temporary_allocator_test.cc 5.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/platform/temporary_allocator.h"
#include <gtest/gtest.h>
X
Xin Pan 已提交
17 18
#include <string>
#include "paddle/fluid/framework/operator.h"
C
chengduo 已提交
19
#include "paddle/fluid/framework/tensor_util.h"
X
Xin Pan 已提交
20

21 22 23 24 25
DECLARE_double(limit_of_temporary_allocation);

namespace paddle {
namespace platform {

X
Xin Pan 已提交
26 27 28 29 30 31 32 33 34 35 36 37
class DummyOp : public framework::OperatorBase {
 public:
  DummyOp(const std::string& type, const framework::VariableNameMap& inputs,
          const framework::VariableNameMap& outputs,
          const framework::AttributeMap& attrs)
      : OperatorBase(type, inputs, outputs, attrs) {}

 protected:
  void RunImpl(const framework::Scope& scope,
               const platform::Place& place) const override {}
};

38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
TEST(temporary_allocator, temporary_allocator) {
  platform::CPUPlace cpu_place;
  TemporaryAllocator alloc(cpu_place);
  alloc.Allocate(100);

#ifdef PADDLE_WITH_CUDA
  platform::CUDAPlace gpu_place(0);
  TemporaryAllocator gpu_alloc(gpu_place);

  auto allocation = gpu_alloc.Allocate(101);
  PADDLE_ENFORCE_EQ(gpu_alloc.TemporaryAllocationQueueSize(), 0);
  gpu_alloc.Release([]() {});
  PADDLE_ENFORCE_EQ(gpu_alloc.TemporaryAllocationQueueSize(), 0);

  {
    auto allocation = gpu_alloc.Allocate(102);
    PADDLE_ENFORCE_EQ(gpu_alloc.TemporaryAllocationQueueSize(), 0);
  }
  PADDLE_ENFORCE_EQ(gpu_alloc.TemporaryAllocationQueueSize(), 1);
  gpu_alloc.Release([]() {});
  PADDLE_ENFORCE_EQ(gpu_alloc.TemporaryAllocationQueueSize(), 0);
#endif
}

TEST(temporary_allocator, add_callback) {
#ifdef PADDLE_WITH_CUDA
C
chengduo 已提交
64
  const double limit = FLAGS_limit_of_temporary_allocation;
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
  FLAGS_limit_of_temporary_allocation = 10;
  platform::CUDAPlace gpu_place(0);
  TemporaryAllocator gpu_alloc(gpu_place);

  platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
  auto* dev_ctx =
      static_cast<platform::CUDADeviceContext*>(pool.Get(gpu_place));
  auto stream = dev_ctx->stream();
  bool deleted = false;
  gpu_alloc.SetCallback([stream, &deleted]() {
    PADDLE_ENFORCE(cudaStreamSynchronize(stream));
    PADDLE_ENFORCE(cudaGetLastError());
    deleted = true;
  });
  { gpu_alloc.Allocate(100); }
  PADDLE_ENFORCE(deleted);
C
chengduo 已提交
81
  FLAGS_limit_of_temporary_allocation = limit;
82 83 84 85
#endif
}

TEST(temporary_allocator, create_tensor_with_allocationptr) {
X
Xin Pan 已提交
86 87 88 89 90 91 92
  framework::VariableNameMap dummy_vars;
  framework::AttributeMap dummy_attrs;
  DummyOp op("dummy", dummy_vars, dummy_vars, dummy_attrs);
  framework::Scope scope;
  framework::VariableValueMap vars;
  framework::RuntimeContext run_ctx(vars, vars);
  size_t memory_size = 300;
93
  {
X
Xin Pan 已提交
94 95 96 97 98 99
    platform::CPUPlace cpu_place;
    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
    auto* dev_ctx =
        static_cast<platform::CPUDeviceContext*>(pool.Get(cpu_place));
    framework::ExecutionContext ctx(op, scope, *dev_ctx, run_ctx);

100
    int numel = memory_size / sizeof(float);
X
Xin Pan 已提交
101 102 103
    framework::Tensor tensor =
        ctx.AllocateTmpTensor<float, platform::CPUDeviceContext>(
            framework::make_ddim({numel}), *dev_ctx);
104 105 106 107 108
    PADDLE_ENFORCE_EQ(tensor.numel(), numel);
  }

#ifdef PADDLE_WITH_CUDA
  {
X
Xin Pan 已提交
109 110 111 112 113
    platform::CUDAPlace gpu_place(0);
    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
    auto* dev_ctx =
        static_cast<platform::CUDADeviceContext*>(pool.Get(gpu_place));
    framework::ExecutionContext ctx(op, scope, *dev_ctx, run_ctx);
114
    int numel = memory_size / sizeof(float);
X
Xin Pan 已提交
115 116 117
    framework::Tensor tensor =
        ctx.AllocateTmpTensor<float, platform::CUDADeviceContext>(
            framework::make_ddim({numel}), *dev_ctx);
118 119 120 121 122 123
    PADDLE_ENFORCE_EQ(tensor.numel(), numel);
  }
#endif
}

TEST(temporary_allocator, create_tensor_with_allocationptr2) {
X
Xin Pan 已提交
124 125 126 127 128 129 130
  framework::VariableNameMap dummy_vars;
  framework::AttributeMap dummy_attrs;
  DummyOp op("dummy", dummy_vars, dummy_vars, dummy_attrs);
  framework::Scope scope;
  framework::VariableValueMap vars;
  framework::RuntimeContext run_ctx(vars, vars);
  size_t memory_size = 400;
131
  {
X
Xin Pan 已提交
132 133 134 135 136
    platform::CPUPlace cpu_place;
    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
    auto* dev_ctx =
        static_cast<platform::CPUDeviceContext*>(pool.Get(cpu_place));
    framework::ExecutionContext ctx(op, scope, *dev_ctx, run_ctx);
137 138 139 140
    int numel = memory_size / sizeof(float);

    framework::Tensor out_side_tensor;
    {
X
Xin Pan 已提交
141 142 143
      framework::Tensor tensor =
          ctx.AllocateTmpTensor<float, platform::CPUDeviceContext>(
              framework::make_ddim({numel}), *dev_ctx);
144 145 146 147 148 149 150 151 152
      PADDLE_ENFORCE_EQ(tensor.numel(), numel);

      out_side_tensor.ShareDataWith(tensor);
    }
    PADDLE_ENFORCE_EQ(out_side_tensor.numel(), numel);
  }

#ifdef PADDLE_WITH_CUDA
  {
X
Xin Pan 已提交
153 154 155 156 157 158
    platform::CUDAPlace gpu_place(0);
    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
    auto* dev_ctx =
        static_cast<platform::CUDADeviceContext*>(pool.Get(gpu_place));
    framework::ExecutionContext ctx(op, scope, *dev_ctx, run_ctx);

159 160 161 162
    size_t memory_size = 500;
    int numel = memory_size / sizeof(float);
    framework::Tensor out_side_tensor;
    {
X
Xin Pan 已提交
163 164 165
      framework::Tensor tensor =
          ctx.AllocateTmpTensor<float, platform::CUDADeviceContext>(
              framework::make_ddim({numel}), *dev_ctx);
166 167 168 169 170 171 172 173 174 175 176
      PADDLE_ENFORCE_EQ(tensor.numel(), numel);

      out_side_tensor.ShareDataWith(tensor);
    }
    PADDLE_ENFORCE_EQ(out_side_tensor.numel(), numel);
  }
#endif
}

}  //  namespace platform
}  //  namespace paddle