temporary_allocator_test.cc 7.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/platform/temporary_allocator.h"
#include <gtest/gtest.h>
X
Xin Pan 已提交
17 18
#include <string>
#include "paddle/fluid/framework/operator.h"
C
chengduo 已提交
19
#include "paddle/fluid/framework/tensor_util.h"
X
Xin Pan 已提交
20

21 22
DECLARE_int64(limit_of_tmp_allocation);
DECLARE_double(times_excess_than_required_tmp_allocation);
23 24 25 26

namespace paddle {
namespace platform {

X
Xin Pan 已提交
27 28 29 30 31 32 33 34 35 36 37 38
class DummyOp : public framework::OperatorBase {
 public:
  DummyOp(const std::string& type, const framework::VariableNameMap& inputs,
          const framework::VariableNameMap& outputs,
          const framework::AttributeMap& attrs)
      : OperatorBase(type, inputs, outputs, attrs) {}

 protected:
  void RunImpl(const framework::Scope& scope,
               const platform::Place& place) const override {}
};

39
TEST(temporary_allocator, test_base_function) {
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
  platform::CPUPlace cpu_place;
  TemporaryAllocator alloc(cpu_place);
  alloc.Allocate(100);

#ifdef PADDLE_WITH_CUDA
  platform::CUDAPlace gpu_place(0);
  TemporaryAllocator gpu_alloc(gpu_place);

  auto allocation = gpu_alloc.Allocate(101);
  PADDLE_ENFORCE_EQ(gpu_alloc.TemporaryAllocationQueueSize(), 0);
  gpu_alloc.Release([]() {});
  PADDLE_ENFORCE_EQ(gpu_alloc.TemporaryAllocationQueueSize(), 0);

  {
    auto allocation = gpu_alloc.Allocate(102);
    PADDLE_ENFORCE_EQ(gpu_alloc.TemporaryAllocationQueueSize(), 0);
  }
  PADDLE_ENFORCE_EQ(gpu_alloc.TemporaryAllocationQueueSize(), 1);
  gpu_alloc.Release([]() {});
  PADDLE_ENFORCE_EQ(gpu_alloc.TemporaryAllocationQueueSize(), 0);
#endif
}

63
TEST(temporary_allocator, test_flags_function) {
64
#ifdef PADDLE_WITH_CUDA
65 66
  const int64_t limit = FLAGS_limit_of_tmp_allocation;
  FLAGS_limit_of_tmp_allocation = 10;
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
  platform::CUDAPlace gpu_place(0);
  TemporaryAllocator gpu_alloc(gpu_place);

  platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
  auto* dev_ctx =
      static_cast<platform::CUDADeviceContext*>(pool.Get(gpu_place));
  auto stream = dev_ctx->stream();
  bool deleted = false;
  gpu_alloc.SetCallback([stream, &deleted]() {
    PADDLE_ENFORCE(cudaStreamSynchronize(stream));
    PADDLE_ENFORCE(cudaGetLastError());
    deleted = true;
  });
  { gpu_alloc.Allocate(100); }
  PADDLE_ENFORCE(deleted);
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
  FLAGS_limit_of_tmp_allocation = limit;
#endif
}

TEST(temporary_allocator, test_reuse_tmp_allocation) {
#ifdef PADDLE_WITH_CUDA
  platform::CUDAPlace gpu_place(0);
  TemporaryAllocator gpu_alloc(gpu_place);
  gpu_alloc.SetCallback([]() {});

  void* tmp_allocation_ptr1 = nullptr;
  {
    PADDLE_ENFORCE_EQ(gpu_alloc.TemporaryAllocationQueueSize(), 0);
    auto tmp_allocation1 = gpu_alloc.Allocate(100);
    tmp_allocation_ptr1 = tmp_allocation1->ptr();
  }
  PADDLE_ENFORCE_EQ(gpu_alloc.TemporaryAllocationQueueSize(), 1);
  auto tmp_allocation2 = gpu_alloc.Allocate(100);
  void* tmp_allocation_ptr2 = tmp_allocation2->ptr();
  PADDLE_ENFORCE_EQ(gpu_alloc.TemporaryAllocationQueueSize(), 0);
  PADDLE_ENFORCE_EQ(tmp_allocation_ptr1, tmp_allocation_ptr2);

  auto tmp_allocation3 = gpu_alloc.Allocate(100);
  void* tmp_allocation_ptr3 = tmp_allocation2->ptr();
  PADDLE_ENFORCE_EQ(tmp_allocation_ptr1, tmp_allocation_ptr3);
#endif
}

TEST(temporary_allocator, test_times_excess_than_required_tmp_allocation) {
#ifdef PADDLE_WITH_CUDA
  platform::CUDAPlace gpu_place(0);
  TemporaryAllocator gpu_alloc(gpu_place);
  gpu_alloc.SetCallback([]() {});
  double excess_fraction = FLAGS_times_excess_than_required_tmp_allocation;
  void* tmp_allocation_ptr1 = nullptr;
  {
    PADDLE_ENFORCE_EQ(gpu_alloc.TemporaryAllocationQueueSize(), 0);
    auto tmp_allocation1 =
        gpu_alloc.Allocate(static_cast<size_t>(100 * excess_fraction - 1));
    tmp_allocation_ptr1 = tmp_allocation1->ptr();
  }
  PADDLE_ENFORCE_EQ(gpu_alloc.TemporaryAllocationQueueSize(), 1);
  auto tmp_allocation2 = gpu_alloc.Allocate(100);
  void* tmp_allocation_ptr2 = tmp_allocation2->ptr();
  PADDLE_ENFORCE_EQ(gpu_alloc.TemporaryAllocationQueueSize(), 0);
  PADDLE_ENFORCE_EQ(tmp_allocation_ptr1, tmp_allocation_ptr2);
128 129 130 131
#endif
}

TEST(temporary_allocator, create_tensor_with_allocationptr) {
X
Xin Pan 已提交
132 133 134 135 136 137 138
  framework::VariableNameMap dummy_vars;
  framework::AttributeMap dummy_attrs;
  DummyOp op("dummy", dummy_vars, dummy_vars, dummy_attrs);
  framework::Scope scope;
  framework::VariableValueMap vars;
  framework::RuntimeContext run_ctx(vars, vars);
  size_t memory_size = 300;
139
  {
X
Xin Pan 已提交
140 141 142 143
    platform::CPUPlace cpu_place;
    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
    auto* dev_ctx =
        static_cast<platform::CPUDeviceContext*>(pool.Get(cpu_place));
144
    framework::ExecutionContext ctx(op, scope, *dev_ctx, run_ctx, nullptr);
X
Xin Pan 已提交
145

146
    int numel = memory_size / sizeof(float);
X
Xin Pan 已提交
147 148 149
    framework::Tensor tensor =
        ctx.AllocateTmpTensor<float, platform::CPUDeviceContext>(
            framework::make_ddim({numel}), *dev_ctx);
150 151 152 153 154
    PADDLE_ENFORCE_EQ(tensor.numel(), numel);
  }

#ifdef PADDLE_WITH_CUDA
  {
X
Xin Pan 已提交
155 156 157 158
    platform::CUDAPlace gpu_place(0);
    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
    auto* dev_ctx =
        static_cast<platform::CUDADeviceContext*>(pool.Get(gpu_place));
159
    framework::ExecutionContext ctx(op, scope, *dev_ctx, run_ctx, nullptr);
160
    int numel = memory_size / sizeof(float);
X
Xin Pan 已提交
161 162 163
    framework::Tensor tensor =
        ctx.AllocateTmpTensor<float, platform::CUDADeviceContext>(
            framework::make_ddim({numel}), *dev_ctx);
164 165 166 167 168 169
    PADDLE_ENFORCE_EQ(tensor.numel(), numel);
  }
#endif
}

TEST(temporary_allocator, create_tensor_with_allocationptr2) {
X
Xin Pan 已提交
170 171 172 173 174 175 176
  framework::VariableNameMap dummy_vars;
  framework::AttributeMap dummy_attrs;
  DummyOp op("dummy", dummy_vars, dummy_vars, dummy_attrs);
  framework::Scope scope;
  framework::VariableValueMap vars;
  framework::RuntimeContext run_ctx(vars, vars);
  size_t memory_size = 400;
177
  {
X
Xin Pan 已提交
178 179 180 181
    platform::CPUPlace cpu_place;
    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
    auto* dev_ctx =
        static_cast<platform::CPUDeviceContext*>(pool.Get(cpu_place));
182
    framework::ExecutionContext ctx(op, scope, *dev_ctx, run_ctx, nullptr);
183 184 185 186
    int numel = memory_size / sizeof(float);

    framework::Tensor out_side_tensor;
    {
X
Xin Pan 已提交
187 188 189
      framework::Tensor tensor =
          ctx.AllocateTmpTensor<float, platform::CPUDeviceContext>(
              framework::make_ddim({numel}), *dev_ctx);
190 191 192 193 194 195 196 197 198
      PADDLE_ENFORCE_EQ(tensor.numel(), numel);

      out_side_tensor.ShareDataWith(tensor);
    }
    PADDLE_ENFORCE_EQ(out_side_tensor.numel(), numel);
  }

#ifdef PADDLE_WITH_CUDA
  {
X
Xin Pan 已提交
199 200 201 202
    platform::CUDAPlace gpu_place(0);
    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
    auto* dev_ctx =
        static_cast<platform::CUDADeviceContext*>(pool.Get(gpu_place));
203
    framework::ExecutionContext ctx(op, scope, *dev_ctx, run_ctx, nullptr);
X
Xin Pan 已提交
204

205 206 207 208
    size_t memory_size = 500;
    int numel = memory_size / sizeof(float);
    framework::Tensor out_side_tensor;
    {
X
Xin Pan 已提交
209 210 211
      framework::Tensor tensor =
          ctx.AllocateTmpTensor<float, platform::CUDADeviceContext>(
              framework::make_ddim({numel}), *dev_ctx);
212 213 214 215 216 217 218 219 220 221 222
      PADDLE_ENFORCE_EQ(tensor.numel(), numel);

      out_side_tensor.ShareDataWith(tensor);
    }
    PADDLE_ENFORCE_EQ(out_side_tensor.numel(), numel);
  }
#endif
}

}  //  namespace platform
}  //  namespace paddle