From a5e1cf5a2eeec59740f5ff5c60dc104b2aa9b520 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Wed, 27 Dec 2017 10:29:29 +0800 Subject: [PATCH] Rename API of DeviceContext Make them as usual names. --- paddle/framework/init.cc | 2 +- paddle/framework/operator.cc | 4 +-- paddle/operators/array_operator.h | 4 +-- paddle/operators/array_to_lod_tensor_op.cc | 5 ++-- paddle/operators/assign_op.cc | 4 +-- paddle/operators/cond_op.cc | 4 +-- paddle/operators/feed_op.cc | 4 +-- paddle/operators/fetch_op.cc | 4 +-- paddle/operators/fill_constant_op.cc | 4 +-- paddle/operators/fill_op.cc | 5 ++-- paddle/operators/load_op.cc | 4 +-- paddle/operators/lod_tensor_to_array_op.cc | 5 ++-- paddle/operators/merge_lod_tensor_op.cc | 4 +-- paddle/operators/recurrent_op.cc | 9 +++--- .../reorder_lod_tensor_by_rank_op.cc | 4 +-- paddle/operators/save_op.cc | 4 +-- paddle/operators/shrink_rnn_memory_op.cc | 4 +-- paddle/operators/split_lod_tensor_op.cc | 4 +-- .../operators/tensor_array_read_write_op.cc | 10 ++++--- paddle/platform/device_context.cc | 20 +------------ paddle/platform/device_context.h | 12 ++------ paddle/platform/device_context_test.cu | 29 +++++-------------- paddle/platform/nccl_test.cu | 2 +- 23 files changed, 59 insertions(+), 92 deletions(-) diff --git a/paddle/framework/init.cc b/paddle/framework/init.cc index d6601090d5..682cff168d 100644 --- a/paddle/framework/init.cc +++ b/paddle/framework/init.cc @@ -71,7 +71,7 @@ bool InitDevices(const std::vector &devices) { places.emplace_back(platform::CPUPlace()); LOG(WARNING) << "Not specified CPU device, create CPU by Default."; } - platform::DeviceContextPool::Create(places); + platform::DeviceContextPool::Init(places); return true; } diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 886f73e7b8..e8d4be8675 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -388,8 +388,8 @@ void OperatorWithKernel::Run(const Scope& scope, const platform::Place& place) const { RuntimeInferShapeContext infer_shape_ctx(*this, scope); this->InferShape(&infer_shape_ctx); - platform::DeviceContextPool& pool = platform::DeviceContextPool::Get(); - auto dev_ctx = pool.Borrow(place); + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto dev_ctx = pool.Get(place); // check if op[type] has kernel registered. auto& all_op_kernels = AllOpKernels(); diff --git a/paddle/operators/array_operator.h b/paddle/operators/array_operator.h index 060ffac827..e0eef5d9f9 100644 --- a/paddle/operators/array_operator.h +++ b/paddle/operators/array_operator.h @@ -35,8 +35,8 @@ class ArrayOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ(i_tensor.numel(), 1); // get device context from pool - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); size_t offset; if (platform::is_gpu_place(i_tensor.place())) { diff --git a/paddle/operators/array_to_lod_tensor_op.cc b/paddle/operators/array_to_lod_tensor_op.cc index 0aa04c268b..49366fee8d 100644 --- a/paddle/operators/array_to_lod_tensor_op.cc +++ b/paddle/operators/array_to_lod_tensor_op.cc @@ -106,8 +106,9 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { } auto slice = out->Slice(out_offset, out_offset + len); - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); framework::CopyFrom(x[x_idx].Slice(start_offset, end_offset), place, dev_ctx, &slice); diff --git a/paddle/operators/assign_op.cc b/paddle/operators/assign_op.cc index 0560040509..7d77be3be1 100644 --- a/paddle/operators/assign_op.cc +++ b/paddle/operators/assign_op.cc @@ -82,8 +82,8 @@ class AssignOp : public framework::OperatorBase { out != nullptr, "The Output(Out) should not be null if the Input(X) is set."); - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); framework::VisitVarType(*x, AssignFunctor(out, dev_ctx)); } diff --git a/paddle/operators/cond_op.cc b/paddle/operators/cond_op.cc index 455fbd8ca3..e333002bfd 100644 --- a/paddle/operators/cond_op.cc +++ b/paddle/operators/cond_op.cc @@ -195,8 +195,8 @@ void CondOp::MergeDataFromSubnet(const framework::Scope& scope, void CondOp::Run(const Scope& scope, const platform::Place& place) const { // get device context from pool - platform::DeviceContextPool& pool = platform::DeviceContextPool::Get(); - auto& dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto& dev_ctx = *pool.Get(place); PrepareDataForSubnet(scope, dev_ctx); std::vector& sub_scopes = GetSubScopes(scope); diff --git a/paddle/operators/feed_op.cc b/paddle/operators/feed_op.cc index cecbb7226a..48da52c3b6 100644 --- a/paddle/operators/feed_op.cc +++ b/paddle/operators/feed_op.cc @@ -49,8 +49,8 @@ class FeedOp : public framework::OperatorBase { auto *out_item = out_var->GetMutable(); // get device context from pool - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); framework::CopyFrom(feed_item, place, dev_ctx, out_item); out_item->set_lod(feed_item.lod()); diff --git a/paddle/operators/fetch_op.cc b/paddle/operators/fetch_op.cc index fa20a06540..387d1e0a74 100644 --- a/paddle/operators/fetch_op.cc +++ b/paddle/operators/fetch_op.cc @@ -52,8 +52,8 @@ class FetchOp : public framework::OperatorBase { // FIXME(yuyang18): Should we assume the fetch operator always generate // CPU outputs? - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); CopyFrom(src_item, platform::CPUPlace(), dev_ctx, &dst_item); dev_ctx.Wait(); diff --git a/paddle/operators/fill_constant_op.cc b/paddle/operators/fill_constant_op.cc index fe0706c4a9..dcd43a30c8 100644 --- a/paddle/operators/fill_constant_op.cc +++ b/paddle/operators/fill_constant_op.cc @@ -49,8 +49,8 @@ class FillConstantOp : public framework::OperatorBase { out.mutable_data(dev_place, framework::ToTypeIndex(data_type)); } - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(dev_place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(dev_place); math::set_constant(dev_ctx, &out, value); } }; diff --git a/paddle/operators/fill_op.cc b/paddle/operators/fill_op.cc index 57b4ec6938..084ba1db62 100644 --- a/paddle/operators/fill_op.cc +++ b/paddle/operators/fill_op.cc @@ -69,8 +69,9 @@ class FillOp : public framework::OperatorBase { if (!force_cpu && platform::is_gpu_place(place)) { // Copy tensor to out - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); framework::CopyFrom(tensor, place, dev_ctx, &out); } } diff --git a/paddle/operators/load_op.cc b/paddle/operators/load_op.cc index 5425375c1f..65f021d919 100644 --- a/paddle/operators/load_op.cc +++ b/paddle/operators/load_op.cc @@ -40,8 +40,8 @@ class LoadOp : public framework::OperatorBase { auto *tensor = out_var->GetMutable(); framework::DeserializeFromStream(fin, tensor); - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); if (platform::is_gpu_place(place)) { // copy CPU to GPU diff --git a/paddle/operators/lod_tensor_to_array_op.cc b/paddle/operators/lod_tensor_to_array_op.cc index ed99915bb7..8d164b4abc 100644 --- a/paddle/operators/lod_tensor_to_array_op.cc +++ b/paddle/operators/lod_tensor_to_array_op.cc @@ -88,8 +88,9 @@ class LoDTensorToArrayOp : public framework::OperatorBase { auto slice = out[i].Slice(static_cast(offset), static_cast(offset + len)); - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); framework::CopyFrom(x.Slice(static_cast(each_range.begin), static_cast(each_range.end)), diff --git a/paddle/operators/merge_lod_tensor_op.cc b/paddle/operators/merge_lod_tensor_op.cc index 2287f34791..3f999e404f 100644 --- a/paddle/operators/merge_lod_tensor_op.cc +++ b/paddle/operators/merge_lod_tensor_op.cc @@ -30,8 +30,8 @@ class MergeLoDTensorOp : public framework::OperatorBase { void Run(const framework::Scope &scope, const platform::Place &dev_place) const override { // get device context from pool - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(dev_place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(dev_place); auto &x = scope.FindVar(Input("X"))->Get(); auto &mask = scope.FindVar(Input("Mask"))->Get(); diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 71769e67c7..056fa46949 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -272,8 +272,9 @@ class RecurrentOp : public RecurrentBase { false /*create_local_scope*/); // get device context from pool - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); // Copy inside::output -> outside::output // outside::output[seq_offset: seq_offset + 1] = inside::output @@ -326,8 +327,8 @@ class RecurrentGradOp : public RecurrentBase { auto *program = block->Program(); // get device context from pool - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); for (size_t step_id = 0; step_id < seq_len; ++step_id) { size_t seq_offset = reverse ? step_id : seq_len - step_id - 1; diff --git a/paddle/operators/reorder_lod_tensor_by_rank_op.cc b/paddle/operators/reorder_lod_tensor_by_rank_op.cc index 1063388e25..8d652ff806 100644 --- a/paddle/operators/reorder_lod_tensor_by_rank_op.cc +++ b/paddle/operators/reorder_lod_tensor_by_rank_op.cc @@ -131,8 +131,8 @@ class ReorderLoDTensorByRankTableBase : public framework::OperatorBase { auto x_sliced = x.Slice(x_offset, x_offset + len); auto out_sliced = out->Slice(out_offset, out_offset + len); - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); framework::CopyFrom(x_sliced, out_sliced.place(), dev_ctx, &out_sliced); out_offset += len; return out_offset; diff --git a/paddle/operators/save_op.cc b/paddle/operators/save_op.cc index d045a8b5b8..4b1cbe8883 100644 --- a/paddle/operators/save_op.cc +++ b/paddle/operators/save_op.cc @@ -91,8 +91,8 @@ class SaveOp : public framework::OperatorBase { auto &tensor = var->Get(); // get device context from pool - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); framework::SerializeToStream(fout, tensor, dev_ctx); } diff --git a/paddle/operators/shrink_rnn_memory_op.cc b/paddle/operators/shrink_rnn_memory_op.cc index e8a4773547..e5ef0740b6 100644 --- a/paddle/operators/shrink_rnn_memory_op.cc +++ b/paddle/operators/shrink_rnn_memory_op.cc @@ -106,8 +106,8 @@ class ShrinkRNNMemoryGradOp : public ArrayOp { dx_tensor.mutable_data(x_tensor.place(), x_tensor.type()); // get device context from pool - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); if (dout_var == nullptr) { // dx_tensor fill zero math::set_constant(dev_ctx, &dx_tensor, 0.0f); diff --git a/paddle/operators/split_lod_tensor_op.cc b/paddle/operators/split_lod_tensor_op.cc index 89826ca6ee..2d8787d740 100644 --- a/paddle/operators/split_lod_tensor_op.cc +++ b/paddle/operators/split_lod_tensor_op.cc @@ -45,8 +45,8 @@ class SplitLoDTensorOp : public framework::OperatorBase { auto &x_lod = x.lod(); auto &mask_dim = mask.dims(); - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(dev_place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(dev_place); std::unique_ptr cpu_mask{new framework::LoDTensor()}; if (platform::is_cpu_place(mask.place())) { diff --git a/paddle/operators/tensor_array_read_write_op.cc b/paddle/operators/tensor_array_read_write_op.cc index 9529aab573..53e38ec703 100644 --- a/paddle/operators/tensor_array_read_write_op.cc +++ b/paddle/operators/tensor_array_read_write_op.cc @@ -40,8 +40,9 @@ class WriteToArrayOp : public ArrayOp { if (x_tensor.memory_size() > 0) { auto *out_tensor = &out->at(offset); - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); CopyFrom(x_tensor, place, dev_ctx, out_tensor); out_tensor->set_lod(x_tensor.lod()); @@ -132,8 +133,9 @@ class ReadFromArrayOp : public ArrayOp { auto *out_tensor = out->GetMutable(); size_t offset = GetOffset(scope, place); if (offset < x_array.size()) { - platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); - auto &dev_ctx = *pool.Borrow(place); + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); framework::CopyFrom(x_array[offset], place, dev_ctx, out_tensor); out_tensor->set_lod(x_array[offset].lod()); } else { diff --git a/paddle/platform/device_context.cc b/paddle/platform/device_context.cc index e450ef32a4..ea07f2e002 100644 --- a/paddle/platform/device_context.cc +++ b/paddle/platform/device_context.cc @@ -17,7 +17,7 @@ namespace platform { DeviceContextPool* DeviceContextPool::pool = nullptr; -const platform::DeviceContext* DeviceContextPool::Borrow( +const platform::DeviceContext* DeviceContextPool::Get( const platform::Place& place) { auto it = device_contexts_.find(place); if (it == device_contexts_.end()) { @@ -28,24 +28,6 @@ const platform::DeviceContext* DeviceContextPool::Borrow( return it->second; } -std::vector DeviceContextPool::Borrow( - const std::vector& places) { - PADDLE_ENFORCE_GT(places.size(), 0); - PADDLE_ENFORCE_LE(places.size(), device_contexts_.size()); - std::vector borrowed_contexts; - for (auto& place : places) { - auto it = device_contexts_.find(place); - if (it != device_contexts_.end()) { - borrowed_contexts.emplace_back(it->second); - } else { - PADDLE_THROW( - "'Place' is not supported, Please re-compile with WITH_GPU " - "option"); - } - } - return borrowed_contexts; -} - DeviceContextPool::DeviceContextPool( const std::vector& places) { PADDLE_ENFORCE_GT(places.size(), 0); diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index 8ba12e1657..dfef2c16d8 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -109,13 +109,13 @@ class DeviceContextPool { public: explicit DeviceContextPool(const std::vector& places); - static DeviceContextPool& Get() { + static DeviceContextPool& Instance() { PADDLE_ENFORCE_NOT_NULL(pool, "Need to Create DeviceContextPool first!"); return *pool; } /*! \brief Create should only called by Init function */ - static DeviceContextPool& Create(const std::vector& places) { + static DeviceContextPool& Init(const std::vector& places) { if (pool == nullptr) { pool = new DeviceContextPool(places); } @@ -123,13 +123,7 @@ class DeviceContextPool { } /*! \brief Return handle of single device context. */ - const platform::DeviceContext* Borrow(const platform::Place& place); - - /*! \brief Return handle of multi-device context. */ - std::vector Borrow( - const std::vector& places); - - ~DeviceContextPool() {} + const platform::DeviceContext* Get(const platform::Place& place); private: static DeviceContextPool* pool; diff --git a/paddle/platform/device_context_test.cu b/paddle/platform/device_context_test.cu index 91011bf71c..ca10cf3463 100644 --- a/paddle/platform/device_context_test.cu +++ b/paddle/platform/device_context_test.cu @@ -71,35 +71,20 @@ TEST(Device, DeviceContextPool) { using paddle::platform::CPUPlace; using paddle::platform::CUDAPlace; - DeviceContextPool& pool = DeviceContextPool::Get(); - auto cpu_dev_ctx1 = pool.Borrow(CPUPlace()); - auto cpu_dev_ctx2 = pool.Borrow(CPUPlace()); - EXPECT_TRUE(cpu_dev_ctx2 == cpu_dev_ctx1); + DeviceContextPool& pool = DeviceContextPool::Instance(); + auto cpu_dev_ctx1 = pool.Get(CPUPlace()); + auto cpu_dev_ctx2 = pool.Get(CPUPlace()); + ASSERT_EQ(cpu_dev_ctx2, cpu_dev_ctx1); std::vector gpu_places; int count = paddle::platform::GetCUDADeviceCount(); for (int i = 0; i < count; ++i) { - gpu_places.emplace_back(CUDAPlace(i)); - } - auto dev_ctxs = pool.Borrow(gpu_places); - for (size_t i = 0; i < dev_ctxs.size(); ++i) { - auto* dev_ctx = static_cast(dev_ctxs[i]); - - // check same as CUDAPlace(i) - CUDAPlace place = boost::get(dev_ctx->GetPlace()); - EXPECT_EQ(place.GetDeviceId(), static_cast(i)); + auto dev_ctx = pool.Get(CUDAPlace(i)); + ASSERT_NE(dev_ctx, nullptr); } } int main(int argc, char** argv) { - int dev_count = paddle::platform::GetCUDADeviceCount(); - if (dev_count <= 1) { - LOG(WARNING) << "Cannot test multi-gpu DeviceContextPool, because the CUDA " - "device count is " - << dev_count; - return 0; - } - std::vector places; places.emplace_back(paddle::platform::CPUPlace()); @@ -109,7 +94,7 @@ int main(int argc, char** argv) { } VLOG(0) << " DeviceCount " << count; - paddle::platform::DeviceContextPool::Create(places); + paddle::platform::DeviceContextPool::Init(places); testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff --git a/paddle/platform/nccl_test.cu b/paddle/platform/nccl_test.cu index 8f815863a7..ef6d845874 100644 --- a/paddle/platform/nccl_test.cu +++ b/paddle/platform/nccl_test.cu @@ -144,7 +144,7 @@ int main(int argc, char** argv) { } VLOG(0) << " DeviceCount " << count; - paddle::platform::DeviceContextPool::Create(places); + paddle::platform::DeviceContextPool::Init(places); testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); -- GitLab