refine codes

6c4d1f55 · qijun · e946fc15 · 6c4d1f55 · 6c4d1f55 · 6c4d1f55
7 changed file
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -44,8 +44,12 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
 cc_library(backward SRCS backward.cc DEPS net_op)
 cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context)

-cc_library(executor SRCS executor.cc DEPS op_registry device scope framework_proto ${GLOB_OP_LIB})
-cc_test(executor_test SRCS executor_test.cc DEPS executor)
+cc_library(executor SRCS executor.cc DEPS op_registry device_context_manager scope framework_proto ${GLOB_OP_LIB})
+if(WITH_GPU)
+    nv_test(executor_test SRCS executor_test.cc DEPS executor)
+else()
+    cc_test(executor_test SRCS executor_test.cc DEPS executor)
+endif()

 cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor)
 cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place)
--- a/paddle/framework/executor.cc
+++ b/paddle/framework/executor.cc
@@ -22,9 +22,21 @@ namespace paddle {
 namespace framework {

 Executor::Executor(const std::vector<platform::Place>& places) {
-  devices_.resize(places.size());
+  device_contexts_.resize(places.size());
  for (size_t i = 0; i < places.size(); i++) {
-    devices_[i] = platform::GetDevice(places[i]);
+    if (platform::is_cpu_place(places[i])) {
+      device_contexts_[i] = platform::DeviceContextManager::Get()
+                                ->GetDeviceContext<platform::CPUPlace>(
+                                    boost::get<platform::CPUPlace>(places[i]));
+    } else {
+#ifndef PADDLE_ONLY_CPU
+      device_contexts_[i] = platform::DeviceContextManager::Get()
+                                ->GetDeviceContext<platform::GPUPlace>(
+                                    boost::get<platform::GPUPlace>(places[i]));
+#else
+      PADDLE_THROW("'GPUPlace' is not supported in CPU only device.");
+#endif
+    }
  }
 }

@@ -34,37 +46,25 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope,
  // TODO(tonyyang-svail):
  //    - only runs the first block
  //    - only runs on the first device
+  Scope& local_scope = scope->NewScope();
+
  auto& block = pdesc.blocks(0);
-  auto& device = devices_[0];
+  auto& device_context = device_contexts_[0];

  for (auto& var : block.vars()) {
-    scope->NewVar(var.name());
+    local_scope.NewVar(var.name());
  }

  // std::vector<op_ptr> ops;
  for (auto& op_desc : block.ops()) {
    auto op = framework::OpRegistry::CreateOp(op_desc);
-    // op->InferShape(*scope);
-    op->Run(*scope, *device->cpu_device_context);
+    // InferShape is now doing inside Run method.
+    op->Run(local_scope, *device_context);
  }

  // TODO(tonyyang-svail): need to test gpu device
-  //   device_->cpu_device_context->Wait();
-  // #ifndef PADDLE_ONLY_CPU
-  //   if (device_->cuda_device_context) {
-  //     device_->cuda_device_context->Wait();
-  //   }
-  // #endif
-
-  Scope& local_scope = scope->NewScope();
-  local_scope.NewVar();
-  for (auto device : devices_) {
-    device->cpu_device_context->Wait();
-#ifndef PADDLE_ONLY_CPU
-    if (device->cuda_device_context) {
-      device->cuda_device_context->Wait();
-    }
-#endif
+  for (auto device_context : device_contexts_) {
+    device_context->Wait();
  }
 }


--- a/paddle/framework/executor.h
+++ b/paddle/framework/executor.h
@@ -18,7 +18,7 @@ limitations under the License. */
 #include "paddle/framework/op_info.h"
 #include "paddle/framework/scope.h"
 #include "paddle/framework/tensor.h"
-#include "paddle/platform/device.h"
+#include "paddle/platform/device_context_manager.h"

 namespace paddle {
 namespace framework {
@@ -30,7 +30,7 @@ class Executor {
  void Run(const ProgramDesc&, Scope*, std::vector<Tensor>*);

 private:
-  std::vector<platform::Device*> devices_;
+  std::vector<platform::DeviceContext*> device_contexts_;
 };

 }  // namespace framework

--- a/paddle/framework/executor_test.cc
+++ b/paddle/framework/executor_test.cc
@@ -15,8 +15,6 @@ limitations under the License. */
 #include "paddle/framework/executor.h"
 #include "gtest/gtest.h"
 #include "paddle/framework/attribute.h"
-
-#include <gtest/gtest.h>
 #include "paddle/framework/grad_op_builder.h"
 #include "paddle/framework/op_registry.h"
 #include "paddle/framework/operator.h"
@@ -26,52 +24,71 @@ USE_OP(elementwise_add);
 using namespace paddle::platform;
 using namespace paddle::framework;

-TEST(Executor, Init) {
-  ProgramDesc pdesc;
-
-  auto root_block = pdesc.add_blocks();
-  root_block->set_idx(0);
-  root_block->set_parent_idx(-1);
-
-  auto a = root_block->add_vars();
-  a->set_name("a");
-  auto a_lt = a->mutable_lod_tensor();
-  a_lt->set_data_type(paddle::framework::DataType::FP32);
-  a_lt->add_dims(640);
-  a_lt->add_dims(640);
-
-  auto b = root_block->add_vars();
-  b->set_name("b");
-  auto b_lt = b->mutable_lod_tensor();
-  b_lt->set_data_type(paddle::framework::DataType::FP32);
-  b_lt->add_dims(640);
-  b_lt->add_dims(640);
-
-  auto c = root_block->add_vars();
-  c->set_name("c");
-  auto c_lt = c->mutable_lod_tensor();
-  c_lt->set_data_type(paddle::framework::DataType::FP32);
-  c_lt->add_dims(640);
-  c_lt->add_dims(640);
-
-  auto op1 = root_block->add_ops();
-  op1->set_type("elementwise_add");
-  auto X = op1->add_inputs();
-  X->set_parameter("X");
-  X->add_arguments("a");
-  auto Y = op1->add_inputs();
-  Y->set_parameter("Y");
-  Y->add_arguments("b");
-
-  CPUPlace cpu_place1, cpu_place2;
+class ExecutorTester : public ::testing::Test {
+ public:
+  virtual void SetUp() override {
+    auto root_block = pdesc_.add_blocks();
+    root_block->set_idx(0);
+    root_block->set_parent_idx(-1);
+
+    auto a = root_block->add_vars();
+    a->set_name("a");
+    auto a_lt = a->mutable_lod_tensor();
+    a_lt->set_data_type(paddle::framework::DataType::FP32);
+    a_lt->add_dims(640);
+    a_lt->add_dims(640);
+
+    auto b = root_block->add_vars();
+    b->set_name("b");
+    auto b_lt = b->mutable_lod_tensor();
+    b_lt->set_data_type(paddle::framework::DataType::FP32);
+    b_lt->add_dims(640);
+    b_lt->add_dims(640);
+
+    auto c = root_block->add_vars();
+    c->set_name("c");
+    auto c_lt = c->mutable_lod_tensor();
+    c_lt->set_data_type(paddle::framework::DataType::FP32);
+    c_lt->add_dims(640);
+    c_lt->add_dims(640);
+
+    auto op1 = root_block->add_ops();
+    op1->set_type("elementwise_add");
+    auto X = op1->add_inputs();
+    X->set_parameter("X");
+    X->add_arguments("a");
+    auto Y = op1->add_inputs();
+    Y->set_parameter("Y");
+    Y->add_arguments("b");
+  }
+
+ protected:
+  std::vector<Tensor>* outputs_{nullptr};
+  ProgramDesc pdesc_;
+  Scope scope_;
+};
+
+TEST_F(ExecutorTester, InitCPU) {
  std::vector<Place> places;
+  CPUPlace cpu_place1, cpu_place2;
  places.push_back(cpu_place1);
  places.push_back(cpu_place2);

  Executor* executor = new Executor(places);
-  Scope s;
-  std::vector<Tensor>* outputs{nullptr};
-  executor->Run(pdesc, &s, outputs);
+  executor->Run(pdesc_, &scope_, outputs_);
+  delete executor;
+}
+
+#ifndef PADDLE_ONLY_CPU
+TEST_F(ExecutorTester, InitGPU) {
+  std::vector<Place> places;
+  GPUPlace gpu_place0(0);
+  GPUPlace gpu_place1(1);
+  places.push_back(gpu_place0);
+  places.push_back(gpu_place1);

+  Executor* executor = new Executor(places);
+  executor->Run(pdesc_, &scope_, outputs_);
  delete executor;
 }
+#endif
--- a/paddle/platform/CMakeLists.txt
+++ b/paddle/platform/CMakeLists.txt
@@ -23,7 +23,7 @@ cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator
    system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS})
 nv_test(device_context_test SRCS device_context_test.cc DEPS device_context gpu_info)

-cc_library(device SRCS device.cc DEPS device_context)
+cc_library(device_context_manager SRCS device_context_manager.cc DEPS device_context)

 nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda)
 nv_test(transform_test SRCS transform_test.cu DEPS paddle_memory place device_context)
--- a/paddle/platform/device.cc
+++ b/paddle/platform/device.cc
@@ -12,48 +12,57 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "paddle/platform/device.h"
+#include "paddle/platform/device_context_manager.h"

 namespace paddle {
 namespace platform {

-template <typename T, typename... Args>
-std::unique_ptr<T> make_unique(Args&&... args) {
-  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+DeviceContextManager::DeviceContextManager() {
+#ifndef PADDLE_ONLY_CPU
+  device_count_ = GetDeviceCount();
+  cuda_contexts_.reserve(device_count_);
+  for (int i = 0; i < device_count_; i++) {
+    cuda_contexts_[i] = nullptr;
+  }
+#endif
 }

-CPUDeviceContext* GetCPUDeviceContext(const CPUPlace& place) {
-  static std::unique_ptr<CPUDeviceContext> g_cpu_device_context =
-      make_unique<CPUDeviceContext>(place);
-  return g_cpu_device_context.get();
+template <>
+CPUDeviceContext* DeviceContextManager::GetDeviceContext<
+    CPUPlace, CPUDeviceContext>(const CPUPlace& place) {
+  if (!cpu_context_) {
+    cpu_context_ = new CPUDeviceContext(place);
+  }
+  return cpu_context_;
 }

 #ifndef PADDLE_ONLY_CPU
-CUDADeviceContext* GetCUDADeviceContext(const GPUPlace& place) {
-  static std::unique_ptr<CUDADeviceContext> g_cuda_device_context =
-      make_unique<CUDADeviceContext>(place);
-  return g_cuda_device_context.get();
+template <>
+CUDADeviceContext* DeviceContextManager::GetDeviceContext<
+    GPUPlace, CUDADeviceContext>(const GPUPlace& place) {
+  int gpu_id = place.device;
+  PADDLE_ENFORCE(gpu_id < device_count_,
+                 "GPU device id must less than device count");
+  SetDeviceId(gpu_id);
+  if (!cuda_contexts_[gpu_id]) {
+    cuda_contexts_[gpu_id] = new CUDADeviceContext(place);
+  }
+  return cuda_contexts_[gpu_id];
 }
 #endif

-Device* GetDevice(const Place& place) {
-  CPUPlace cpu_place;
+DeviceContextManager::~DeviceContextManager() {
+  if (cpu_context_) {
+    delete cpu_context_;
+  }
 #ifndef PADDLE_ONLY_CPU
-  if (is_gpu_place(place)) {
-    GPUPlace gpu_place = boost::get<GPUPlace>(place);
-    static std::unique_ptr<Device> g_device = make_unique<Device>(
-        GetCPUDeviceContext(cpu_place), GetCUDADeviceContext(gpu_place));
-    return g_device.get();
-  } else {
-    static std::unique_ptr<Device> g_device =
-        make_unique<Device>(GetCPUDeviceContext(cpu_place), nullptr);
-    return g_device.get();
+  for (int i = 0; i < device_count_; i++) {
+    if (cuda_contexts_[i]) {
+      delete cuda_contexts_[i];
+    }
  }
-#else
-  static std::unique_ptr<Device> g_device =
-      make_unique<Device>(GetCPUDeviceContext(cpu_place));
-  return g_device.get();
 #endif
 }
+
 }  // namespace platform
 }  // namespace paddle
--- a/paddle/platform/device.h
+++ b/paddle/platform/device.h
@@ -13,33 +13,46 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #pragma once
-
 #include "paddle/platform/device_context.h"
-#include "paddle/platform/place.h"

 namespace paddle {
 namespace platform {

-struct Device {
-  CPUDeviceContext* cpu_device_context;
-#ifndef PADDLE_ONLY_CPU
-  CUDADeviceContext* cuda_device_context;
-#endif
+template <typename T>
+struct Converter;
+
+template <>
+struct Converter<CPUPlace> {
+  using DeviceContextType = CPUDeviceContext;
+};

 #ifndef PADDLE_ONLY_CPU
-  Device(CPUDeviceContext* cpu, CUDADeviceContext* gpu)
-      : cpu_device_context(cpu), cuda_device_context(gpu) {}
-#else
-  explicit Device(CPUDeviceContext* cpu) : cpu_device_context(cpu) {}
-#endif
+template <>
+struct Converter<GPUPlace> {
+  using DeviceContextType = CUDADeviceContext;
 };
+#endif
+
+class DeviceContextManager {
+ public:
+  DeviceContextManager();
+  ~DeviceContextManager();
+
+  template <typename PlaceType, typename DeviceType = typename Converter<
+                                    PlaceType>::DeviceContextType>
+  DeviceType* GetDeviceContext(const PlaceType& place);

-CPUDeviceContext* GetCPUDeviceContext(const platform::CPUPlace& place);
+  static DeviceContextManager* Get() {
+    static DeviceContextManager inst;
+    return &inst;
+  }

+ private:
+  CPUDeviceContext* cpu_context_;
 #ifndef PADDLE_ONLY_CPU
-CUDADeviceContext* GetCUDADeviceContext(const platform::GPUPlace& place);
+  int device_count_;
+  std::vector<CUDADeviceContext*> cuda_contexts_;
 #endif
-
-Device* GetDevice(const platform::Place& place);
+};
 }  // namespace platform
 }  // namespace paddle