pass cpu test; gpu seg fault

0009a30f · Yang Yang · f5e73f4c · 6c4d1f55 · 0009a30f · 0009a30f
7 changed file
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -44,8 +44,12 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
 cc_library(backward SRCS backward.cc DEPS net_op)
 cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context)
-cc_library(executor SRCS executor.cc DEPS op_registry device scope framework_proto ${GLOB_OP_LIB})
+cc_library(executor SRCS executor.cc DEPS op_registry device_context_manager scope framework_proto ${GLOB_OP_LIB})
-cc_test(executor_test SRCS executor_test.cc DEPS executor)
+if(WITH_GPU)
+    nv_test(executor_test SRCS executor_test.cc DEPS executor)
+else()
+    cc_test(executor_test SRCS executor_test.cc DEPS executor)
+endif()
 cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor)
 cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place)
--- a/paddle/framework/executor.cc
+++ b/paddle/framework/executor.cc
@@ -24,9 +24,21 @@ namespace paddle {
 namespace framework {
 Executor::Executor(const std::vector<platform::Place>& places) {
-  devices_.resize(places.size());
+  device_contexts_.resize(places.size());
  for (size_t i = 0; i < places.size(); i++) {
-    devices_[i] = platform::GetDevice(places[i]);
+    if (platform::is_cpu_place(places[i])) {
+      device_contexts_[i] = platform::DeviceContextManager::Get()
+                                ->GetDeviceContext<platform::CPUPlace>(
+                                    boost::get<platform::CPUPlace>(places[i]));
+    } else {
+#ifndef PADDLE_ONLY_CPU
+      device_contexts_[i] = platform::DeviceContextManager::Get()
+                                ->GetDeviceContext<platform::GPUPlace>(
+                                    boost::get<platform::GPUPlace>(places[i]));
+#else
+      PADDLE_THROW("'GPUPlace' is not supported in CPU only device.");
+#endif
+    }
  }
 }
@@ -37,7 +49,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope,
  //    - only runs on the first device
  //    - test on gpu
  auto& block = pdesc.blocks(0);
-  auto& device = devices_[0];
+  auto& device = device_contexts_[0];
  // TODO(tonyyang-svail):
  //    - runs on a new local scope
@@ -49,17 +61,18 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope,
  for (auto& op_desc : block.ops()) {
    auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
-    op->Run(*scope, *device->cpu_device_context);
+    op->Run(*scope, *device);
  }
-  // print tensor value
+  // // print tensor value
-  for (auto& var : block.vars()) {
+  // for (auto& var : block.vars()) {
-    std::cout << var.name() << std::endl;
+  //   std::cout << var.name() << std::endl;
-    auto v = scope->FindVar(var.name());
+  //   auto v = scope->FindVar(var.name());
-    const LoDTensor& t = v->Get<LoDTensor>();
+  //   const LoDTensor& t = v->Get<LoDTensor>();
-    for (int i = 0; i < t.numel(); ++i) std::cout << t.data<float>()[i] << " ";
+  //   for (int i = 0; i < t.numel(); ++i)
-    std::cout << std::endl;
+  //     std::cout << t.data<float>()[i] << " ";
-  }
+  //   std::cout << std::endl;
+  // }
 }
 }  // namespace framework

--- a/paddle/framework/executor.h
+++ b/paddle/framework/executor.h
@@ -18,7 +18,7 @@ limitations under the License. */
 #include "paddle/framework/op_info.h"
 #include "paddle/framework/scope.h"
 #include "paddle/framework/tensor.h"
-#include "paddle/platform/device.h"
+#include "paddle/platform/device_context_manager.h"
 namespace paddle {
 namespace framework {
@@ -30,7 +30,7 @@ class Executor {
  void Run(const ProgramDesc&, Scope*, std::vector<Tensor>*);
 private:
-  std::vector<platform::Device*> devices_;
+  std::vector<platform::DeviceContext*> device_contexts_;
 };
 }  // namespace framework

--- a/paddle/framework/executor_test.cc
+++ b/paddle/framework/executor_test.cc
@@ -25,14 +25,13 @@ limitations under the License. */
 USE_OP(elementwise_add);
 USE_OP(gaussian_random);
+using std::string;
 using namespace paddle::platform;
 using namespace paddle::framework;
 typedef paddle::framework::BlockDesc proto_block;
 typedef paddle::framework::OpDesc proto_op;
-using std::string;
 void add_gaussian_random_op(string var_name, proto_block* block) {
  std::vector<int> dim{2, 3};
@@ -59,42 +58,59 @@ void add_gaussian_random_op(string var_name, proto_block* block) {
  Out->add_arguments(var_name);
 }
-TEST(Executor, Init) {
+class ExecutorTester : public ::testing::Test {
-  ProgramDesc pdesc;
+ public:
+  virtual void SetUp() override {
-  auto root_block = pdesc.add_blocks();
+    auto root_block = pdesc_.add_blocks();
-  root_block->set_idx(0);
+    root_block->set_idx(0);
-  root_block->set_parent_idx(-1);
+    root_block->set_parent_idx(-1);
-  add_gaussian_random_op("a", root_block);
+    add_gaussian_random_op("a", root_block);
-  add_gaussian_random_op("b", root_block);
+    add_gaussian_random_op("b", root_block);
-  auto c = root_block->add_vars();
+    auto c = root_block->add_vars();
-  c->set_name("c");
+    c->set_name("c");
-  auto c_lt = c->mutable_lod_tensor();
+    auto c_lt = c->mutable_lod_tensor();
-  c_lt->set_data_type(paddle::framework::DataType::FP32);
+    c_lt->set_data_type(paddle::framework::DataType::FP32);
-  auto op = root_block->add_ops();
+    auto op = root_block->add_ops();
-  op->set_type("elementwise_add");
+    op->set_type("elementwise_add");
-  auto X = op->add_inputs();
+    auto X = op->add_inputs();
-  X->set_parameter("X");
+    X->set_parameter("X");
-  X->add_arguments("a");
+    X->add_arguments("a");
-  auto Y = op->add_inputs();
+    auto Y = op->add_inputs();
-  Y->set_parameter("Y");
+    Y->set_parameter("Y");
-  Y->add_arguments("b");
+    Y->add_arguments("b");
-  auto Out = op->add_outputs();
+    auto Out = op->add_outputs();
-  Out->set_parameter("Out");
+    Out->set_parameter("Out");
-  Out->add_arguments("c");
+    Out->add_arguments("c");
+  }
-  CPUPlace cpu_place1, cpu_place2;
+ protected:
+  std::vector<Tensor>* outputs_{nullptr};
+  ProgramDesc pdesc_;
+  Scope scope_;
+};
+TEST_F(ExecutorTester, InitCPU) {
  std::vector<Place> places;
+  CPUPlace cpu_place1, cpu_place2;
  places.push_back(cpu_place1);
  places.push_back(cpu_place2);
  Executor* executor = new Executor(places);
-  Scope s;
+  executor->Run(pdesc_, &scope_, outputs_);
-  std::vector<Tensor>* outputs{nullptr};
+  delete executor;
-  executor->Run(pdesc, &s, outputs);
+}
+#ifndef PADDLE_ONLY_CPU
+TEST_F(ExecutorTester, InitGPU) {
+  std::vector<Place> places;
+  GPUPlace gpu_place0(0);
+  places.push_back(gpu_place0);
+  Executor* executor = new Executor(places);
+  executor->Run(pdesc_, &scope_, outputs_);
  delete executor;
 }
+#endif
--- a/paddle/platform/CMakeLists.txt
+++ b/paddle/platform/CMakeLists.txt
@@ -23,7 +23,7 @@ cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator
    system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS})
 nv_test(device_context_test SRCS device_context_test.cc DEPS device_context gpu_info)
-cc_library(device SRCS device.cc DEPS device_context)
+cc_library(device_context_manager SRCS device_context_manager.cc DEPS device_context)
 nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda)
 nv_test(transform_test SRCS transform_test.cu DEPS paddle_memory place device_context)
--- a/paddle/platform/device.cc
+++ b/paddle/platform/device.cc
@@ -12,48 +12,57 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include "paddle/platform/device.h"
+#include "paddle/platform/device_context_manager.h"
 namespace paddle {
 namespace platform {
-template <typename T, typename... Args>
+DeviceContextManager::DeviceContextManager() {
-std::unique_ptr<T> make_unique(Args&&... args) {
+#ifndef PADDLE_ONLY_CPU
-  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+  device_count_ = GetDeviceCount();
+  cuda_contexts_.reserve(device_count_);
+  for (int i = 0; i < device_count_; i++) {
+    cuda_contexts_[i] = nullptr;
+  }
+#endif
 }
-CPUDeviceContext* GetCPUDeviceContext(const CPUPlace& place) {
+template <>
-  static std::unique_ptr<CPUDeviceContext> g_cpu_device_context =
+CPUDeviceContext* DeviceContextManager::GetDeviceContext<
-      make_unique<CPUDeviceContext>(place);
+    CPUPlace, CPUDeviceContext>(const CPUPlace& place) {
-  return g_cpu_device_context.get();
+  if (!cpu_context_) {
+    cpu_context_ = new CPUDeviceContext(place);
+  }
+  return cpu_context_;
 }
 #ifndef PADDLE_ONLY_CPU
-CUDADeviceContext* GetCUDADeviceContext(const GPUPlace& place) {
+template <>
-  static std::unique_ptr<CUDADeviceContext> g_cuda_device_context =
+CUDADeviceContext* DeviceContextManager::GetDeviceContext<
-      make_unique<CUDADeviceContext>(place);
+    GPUPlace, CUDADeviceContext>(const GPUPlace& place) {
-  return g_cuda_device_context.get();
+  int gpu_id = place.device;
+  PADDLE_ENFORCE(gpu_id < device_count_,
+                 "GPU device id must less than device count");
+  SetDeviceId(gpu_id);
+  if (!cuda_contexts_[gpu_id]) {
+    cuda_contexts_[gpu_id] = new CUDADeviceContext(place);
+  }
+  return cuda_contexts_[gpu_id];
 }
 #endif
-Device* GetDevice(const Place& place) {
+DeviceContextManager::~DeviceContextManager() {
-  CPUPlace cpu_place;
+  if (cpu_context_) {
+    delete cpu_context_;
+  }
 #ifndef PADDLE_ONLY_CPU
-  if (is_gpu_place(place)) {
+  for (int i = 0; i < device_count_; i++) {
-    GPUPlace gpu_place = boost::get<GPUPlace>(place);
+    if (cuda_contexts_[i]) {
-    static std::unique_ptr<Device> g_device = make_unique<Device>(
+      delete cuda_contexts_[i];
-        GetCPUDeviceContext(cpu_place), GetCUDADeviceContext(gpu_place));
+    }
-    return g_device.get();
-  } else {
-    static std::unique_ptr<Device> g_device =
-        make_unique<Device>(GetCPUDeviceContext(cpu_place), nullptr);
-    return g_device.get();
  }
-#else
-  static std::unique_ptr<Device> g_device =
-      make_unique<Device>(GetCPUDeviceContext(cpu_place));
-  return g_device.get();
 #endif
 }
 }  // namespace platform
 }  // namespace paddle
--- a/paddle/platform/device.h
+++ b/paddle/platform/device.h
@@ -13,33 +13,46 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
 #include "paddle/platform/device_context.h"
-#include "paddle/platform/place.h"
 namespace paddle {
 namespace platform {
-struct Device {
+template <typename T>
-  CPUDeviceContext* cpu_device_context;
+struct Converter;
-#ifndef PADDLE_ONLY_CPU
-  CUDADeviceContext* cuda_device_context;
+template <>
-#endif
+struct Converter<CPUPlace> {
+  using DeviceContextType = CPUDeviceContext;
+};
 #ifndef PADDLE_ONLY_CPU
-  Device(CPUDeviceContext* cpu, CUDADeviceContext* gpu)
+template <>
-      : cpu_device_context(cpu), cuda_device_context(gpu) {}
+struct Converter<GPUPlace> {
-#else
+  using DeviceContextType = CUDADeviceContext;
-  explicit Device(CPUDeviceContext* cpu) : cpu_device_context(cpu) {}
-#endif
 };
+#endif
+class DeviceContextManager {
+ public:
+  DeviceContextManager();
+  ~DeviceContextManager();
+  template <typename PlaceType, typename DeviceType = typename Converter<
+                                    PlaceType>::DeviceContextType>
+  DeviceType* GetDeviceContext(const PlaceType& place);
-CPUDeviceContext* GetCPUDeviceContext(const platform::CPUPlace& place);
+  static DeviceContextManager* Get() {
+    static DeviceContextManager inst;
+    return &inst;
+  }
+ private:
+  CPUDeviceContext* cpu_context_;
 #ifndef PADDLE_ONLY_CPU
-CUDADeviceContext* GetCUDADeviceContext(const platform::GPUPlace& place);
+  int device_count_;
+  std::vector<CUDADeviceContext*> cuda_contexts_;
 #endif
+};
-Device* GetDevice(const platform::Place& place);
 }  // namespace platform
 }  // namespace paddle