diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 58e78e9a6a4877f7b3fef61b1715d4e27d6ead79..898b3a990d927583026cd3e9ca8fba9202b72dac 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -44,8 +44,12 @@ add_custom_command(TARGET framework_py_proto POST_BUILD cc_library(backward SRCS backward.cc DEPS net_op) cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context) -cc_library(executor SRCS executor.cc DEPS op_registry device scope framework_proto ${GLOB_OP_LIB}) -cc_test(executor_test SRCS executor_test.cc DEPS executor) +cc_library(executor SRCS executor.cc DEPS op_registry device_context_manager scope framework_proto ${GLOB_OP_LIB}) +if(WITH_GPU) + nv_test(executor_test SRCS executor_test.cc DEPS executor) +else() + cc_test(executor_test SRCS executor_test.cc DEPS executor) +endif() cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor) cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place) diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index da387b47bab23432a06db5e769f07a8a028bae4d..ae1dcb6230220872d0509f3e7cce34db56d57969 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -24,9 +24,21 @@ namespace paddle { namespace framework { Executor::Executor(const std::vector& places) { - devices_.resize(places.size()); + device_contexts_.resize(places.size()); for (size_t i = 0; i < places.size(); i++) { - devices_[i] = platform::GetDevice(places[i]); + if (platform::is_cpu_place(places[i])) { + device_contexts_[i] = platform::DeviceContextManager::Get() + ->GetDeviceContext( + boost::get(places[i])); + } else { +#ifndef PADDLE_ONLY_CPU + device_contexts_[i] = platform::DeviceContextManager::Get() + ->GetDeviceContext( + boost::get(places[i])); +#else + PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); +#endif + } } } @@ -37,7 +49,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, // - only runs on the first device // - test on gpu auto& block = pdesc.blocks(0); - auto& device = devices_[0]; + auto& device = device_contexts_[0]; // TODO(tonyyang-svail): // - runs on a new local scope @@ -49,17 +61,18 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, for (auto& op_desc : block.ops()) { auto op = paddle::framework::OpRegistry::CreateOp(op_desc); - op->Run(*scope, *device->cpu_device_context); + op->Run(*scope, *device); } - // print tensor value - for (auto& var : block.vars()) { - std::cout << var.name() << std::endl; - auto v = scope->FindVar(var.name()); - const LoDTensor& t = v->Get(); - for (int i = 0; i < t.numel(); ++i) std::cout << t.data()[i] << " "; - std::cout << std::endl; - } + // // print tensor value + // for (auto& var : block.vars()) { + // std::cout << var.name() << std::endl; + // auto v = scope->FindVar(var.name()); + // const LoDTensor& t = v->Get(); + // for (int i = 0; i < t.numel(); ++i) + // std::cout << t.data()[i] << " "; + // std::cout << std::endl; + // } } } // namespace framework diff --git a/paddle/framework/executor.h b/paddle/framework/executor.h index cdb80bc10413d7bc3f4e42065ec4545c625c1b72..795b8ffdab3bf204214d323a76f7ce8df7af8054 100644 --- a/paddle/framework/executor.h +++ b/paddle/framework/executor.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/framework/op_info.h" #include "paddle/framework/scope.h" #include "paddle/framework/tensor.h" -#include "paddle/platform/device.h" +#include "paddle/platform/device_context_manager.h" namespace paddle { namespace framework { @@ -30,7 +30,7 @@ class Executor { void Run(const ProgramDesc&, Scope*, std::vector*); private: - std::vector devices_; + std::vector device_contexts_; }; } // namespace framework diff --git a/paddle/framework/executor_test.cc b/paddle/framework/executor_test.cc index 300de36b8798adf9573684dd8c68f3be1cdb8673..f746242a6b35dbfa0938608f214a04bd9cc49799 100644 --- a/paddle/framework/executor_test.cc +++ b/paddle/framework/executor_test.cc @@ -25,14 +25,13 @@ limitations under the License. */ USE_OP(elementwise_add); USE_OP(gaussian_random); +using std::string; using namespace paddle::platform; using namespace paddle::framework; typedef paddle::framework::BlockDesc proto_block; typedef paddle::framework::OpDesc proto_op; -using std::string; - void add_gaussian_random_op(string var_name, proto_block* block) { std::vector dim{2, 3}; @@ -59,42 +58,59 @@ void add_gaussian_random_op(string var_name, proto_block* block) { Out->add_arguments(var_name); } -TEST(Executor, Init) { - ProgramDesc pdesc; - - auto root_block = pdesc.add_blocks(); - root_block->set_idx(0); - root_block->set_parent_idx(-1); - - add_gaussian_random_op("a", root_block); - add_gaussian_random_op("b", root_block); - - auto c = root_block->add_vars(); - c->set_name("c"); - auto c_lt = c->mutable_lod_tensor(); - c_lt->set_data_type(paddle::framework::DataType::FP32); - - auto op = root_block->add_ops(); - op->set_type("elementwise_add"); - auto X = op->add_inputs(); - X->set_parameter("X"); - X->add_arguments("a"); - auto Y = op->add_inputs(); - Y->set_parameter("Y"); - Y->add_arguments("b"); - auto Out = op->add_outputs(); - Out->set_parameter("Out"); - Out->add_arguments("c"); +class ExecutorTester : public ::testing::Test { + public: + virtual void SetUp() override { + auto root_block = pdesc_.add_blocks(); + root_block->set_idx(0); + root_block->set_parent_idx(-1); + + add_gaussian_random_op("a", root_block); + add_gaussian_random_op("b", root_block); + + auto c = root_block->add_vars(); + c->set_name("c"); + auto c_lt = c->mutable_lod_tensor(); + c_lt->set_data_type(paddle::framework::DataType::FP32); + + auto op = root_block->add_ops(); + op->set_type("elementwise_add"); + auto X = op->add_inputs(); + X->set_parameter("X"); + X->add_arguments("a"); + auto Y = op->add_inputs(); + Y->set_parameter("Y"); + Y->add_arguments("b"); + auto Out = op->add_outputs(); + Out->set_parameter("Out"); + Out->add_arguments("c"); + } - CPUPlace cpu_place1, cpu_place2; + protected: + std::vector* outputs_{nullptr}; + ProgramDesc pdesc_; + Scope scope_; +}; + +TEST_F(ExecutorTester, InitCPU) { std::vector places; + CPUPlace cpu_place1, cpu_place2; places.push_back(cpu_place1); places.push_back(cpu_place2); Executor* executor = new Executor(places); - Scope s; - std::vector* outputs{nullptr}; - executor->Run(pdesc, &s, outputs); + executor->Run(pdesc_, &scope_, outputs_); + delete executor; +} +#ifndef PADDLE_ONLY_CPU +TEST_F(ExecutorTester, InitGPU) { + std::vector places; + GPUPlace gpu_place0(0); + places.push_back(gpu_place0); + + Executor* executor = new Executor(places); + executor->Run(pdesc_, &scope_, outputs_); delete executor; } +#endif diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index b581937393520ec6a47991c16c093db65a942162..b4ddf721ddb3776406ad44a13bcf9876a905a0df 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -23,7 +23,7 @@ cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS}) nv_test(device_context_test SRCS device_context_test.cc DEPS device_context gpu_info) -cc_library(device SRCS device.cc DEPS device_context) +cc_library(device_context_manager SRCS device_context_manager.cc DEPS device_context) nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) nv_test(transform_test SRCS transform_test.cu DEPS paddle_memory place device_context) diff --git a/paddle/platform/device.cc b/paddle/platform/device.cc deleted file mode 100644 index 7acd87c8c3db8f774de72251b028be10ef34770c..0000000000000000000000000000000000000000 --- a/paddle/platform/device.cc +++ /dev/null @@ -1,59 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/platform/device.h" - -namespace paddle { -namespace platform { - -template -std::unique_ptr make_unique(Args&&... args) { - return std::unique_ptr(new T(std::forward(args)...)); -} - -CPUDeviceContext* GetCPUDeviceContext(const CPUPlace& place) { - static std::unique_ptr g_cpu_device_context = - make_unique(place); - return g_cpu_device_context.get(); -} - -#ifndef PADDLE_ONLY_CPU -CUDADeviceContext* GetCUDADeviceContext(const GPUPlace& place) { - static std::unique_ptr g_cuda_device_context = - make_unique(place); - return g_cuda_device_context.get(); -} -#endif - -Device* GetDevice(const Place& place) { - CPUPlace cpu_place; -#ifndef PADDLE_ONLY_CPU - if (is_gpu_place(place)) { - GPUPlace gpu_place = boost::get(place); - static std::unique_ptr g_device = make_unique( - GetCPUDeviceContext(cpu_place), GetCUDADeviceContext(gpu_place)); - return g_device.get(); - } else { - static std::unique_ptr g_device = - make_unique(GetCPUDeviceContext(cpu_place), nullptr); - return g_device.get(); - } -#else - static std::unique_ptr g_device = - make_unique(GetCPUDeviceContext(cpu_place)); - return g_device.get(); -#endif -} -} // namespace platform -} // namespace paddle diff --git a/paddle/platform/device_context_manager.cc b/paddle/platform/device_context_manager.cc new file mode 100644 index 0000000000000000000000000000000000000000..156d317c8a9e24ed7ace95429d0c7dc534210ece --- /dev/null +++ b/paddle/platform/device_context_manager.cc @@ -0,0 +1,68 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/platform/device_context_manager.h" + +namespace paddle { +namespace platform { + +DeviceContextManager::DeviceContextManager() { +#ifndef PADDLE_ONLY_CPU + device_count_ = GetDeviceCount(); + cuda_contexts_.reserve(device_count_); + for (int i = 0; i < device_count_; i++) { + cuda_contexts_[i] = nullptr; + } +#endif +} + +template <> +CPUDeviceContext* DeviceContextManager::GetDeviceContext< + CPUPlace, CPUDeviceContext>(const CPUPlace& place) { + if (!cpu_context_) { + cpu_context_ = new CPUDeviceContext(place); + } + return cpu_context_; +} + +#ifndef PADDLE_ONLY_CPU +template <> +CUDADeviceContext* DeviceContextManager::GetDeviceContext< + GPUPlace, CUDADeviceContext>(const GPUPlace& place) { + int gpu_id = place.device; + PADDLE_ENFORCE(gpu_id < device_count_, + "GPU device id must less than device count"); + SetDeviceId(gpu_id); + if (!cuda_contexts_[gpu_id]) { + cuda_contexts_[gpu_id] = new CUDADeviceContext(place); + } + return cuda_contexts_[gpu_id]; +} +#endif + +DeviceContextManager::~DeviceContextManager() { + if (cpu_context_) { + delete cpu_context_; + } +#ifndef PADDLE_ONLY_CPU + for (int i = 0; i < device_count_; i++) { + if (cuda_contexts_[i]) { + delete cuda_contexts_[i]; + } + } +#endif +} + +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/device.h b/paddle/platform/device_context_manager.h similarity index 52% rename from paddle/platform/device.h rename to paddle/platform/device_context_manager.h index b1bb8073cf15dccbcb7ef20a9b412385ad666a4d..da15808a6079bbae30ae324277c5cd657ee82155 100644 --- a/paddle/platform/device.h +++ b/paddle/platform/device_context_manager.h @@ -13,33 +13,46 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once - #include "paddle/platform/device_context.h" -#include "paddle/platform/place.h" namespace paddle { namespace platform { -struct Device { - CPUDeviceContext* cpu_device_context; -#ifndef PADDLE_ONLY_CPU - CUDADeviceContext* cuda_device_context; -#endif +template +struct Converter; + +template <> +struct Converter { + using DeviceContextType = CPUDeviceContext; +}; #ifndef PADDLE_ONLY_CPU - Device(CPUDeviceContext* cpu, CUDADeviceContext* gpu) - : cpu_device_context(cpu), cuda_device_context(gpu) {} -#else - explicit Device(CPUDeviceContext* cpu) : cpu_device_context(cpu) {} -#endif +template <> +struct Converter { + using DeviceContextType = CUDADeviceContext; }; +#endif + +class DeviceContextManager { + public: + DeviceContextManager(); + ~DeviceContextManager(); + + template ::DeviceContextType> + DeviceType* GetDeviceContext(const PlaceType& place); -CPUDeviceContext* GetCPUDeviceContext(const platform::CPUPlace& place); + static DeviceContextManager* Get() { + static DeviceContextManager inst; + return &inst; + } + private: + CPUDeviceContext* cpu_context_; #ifndef PADDLE_ONLY_CPU -CUDADeviceContext* GetCUDADeviceContext(const platform::GPUPlace& place); + int device_count_; + std::vector cuda_contexts_; #endif - -Device* GetDevice(const platform::Place& place); +}; } // namespace platform } // namespace paddle