diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 58e78e9a6a4877f7b3fef61b1715d4e27d6ead79..898b3a990d927583026cd3e9ca8fba9202b72dac 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -44,8 +44,12 @@ add_custom_command(TARGET framework_py_proto POST_BUILD cc_library(backward SRCS backward.cc DEPS net_op) cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context) -cc_library(executor SRCS executor.cc DEPS op_registry device scope framework_proto ${GLOB_OP_LIB}) -cc_test(executor_test SRCS executor_test.cc DEPS executor) +cc_library(executor SRCS executor.cc DEPS op_registry device_context_manager scope framework_proto ${GLOB_OP_LIB}) +if(WITH_GPU) + nv_test(executor_test SRCS executor_test.cc DEPS executor) +else() + cc_test(executor_test SRCS executor_test.cc DEPS executor) +endif() cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor) cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place) diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index 94b9b3b350910f0853a95ecd6e0e00af5ac47f8c..717f9bf81a372dec0c5933a874891f52f889b4f8 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -22,9 +22,21 @@ namespace paddle { namespace framework { Executor::Executor(const std::vector& places) { - devices_.resize(places.size()); + device_contexts_.resize(places.size()); for (size_t i = 0; i < places.size(); i++) { - devices_[i] = platform::GetDevice(places[i]); + if (platform::is_cpu_place(places[i])) { + device_contexts_[i] = platform::DeviceContextManager::Get() + ->GetDeviceContext( + boost::get(places[i])); + } else { +#ifndef PADDLE_ONLY_CPU + device_contexts_[i] = platform::DeviceContextManager::Get() + ->GetDeviceContext( + boost::get(places[i])); +#else + PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); +#endif + } } } @@ -34,37 +46,25 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, // TODO(tonyyang-svail): // - only runs the first block // - only runs on the first device + Scope& local_scope = scope->NewScope(); + auto& block = pdesc.blocks(0); - auto& device = devices_[0]; + auto& device_context = device_contexts_[0]; for (auto& var : block.vars()) { - scope->NewVar(var.name()); + local_scope.NewVar(var.name()); } // std::vector ops; for (auto& op_desc : block.ops()) { auto op = framework::OpRegistry::CreateOp(op_desc); - // op->InferShape(*scope); - op->Run(*scope, *device->cpu_device_context); + // InferShape is now doing inside Run method. + op->Run(local_scope, *device_context); } // TODO(tonyyang-svail): need to test gpu device - // device_->cpu_device_context->Wait(); - // #ifndef PADDLE_ONLY_CPU - // if (device_->cuda_device_context) { - // device_->cuda_device_context->Wait(); - // } - // #endif - - Scope& local_scope = scope->NewScope(); - local_scope.NewVar(); - for (auto device : devices_) { - device->cpu_device_context->Wait(); -#ifndef PADDLE_ONLY_CPU - if (device->cuda_device_context) { - device->cuda_device_context->Wait(); - } -#endif + for (auto device_context : device_contexts_) { + device_context->Wait(); } } diff --git a/paddle/framework/executor.h b/paddle/framework/executor.h index cdb80bc10413d7bc3f4e42065ec4545c625c1b72..795b8ffdab3bf204214d323a76f7ce8df7af8054 100644 --- a/paddle/framework/executor.h +++ b/paddle/framework/executor.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/framework/op_info.h" #include "paddle/framework/scope.h" #include "paddle/framework/tensor.h" -#include "paddle/platform/device.h" +#include "paddle/platform/device_context_manager.h" namespace paddle { namespace framework { @@ -30,7 +30,7 @@ class Executor { void Run(const ProgramDesc&, Scope*, std::vector*); private: - std::vector devices_; + std::vector device_contexts_; }; } // namespace framework diff --git a/paddle/framework/executor_test.cc b/paddle/framework/executor_test.cc index 11255af808aaee3e17cec70d41ae30fcbc5d5d18..810ff2a512a4185a5cb9d6921f43ace44fcddf7d 100644 --- a/paddle/framework/executor_test.cc +++ b/paddle/framework/executor_test.cc @@ -15,8 +15,6 @@ limitations under the License. */ #include "paddle/framework/executor.h" #include "gtest/gtest.h" #include "paddle/framework/attribute.h" - -#include #include "paddle/framework/grad_op_builder.h" #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" @@ -26,52 +24,71 @@ USE_OP(elementwise_add); using namespace paddle::platform; using namespace paddle::framework; -TEST(Executor, Init) { - ProgramDesc pdesc; - - auto root_block = pdesc.add_blocks(); - root_block->set_idx(0); - root_block->set_parent_idx(-1); - - auto a = root_block->add_vars(); - a->set_name("a"); - auto a_lt = a->mutable_lod_tensor(); - a_lt->set_data_type(paddle::framework::DataType::FP32); - a_lt->add_dims(640); - a_lt->add_dims(640); - - auto b = root_block->add_vars(); - b->set_name("b"); - auto b_lt = b->mutable_lod_tensor(); - b_lt->set_data_type(paddle::framework::DataType::FP32); - b_lt->add_dims(640); - b_lt->add_dims(640); - - auto c = root_block->add_vars(); - c->set_name("c"); - auto c_lt = c->mutable_lod_tensor(); - c_lt->set_data_type(paddle::framework::DataType::FP32); - c_lt->add_dims(640); - c_lt->add_dims(640); - - auto op1 = root_block->add_ops(); - op1->set_type("elementwise_add"); - auto X = op1->add_inputs(); - X->set_parameter("X"); - X->add_arguments("a"); - auto Y = op1->add_inputs(); - Y->set_parameter("Y"); - Y->add_arguments("b"); - - CPUPlace cpu_place1, cpu_place2; +class ExecutorTester : public ::testing::Test { + public: + virtual void SetUp() override { + auto root_block = pdesc_.add_blocks(); + root_block->set_idx(0); + root_block->set_parent_idx(-1); + + auto a = root_block->add_vars(); + a->set_name("a"); + auto a_lt = a->mutable_lod_tensor(); + a_lt->set_data_type(paddle::framework::DataType::FP32); + a_lt->add_dims(640); + a_lt->add_dims(640); + + auto b = root_block->add_vars(); + b->set_name("b"); + auto b_lt = b->mutable_lod_tensor(); + b_lt->set_data_type(paddle::framework::DataType::FP32); + b_lt->add_dims(640); + b_lt->add_dims(640); + + auto c = root_block->add_vars(); + c->set_name("c"); + auto c_lt = c->mutable_lod_tensor(); + c_lt->set_data_type(paddle::framework::DataType::FP32); + c_lt->add_dims(640); + c_lt->add_dims(640); + + auto op1 = root_block->add_ops(); + op1->set_type("elementwise_add"); + auto X = op1->add_inputs(); + X->set_parameter("X"); + X->add_arguments("a"); + auto Y = op1->add_inputs(); + Y->set_parameter("Y"); + Y->add_arguments("b"); + } + + protected: + std::vector* outputs_{nullptr}; + ProgramDesc pdesc_; + Scope scope_; +}; + +TEST_F(ExecutorTester, InitCPU) { std::vector places; + CPUPlace cpu_place1, cpu_place2; places.push_back(cpu_place1); places.push_back(cpu_place2); Executor* executor = new Executor(places); - Scope s; - std::vector* outputs{nullptr}; - executor->Run(pdesc, &s, outputs); + executor->Run(pdesc_, &scope_, outputs_); + delete executor; +} + +#ifndef PADDLE_ONLY_CPU +TEST_F(ExecutorTester, InitGPU) { + std::vector places; + GPUPlace gpu_place0(0); + GPUPlace gpu_place1(1); + places.push_back(gpu_place0); + places.push_back(gpu_place1); + Executor* executor = new Executor(places); + executor->Run(pdesc_, &scope_, outputs_); delete executor; } +#endif diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index b581937393520ec6a47991c16c093db65a942162..b4ddf721ddb3776406ad44a13bcf9876a905a0df 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -23,7 +23,7 @@ cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS}) nv_test(device_context_test SRCS device_context_test.cc DEPS device_context gpu_info) -cc_library(device SRCS device.cc DEPS device_context) +cc_library(device_context_manager SRCS device_context_manager.cc DEPS device_context) nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) nv_test(transform_test SRCS transform_test.cu DEPS paddle_memory place device_context) diff --git a/paddle/platform/device.cc b/paddle/platform/device.cc deleted file mode 100644 index 7acd87c8c3db8f774de72251b028be10ef34770c..0000000000000000000000000000000000000000 --- a/paddle/platform/device.cc +++ /dev/null @@ -1,59 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/platform/device.h" - -namespace paddle { -namespace platform { - -template -std::unique_ptr make_unique(Args&&... args) { - return std::unique_ptr(new T(std::forward(args)...)); -} - -CPUDeviceContext* GetCPUDeviceContext(const CPUPlace& place) { - static std::unique_ptr g_cpu_device_context = - make_unique(place); - return g_cpu_device_context.get(); -} - -#ifndef PADDLE_ONLY_CPU -CUDADeviceContext* GetCUDADeviceContext(const GPUPlace& place) { - static std::unique_ptr g_cuda_device_context = - make_unique(place); - return g_cuda_device_context.get(); -} -#endif - -Device* GetDevice(const Place& place) { - CPUPlace cpu_place; -#ifndef PADDLE_ONLY_CPU - if (is_gpu_place(place)) { - GPUPlace gpu_place = boost::get(place); - static std::unique_ptr g_device = make_unique( - GetCPUDeviceContext(cpu_place), GetCUDADeviceContext(gpu_place)); - return g_device.get(); - } else { - static std::unique_ptr g_device = - make_unique(GetCPUDeviceContext(cpu_place), nullptr); - return g_device.get(); - } -#else - static std::unique_ptr g_device = - make_unique(GetCPUDeviceContext(cpu_place)); - return g_device.get(); -#endif -} -} // namespace platform -} // namespace paddle diff --git a/paddle/platform/device_context_manager.cc b/paddle/platform/device_context_manager.cc new file mode 100644 index 0000000000000000000000000000000000000000..156d317c8a9e24ed7ace95429d0c7dc534210ece --- /dev/null +++ b/paddle/platform/device_context_manager.cc @@ -0,0 +1,68 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/platform/device_context_manager.h" + +namespace paddle { +namespace platform { + +DeviceContextManager::DeviceContextManager() { +#ifndef PADDLE_ONLY_CPU + device_count_ = GetDeviceCount(); + cuda_contexts_.reserve(device_count_); + for (int i = 0; i < device_count_; i++) { + cuda_contexts_[i] = nullptr; + } +#endif +} + +template <> +CPUDeviceContext* DeviceContextManager::GetDeviceContext< + CPUPlace, CPUDeviceContext>(const CPUPlace& place) { + if (!cpu_context_) { + cpu_context_ = new CPUDeviceContext(place); + } + return cpu_context_; +} + +#ifndef PADDLE_ONLY_CPU +template <> +CUDADeviceContext* DeviceContextManager::GetDeviceContext< + GPUPlace, CUDADeviceContext>(const GPUPlace& place) { + int gpu_id = place.device; + PADDLE_ENFORCE(gpu_id < device_count_, + "GPU device id must less than device count"); + SetDeviceId(gpu_id); + if (!cuda_contexts_[gpu_id]) { + cuda_contexts_[gpu_id] = new CUDADeviceContext(place); + } + return cuda_contexts_[gpu_id]; +} +#endif + +DeviceContextManager::~DeviceContextManager() { + if (cpu_context_) { + delete cpu_context_; + } +#ifndef PADDLE_ONLY_CPU + for (int i = 0; i < device_count_; i++) { + if (cuda_contexts_[i]) { + delete cuda_contexts_[i]; + } + } +#endif +} + +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/device.h b/paddle/platform/device_context_manager.h similarity index 52% rename from paddle/platform/device.h rename to paddle/platform/device_context_manager.h index b1bb8073cf15dccbcb7ef20a9b412385ad666a4d..da15808a6079bbae30ae324277c5cd657ee82155 100644 --- a/paddle/platform/device.h +++ b/paddle/platform/device_context_manager.h @@ -13,33 +13,46 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once - #include "paddle/platform/device_context.h" -#include "paddle/platform/place.h" namespace paddle { namespace platform { -struct Device { - CPUDeviceContext* cpu_device_context; -#ifndef PADDLE_ONLY_CPU - CUDADeviceContext* cuda_device_context; -#endif +template +struct Converter; + +template <> +struct Converter { + using DeviceContextType = CPUDeviceContext; +}; #ifndef PADDLE_ONLY_CPU - Device(CPUDeviceContext* cpu, CUDADeviceContext* gpu) - : cpu_device_context(cpu), cuda_device_context(gpu) {} -#else - explicit Device(CPUDeviceContext* cpu) : cpu_device_context(cpu) {} -#endif +template <> +struct Converter { + using DeviceContextType = CUDADeviceContext; }; +#endif + +class DeviceContextManager { + public: + DeviceContextManager(); + ~DeviceContextManager(); + + template ::DeviceContextType> + DeviceType* GetDeviceContext(const PlaceType& place); -CPUDeviceContext* GetCPUDeviceContext(const platform::CPUPlace& place); + static DeviceContextManager* Get() { + static DeviceContextManager inst; + return &inst; + } + private: + CPUDeviceContext* cpu_context_; #ifndef PADDLE_ONLY_CPU -CUDADeviceContext* GetCUDADeviceContext(const platform::GPUPlace& place); + int device_count_; + std::vector cuda_contexts_; #endif - -Device* GetDevice(const platform::Place& place); +}; } // namespace platform } // namespace paddle