提交 6c4d1f55 编写于 作者: Q qijun

refine codes

上级 e946fc15
......@@ -44,8 +44,12 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context)
cc_library(executor SRCS executor.cc DEPS op_registry device scope framework_proto ${GLOB_OP_LIB})
cc_test(executor_test SRCS executor_test.cc DEPS executor)
cc_library(executor SRCS executor.cc DEPS op_registry device_context_manager scope framework_proto ${GLOB_OP_LIB})
if(WITH_GPU)
nv_test(executor_test SRCS executor_test.cc DEPS executor)
else()
cc_test(executor_test SRCS executor_test.cc DEPS executor)
endif()
cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor)
cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place)
......@@ -22,9 +22,21 @@ namespace paddle {
namespace framework {
Executor::Executor(const std::vector<platform::Place>& places) {
devices_.resize(places.size());
device_contexts_.resize(places.size());
for (size_t i = 0; i < places.size(); i++) {
devices_[i] = platform::GetDevice(places[i]);
if (platform::is_cpu_place(places[i])) {
device_contexts_[i] = platform::DeviceContextManager::Get()
->GetDeviceContext<platform::CPUPlace>(
boost::get<platform::CPUPlace>(places[i]));
} else {
#ifndef PADDLE_ONLY_CPU
device_contexts_[i] = platform::DeviceContextManager::Get()
->GetDeviceContext<platform::GPUPlace>(
boost::get<platform::GPUPlace>(places[i]));
#else
PADDLE_THROW("'GPUPlace' is not supported in CPU only device.");
#endif
}
}
}
......@@ -34,37 +46,25 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope,
// TODO(tonyyang-svail):
// - only runs the first block
// - only runs on the first device
Scope& local_scope = scope->NewScope();
auto& block = pdesc.blocks(0);
auto& device = devices_[0];
auto& device_context = device_contexts_[0];
for (auto& var : block.vars()) {
scope->NewVar(var.name());
local_scope.NewVar(var.name());
}
// std::vector<op_ptr> ops;
for (auto& op_desc : block.ops()) {
auto op = framework::OpRegistry::CreateOp(op_desc);
// op->InferShape(*scope);
op->Run(*scope, *device->cpu_device_context);
// InferShape is now doing inside Run method.
op->Run(local_scope, *device_context);
}
// TODO(tonyyang-svail): need to test gpu device
// device_->cpu_device_context->Wait();
// #ifndef PADDLE_ONLY_CPU
// if (device_->cuda_device_context) {
// device_->cuda_device_context->Wait();
// }
// #endif
Scope& local_scope = scope->NewScope();
local_scope.NewVar();
for (auto device : devices_) {
device->cpu_device_context->Wait();
#ifndef PADDLE_ONLY_CPU
if (device->cuda_device_context) {
device->cuda_device_context->Wait();
}
#endif
for (auto device_context : device_contexts_) {
device_context->Wait();
}
}
......
......@@ -18,7 +18,7 @@ limitations under the License. */
#include "paddle/framework/op_info.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/tensor.h"
#include "paddle/platform/device.h"
#include "paddle/platform/device_context_manager.h"
namespace paddle {
namespace framework {
......@@ -30,7 +30,7 @@ class Executor {
void Run(const ProgramDesc&, Scope*, std::vector<Tensor>*);
private:
std::vector<platform::Device*> devices_;
std::vector<platform::DeviceContext*> device_contexts_;
};
} // namespace framework
......
......@@ -15,8 +15,6 @@ limitations under the License. */
#include "paddle/framework/executor.h"
#include "gtest/gtest.h"
#include "paddle/framework/attribute.h"
#include <gtest/gtest.h>
#include "paddle/framework/grad_op_builder.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
......@@ -26,52 +24,71 @@ USE_OP(elementwise_add);
using namespace paddle::platform;
using namespace paddle::framework;
TEST(Executor, Init) {
ProgramDesc pdesc;
auto root_block = pdesc.add_blocks();
root_block->set_idx(0);
root_block->set_parent_idx(-1);
auto a = root_block->add_vars();
a->set_name("a");
auto a_lt = a->mutable_lod_tensor();
a_lt->set_data_type(paddle::framework::DataType::FP32);
a_lt->add_dims(640);
a_lt->add_dims(640);
auto b = root_block->add_vars();
b->set_name("b");
auto b_lt = b->mutable_lod_tensor();
b_lt->set_data_type(paddle::framework::DataType::FP32);
b_lt->add_dims(640);
b_lt->add_dims(640);
auto c = root_block->add_vars();
c->set_name("c");
auto c_lt = c->mutable_lod_tensor();
c_lt->set_data_type(paddle::framework::DataType::FP32);
c_lt->add_dims(640);
c_lt->add_dims(640);
auto op1 = root_block->add_ops();
op1->set_type("elementwise_add");
auto X = op1->add_inputs();
X->set_parameter("X");
X->add_arguments("a");
auto Y = op1->add_inputs();
Y->set_parameter("Y");
Y->add_arguments("b");
CPUPlace cpu_place1, cpu_place2;
class ExecutorTester : public ::testing::Test {
public:
virtual void SetUp() override {
auto root_block = pdesc_.add_blocks();
root_block->set_idx(0);
root_block->set_parent_idx(-1);
auto a = root_block->add_vars();
a->set_name("a");
auto a_lt = a->mutable_lod_tensor();
a_lt->set_data_type(paddle::framework::DataType::FP32);
a_lt->add_dims(640);
a_lt->add_dims(640);
auto b = root_block->add_vars();
b->set_name("b");
auto b_lt = b->mutable_lod_tensor();
b_lt->set_data_type(paddle::framework::DataType::FP32);
b_lt->add_dims(640);
b_lt->add_dims(640);
auto c = root_block->add_vars();
c->set_name("c");
auto c_lt = c->mutable_lod_tensor();
c_lt->set_data_type(paddle::framework::DataType::FP32);
c_lt->add_dims(640);
c_lt->add_dims(640);
auto op1 = root_block->add_ops();
op1->set_type("elementwise_add");
auto X = op1->add_inputs();
X->set_parameter("X");
X->add_arguments("a");
auto Y = op1->add_inputs();
Y->set_parameter("Y");
Y->add_arguments("b");
}
protected:
std::vector<Tensor>* outputs_{nullptr};
ProgramDesc pdesc_;
Scope scope_;
};
TEST_F(ExecutorTester, InitCPU) {
std::vector<Place> places;
CPUPlace cpu_place1, cpu_place2;
places.push_back(cpu_place1);
places.push_back(cpu_place2);
Executor* executor = new Executor(places);
Scope s;
std::vector<Tensor>* outputs{nullptr};
executor->Run(pdesc, &s, outputs);
executor->Run(pdesc_, &scope_, outputs_);
delete executor;
}
#ifndef PADDLE_ONLY_CPU
TEST_F(ExecutorTester, InitGPU) {
std::vector<Place> places;
GPUPlace gpu_place0(0);
GPUPlace gpu_place1(1);
places.push_back(gpu_place0);
places.push_back(gpu_place1);
Executor* executor = new Executor(places);
executor->Run(pdesc_, &scope_, outputs_);
delete executor;
}
#endif
......@@ -23,7 +23,7 @@ cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator
system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS})
nv_test(device_context_test SRCS device_context_test.cc DEPS device_context gpu_info)
cc_library(device SRCS device.cc DEPS device_context)
cc_library(device_context_manager SRCS device_context_manager.cc DEPS device_context)
nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda)
nv_test(transform_test SRCS transform_test.cu DEPS paddle_memory place device_context)
......@@ -12,48 +12,57 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/platform/device.h"
#include "paddle/platform/device_context_manager.h"
namespace paddle {
namespace platform {
template <typename T, typename... Args>
std::unique_ptr<T> make_unique(Args&&... args) {
return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
DeviceContextManager::DeviceContextManager() {
#ifndef PADDLE_ONLY_CPU
device_count_ = GetDeviceCount();
cuda_contexts_.reserve(device_count_);
for (int i = 0; i < device_count_; i++) {
cuda_contexts_[i] = nullptr;
}
#endif
}
CPUDeviceContext* GetCPUDeviceContext(const CPUPlace& place) {
static std::unique_ptr<CPUDeviceContext> g_cpu_device_context =
make_unique<CPUDeviceContext>(place);
return g_cpu_device_context.get();
template <>
CPUDeviceContext* DeviceContextManager::GetDeviceContext<
CPUPlace, CPUDeviceContext>(const CPUPlace& place) {
if (!cpu_context_) {
cpu_context_ = new CPUDeviceContext(place);
}
return cpu_context_;
}
#ifndef PADDLE_ONLY_CPU
CUDADeviceContext* GetCUDADeviceContext(const GPUPlace& place) {
static std::unique_ptr<CUDADeviceContext> g_cuda_device_context =
make_unique<CUDADeviceContext>(place);
return g_cuda_device_context.get();
template <>
CUDADeviceContext* DeviceContextManager::GetDeviceContext<
GPUPlace, CUDADeviceContext>(const GPUPlace& place) {
int gpu_id = place.device;
PADDLE_ENFORCE(gpu_id < device_count_,
"GPU device id must less than device count");
SetDeviceId(gpu_id);
if (!cuda_contexts_[gpu_id]) {
cuda_contexts_[gpu_id] = new CUDADeviceContext(place);
}
return cuda_contexts_[gpu_id];
}
#endif
Device* GetDevice(const Place& place) {
CPUPlace cpu_place;
DeviceContextManager::~DeviceContextManager() {
if (cpu_context_) {
delete cpu_context_;
}
#ifndef PADDLE_ONLY_CPU
if (is_gpu_place(place)) {
GPUPlace gpu_place = boost::get<GPUPlace>(place);
static std::unique_ptr<Device> g_device = make_unique<Device>(
GetCPUDeviceContext(cpu_place), GetCUDADeviceContext(gpu_place));
return g_device.get();
} else {
static std::unique_ptr<Device> g_device =
make_unique<Device>(GetCPUDeviceContext(cpu_place), nullptr);
return g_device.get();
for (int i = 0; i < device_count_; i++) {
if (cuda_contexts_[i]) {
delete cuda_contexts_[i];
}
}
#else
static std::unique_ptr<Device> g_device =
make_unique<Device>(GetCPUDeviceContext(cpu_place));
return g_device.get();
#endif
}
} // namespace platform
} // namespace paddle
......@@ -13,33 +13,46 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/platform/device_context.h"
#include "paddle/platform/place.h"
namespace paddle {
namespace platform {
struct Device {
CPUDeviceContext* cpu_device_context;
#ifndef PADDLE_ONLY_CPU
CUDADeviceContext* cuda_device_context;
#endif
template <typename T>
struct Converter;
template <>
struct Converter<CPUPlace> {
using DeviceContextType = CPUDeviceContext;
};
#ifndef PADDLE_ONLY_CPU
Device(CPUDeviceContext* cpu, CUDADeviceContext* gpu)
: cpu_device_context(cpu), cuda_device_context(gpu) {}
#else
explicit Device(CPUDeviceContext* cpu) : cpu_device_context(cpu) {}
#endif
template <>
struct Converter<GPUPlace> {
using DeviceContextType = CUDADeviceContext;
};
#endif
class DeviceContextManager {
public:
DeviceContextManager();
~DeviceContextManager();
template <typename PlaceType, typename DeviceType = typename Converter<
PlaceType>::DeviceContextType>
DeviceType* GetDeviceContext(const PlaceType& place);
CPUDeviceContext* GetCPUDeviceContext(const platform::CPUPlace& place);
static DeviceContextManager* Get() {
static DeviceContextManager inst;
return &inst;
}
private:
CPUDeviceContext* cpu_context_;
#ifndef PADDLE_ONLY_CPU
CUDADeviceContext* GetCUDADeviceContext(const platform::GPUPlace& place);
int device_count_;
std::vector<CUDADeviceContext*> cuda_contexts_;
#endif
Device* GetDevice(const platform::Place& place);
};
} // namespace platform
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册