提交 0009a30f 编写于 作者: Y Yang Yang

pass cpu test; gpu seg fault

...@@ -44,8 +44,12 @@ add_custom_command(TARGET framework_py_proto POST_BUILD ...@@ -44,8 +44,12 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
cc_library(backward SRCS backward.cc DEPS net_op) cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context) cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context)
cc_library(executor SRCS executor.cc DEPS op_registry device scope framework_proto ${GLOB_OP_LIB}) cc_library(executor SRCS executor.cc DEPS op_registry device_context_manager scope framework_proto ${GLOB_OP_LIB})
cc_test(executor_test SRCS executor_test.cc DEPS executor) if(WITH_GPU)
nv_test(executor_test SRCS executor_test.cc DEPS executor)
else()
cc_test(executor_test SRCS executor_test.cc DEPS executor)
endif()
cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor) cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor)
cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place) cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place)
...@@ -24,9 +24,21 @@ namespace paddle { ...@@ -24,9 +24,21 @@ namespace paddle {
namespace framework { namespace framework {
Executor::Executor(const std::vector<platform::Place>& places) { Executor::Executor(const std::vector<platform::Place>& places) {
devices_.resize(places.size()); device_contexts_.resize(places.size());
for (size_t i = 0; i < places.size(); i++) { for (size_t i = 0; i < places.size(); i++) {
devices_[i] = platform::GetDevice(places[i]); if (platform::is_cpu_place(places[i])) {
device_contexts_[i] = platform::DeviceContextManager::Get()
->GetDeviceContext<platform::CPUPlace>(
boost::get<platform::CPUPlace>(places[i]));
} else {
#ifndef PADDLE_ONLY_CPU
device_contexts_[i] = platform::DeviceContextManager::Get()
->GetDeviceContext<platform::GPUPlace>(
boost::get<platform::GPUPlace>(places[i]));
#else
PADDLE_THROW("'GPUPlace' is not supported in CPU only device.");
#endif
}
} }
} }
...@@ -37,7 +49,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, ...@@ -37,7 +49,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope,
// - only runs on the first device // - only runs on the first device
// - test on gpu // - test on gpu
auto& block = pdesc.blocks(0); auto& block = pdesc.blocks(0);
auto& device = devices_[0]; auto& device = device_contexts_[0];
// TODO(tonyyang-svail): // TODO(tonyyang-svail):
// - runs on a new local scope // - runs on a new local scope
...@@ -49,17 +61,18 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, ...@@ -49,17 +61,18 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope,
for (auto& op_desc : block.ops()) { for (auto& op_desc : block.ops()) {
auto op = paddle::framework::OpRegistry::CreateOp(op_desc); auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
op->Run(*scope, *device->cpu_device_context); op->Run(*scope, *device);
} }
// print tensor value // // print tensor value
for (auto& var : block.vars()) { // for (auto& var : block.vars()) {
std::cout << var.name() << std::endl; // std::cout << var.name() << std::endl;
auto v = scope->FindVar(var.name()); // auto v = scope->FindVar(var.name());
const LoDTensor& t = v->Get<LoDTensor>(); // const LoDTensor& t = v->Get<LoDTensor>();
for (int i = 0; i < t.numel(); ++i) std::cout << t.data<float>()[i] << " "; // for (int i = 0; i < t.numel(); ++i)
std::cout << std::endl; // std::cout << t.data<float>()[i] << " ";
} // std::cout << std::endl;
// }
} }
} // namespace framework } // namespace framework
......
...@@ -18,7 +18,7 @@ limitations under the License. */ ...@@ -18,7 +18,7 @@ limitations under the License. */
#include "paddle/framework/op_info.h" #include "paddle/framework/op_info.h"
#include "paddle/framework/scope.h" #include "paddle/framework/scope.h"
#include "paddle/framework/tensor.h" #include "paddle/framework/tensor.h"
#include "paddle/platform/device.h" #include "paddle/platform/device_context_manager.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -30,7 +30,7 @@ class Executor { ...@@ -30,7 +30,7 @@ class Executor {
void Run(const ProgramDesc&, Scope*, std::vector<Tensor>*); void Run(const ProgramDesc&, Scope*, std::vector<Tensor>*);
private: private:
std::vector<platform::Device*> devices_; std::vector<platform::DeviceContext*> device_contexts_;
}; };
} // namespace framework } // namespace framework
......
...@@ -25,14 +25,13 @@ limitations under the License. */ ...@@ -25,14 +25,13 @@ limitations under the License. */
USE_OP(elementwise_add); USE_OP(elementwise_add);
USE_OP(gaussian_random); USE_OP(gaussian_random);
using std::string;
using namespace paddle::platform; using namespace paddle::platform;
using namespace paddle::framework; using namespace paddle::framework;
typedef paddle::framework::BlockDesc proto_block; typedef paddle::framework::BlockDesc proto_block;
typedef paddle::framework::OpDesc proto_op; typedef paddle::framework::OpDesc proto_op;
using std::string;
void add_gaussian_random_op(string var_name, proto_block* block) { void add_gaussian_random_op(string var_name, proto_block* block) {
std::vector<int> dim{2, 3}; std::vector<int> dim{2, 3};
...@@ -59,42 +58,59 @@ void add_gaussian_random_op(string var_name, proto_block* block) { ...@@ -59,42 +58,59 @@ void add_gaussian_random_op(string var_name, proto_block* block) {
Out->add_arguments(var_name); Out->add_arguments(var_name);
} }
TEST(Executor, Init) { class ExecutorTester : public ::testing::Test {
ProgramDesc pdesc; public:
virtual void SetUp() override {
auto root_block = pdesc.add_blocks(); auto root_block = pdesc_.add_blocks();
root_block->set_idx(0); root_block->set_idx(0);
root_block->set_parent_idx(-1); root_block->set_parent_idx(-1);
add_gaussian_random_op("a", root_block); add_gaussian_random_op("a", root_block);
add_gaussian_random_op("b", root_block); add_gaussian_random_op("b", root_block);
auto c = root_block->add_vars(); auto c = root_block->add_vars();
c->set_name("c"); c->set_name("c");
auto c_lt = c->mutable_lod_tensor(); auto c_lt = c->mutable_lod_tensor();
c_lt->set_data_type(paddle::framework::DataType::FP32); c_lt->set_data_type(paddle::framework::DataType::FP32);
auto op = root_block->add_ops(); auto op = root_block->add_ops();
op->set_type("elementwise_add"); op->set_type("elementwise_add");
auto X = op->add_inputs(); auto X = op->add_inputs();
X->set_parameter("X"); X->set_parameter("X");
X->add_arguments("a"); X->add_arguments("a");
auto Y = op->add_inputs(); auto Y = op->add_inputs();
Y->set_parameter("Y"); Y->set_parameter("Y");
Y->add_arguments("b"); Y->add_arguments("b");
auto Out = op->add_outputs(); auto Out = op->add_outputs();
Out->set_parameter("Out"); Out->set_parameter("Out");
Out->add_arguments("c"); Out->add_arguments("c");
}
CPUPlace cpu_place1, cpu_place2; protected:
std::vector<Tensor>* outputs_{nullptr};
ProgramDesc pdesc_;
Scope scope_;
};
TEST_F(ExecutorTester, InitCPU) {
std::vector<Place> places; std::vector<Place> places;
CPUPlace cpu_place1, cpu_place2;
places.push_back(cpu_place1); places.push_back(cpu_place1);
places.push_back(cpu_place2); places.push_back(cpu_place2);
Executor* executor = new Executor(places); Executor* executor = new Executor(places);
Scope s; executor->Run(pdesc_, &scope_, outputs_);
std::vector<Tensor>* outputs{nullptr}; delete executor;
executor->Run(pdesc, &s, outputs); }
#ifndef PADDLE_ONLY_CPU
TEST_F(ExecutorTester, InitGPU) {
std::vector<Place> places;
GPUPlace gpu_place0(0);
places.push_back(gpu_place0);
Executor* executor = new Executor(places);
executor->Run(pdesc_, &scope_, outputs_);
delete executor; delete executor;
} }
#endif
...@@ -23,7 +23,7 @@ cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator ...@@ -23,7 +23,7 @@ cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator
system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS}) system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS})
nv_test(device_context_test SRCS device_context_test.cc DEPS device_context gpu_info) nv_test(device_context_test SRCS device_context_test.cc DEPS device_context gpu_info)
cc_library(device SRCS device.cc DEPS device_context) cc_library(device_context_manager SRCS device_context_manager.cc DEPS device_context)
nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda)
nv_test(transform_test SRCS transform_test.cu DEPS paddle_memory place device_context) nv_test(transform_test SRCS transform_test.cu DEPS paddle_memory place device_context)
...@@ -12,48 +12,57 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,48 +12,57 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/platform/device.h" #include "paddle/platform/device_context_manager.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
template <typename T, typename... Args> DeviceContextManager::DeviceContextManager() {
std::unique_ptr<T> make_unique(Args&&... args) { #ifndef PADDLE_ONLY_CPU
return std::unique_ptr<T>(new T(std::forward<Args>(args)...)); device_count_ = GetDeviceCount();
cuda_contexts_.reserve(device_count_);
for (int i = 0; i < device_count_; i++) {
cuda_contexts_[i] = nullptr;
}
#endif
} }
CPUDeviceContext* GetCPUDeviceContext(const CPUPlace& place) { template <>
static std::unique_ptr<CPUDeviceContext> g_cpu_device_context = CPUDeviceContext* DeviceContextManager::GetDeviceContext<
make_unique<CPUDeviceContext>(place); CPUPlace, CPUDeviceContext>(const CPUPlace& place) {
return g_cpu_device_context.get(); if (!cpu_context_) {
cpu_context_ = new CPUDeviceContext(place);
}
return cpu_context_;
} }
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
CUDADeviceContext* GetCUDADeviceContext(const GPUPlace& place) { template <>
static std::unique_ptr<CUDADeviceContext> g_cuda_device_context = CUDADeviceContext* DeviceContextManager::GetDeviceContext<
make_unique<CUDADeviceContext>(place); GPUPlace, CUDADeviceContext>(const GPUPlace& place) {
return g_cuda_device_context.get(); int gpu_id = place.device;
PADDLE_ENFORCE(gpu_id < device_count_,
"GPU device id must less than device count");
SetDeviceId(gpu_id);
if (!cuda_contexts_[gpu_id]) {
cuda_contexts_[gpu_id] = new CUDADeviceContext(place);
}
return cuda_contexts_[gpu_id];
} }
#endif #endif
Device* GetDevice(const Place& place) { DeviceContextManager::~DeviceContextManager() {
CPUPlace cpu_place; if (cpu_context_) {
delete cpu_context_;
}
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
if (is_gpu_place(place)) { for (int i = 0; i < device_count_; i++) {
GPUPlace gpu_place = boost::get<GPUPlace>(place); if (cuda_contexts_[i]) {
static std::unique_ptr<Device> g_device = make_unique<Device>( delete cuda_contexts_[i];
GetCPUDeviceContext(cpu_place), GetCUDADeviceContext(gpu_place)); }
return g_device.get();
} else {
static std::unique_ptr<Device> g_device =
make_unique<Device>(GetCPUDeviceContext(cpu_place), nullptr);
return g_device.get();
} }
#else
static std::unique_ptr<Device> g_device =
make_unique<Device>(GetCPUDeviceContext(cpu_place));
return g_device.get();
#endif #endif
} }
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -13,33 +13,46 @@ See the License for the specific language governing permissions and ...@@ -13,33 +13,46 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "paddle/platform/device_context.h" #include "paddle/platform/device_context.h"
#include "paddle/platform/place.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
struct Device { template <typename T>
CPUDeviceContext* cpu_device_context; struct Converter;
#ifndef PADDLE_ONLY_CPU
CUDADeviceContext* cuda_device_context; template <>
#endif struct Converter<CPUPlace> {
using DeviceContextType = CPUDeviceContext;
};
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
Device(CPUDeviceContext* cpu, CUDADeviceContext* gpu) template <>
: cpu_device_context(cpu), cuda_device_context(gpu) {} struct Converter<GPUPlace> {
#else using DeviceContextType = CUDADeviceContext;
explicit Device(CPUDeviceContext* cpu) : cpu_device_context(cpu) {}
#endif
}; };
#endif
class DeviceContextManager {
public:
DeviceContextManager();
~DeviceContextManager();
template <typename PlaceType, typename DeviceType = typename Converter<
PlaceType>::DeviceContextType>
DeviceType* GetDeviceContext(const PlaceType& place);
CPUDeviceContext* GetCPUDeviceContext(const platform::CPUPlace& place); static DeviceContextManager* Get() {
static DeviceContextManager inst;
return &inst;
}
private:
CPUDeviceContext* cpu_context_;
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
CUDADeviceContext* GetCUDADeviceContext(const platform::GPUPlace& place); int device_count_;
std::vector<CUDADeviceContext*> cuda_contexts_;
#endif #endif
};
Device* GetDevice(const platform::Place& place);
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册