提交 0009a30f 编写于 作者: Y Yang Yang

pass cpu test; gpu seg fault

......@@ -44,8 +44,12 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context)
cc_library(executor SRCS executor.cc DEPS op_registry device scope framework_proto ${GLOB_OP_LIB})
cc_test(executor_test SRCS executor_test.cc DEPS executor)
cc_library(executor SRCS executor.cc DEPS op_registry device_context_manager scope framework_proto ${GLOB_OP_LIB})
if(WITH_GPU)
nv_test(executor_test SRCS executor_test.cc DEPS executor)
else()
cc_test(executor_test SRCS executor_test.cc DEPS executor)
endif()
cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor)
cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place)
......@@ -24,9 +24,21 @@ namespace paddle {
namespace framework {
Executor::Executor(const std::vector<platform::Place>& places) {
devices_.resize(places.size());
device_contexts_.resize(places.size());
for (size_t i = 0; i < places.size(); i++) {
devices_[i] = platform::GetDevice(places[i]);
if (platform::is_cpu_place(places[i])) {
device_contexts_[i] = platform::DeviceContextManager::Get()
->GetDeviceContext<platform::CPUPlace>(
boost::get<platform::CPUPlace>(places[i]));
} else {
#ifndef PADDLE_ONLY_CPU
device_contexts_[i] = platform::DeviceContextManager::Get()
->GetDeviceContext<platform::GPUPlace>(
boost::get<platform::GPUPlace>(places[i]));
#else
PADDLE_THROW("'GPUPlace' is not supported in CPU only device.");
#endif
}
}
}
......@@ -37,7 +49,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope,
// - only runs on the first device
// - test on gpu
auto& block = pdesc.blocks(0);
auto& device = devices_[0];
auto& device = device_contexts_[0];
// TODO(tonyyang-svail):
// - runs on a new local scope
......@@ -49,17 +61,18 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope,
for (auto& op_desc : block.ops()) {
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
op->Run(*scope, *device->cpu_device_context);
op->Run(*scope, *device);
}
// print tensor value
for (auto& var : block.vars()) {
std::cout << var.name() << std::endl;
auto v = scope->FindVar(var.name());
const LoDTensor& t = v->Get<LoDTensor>();
for (int i = 0; i < t.numel(); ++i) std::cout << t.data<float>()[i] << " ";
std::cout << std::endl;
}
// // print tensor value
// for (auto& var : block.vars()) {
// std::cout << var.name() << std::endl;
// auto v = scope->FindVar(var.name());
// const LoDTensor& t = v->Get<LoDTensor>();
// for (int i = 0; i < t.numel(); ++i)
// std::cout << t.data<float>()[i] << " ";
// std::cout << std::endl;
// }
}
} // namespace framework
......
......@@ -18,7 +18,7 @@ limitations under the License. */
#include "paddle/framework/op_info.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/tensor.h"
#include "paddle/platform/device.h"
#include "paddle/platform/device_context_manager.h"
namespace paddle {
namespace framework {
......@@ -30,7 +30,7 @@ class Executor {
void Run(const ProgramDesc&, Scope*, std::vector<Tensor>*);
private:
std::vector<platform::Device*> devices_;
std::vector<platform::DeviceContext*> device_contexts_;
};
} // namespace framework
......
......@@ -25,14 +25,13 @@ limitations under the License. */
USE_OP(elementwise_add);
USE_OP(gaussian_random);
using std::string;
using namespace paddle::platform;
using namespace paddle::framework;
typedef paddle::framework::BlockDesc proto_block;
typedef paddle::framework::OpDesc proto_op;
using std::string;
void add_gaussian_random_op(string var_name, proto_block* block) {
std::vector<int> dim{2, 3};
......@@ -59,10 +58,10 @@ void add_gaussian_random_op(string var_name, proto_block* block) {
Out->add_arguments(var_name);
}
TEST(Executor, Init) {
ProgramDesc pdesc;
auto root_block = pdesc.add_blocks();
class ExecutorTester : public ::testing::Test {
public:
virtual void SetUp() override {
auto root_block = pdesc_.add_blocks();
root_block->set_idx(0);
root_block->set_parent_idx(-1);
......@@ -85,16 +84,33 @@ TEST(Executor, Init) {
auto Out = op->add_outputs();
Out->set_parameter("Out");
Out->add_arguments("c");
}
CPUPlace cpu_place1, cpu_place2;
protected:
std::vector<Tensor>* outputs_{nullptr};
ProgramDesc pdesc_;
Scope scope_;
};
TEST_F(ExecutorTester, InitCPU) {
std::vector<Place> places;
CPUPlace cpu_place1, cpu_place2;
places.push_back(cpu_place1);
places.push_back(cpu_place2);
Executor* executor = new Executor(places);
Scope s;
std::vector<Tensor>* outputs{nullptr};
executor->Run(pdesc, &s, outputs);
executor->Run(pdesc_, &scope_, outputs_);
delete executor;
}
#ifndef PADDLE_ONLY_CPU
TEST_F(ExecutorTester, InitGPU) {
std::vector<Place> places;
GPUPlace gpu_place0(0);
places.push_back(gpu_place0);
Executor* executor = new Executor(places);
executor->Run(pdesc_, &scope_, outputs_);
delete executor;
}
#endif
......@@ -23,7 +23,7 @@ cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator
system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS})
nv_test(device_context_test SRCS device_context_test.cc DEPS device_context gpu_info)
cc_library(device SRCS device.cc DEPS device_context)
cc_library(device_context_manager SRCS device_context_manager.cc DEPS device_context)
nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda)
nv_test(transform_test SRCS transform_test.cu DEPS paddle_memory place device_context)
......@@ -12,48 +12,57 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/platform/device.h"
#include "paddle/platform/device_context_manager.h"
namespace paddle {
namespace platform {
template <typename T, typename... Args>
std::unique_ptr<T> make_unique(Args&&... args) {
return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
DeviceContextManager::DeviceContextManager() {
#ifndef PADDLE_ONLY_CPU
device_count_ = GetDeviceCount();
cuda_contexts_.reserve(device_count_);
for (int i = 0; i < device_count_; i++) {
cuda_contexts_[i] = nullptr;
}
#endif
}
CPUDeviceContext* GetCPUDeviceContext(const CPUPlace& place) {
static std::unique_ptr<CPUDeviceContext> g_cpu_device_context =
make_unique<CPUDeviceContext>(place);
return g_cpu_device_context.get();
template <>
CPUDeviceContext* DeviceContextManager::GetDeviceContext<
CPUPlace, CPUDeviceContext>(const CPUPlace& place) {
if (!cpu_context_) {
cpu_context_ = new CPUDeviceContext(place);
}
return cpu_context_;
}
#ifndef PADDLE_ONLY_CPU
CUDADeviceContext* GetCUDADeviceContext(const GPUPlace& place) {
static std::unique_ptr<CUDADeviceContext> g_cuda_device_context =
make_unique<CUDADeviceContext>(place);
return g_cuda_device_context.get();
template <>
CUDADeviceContext* DeviceContextManager::GetDeviceContext<
GPUPlace, CUDADeviceContext>(const GPUPlace& place) {
int gpu_id = place.device;
PADDLE_ENFORCE(gpu_id < device_count_,
"GPU device id must less than device count");
SetDeviceId(gpu_id);
if (!cuda_contexts_[gpu_id]) {
cuda_contexts_[gpu_id] = new CUDADeviceContext(place);
}
return cuda_contexts_[gpu_id];
}
#endif
Device* GetDevice(const Place& place) {
CPUPlace cpu_place;
DeviceContextManager::~DeviceContextManager() {
if (cpu_context_) {
delete cpu_context_;
}
#ifndef PADDLE_ONLY_CPU
if (is_gpu_place(place)) {
GPUPlace gpu_place = boost::get<GPUPlace>(place);
static std::unique_ptr<Device> g_device = make_unique<Device>(
GetCPUDeviceContext(cpu_place), GetCUDADeviceContext(gpu_place));
return g_device.get();
} else {
static std::unique_ptr<Device> g_device =
make_unique<Device>(GetCPUDeviceContext(cpu_place), nullptr);
return g_device.get();
for (int i = 0; i < device_count_; i++) {
if (cuda_contexts_[i]) {
delete cuda_contexts_[i];
}
}
#else
static std::unique_ptr<Device> g_device =
make_unique<Device>(GetCPUDeviceContext(cpu_place));
return g_device.get();
#endif
}
} // namespace platform
} // namespace paddle
......@@ -13,33 +13,46 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/platform/device_context.h"
#include "paddle/platform/place.h"
namespace paddle {
namespace platform {
struct Device {
CPUDeviceContext* cpu_device_context;
#ifndef PADDLE_ONLY_CPU
CUDADeviceContext* cuda_device_context;
#endif
template <typename T>
struct Converter;
template <>
struct Converter<CPUPlace> {
using DeviceContextType = CPUDeviceContext;
};
#ifndef PADDLE_ONLY_CPU
Device(CPUDeviceContext* cpu, CUDADeviceContext* gpu)
: cpu_device_context(cpu), cuda_device_context(gpu) {}
#else
explicit Device(CPUDeviceContext* cpu) : cpu_device_context(cpu) {}
#endif
template <>
struct Converter<GPUPlace> {
using DeviceContextType = CUDADeviceContext;
};
#endif
class DeviceContextManager {
public:
DeviceContextManager();
~DeviceContextManager();
template <typename PlaceType, typename DeviceType = typename Converter<
PlaceType>::DeviceContextType>
DeviceType* GetDeviceContext(const PlaceType& place);
CPUDeviceContext* GetCPUDeviceContext(const platform::CPUPlace& place);
static DeviceContextManager* Get() {
static DeviceContextManager inst;
return &inst;
}
private:
CPUDeviceContext* cpu_context_;
#ifndef PADDLE_ONLY_CPU
CUDADeviceContext* GetCUDADeviceContext(const platform::GPUPlace& place);
int device_count_;
std::vector<CUDADeviceContext*> cuda_contexts_;
#endif
Device* GetDevice(const platform::Place& place);
};
} // namespace platform
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册