diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index db82c56da763306f84fb7c9288d6c7b722e68f30..24879ee78f1dbd0755d4319af95cc974222fc89b 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -20,6 +20,7 @@ limitations under the License. */ #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" #include "paddle/framework/scope.h" +#include "paddle/platform/enforce.h" #include "paddle/platform/place.h" #include "paddle/pybind/tensor_bind.h" #include "pybind11/numpy.h" @@ -131,18 +132,24 @@ All parameter, weight, gradient are variables in Paddle. .def("temp", pd::OperatorBase::TMP_VAR_NAME); py::class_(m, "DeviceContext") - .def_static( - "create", - [](paddle::platform::Place) -> paddle::platform::DeviceContext* { - if (paddle::platform::is_gpu_place(place)) { - return new paddle::platform::GPUDeviceContext(place); - } else if (paddle::platform::is_cpu_place(place)) { - return new paddle::platform::CPUDeviceContext(); - } - }); + .def_static("cpu_context", + []() -> paddle::platform::DeviceContext* { + return new paddle::platform::CPUDeviceContext(); + }) + .def_static("gpu_context", + [](paddle::platform::Place& place) + -> paddle::platform::DeviceContext* { +#ifdef PADDLE_ONLY_CPU + + // PADDLE_THROW("'GPUPlace' is not supported in CPU only + // device."); + return nullptr; +#else + return new paddle::platform::CUDADeviceContext(place); +#endif + }); py::class_(m, "GPUPlace").def(py::init()); - .def(py::init<>()); py::class_(m, "CPUPlace").def(py::init<>()); diff --git a/paddle/pybind/tensor_bind.h b/paddle/pybind/tensor_bind.h index 1af7c0a3026807b38a9f9cd964293ad0f52bc5b4..a94c89d328b35cb199762411fa9ac74b3f9e45fe 100644 --- a/paddle/pybind/tensor_bind.h +++ b/paddle/pybind/tensor_bind.h @@ -13,6 +13,7 @@ limitations under the License. */ #pragma once +#include #include "paddle/framework/tensor.h" #include "paddle/memory/memcpy.h" #include "pybind11/numpy.h" @@ -57,9 +58,9 @@ struct CastToPyBufferImpl { strides[i - 1] = sizeof(CUR_TYPE) * prod; prod *= dims_outside[i - 1]; } - Tensor dst_tensor; + framework::Tensor dst_tensor; if (paddle::platform::is_gpu_place(tensor.holder_->place())) { - dst_tensor.CopyFrom(tensor, platform::CPUPlace()); + dst_tensor.CopyFrom(tensor, platform::CPUPlace()); } else if (paddle::platform::is_gpu_place(tensor.holder_->place())) { dst_tensor = tensor; } @@ -96,20 +97,13 @@ void PyTensorSetFromArray( auto *dst = self.mutable_data(self.place()); if (paddle::platform::is_cpu_place(self.place())) { - paddle::memory::Copy( - place, dst, place, array.data(), sizeof(T) * array.size()); - } else if (paddle::platform::is_gpu_place(place)) { + std::memcpy(dst, array.data(), sizeof(T) * array.size()); + } else if (paddle::platform::is_gpu_place(self.place())) { #ifdef PADDLE_ONLY_CPU PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); #else - paddle::memory::Copy( - place, - dst, - paddle::platform::CPUPlace(), - array.data(), - sizeof(T) * array.size()); + GpuMemcpySync( + dst, array.data(), sizeof(T) * array.size(), cudaMemcpyHostToDevice); #endif } }