diff --git a/paddle/fluid/framework/tensor_impl.h b/paddle/fluid/framework/tensor_impl.h index 7a4839044008338dda43f75b5ee6def500b78270..07d0906ea709da8c53242dc403c59382231d79d1 100644 --- a/paddle/fluid/framework/tensor_impl.h +++ b/paddle/fluid/framework/tensor_impl.h @@ -128,13 +128,21 @@ inline void* Tensor::mutable_data(platform::Place place, std::type_index type) { if (platform::is_cpu_place(place)) { holder_.reset(new PlaceholderImpl( boost::get(place), size, type)); - } else if (platform::is_gpu_place(place)) { + } else if (platform::is_gpu_place(place) || + platform::is_cuda_pinned_place(place)) { #ifndef PADDLE_WITH_CUDA - PADDLE_THROW("'CUDAPlace' is not supported in CPU only device."); + PADDLE_THROW( + "'CUDAPlace' or 'CUDAPinnedPlace' is not supported in CPU only " + "device."); } #else - holder_.reset(new PlaceholderImpl( - boost::get(place), size, type)); + if (platform::is_gpu_place(place)) { + holder_.reset(new PlaceholderImpl( + boost::get(place), size, type)); + } else if (platform::is_cuda_pinned_place(place)) { + holder_.reset(new PlaceholderImpl( + boost::get(place), size, type)); + } } #endif offset_ = 0; diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index e7fa450832d9624a3b279a061c7ef83cb5fbec63..046721970aafdc541f4749a856fcf6433647f1c1 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -125,6 +125,12 @@ PYBIND11_PLUGIN(core) { .def("set", PyCUDATensorSetFromArray) .def("set", PyCUDATensorSetFromArray) .def("set", PyCUDATensorSetFromArray) + .def("set", PyCUDAPinnedTensorSetFromArray) + .def("set", PyCUDAPinnedTensorSetFromArray) + .def("set", PyCUDAPinnedTensorSetFromArray) + .def("set", PyCUDAPinnedTensorSetFromArray) + .def("set", PyCUDAPinnedTensorSetFromArray) + .def("set", PyCUDAPinnedTensorSetFromArray) #endif .def("shape", [](Tensor &self) { return vectorize(self.dims()); }) .def("set_float_element", TensorSetElement) @@ -367,8 +373,8 @@ All parameter, weight, gradient are variables in Paddle. self = gpu_place; }) .def("set_place", [](platform::Place &self, - const platform::CUDAPinnedPlace &gpu_place) { - self = gpu_place; + const platform::CUDAPinnedPlace &cuda_pinned_place) { + self = cuda_pinned_place; }); py::class_(m, "Operator") diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index 6f8c597f8e610594851c318c122563523e4e7ea6..f52ffc9ef3ff7dbbd6097949bf2fe2bfd2c33d7f 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -14,6 +14,8 @@ limitations under the License. */ #pragma once #include +#include +#include #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/device_context.h" @@ -208,6 +210,38 @@ void PyCUDATensorSetFromArray( sizeof(uint16_t) * array.size(), cudaMemcpyHostToDevice, dev_ctx->stream()); } + +template +void PyCUDAPinnedTensorSetFromArray( + framework::Tensor &self, + py::array_t array, + const paddle::platform::CUDAPinnedPlace &place) { + std::vector dims; + dims.reserve(array.ndim()); + for (size_t i = 0; i < array.ndim(); ++i) { + dims.push_back(static_cast(array.shape()[i])); + } + + self.Resize(framework::make_ddim(dims)); + auto *dst = self.mutable_data(place); + std::memcpy(dst, array.data(), sizeof(T) * array.size()); +} + +template <> +void PyCUDAPinnedTensorSetFromArray( + framework::Tensor &self, + py::array_t array, + const paddle::platform::CUDAPinnedPlace &place) { + std::vector dims; + dims.reserve(array.ndim()); + for (size_t i = 0; i < array.ndim(); ++i) { + dims.push_back(static_cast(array.shape()[i])); + } + + self.Resize(framework::make_ddim(dims)); + auto *dst = self.mutable_data(place); + std::memcpy(dst, array.data(), sizeof(uint16_t) * array.size()); +} #endif } // namespace pybind diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 5ea4d977f4d8d9eb56b1fefa16f429df6e2a15bb..f01d638efddd471d5667fded183b90c2d7d0a856 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -31,7 +31,7 @@ import regularizer import average from param_attr import ParamAttr, WeightNormParamAttr from data_feeder import DataFeeder -from core import LoDTensor, CPUPlace, CUDAPlace +from core import LoDTensor, CPUPlace, CUDAPlace, CUDAPinnedPlace from distribute_transpiler import DistributeTranspiler from distribute_transpiler_simple import SimpleDistributeTranspiler from concurrency import (Go, make_channel, channel_send, channel_recv, @@ -57,6 +57,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + [ 'LoDTensor', 'CPUPlace', 'CUDAPlace', + 'CUDAPinnedPlace', 'Tensor', 'ParamAttr', 'WeightNormParamAttr',