From 358261f0bdf2ce887a3ff77218694828a6527ede Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 25 Jul 2017 12:41:11 +0000 Subject: [PATCH] fix gpu build error --- paddle/pybind/pybind.cc | 22 ++++++----- paddle/pybind/tensor_bind.h | 37 ++++++++++++------- .../paddle/v2/framework/tests/op_test_util.py | 3 +- .../paddle/v2/framework/tests/test_fc_op.py | 7 ++-- .../paddle/v2/framework/tests/test_tensor.py | 11 +++--- 5 files changed, 47 insertions(+), 33 deletions(-) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 2cc26a926e3..27a80f7ffa3 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -64,23 +64,25 @@ PYBIND11_PLUGIN(core) { self.Resize(pd::make_ddim(dim)); }) .def("alloc_float", - [](pd::Tensor& self, paddle::platform::Place& place) { + [](pd::Tensor& self, paddle::platform::GPUPlace& place) { self.mutable_data(place); }) .def("alloc_float", - [](pd::Tensor& self) { - self.mutable_data(paddle::platform::CPUPlace()); + [](pd::Tensor& self, paddle::platform::CPUPlace& place) { + self.mutable_data(place); }) .def("alloc_int", - [](pd::Tensor& self, paddle::platform::Place& place) { + [](pd::Tensor& self, paddle::platform::CPUPlace& place) { self.mutable_data(place); }) .def("alloc_int", - [](pd::Tensor& self) { - self.mutable_data(paddle::platform::CPUPlace()); + [](pd::Tensor& self, paddle::platform::GPUPlace& place) { + self.mutable_data(place); }) - .def("set", paddle::pybind::PyTensorSetFromArray) - .def("set", paddle::pybind::PyTensorSetFromArray) + .def("set", paddle::pybind::PyCPUTensorSetFromArray) + .def("set", paddle::pybind::PyCUDATensorSetFromArray) + .def("set", paddle::pybind::PyCPUTensorSetFromArray) + .def("set", paddle::pybind::PyCUDATensorSetFromArray) .def("shape", [](pd::Tensor& self) { return pd::vectorize(self.dims()); }); @@ -144,9 +146,9 @@ All parameter, weight, gradient are variables in Paddle. }) #endif ; // NOLINT - py::class_(m, "GPUPlace").def(py::init()); + py::class_(m, "GPUPlace").def(py::init()); - py::class_(m, "CPUPlace").def(py::init<>()); + py::class_(m, "CPUPlace").def(py::init<>()); py::class_> operator_base( m, "Operator"); diff --git a/paddle/pybind/tensor_bind.h b/paddle/pybind/tensor_bind.h index fdf8861b68c..86eff97d722 100644 --- a/paddle/pybind/tensor_bind.h +++ b/paddle/pybind/tensor_bind.h @@ -61,7 +61,7 @@ struct CastToPyBufferImpl { framework::Tensor dst_tensor; if (paddle::platform::is_gpu_place(tensor.holder_->place())) { dst_tensor.CopyFrom(tensor, platform::CPUPlace()); - } else if (paddle::platform::is_gpu_place(tensor.holder_->place())) { + } else if (paddle::platform::is_cpu_place(tensor.holder_->place())) { dst_tensor = tensor; } return py::buffer_info( @@ -84,9 +84,10 @@ inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) { } template -void PyTensorSetFromArray( +void PyCPUTensorSetFromArray( framework::Tensor &self, - py::array_t array) { + py::array_t array, + paddle::platform::CPUPlace &place) { std::vector dims; dims.reserve(array.ndim()); for (size_t i = 0; i < array.ndim(); ++i) { @@ -94,18 +95,26 @@ void PyTensorSetFromArray( } self.Resize(framework::make_ddim(dims)); - auto *dst = self.mutable_data(self.place()); - - if (paddle::platform::is_cpu_place(self.place())) { - std::memcpy(dst, array.data(), sizeof(T) * array.size()); - } else if (paddle::platform::is_gpu_place(self.place())) { -#ifdef PADDLE_ONLY_CPU - PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); -#else - platform::GpuMemcpySync( - dst, array.data(), sizeof(T) * array.size(), cudaMemcpyHostToDevice); -#endif + auto *dst = self.mutable_data(place); + std::memcpy(dst, array.data(), sizeof(T) * array.size()); +} + +template +void PyCUDATensorSetFromArray( + framework::Tensor &self, + py::array_t array, + paddle::platform::GPUPlace &place) { + std::vector dims; + dims.reserve(array.ndim()); + for (size_t i = 0; i < array.ndim(); ++i) { + dims.push_back((int)array.shape()[i]); } + + self.Resize(framework::make_ddim(dims)); + auto *dst = self.mutable_data(place); + std::memcpy(dst, array.data(), sizeof(T) * array.size()); + paddle::platform::GpuMemcpySync( + dst, array.data(), sizeof(T) * array.size(), cudaMemcpyHostToDevice); } } // namespace pybind diff --git a/python/paddle/v2/framework/tests/op_test_util.py b/python/paddle/v2/framework/tests/op_test_util.py index 7b62313f8ac..35ee9555850 100644 --- a/python/paddle/v2/framework/tests/op_test_util.py +++ b/python/paddle/v2/framework/tests/op_test_util.py @@ -25,6 +25,7 @@ class OpTestMeta(type): self.assertIsNotNone(func) scope = core.Scope(None) + place = core.CPUPlace() kwargs = dict() for in_name in func.all_input_args: @@ -33,7 +34,7 @@ class OpTestMeta(type): var = scope.create_var(in_name).get_tensor() arr = getattr(self, in_name) var.set_dims(arr.shape) - var.set(arr) + var.set(arr, place) else: kwargs[in_name] = "@EMPTY@" diff --git a/python/paddle/v2/framework/tests/test_fc_op.py b/python/paddle/v2/framework/tests/test_fc_op.py index 59e7e61249e..d5fd590892f 100644 --- a/python/paddle/v2/framework/tests/test_fc_op.py +++ b/python/paddle/v2/framework/tests/test_fc_op.py @@ -7,17 +7,18 @@ import paddle.v2.framework.create_op_creation_methods as creation class TestFc(unittest.TestCase): def test_fc(self): scope = core.Scope(None) + place = core.CPUPlace() x = scope.create_var("X") x_tensor = x.get_tensor() x_tensor.set_dims([1000, 784]) - x_tensor.alloc_float() + x_tensor.alloc_float(place) w = scope.create_var("W") w_tensor = w.get_tensor() w_tensor.set_dims([784, 100]) - w_tensor.alloc_float() + w_tensor.alloc_float(place) - w_tensor.set(numpy.random.random((784, 100)).astype("float32")) + w_tensor.set(numpy.random.random((784, 100)).astype("float32"), place) # Set a real numpy array here. # x_tensor.set(numpy.array([])) diff --git a/python/paddle/v2/framework/tests/test_tensor.py b/python/paddle/v2/framework/tests/test_tensor.py index b72aff3b9cd..54b627b38c2 100644 --- a/python/paddle/v2/framework/tests/test_tensor.py +++ b/python/paddle/v2/framework/tests/test_tensor.py @@ -7,16 +7,16 @@ class TestScope(unittest.TestCase): def test_int_tensor(self): scope = core.Scope(None) var = scope.create_var("test_tensor") + place = core.CPUPlace() tensor = var.get_tensor() tensor.set_dims([1000, 784]) - tensor.alloc_int() - + tensor.alloc_int(place) tensor_array = numpy.array(tensor) self.assertEqual((1000, 784), tensor_array.shape) tensor_array[3, 9] = 1 tensor_array[19, 11] = 2 - tensor.set(tensor_array) + tensor.set(tensor_array, place) tensor_array_2 = numpy.array(tensor) self.assertEqual(1.0, tensor_array_2[3, 9]) @@ -25,16 +25,17 @@ class TestScope(unittest.TestCase): def test_float_tensor(self): scope = core.Scope(None) var = scope.create_var("test_tensor") + place = core.CPUPlace() tensor = var.get_tensor() tensor.set_dims([1000, 784]) - tensor.alloc_float() + tensor.alloc_float(place) tensor_array = numpy.array(tensor) self.assertEqual((1000, 784), tensor_array.shape) tensor_array[3, 9] = 1.0 tensor_array[19, 11] = 2.0 - tensor.set(tensor_array) + tensor.set(tensor_array, place) tensor_array_2 = numpy.array(tensor) self.assertAlmostEqual(1.0, tensor_array_2[3, 9]) -- GitLab