From bde90be71bc2758b464960c8e2631ee177c1d9a7 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 17 Jul 2017 18:10:18 +0800 Subject: [PATCH] Read/Write a Tensor Python Basically following http://pybind11.readthedocs.io/en/stable/advanced/pycpp/numpy.html * Use buffer protocol to return a view of Tensor. It can be cast to numpy array in Python. * Set a numpy array to a tensor. --- paddle/framework/tensor.h | 9 +- paddle/pybind/pybind.cc | 142 +++++++++++++++++- .../paddle/v2/framework/tests/test_tensor.py | 45 ++++++ 3 files changed, 194 insertions(+), 2 deletions(-) create mode 100644 python/paddle/v2/framework/tests/test_tensor.py diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 29bad7a00a4..891cf736418 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include +#include #include "paddle/framework/ddim.h" #include "paddle/framework/enforce.h" #include "paddle/memory/memory.h" @@ -127,6 +128,10 @@ class Tensor { DDim dims() const { return dims_; } + platform::Place place() const { return holder_->place(); } + + std::type_index type() const { return holder_->type(); } + private: // Placeholder hides type T, so it doesn't appear as a template // parameter of Variable. @@ -135,6 +140,7 @@ class Tensor { virtual void* ptr() const = 0; virtual platform::Place place() const = 0; virtual size_t size() const = 0; + virtual std::type_index type() const = 0; }; template @@ -159,7 +165,8 @@ class Tensor { virtual void* ptr() const { return static_cast(ptr_.get()); } virtual size_t size() const { return size_; } - virtual platform::Place place() const { return place_; } + virtual paddle::platform::Place place() const { return place_; } + virtual std::type_index type() const { return std::type_index(typeid(T)); } std::unique_ptr> ptr_; platform::Place place_; // record the place of ptr_. diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index b5ead21fd01..8222323e36a 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include +#include #include #include #include @@ -25,9 +26,143 @@ namespace pd = paddle::framework; USE_OP(add_two); +struct PlaceDebugString : public boost::static_visitor { + std::string operator()(const paddle::platform::GPUPlace& place) const { + return "GPU(" + std::to_string(place.device) + ")"; + } + + std::string operator()(const paddle::platform::CPUPlace& place) const { + return "CPU"; + } +}; + +template +struct TensorToPyBuffer { + pd::Tensor& self_; + explicit TensorToPyBuffer(pd::Tensor& self) : self_(self) {} + + bool CanCast() const { return std::type_index(typeid(T)) == self_.type(); } + + py::buffer_info Cast() const { + auto dim_vec = pd::vectorize(self_.dims()); + std::vector dims_outside; + std::vector strides; + dims_outside.resize(dim_vec.size()); + strides.resize(dim_vec.size()); + + size_t prod = 1; + for (size_t i = dim_vec.size(); i != 0; --i) { + dims_outside[i - 1] = (size_t)dim_vec[i - 1]; + strides[i - 1] = sizeof(float) * prod; + prod *= dims_outside[i - 1]; + } + + return py::buffer_info(self_.mutable_data(self_.place()), + sizeof(T), + py::format_descriptor::format(), + (size_t)pd::arity(self_.dims()), + dims_outside, + strides); + } +}; + +template +struct CastToPyBufferImpl; + +template +struct CastToPyBufferImpl { + py::buffer_info operator()(pd::Tensor& tensor) { + PADDLE_THROW("This type of tensor cannot be expose to Python"); + return py::buffer_info(); + } +}; + +template +struct CastToPyBufferImpl { + using CUR_TYPE = typename std::tuple_element>::type; + py::buffer_info operator()(pd::Tensor& tensor) { + TensorToPyBuffer cast_object(tensor); + if (cast_object.CanCast()) { + return cast_object.Cast(); + } else { + constexpr bool less = I + 1 < std::tuple_size>::value; + return CastToPyBufferImpl()(tensor); + } + } +}; + +template +std::ostream& operator<<(std::ostream& os, const std::vector& vec) { + for (size_t i = 0; i < vec.size(); ++i) { + os << vec[i]; + if (i + 1 != vec.size()) { + os << ", "; + } + } + return os; +} + +py::buffer_info CastToPyBuffer(pd::Tensor& tensor) { + auto buffer_info = CastToPyBufferImpl()(tensor); + return buffer_info; +} + +template +void PyTensorSet( + pd::Tensor& self, + py::array_t array) { + std::vector dims; + dims.reserve(array.ndim()); + for (size_t i = 0; i < array.ndim(); ++i) { + dims.push_back((int)array.shape()[i]); + } + + self.set_dims(pd::make_ddim(dims)); + auto* dst = self.mutable_data(paddle::platform::CPUPlace()); + std::memcpy(dst, array.data(), sizeof(T) * array.size()); +} + PYBIND11_PLUGIN(core) { py::module m("core", "C++ core of Paddle Paddle"); + py::class_( + m, "Place", R"DOC(Device Place Class.)DOC") + .def("__str__", + [](const paddle::platform::Place& self) { + return boost::apply_visitor(PlaceDebugString(), self); + }) + .def("is_gpu", + [](const paddle::platform::Place& self) { + return paddle::platform::is_gpu_place(self); + }) + .def("is_cpu", [](const paddle::platform::Place& self) { + return paddle::platform::is_cpu_place(self); + }); + + py::class_(m, "Tensor", py::buffer_protocol()) + .def("get_place", &pd::Tensor::place) + .def_buffer([](pd::Tensor& self) -> py::buffer_info { + PADDLE_ENFORCE(paddle::platform::is_cpu_place(self.place()), + "Only CPU tensor can cast to numpy array"); + return CastToPyBuffer(self); + }) + .def("get_dims", + [](const pd::Tensor& self) { return pd::vectorize(self.dims()); }) + .def("set_dims", + [](pd::Tensor& self, const std::vector& dim) { + self.set_dims(pd::make_ddim(dim)); + }) + .def("alloc_float", + [](pd::Tensor& self) { + self.mutable_data(paddle::platform::CPUPlace()); + }) + .def("alloc_int", + [](pd::Tensor& self) { + self.mutable_data(paddle::platform::CPUPlace()); + }) + .def("set", PyTensorSet) + .def("set", PyTensorSet); + py::class_(m, "Variable", R"DOC(Variable Class. All parameter, weight, gradient are variables in Paddle. @@ -38,7 +173,12 @@ All parameter, weight, gradient are variables in Paddle. *var.GetMutable() = val; }) .def("get_int", - [](const pd::Variable& var) -> int { return var.Get(); }); + [](const pd::Variable& var) -> int { return var.Get(); }) + .def("get_tensor", + [](pd::Variable& self) -> pd::Tensor* { + return self.GetMutable(); + }, + py::return_value_policy::reference); py::class_>(m, "Scope") .def(py::init&>()) diff --git a/python/paddle/v2/framework/tests/test_tensor.py b/python/paddle/v2/framework/tests/test_tensor.py new file mode 100644 index 00000000000..b72aff3b9cd --- /dev/null +++ b/python/paddle/v2/framework/tests/test_tensor.py @@ -0,0 +1,45 @@ +import paddle.v2.framework.core as core +import unittest +import numpy + + +class TestScope(unittest.TestCase): + def test_int_tensor(self): + scope = core.Scope(None) + var = scope.create_var("test_tensor") + tensor = var.get_tensor() + + tensor.set_dims([1000, 784]) + tensor.alloc_int() + + tensor_array = numpy.array(tensor) + self.assertEqual((1000, 784), tensor_array.shape) + tensor_array[3, 9] = 1 + tensor_array[19, 11] = 2 + tensor.set(tensor_array) + + tensor_array_2 = numpy.array(tensor) + self.assertEqual(1.0, tensor_array_2[3, 9]) + self.assertEqual(2.0, tensor_array_2[19, 11]) + + def test_float_tensor(self): + scope = core.Scope(None) + var = scope.create_var("test_tensor") + tensor = var.get_tensor() + + tensor.set_dims([1000, 784]) + tensor.alloc_float() + + tensor_array = numpy.array(tensor) + self.assertEqual((1000, 784), tensor_array.shape) + tensor_array[3, 9] = 1.0 + tensor_array[19, 11] = 2.0 + tensor.set(tensor_array) + + tensor_array_2 = numpy.array(tensor) + self.assertAlmostEqual(1.0, tensor_array_2[3, 9]) + self.assertAlmostEqual(2.0, tensor_array_2[19, 11]) + + +if __name__ == '__main__': + unittest.main() -- GitLab