From d5ff79e55ea329396f4c61686a318b7c77520b93 Mon Sep 17 00:00:00 2001
From: Youwei Song <youwei0314@gmail.com>
Date: Wed, 27 Nov 2019 16:23:56 +0800
Subject: [PATCH] Support numpy bridge (enabled by default in dygraph mode)
 (#20983)

* add numpy bridge

* fix template compile

* add unittest, add default
test=develop

* fix unittest
test=develop

* fix unittest
test=develop

* zero_copy=True for to_variable,
test=develop

* bug fix
test=develop

* disable deprecated NumPy API
test=develop

* use better design of NumpyAllocator
test=develop

* fix Py_None check
test=develop

* reset c++ tracer when jump out dygraph guard
test=develop

* refine PADDLE_ENFORCE_xx format
test=develop

* bug fix of tracer switch
test=develop

* update decref
test=develop
---
 paddle/fluid/framework/tensor.cc              |  6 ++
 paddle/fluid/framework/tensor.h               |  3 +
 paddle/fluid/pybind/pybind.cc                 |  9 ++-
 paddle/fluid/pybind/tensor_py.h               | 61 +++++++++++++++----
 python/paddle/fluid/dygraph/base.py           | 20 ++++--
 python/paddle/fluid/framework.py              |  2 +
 .../unittests/test_imperative_numpy_bridge.py | 39 ++++++++++++
 7 files changed, 120 insertions(+), 20 deletions(-)
 create mode 100644 python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py

diff --git a/paddle/fluid/framework/tensor.cc b/paddle/fluid/framework/tensor.cc
index 956049ca6a7..df1e0fb6d5b 100644
--- a/paddle/fluid/framework/tensor.cc
+++ b/paddle/fluid/framework/tensor.cc
@@ -114,5 +114,11 @@ void Tensor::ResetHolder(std::shared_ptr<memory::Allocation> holder) {
   holder_ = holder;
 }
 
+void Tensor::ResetHolderWithType(std::shared_ptr<memory::Allocation> holder,
+                                 const proto::VarType::Type type) {
+  ResetHolder(holder);
+  type_ = type;
+}
+
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h
index ea28302b35a..2c89afcf887 100644
--- a/paddle/fluid/framework/tensor.h
+++ b/paddle/fluid/framework/tensor.h
@@ -169,6 +169,9 @@ class Tensor {
 
   void ResetHolder(std::shared_ptr<memory::Allocation> holder);
 
+  void ResetHolderWithType(std::shared_ptr<memory::Allocation> holder,
+                           const proto::VarType::Type type);
+
  private:
   /*! holds the memory block if allocated. */
   std::shared_ptr<memory::Allocation> holder_;
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index 86635f15fc7..f01ecfd6ca4 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -459,17 +459,20 @@ PYBIND11_MODULE(core_noavx, m) {
            })
       .def("_clear", &Tensor::clear)
       .def("set", SetTensorFromPyArray<paddle::platform::CPUPlace>,
-           py::arg("array"), py::arg("place"))
+           py::arg("array"), py::arg("place"), py::arg("zero_copy") = false)
       .def("set", SetTensorFromPyArray<paddle::platform::CUDAPlace>,
-           py::arg("array"), py::arg("place"))
+           py::arg("array"), py::arg("place"), py::arg("zero_copy") = false)
       .def("set", SetTensorFromPyArray<paddle::platform::CUDAPinnedPlace>,
-           py::arg("array"), py::arg("place"), R"DOC(
+           py::arg("array"), py::arg("place"), py::arg("zero_copy") = false,
+           R"DOC(
         Set the data of LoDTensor on place with given numpy array.
         
         Args:
           lod (numpy.ndarray): The data to set.
           place (CPUPlace|CUDAPlace|CUDAPinnedPlace): The place where the 
           LoDTensor is to be set.
+          zero_copy (bool, optional): Whether to share memory with the input numpy array.
+          This parameter only works with CPUPlace. Default: False.
 
         Returns:
             None.
diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h
index a6078088944..0e04b20a10e 100644
--- a/paddle/fluid/pybind/tensor_py.h
+++ b/paddle/fluid/pybind/tensor_py.h
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
+
 #include <Python.h>
 #include <algorithm>
 #include <memory>
@@ -64,6 +65,31 @@ namespace pybind {
 
 namespace details {
 
+template <typename T>
+class PYBIND11_HIDDEN NumpyAllocation : public memory::Allocation {
+ public:
+  explicit NumpyAllocation(const py::array &arr)
+      : Allocation(const_cast<void *>(arr.data()), sizeof(T) * (arr.size()),
+                   paddle::platform::CPUPlace()),
+        arr_(arr.ptr()) {
+    PADDLE_ENFORCE_NOT_NULL(arr_, platform::errors::InvalidArgument(
+                                      "The underlying PyObject pointer of "
+                                      "numpy array cannot be nullptr"));
+    PADDLE_ENFORCE_NE(
+        arr_, Py_None,
+        platform::errors::PreconditionNotMet(
+            "The underlying PyObject pointer of numpy array cannot be None"));
+    Py_INCREF(arr_);
+  }
+  ~NumpyAllocation() override {
+    py::gil_scoped_acquire gil;
+    Py_DECREF(arr_);
+  }
+
+ private:
+  PyObject *arr_;
+};
+
 template <typename T>
 struct ValidDTypeToPyArrayChecker {
   static constexpr bool kValue = false;
@@ -141,19 +167,26 @@ template <typename T, typename P>
 void SetTensorFromPyArrayT(
     framework::Tensor *self,
     const py::array_t<T, py::array::c_style | py::array::forcecast> &array,
-    const P &place) {
+    const P &place, bool zero_copy) {
   std::vector<int64_t> dims;
   dims.reserve(array.ndim());
   for (decltype(array.ndim()) i = 0; i < array.ndim(); ++i) {
     dims.push_back(static_cast<int>(array.shape()[i]));
   }
   self->Resize(framework::make_ddim(dims));
-  auto dst = self->mutable_data<T>(place);
 
   if (paddle::platform::is_cpu_place(place)) {
-    std::memcpy(dst, array.data(), array.nbytes());
+    if (zero_copy) {
+      auto holder = std::make_shared<details::NumpyAllocation<T>>(array);
+      auto type = framework::ToDataType(std::type_index(typeid(T)));
+      self->ResetHolderWithType(holder, type);
+    } else {
+      auto dst = self->mutable_data<T>(place);
+      std::memcpy(dst, array.data(), array.nbytes());
+    }
   } else {
 #ifdef PADDLE_WITH_CUDA
+    auto dst = self->mutable_data<T>(place);
     if (paddle::platform::is_cuda_pinned_place(place)) {
       std::memcpy(dst, array.data(), array.nbytes());
     } else if (paddle::platform::is_gpu_place(place)) {
@@ -173,27 +206,29 @@ void SetTensorFromPyArrayT(
 
 template <typename P>
 void SetTensorFromPyArray(framework::Tensor *self, const py::object &obj,
-                          const P &place) {
+                          const P &place, bool zero_copy) {
   auto array = obj.cast<py::array>();
   if (py::isinstance<py::array_t<float>>(array)) {
-    SetTensorFromPyArrayT<float, P>(self, array, place);
+    SetTensorFromPyArrayT<float, P>(self, array, place, zero_copy);
   } else if (py::isinstance<py::array_t<int>>(array)) {
-    SetTensorFromPyArrayT<int, P>(self, array, place);
+    SetTensorFromPyArrayT<int, P>(self, array, place, zero_copy);
   } else if (py::isinstance<py::array_t<int64_t>>(array)) {
-    SetTensorFromPyArrayT<int64_t, P>(self, array, place);
+    SetTensorFromPyArrayT<int64_t, P>(self, array, place, zero_copy);
   } else if (py::isinstance<py::array_t<double>>(array)) {
-    SetTensorFromPyArrayT<double, P>(self, array, place);
+    SetTensorFromPyArrayT<double, P>(self, array, place, zero_copy);
   } else if (py::isinstance<py::array_t<int8_t>>(array)) {
-    SetTensorFromPyArrayT<int8_t, P>(self, array, place);
+    SetTensorFromPyArrayT<int8_t, P>(self, array, place, zero_copy);
   } else if (py::isinstance<py::array_t<uint8_t>>(array)) {
-    SetTensorFromPyArrayT<uint8_t, P>(self, array, place);
+    SetTensorFromPyArrayT<uint8_t, P>(self, array, place, zero_copy);
   } else if (py::isinstance<py::array_t<paddle::platform::float16>>(array)) {
-    SetTensorFromPyArrayT<paddle::platform::float16, P>(self, array, place);
+    SetTensorFromPyArrayT<paddle::platform::float16, P>(self, array, place,
+                                                        zero_copy);
   } else if (py::isinstance<py::array_t<uint16_t>>(array)) {
     // TODO(cql): temporary keeping uint16, should be depracated later
-    SetTensorFromPyArrayT<paddle::platform::float16, P>(self, array, place);
+    SetTensorFromPyArrayT<paddle::platform::float16, P>(self, array, place,
+                                                        zero_copy);
   } else if (py::isinstance<py::array_t<bool>>(array)) {
-    SetTensorFromPyArrayT<bool, P>(self, array, place);
+    SetTensorFromPyArrayT<bool, P>(self, array, place, zero_copy);
   } else {
     PADDLE_THROW(
         "Incompatible data or style type: tensor.set() supports bool, float16, "
diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py
index 42d2de2626d..ed9e9abb28f 100644
--- a/python/paddle/fluid/dygraph/base.py
+++ b/python/paddle/fluid/dygraph/base.py
@@ -138,7 +138,6 @@ def guard(place=None):
     train = framework.Program()
     startup = framework.Program()
     tracer = Tracer()
-    core._switch_tracer(tracer)
 
     if place is None:
         if core.is_compiled_with_cuda():
@@ -173,7 +172,7 @@ def _print_debug_msg(limit=5, is_test=False):
 
 
 @framework.dygraph_only
-def to_variable(value, block=None, name=None):
+def to_variable(value, block=None, name=None, zero_copy=None):
     """
     The API will create a ``Variable`` object from numpy\.ndarray or Variable object.
 
@@ -181,6 +180,7 @@ def to_variable(value, block=None, name=None):
         value(ndarray): The numpy\.ndarray object that needs to be converted, it can be multi-dimension, and the data type is one of numpy\.{float16, float32, float64, int16, int32, int64, uint8, uint16}.
         block(fluid.Block, optional): Which block this variable will be in. Default: None.
         name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`
+        zero_copy(bool, optional): Whether to share memory with the input numpy array. This parameter only works with CPUPlace and will be set to True when it is None. Default: None.
 
     Returns:
         Variable: ``Tensor`` created from the specified numpy\.ndarray object, data type and shape is the same as ``value`` .
@@ -192,9 +192,14 @@ def to_variable(value, block=None, name=None):
         import numpy as np
         import paddle.fluid as fluid
 
-        with fluid.dygraph.guard():
+        with fluid.dygraph.guard(fluid.CPUPlace()):
             x = np.ones([2, 2], np.float32)
+            y = fluid.dygraph.to_variable(x, zero_copy=False)
+            x[0][0] = -1
+            y[0][0].numpy()  # array([1.], dtype=float32)
             y = fluid.dygraph.to_variable(x)
+            x[0][0] = 0
+            y[0][0].numpy()  # array([0.], dtype=float32)
 
     """
     if isinstance(value, np.ndarray):
@@ -212,7 +217,14 @@ def to_variable(value, block=None, name=None):
             stop_gradient=True)
         var = py_var._ivar.value()
         tensor = var.get_tensor()
-        tensor.set(value, framework._current_expected_place())
+        if isinstance(framework._current_expected_place(),
+                      framework.core.CPUPlace):
+            if zero_copy is None:
+                zero_copy = True
+            tensor.set(value, framework._current_expected_place(), zero_copy)
+        else:
+            assert not zero_copy, "zero_copy mode can only be used with CPUPlace"
+            tensor.set(value, framework._current_expected_place(), False)
         return py_var
     elif isinstance(value, framework.Variable):
         return value
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index 469a151eb41..ec6f8198e42 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -4781,9 +4781,11 @@ def _dygraph_guard(tracer):
     global _dygraph_tracer_
     tmp_trace = _dygraph_tracer_
     _dygraph_tracer_ = tracer
+    core._switch_tracer(tracer)
 
     yield
 
+    core._switch_tracer(tmp_trace)
     _dygraph_tracer_ = tmp_trace
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py b/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py
new file mode 100644
index 00000000000..da01be8159a
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py
@@ -0,0 +1,39 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+import paddle.fluid as fluid
+
+
+class TestImperativeNumpyBridge(unittest.TestCase):
+    def test_tensor_from_numpy(self):
+        data_np = np.array([[2, 3, 1]]).astype('float32')
+        with fluid.dygraph.guard(fluid.CPUPlace()):
+            var = fluid.dygraph.to_variable(data_np, zero_copy=True)
+            self.assertTrue(np.array_equal(var.numpy(), data_np))
+            data_np[0][0] = 4
+            self.assertEqual(data_np[0][0], 4)
+            self.assertEqual(var[0][0].numpy()[0], 4)
+            self.assertTrue(np.array_equal(var.numpy(), data_np))
+            var2 = fluid.dygraph.to_variable(data_np, zero_copy=False)
+            self.assertTrue(np.array_equal(var2.numpy(), data_np))
+            data_np[0][0] = -1
+            self.assertEqual(data_np[0][0], -1)
+            self.assertNotEqual(var2[0][0].numpy()[0], -1)
+            self.assertFalse(np.array_equal(var2.numpy(), data_np))
+
+
+if __name__ == '__main__':
+    unittest.main()
-- 
GitLab