From fe291daf684e7a2d3c24c9cfebf013eaf5892b28 Mon Sep 17 00:00:00 2001
From: Jiabin Yang <360788950@qq.com>
Date: Wed, 23 Mar 2022 15:23:10 +0800
Subject: [PATCH] Support sharding (#40637)

* suppor sharding api

* support multi api for sharding in eager

* support multi api for sharding in eager

* fix test

* fix test coverage
---
 paddle/fluid/pybind/eager_method.cc           | 47 +++++++++++++-
 paddle/fluid/pybind/pybind.cc                 |  2 +
 paddle/phi/api/include/tensor.h               |  4 +-
 paddle/phi/api/lib/tensor_method.cc           | 12 ++--
 .../fluid/dygraph/varbase_patch_methods.py    | 32 ++++++++++
 .../tests/unittests/test_egr_python_api.py    | 61 ++++++++++++++++---
 6 files changed, 139 insertions(+), 19 deletions(-)

diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
index 52a43c4ebe8..5b9b91ef89b 100644
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -948,8 +948,8 @@ static PyObject* tensor_register_reduce_hook(TensorObject* self, PyObject* args,
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
-static PyObject* set_grad_type(TensorObject* self, PyObject* args,
-                               PyObject* kwargs) {
+static PyObject* tensor__set_grad_type(TensorObject* self, PyObject* args,
+                                       PyObject* kwargs) {
   EAGER_TRY
   auto var_type = pybind::CastPyArg2ProtoType(PyTuple_GET_ITEM(args, 0), 0);
   auto grad_tensor =
@@ -963,6 +963,42 @@ static PyObject* set_grad_type(TensorObject* self, PyObject* args,
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
+static PyObject* tensor__clear(TensorObject* self, PyObject* args,
+                               PyObject* kwargs) {
+  EAGER_TRY
+  self->tensor.reset();
+  return Py_None;
+  EAGER_CATCH_AND_THROW_RETURN_NULL
+}
+
+static PyObject* tensor__copy_gradient_from(TensorObject* self, PyObject* args,
+                                            PyObject* kwargs) {
+  EAGER_TRY
+  auto src = CastPyArg2Tensor(PyTuple_GET_ITEM(args, 0), 0);
+  if (self->tensor.is_initialized()) {
+    PADDLE_ENFORCE_EQ(self->tensor.dtype(), src.dtype(),
+                      platform::errors::PreconditionNotMet(
+                          "Tensor %s has different data type with Tensor %s",
+                          self->tensor.name(), src.name()));
+    PADDLE_ENFORCE_EQ(self->tensor.impl()->type_info().id(),
+                      src.impl()->type_info().id(),
+                      platform::errors::PreconditionNotMet(
+                          "Tensor %s has different type with Tensor %s, Tensor "
+                          "ShareGradientDataWith cannot be performed!",
+                          self->tensor.name(), src.name()));
+  }
+  VLOG(6) << "Tensor copy gradient from: " << src.name();
+  auto* p_grad = egr::EagerUtils::mutable_grad(self->tensor);
+  if (p_grad) {
+    PADDLE_ENFORCE_EQ(src.initialized(), true,
+                      platform::errors::InvalidArgument(
+                          "Tensor %s has not been initialized", src.name()));
+    p_grad->set_impl(src.impl());
+  }
+  Py_INCREF(Py_None);
+  return Py_None;
+  EAGER_CATCH_AND_THROW_RETURN_NULL
+}
 static PyObject* tensor_method_get_non_zero_indices(TensorObject* self,
                                                     PyObject* args,
                                                     PyObject* kwargs) {
@@ -1117,7 +1153,12 @@ PyMethodDef variable_methods[] = {
     {"_register_backward_hook",
      (PyCFunction)(void (*)(void))tensor_register_reduce_hook,
      METH_VARARGS | METH_KEYWORDS, NULL},
-    {"_set_grad_type", (PyCFunction)(void (*)(void))set_grad_type,
+    {"_set_grad_type", (PyCFunction)(void (*)(void))tensor__set_grad_type,
+     METH_VARARGS | METH_KEYWORDS, NULL},
+    {"_clear", (PyCFunction)(void (*)(void))tensor__clear,
+     METH_VARARGS | METH_KEYWORDS, NULL},
+    {"_copy_gradient_from",
+     (PyCFunction)(void (*)(void))tensor__copy_gradient_from,
      METH_VARARGS | METH_KEYWORDS, NULL},
     /***the method of sparse tensor****/
     {"non_zero_indices",
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index f5c853fb4b8..84c711f9b87 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -829,6 +829,8 @@ PYBIND11_MODULE(core_noavx, m) {
            [](const framework::Tensor &self) {
              return reinterpret_cast<uintptr_t>(self.data());
            })
+      .def("_slice", &framework::Tensor::Slice)
+      .def("_numel", &framework::Tensor::numel)
       .def("_is_initialized",
            [](const framework::Tensor &self) { return self.IsInitialized(); })
       .def("_get_dims",
diff --git a/paddle/phi/api/include/tensor.h b/paddle/phi/api/include/tensor.h
index 6fab6643f39..b881b5bac21 100644
--- a/paddle/phi/api/include/tensor.h
+++ b/paddle/phi/api/include/tensor.h
@@ -427,9 +427,7 @@ class PADDLE_API Tensor final {
    * @param blocking, Should we copy this in sync way.
    * @return void
    */
-  void copy_(const Tensor& src,
-             const phi::Place& target_place,
-             const bool blocking);
+  void copy_(const Tensor& src, const phi::Place& target_place, bool blocking);
   /**
    * @brief Cast datatype from one to another
    *
diff --git a/paddle/phi/api/lib/tensor_method.cc b/paddle/phi/api/lib/tensor_method.cc
index c6214052f7b..c502747c4f9 100644
--- a/paddle/phi/api/lib/tensor_method.cc
+++ b/paddle/phi/api/lib/tensor_method.cc
@@ -84,26 +84,26 @@ void Tensor::copy_(const Tensor &src,
   if (is_initialized()) {
     PADDLE_ENFORCE_EQ(dtype(),
                       src.dtype(),
-                      platform::errors::PreconditionNotMet(
+                      phi::errors::PreconditionNotMet(
                           "Tensor %s has different data type with Tensor %s, "
                           "Tensor Copy cannot be performed!",
                           name(),
                           src.name()));
     PADDLE_ENFORCE_EQ(impl()->type_info().id(),
                       src.impl()->type_info().id(),
-                      platform::errors::PreconditionNotMet(
+                      phi::errors::PreconditionNotMet(
                           "Tensor %s has different type with Tensor %s, Tensor "
                           "Copy cannot be performed!",
                           name(),
                           src.name()));
     PADDLE_ENFORCE_EQ(target_place,
                       inner_place(),
-                      platform::errors::PreconditionNotMet(
+                      phi::errors::PreconditionNotMet(
                           "Place is different of dst tensor and args %s, which "
                           "current tensor holds %s "
                           "Copy cannot be performed!",
-                          target_place.DebugString(),
-                          inner_place().DebugString()));
+                          target_place,
+                          inner_place()));
     kernel_key_set.backend_set =
         kernel_key_set.backend_set |
         BackendSet(phi::TransToPhiBackend(inner_place()));
@@ -177,7 +177,7 @@ void Tensor::copy_(const Tensor &src,
                  blocking,
                  static_cast<phi::SelectedRows *>(impl_.get()));
   } else {
-    PADDLE_THROW(paddle::platform::errors::InvalidArgument(
+    PADDLE_THROW(phi::errors::InvalidArgument(
         "We currently only support dense tensor copy for now and if u need to "
         "copy selected rows please raise a issue."));
   }
diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py
index 2ca923f8634..878fc1c68e4 100644
--- a/python/paddle/fluid/dygraph/varbase_patch_methods.py
+++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py
@@ -797,6 +797,34 @@ def monkey_patch_varbase():
     def value(self):
         return self
 
+    @framework.dygraph_only
+    def _slice(self, begin_idx, end_idx):
+        return core.eager.Tensor(self.get_tensor()._slice(begin_idx, end_idx))
+
+    @framework.dygraph_only
+    def _numel(self):
+        return self.get_tensor()._numel()
+
+    @framework.dygraph_only
+    def cpu(self):
+        if self.place.is_cpu_place():
+            return self
+        else:
+            res = self._copy_to(core.CPUPlace(), True)
+            res.stop_gradient = self.stop_gradient
+            res.persistable = self.persistable
+            return res
+
+    @framework.dygraph_only
+    def cuda(self, device_id, blocking):
+        if self.place.is_gpu_place():
+            return self
+        else:
+            res = self._copy_to(core.CUDAPlace(device_id), True)
+            res.stop_gradient = self.stop_gradient
+            res.persistable = self.persistable
+            return res
+
     if core._in_eager_mode() and not hasattr(core, "eager"):
         return
 
@@ -820,6 +848,10 @@ def monkey_patch_varbase():
         setattr(core.eager.Tensor, "_set_grad_ivar", _set_grad_ivar)
         setattr(core.eager.Tensor, "clone", clone)
         setattr(core.eager.Tensor, "value", value)
+        setattr(core.eager.Tensor, "cpu", cpu)
+        setattr(core.eager.Tensor, "cuda", cuda)
+        setattr(core.eager.Tensor, "_slice", _slice)
+        setattr(core.eager.Tensor, "_numel", _numel)
     else:
         setattr(core.VarBase, "__name__", "Tensor")
         setattr(core.VarBase, "grad", grad)
diff --git a/python/paddle/fluid/tests/unittests/test_egr_python_api.py b/python/paddle/fluid/tests/unittests/test_egr_python_api.py
index 8166598677a..ce771a572e2 100644
--- a/python/paddle/fluid/tests/unittests/test_egr_python_api.py
+++ b/python/paddle/fluid/tests/unittests/test_egr_python_api.py
@@ -634,20 +634,39 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase):
             if core.is_compiled_with_cuda():
                 tensor3 = tensor2._copy_to(core.CUDAPlace(0), True)
                 self.assertTrue(np.array_equal(tensor3.numpy(), arr2))
-                self.assertTrue(tensor3.persistable, True)
-                self.assertTrue(tensor3.stop_gradient, True)
+                self.assertEqual(tensor3.persistable, True)
+                self.assertEqual(tensor3.stop_gradient, True)
                 self.assertTrue(tensor3.place.is_gpu_place())
-                tensor4 = paddle.to_tensor([1, 2, 3], place='gpu_pinned')
-                tensor5 = tensor4._copy_to(core.CUDAPlace(0), True)
+
+                tensor4 = tensor2.cuda(0, True)
+                self.assertTrue(np.array_equal(tensor4.numpy(), arr2))
+                self.assertEqual(tensor4.persistable, True)
+                self.assertEqual(tensor4.stop_gradient, False)
+                self.assertTrue(tensor4.place.is_gpu_place())
+
+                tensor5 = tensor4.cpu()
+                self.assertTrue(np.array_equal(tensor5.numpy(), arr2))
+                self.assertEqual(tensor5.persistable, True)
+                self.assertEqual(tensor5.stop_gradient, False)
+                self.assertTrue(tensor5.place.is_cpu_place())
+
+                tensor10 = paddle.to_tensor([1, 2, 3], place='gpu_pinned')
+                tensor11 = tensor10._copy_to(core.CUDAPlace(0), True)
                 self.assertTrue(
-                    np.array_equal(tensor4.numpy(), tensor5.numpy()))
+                    np.array_equal(tensor10.numpy(), tensor11.numpy()))
             else:
                 tensor3 = tensor2._copy_to(core.CPUPlace(), True)
                 self.assertTrue(np.array_equal(tensor3.numpy(), arr2))
-                self.assertTrue(tensor3.persistable, True)
-                self.assertTrue(tensor3.stop_gradient, True)
+                self.assertEqual(tensor3.persistable, True)
+                self.assertEqual(tensor3.stop_gradient, True)
                 self.assertTrue(tensor3.place.is_cpu_place())
 
+                tensor4 = tensor2.cpu()
+                self.assertTrue(np.array_equal(tensor4.numpy(), arr2))
+                self.assertEqual(tensor4.persistable, True)
+                self.assertEqual(tensor4.stop_gradient, False)
+                self.assertTrue(tensor4.place.is_cpu_place())
+
     def test_share_buffer_to(self):
         with _test_eager_guard():
             arr = np.ones([4, 16, 16, 32]).astype('float32')
@@ -784,6 +803,34 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase):
             self.assertEqual(egr_tensor.shape, [4, 16, 16, 32])
             self.assertTrue(np.array_equal(egr_tensor.numpy(), new_arr))
 
+    def test_sharding_related_api(self):
+        with _test_eager_guard():
+            arr0 = np.random.rand(4, 16, 16, 32).astype('float32')
+            egr_tensor1 = core.eager.Tensor(arr0,
+                                            core.CPUPlace(), True, False,
+                                            "numpy_tensor1", False)
+            self.assertEqual(egr_tensor1._numel(), 32768)
+            self.assertEqual(egr_tensor1._slice(0, 2)._numel(), 16384)
+
+    def test_copy_gradient_from(self):
+        with _test_eager_guard():
+            np_x = np.random.random((2, 2))
+            np_y = np.random.random((2, 2))
+            x = paddle.to_tensor(np_x, dtype="float64", stop_gradient=False)
+            y = paddle.to_tensor(np_y, dtype="float64")
+            out = x + x
+            out.backward()
+            x._copy_gradient_from(y)
+            self.assertTrue(np.array_equal(x.grad.numpy(), np_y))
+
+    def test_clear(self):
+        with _test_eager_guard():
+            np_x = np.random.random((3, 8, 8))
+            x = paddle.to_tensor(np_x, dtype="float64")
+            self.assertTrue(x._is_initialized())
+            x._clear()
+            self.assertFalse(x._is_initialized())
+
 
 class EagerParamBaseUsageTestCase(unittest.TestCase):
     def test_print(self):
-- 
GitLab