From c1931beb72ff8429ec44230c014777840fb231b0 Mon Sep 17 00:00:00 2001
From: Jiabin Yang <360788950@qq.com>
Date: Thu, 17 Mar 2022 10:26:08 +0800
Subject: [PATCH] fix copy_ problem by doing it with phi copy (#40521)

* fix copy_ problem by doing it with phi copy

* improve test coverage

* refactor copy with sr kernel
---
 .../data_structure_tests/eager_tensor_test.cc |  53 +++++++++
 paddle/fluid/pybind/eager_method.cc           |  15 ++-
 paddle/fluid/pybind/eager_properties.cc       |   2 +-
 paddle/phi/api/include/tensor.h               |   4 +-
 paddle/phi/api/lib/CMakeLists.txt             |   2 +-
 paddle/phi/api/lib/tensor_method.cc           | 102 ++++++++++++++++--
 .../phi/kernels/selected_rows/copy_kernel.cc  |  49 +++++++++
 .../phi/kernels/selected_rows/copy_kernel.h   |  31 ++++++
 8 files changed, 248 insertions(+), 10 deletions(-)
 create mode 100644 paddle/phi/kernels/selected_rows/copy_kernel.cc
 create mode 100644 paddle/phi/kernels/selected_rows/copy_kernel.h
diff --git a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc
index 1683f4ed5fb..c8b2d22dcf9 100644
--- a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc
+++ b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc
@@ -17,6 +17,14 @@
 
 #include "paddle/fluid/eager/eager_tensor.h"
 #include "paddle/phi/api/lib/utils/allocator.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+PD_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT);
+PD_DECLARE_KERNEL(copy_sr, CPU, ALL_LAYOUT);
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+PD_DECLARE_KERNEL(copy, GPU, ALL_LAYOUT);
+PD_DECLARE_KERNEL(copy_sr, GPU, ALL_LAYOUT);
+#endif
 
 namespace eager_test {
 using AbstractAutogradMeta = paddle::experimental::AbstractAutogradMeta;
@@ -151,5 +159,50 @@ TEST(EagerVariable, Constructor) {
   CHECK_EQ(dt3_tmp_ptr[1], 10.0f);
   t4.reset();
   CHECK(t4.defined() == false);
+
+  VLOG(6) << "Check Tensor Copy_";
+  std::vector<int64_t> rows = {1, 2};
+  std::vector<int64_t> dims = {2};
+  paddle::experimental::Tensor t7(std::make_shared<phi::SelectedRows>(rows, 2));
+  std::dynamic_pointer_cast<phi::SelectedRows>(t7.impl())
+      ->mutable_value()
+      ->Resize(phi::make_ddim(dims));
+  auto* dt7_tmp_ptr = std::dynamic_pointer_cast<phi::SelectedRows>(t7.impl())
+                          ->mutable_value()
+                          ->mutable_data<float>(paddle::platform::CPUPlace());
+  dt7_tmp_ptr[0] = 6.0f;
+  dt7_tmp_ptr[1] = 11.0f;
+
+  paddle::experimental::Tensor t8;
+  paddle::experimental::Tensor t5;
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+  paddle::experimental::Tensor t6;
+  paddle::experimental::Tensor t9;
+  VLOG(6) << "Check Tensor Copy_ Selected Rows";
+  t8.copy_(t7, paddle::platform::CUDAPlace(0), true);
+  t9.copy_(t8, paddle::platform::CPUPlace(), true);
+  auto* dt9_tmp_ptr = std::dynamic_pointer_cast<phi::SelectedRows>(t9.impl())
+                          ->value()
+                          .data<float>();
+  CHECK_EQ(dt9_tmp_ptr[0], 6.0f);
+  CHECK_EQ(dt9_tmp_ptr[1], 11.0f);
+  CHECK_EQ(std::dynamic_pointer_cast<phi::SelectedRows>(t9.impl())->height(),
+           2);
+
+  VLOG(6) << "Check Tensor Copy_ Dense Tensor";
+  t5.copy_(t3, paddle::platform::CUDAPlace(0), true);
+  t6.copy_(t5, paddle::platform::CPUPlace(), true);
+  auto* dt6_tmp_ptr =
+      std::dynamic_pointer_cast<phi::DenseTensor>(t6.impl())->data<float>();
+  CHECK_EQ(dt6_tmp_ptr[0], 5.0f);
+  CHECK_EQ(dt6_tmp_ptr[1], 10.0f);
+#else
+  t5.copy_(t3, paddle::platform::CPUPlace(), true);
+  auto* dt5_tmp_ptr =
+      std::dynamic_pointer_cast<phi::DenseTensor>(t5.impl())->data<float>();
+  CHECK_EQ(dt5_tmp_ptr[0], 5.0f);
+  CHECK_EQ(dt5_tmp_ptr[1], 10.0f);
+#endif
+
   VLOG(6) << "Finish";
 }
diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
index 082ec382c79..7f8fcd351fe 100644
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -226,6 +226,19 @@ static PyObject* tensor_method__copy_to(TensorObject* self, PyObject* args,
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
+static PyObject* tensor_method_cpu(TensorObject* self, PyObject* args,
+                                   PyObject* kwargs) {
+  EAGER_TRY
+  auto cp_tensor =
+      self->tensor.copy_to(phi::TransToPhiBackend(phi::CPUPlace()), true);
+  egr::EagerUtils::autograd_meta(&cp_tensor)->SetStopGradient(true);
+  egr::EagerUtils::autograd_meta(&cp_tensor)
+      ->SetPersistable(
+          egr::EagerUtils::autograd_meta(&(self->tensor))->Persistable());
+  return ToPyObject(cp_tensor);
+  EAGER_CATCH_AND_THROW_RETURN_NULL
+}
+
 static PyObject* tensor_method_reconstruct_from_(TensorObject* self,
                                                  PyObject* args,
                                                  PyObject* kwargs) {
@@ -264,7 +277,7 @@ static PyObject* tensor_method_copy_(TensorObject* self, PyObject* args,
             egr::EagerUtils::autograd_meta(&(src_tensor))->Persistable());
   }
 
-  self->tensor.copy_(src_tensor, blocking);
+  self->tensor.copy_(src_tensor, self->tensor.inner_place(), blocking);
 
   VLOG(6) << "Finish Copy Tensor " << src_tensor.name() << " to "
           << self->tensor.name();
diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc
index 2572866b8f5..ff8980d727e 100644
--- a/paddle/fluid/pybind/eager_properties.cc
+++ b/paddle/fluid/pybind/eager_properties.cc
@@ -96,7 +96,7 @@ int tensor_properties_set_grad(TensorObject* self, PyObject* value,
                      "Detected NULL grad"
                      "Please check if you have manually cleared"
                      "the grad inside autograd_meta"));
-  grad->copy_(src, true);
+  grad->copy_(src, self->tensor.inner_place(), true);
   return 0;
   EAGER_CATCH_AND_THROW_RETURN_ZERO
 }
diff --git a/paddle/phi/api/include/tensor.h b/paddle/phi/api/include/tensor.h
index c268742fa56..1312710a80f 100644
--- a/paddle/phi/api/include/tensor.h
+++ b/paddle/phi/api/include/tensor.h
@@ -397,7 +397,9 @@ class PADDLE_API Tensor final {
    * @param blocking, Should we copy this in sync way.
    * @return void
    */
-  void copy_(const Tensor& src, const bool blocking);
+  void copy_(const Tensor& src,
+             const phi::Place& target_place,
+             const bool blocking);
   /**
    * @brief Cast datatype from one to another
    *
diff --git a/paddle/phi/api/lib/CMakeLists.txt b/paddle/phi/api/lib/CMakeLists.txt
index 42bf7a8103f..4cbca072362 100644
--- a/paddle/phi/api/lib/CMakeLists.txt
+++ b/paddle/phi/api/lib/CMakeLists.txt
@@ -148,4 +148,4 @@ cc_library(phi_bw_function_api SRCS ${bw_api_source_file} DEPS phi_tensor_raw ph
 cc_library(sparse_api SRCS ${sparse_api_source_file} DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils sparse_api_custom_impl)
 cc_library(sparse_bw_api SRCS ${sparse_bw_api_source_file} DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils sparse_api sparse_api_custom_impl)
 
-cc_library(phi_tensor SRCS tensor_method.cc DEPS phi_tensor_raw phi_function_api)
+cc_library(phi_tensor SRCS tensor_method.cc DEPS phi_tensor_raw phi_function_api api_gen_utils kernel_dispatch infermeta)
diff --git a/paddle/phi/api/lib/tensor_method.cc b/paddle/phi/api/lib/tensor_method.cc
index 885e29b27fa..cc797507e68 100644
--- a/paddle/phi/api/lib/tensor_method.cc
+++ b/paddle/phi/api/lib/tensor_method.cc
@@ -19,9 +19,12 @@ limitations under the License. */
 #include "paddle/phi/core/compat/convert_utils.h"
 #include "paddle/phi/core/tensor_base.h"
 
+#include "paddle/phi/api/lib/api_gen_utils.h"
+#include "paddle/phi/api/lib/kernel_dispatch.h"
+#include "paddle/phi/infermeta/unary.h"
+
 namespace paddle {
 namespace experimental {
-
 // declare cast api
 Tensor cast(const Tensor &x, DataType out_dtype);
 Tensor copy_to(const Tensor &x, Backend backend, bool blocking);
@@ -67,12 +70,18 @@ template PADDLE_API Tensor Tensor::copy_to<phi::dtype::complex<double>>(
 template PADDLE_API Tensor
 Tensor::copy_to<phi::dtype::float16>(const PlaceType &target_place) const;
 
-void Tensor::copy_(const Tensor &src, bool blocking) {
+void Tensor::copy_(const Tensor &src,
+                   const phi::Place &target_place,
+                   bool blocking) {
   if (!src.is_initialized()) {
+    VLOG(8) << "Src is empty, skip copy";
     return;
   }
+  // Prepare copy kernel key and outputs
+  auto kernel_key_set = ParseKernelKeyByInputArgs(src);
+  KernelType kernel_type = ParseKernelTypeByInputArgs(src);
   VLOG(3) << "Deep copy Tensor from " << src.name() << " to " << name();
-  if (defined()) {
+  if (is_initialized()) {
     PADDLE_ENFORCE_EQ(dtype(),
                       src.dtype(),
                       platform::errors::PreconditionNotMet(
@@ -87,10 +96,91 @@ void Tensor::copy_(const Tensor &src, bool blocking) {
                           "Copy cannot be performed!",
                           name(),
                           src.name()));
+    PADDLE_ENFORCE_EQ(target_place,
+                      inner_place(),
+                      platform::errors::PreconditionNotMet(
+                          "Place is different of dst tensor and args %s, which "
+                          "current tensor holds %s "
+                          "Copy cannot be performed!",
+                          target_place.DebugString(),
+                          inner_place().DebugString()));
+    kernel_key_set.backend_set =
+        kernel_key_set.backend_set |
+        BackendSet(phi::TransToPhiBackend(inner_place()));
+  } else {
+    // Deep Copy AutoGrad info from src to self.
+    *autograd_meta_ = *(src.autograd_meta_);
+  }
+
+  auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();
+  auto *dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
+  Backend kernel_backend = Backend::UNDEFINED;
+  DataLayout kernel_layout = DataLayout::UNDEFINED;
+  DataType kernel_data_type = DataType::UNDEFINED;
+
+  if (kernel_backend == Backend::UNDEFINED ||
+      kernel_layout == DataLayout::UNDEFINED ||
+      kernel_data_type == DataType::UNDEFINED) {
+    if (kernel_backend == Backend::UNDEFINED) {
+      kernel_backend = kernel_key.backend();
+    }
+    if (kernel_layout == DataLayout::UNDEFINED) {
+      kernel_layout = kernel_key.layout();
+    }
+    if (kernel_data_type == DataType::UNDEFINED) {
+      kernel_data_type = kernel_key.dtype();
+    }
+  }
+
+  if (kernel_type == KernelType::DENSE_TENSOR_KENREL) {
+    auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
+        "copy", {kernel_backend, kernel_layout, kernel_data_type});
+    VLOG(6) << "copy API kernel key: " << kernel_key;
+    VLOG(6) << "copy API kernel: " << kernel;
+    using kernel_signature = void (*)(const platform::DeviceContext &,
+                                      const phi::DenseTensor &,
+                                      phi::Place,
+                                      bool,
+                                      phi::DenseTensor *);
+    SetKernelOutput(kernel_backend, this);
+    phi::MetaTensor meta_out(impl_.get());
+    phi::UnchangedInferMeta(
+        MakeMetaTensor(
+            *(std::static_pointer_cast<phi::DenseTensor>(src.impl_))),
+        &meta_out);
+    auto *kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
+    (*kernel_fn)(*dev_ctx,
+                 (*(std::static_pointer_cast<phi::DenseTensor>(src.impl_))),
+                 target_place,
+                 blocking,
+                 static_cast<phi::DenseTensor *>(impl_.get()));
+  } else if (kernel_type == KernelType::SELECTED_ROWS_KENREL) {
+    auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
+        "copy_sr", {kernel_backend, kernel_layout, kernel_data_type});
+    VLOG(6) << "copy API kernel key: " << kernel_key;
+    VLOG(6) << "copy API kernel: " << kernel;
+    using kernel_signature = void (*)(const platform::DeviceContext &,
+                                      const phi::SelectedRows &,
+                                      phi::Place,
+                                      bool,
+                                      phi::SelectedRows *);
+    SetSelectedRowsKernelOutput(kernel_backend, this);
+    phi::MetaTensor meta_out(impl_.get());
+    phi::UnchangedInferMeta(
+        MakeMetaTensor(
+            *(std::static_pointer_cast<phi::SelectedRows>(src.impl_))),
+        &meta_out);
+    auto *kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
+    (*kernel_fn)(*dev_ctx,
+                 (*(std::static_pointer_cast<phi::SelectedRows>(src.impl_))),
+                 target_place,
+                 blocking,
+                 static_cast<phi::SelectedRows *>(impl_.get()));
+  } else {
+    PADDLE_THROW(paddle::platform::errors::InvalidArgument(
+        "We currently only support dense tensor copy for now and if u need to "
+        "copy selected rows please raise a issue."));
   }
-  auto copy_tensor =
-      src.copy_to(phi::TransToPhiBackend(src.inner_place()), blocking);
-  set_impl(copy_tensor.impl());
 }
 
 }  // namespace experimental
diff --git a/paddle/phi/kernels/selected_rows/copy_kernel.cc b/paddle/phi/kernels/selected_rows/copy_kernel.cc
new file mode 100644
index 00000000000..cf71ab0583f
--- /dev/null
+++ b/paddle/phi/kernels/selected_rows/copy_kernel.cc
@@ -0,0 +1,49 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/kernels/selected_rows/copy_kernel.h"
+
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/common/bfloat16.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/copy_kernel.h"
+namespace phi {
+namespace sr {
+
+template <typename Context>
+void Copy(const Context& dev_ctx,
+          const SelectedRows& src,
+          Place dst_place,
+          bool blocking,
+          SelectedRows* dst) {
+  if (src.value().Holder() != dst->value().Holder() ||
+      src.value().data() != dst->value().data()) {
+    dst->set_rows(src.rows());
+    dst->set_height(src.height());
+  }
+  phi::Copy<Context>(
+      dev_ctx, src.value(), dst_place, blocking, dst->mutable_value());
+}
+
+}  // namespace sr
+}  // namespace phi
+
+PD_REGISTER_GENERAL_KERNEL(
+    copy_sr, CPU, ALL_LAYOUT, phi::sr::Copy<phi::CPUContext>, ALL_DTYPE) {}
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+PD_REGISTER_GENERAL_KERNEL(
+    copy_sr, GPU, ALL_LAYOUT, phi::sr::Copy<phi::GPUContext>, ALL_DTYPE) {}
+#endif
diff --git a/paddle/phi/kernels/selected_rows/copy_kernel.h b/paddle/phi/kernels/selected_rows/copy_kernel.h
new file mode 100644
index 00000000000..4aa848bea2a
--- /dev/null
+++ b/paddle/phi/kernels/selected_rows/copy_kernel.h
@@ -0,0 +1,31 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/selected_rows.h"
+#include "paddle/phi/core/sparse_csr_tensor.h"
+
+namespace phi {
+namespace sr {
+
+template <typename Context>
+void Copy(const Context& dev_ctx,
+          const SelectedRows& src,
+          Place dst_place,
+          bool blocking,
+          SelectedRows* dst);
+
+}  // namespace sr
+}  // namespace phi
-- 
GitLab