From f637e3d20d76248c214b1764e524fd8f9fa0ece3 Mon Sep 17 00:00:00 2001
From: Chen Weihang <chenwhpro@163.com>
Date: Thu, 21 Apr 2022 09:57:22 +0800
Subject: [PATCH] [Cherry-pick] Polish custom op details (#42008)

* polish tensor api details (#41971)

* [CustomOp] Fix custom op pinned input error (#41972)

* fix custom op pinned input error

* fix compile error

* fix inference custom op (#41999)

* resolve conflict
---
 cmake/inference_lib.cmake                     |  8 +++++-
 cmake/phi_header.cmake                        |  3 +-
 paddle/fluid/framework/custom_operator.cc     | 12 ++++++++
 paddle/phi/api/include/tensor.h               |  6 ++--
 paddle/phi/api/lib/tensor.cc                  |  4 +--
 paddle/phi/api/lib/tensor_method.cc           |  4 +--
 .../custom_op/test_custom_relu_op_setup.py    | 28 +++++++++++++++++++
 python/paddle/utils/code_gen/api_base.py      | 10 +++----
 .../paddle/utils/code_gen/strings_api_gen.py  |  2 +-
 9 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake
index 1b38f208716..0574bfb9eea 100644
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@@ -248,15 +248,21 @@ copy(inference_lib_dist
 copy(inference_lib_dist
         SRCS  ${PADDLE_SOURCE_DIR}/paddle/phi/common/*.h
         DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/common/)
+copy(inference_lib_dist
+        SRCS  ${PADDLE_SOURCE_DIR}/paddle/phi/core/macros.h
+        DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/core/)
 copy(inference_lib_dist
         SRCS  ${PADDLE_SOURCE_DIR}/paddle/utils/any.h
         DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/)
 copy(inference_lib_dist
         SRCS  ${PADDLE_SOURCE_DIR}/paddle/utils/optional.h
         DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/)
-        copy(inference_lib_dist
+copy(inference_lib_dist
         SRCS  ${PADDLE_SOURCE_DIR}/paddle/utils/none.h
         DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/)
+copy(inference_lib_dist
+        SRCS  ${PADDLE_SOURCE_DIR}/paddle/utils/flat_hash_map.h
+        DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/)
 copy(inference_lib_dist
         SRCS  ${PADDLE_SOURCE_DIR}/paddle/extension.h
         DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/)
diff --git a/cmake/phi_header.cmake b/cmake/phi_header.cmake
index c9b7e465337..b23b4086b18 100644
--- a/cmake/phi_header.cmake
+++ b/cmake/phi_header.cmake
@@ -36,7 +36,8 @@ phi_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experiment
 phi_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/ext)
 phi_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/include)
 phi_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/common)
+phi_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/core)
 
 # In order to be compatible with the original behavior, the header file name needs to be changed
 file(RENAME ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/extension.h
-            ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/ext_all.h)
\ No newline at end of file
+            ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/ext_all.h)
diff --git a/paddle/fluid/framework/custom_operator.cc b/paddle/fluid/framework/custom_operator.cc
index fbcd920905c..3f28b2e8c73 100644
--- a/paddle/fluid/framework/custom_operator.cc
+++ b/paddle/fluid/framework/custom_operator.cc
@@ -33,6 +33,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/framework/phi_utils.h"
 #include "paddle/fluid/framework/tensor.h"
+#include "paddle/fluid/platform/device/gpu/gpu_info.h"
 #include "paddle/fluid/platform/dynload/dynamic_loader.h"
 #include "paddle/fluid/string/string_helper.h"
 #include "paddle/phi/api/all.h"
@@ -160,7 +161,18 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
                             "Input tensor (%s) is not initialized.", in_name));
       paddle::experimental::Tensor custom_in;
       custom_in.set_impl(std::make_shared<phi::DenseTensor>(*x));
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+      if (custom_in.is_gpu_pinned()) {
+        VLOG(3) << "Custom Operator: custom input is gpu pinned tensor";
+        auto gpu_place = phi::GPUPlace(platform::GetCurrentDeviceId());
+        auto custom_gpu_in = custom_in.copy_to(gpu_place, true);
+        kernel_ctx.EmplaceBackInput(std::move(custom_gpu_in));
+      } else {
+        kernel_ctx.EmplaceBackInput(std::move(custom_in));
+      }
+#else
       kernel_ctx.EmplaceBackInput(std::move(custom_in));
+#endif
     }
   }
 
diff --git a/paddle/phi/api/include/tensor.h b/paddle/phi/api/include/tensor.h
index e4a97e2c16f..2b0aea9e1ec 100644
--- a/paddle/phi/api/include/tensor.h
+++ b/paddle/phi/api/include/tensor.h
@@ -166,7 +166,7 @@ class PADDLE_API Tensor final {
    *
    * @return phi::DDim
    */
-  phi::DDim dims() const;
+  const phi::DDim& dims() const;
 
   /**
    * @brief Return the shape (dimensions) of Tensor.
@@ -260,7 +260,7 @@ class PADDLE_API Tensor final {
    *
    * @return Place
    */
-  Place place() const;
+  const Place& place() const;
 
   /**
    * @brief Determine whether the tensor device is CPU
@@ -421,7 +421,7 @@ class PADDLE_API Tensor final {
    * @param blocking, Should we copy this in sync way.
    * @return Tensor
    */
-  Tensor copy_to(Place place, bool blocking) const;
+  Tensor copy_to(const Place& place, bool blocking) const;
 
   /**
    * @brief Transfer the source Tensor to current Tensor.
diff --git a/paddle/phi/api/lib/tensor.cc b/paddle/phi/api/lib/tensor.cc
index f1aa48a2a4d..be0a937c91e 100644
--- a/paddle/phi/api/lib/tensor.cc
+++ b/paddle/phi/api/lib/tensor.cc
@@ -110,7 +110,7 @@ int64_t Tensor::numel() const { return impl_->numel(); }
 
 int64_t Tensor::size() const { return impl_->numel(); }
 
-phi::DDim Tensor::dims() const { return impl_->dims(); }
+const phi::DDim &Tensor::dims() const { return impl_->dims(); }
 
 std::vector<int64_t> Tensor::shape() const {
   auto dims = impl_->dims();
@@ -158,7 +158,7 @@ bool Tensor::is_string_tensor() const {
 }
 /* Part 3: Device and Backend methods */
 
-Place Tensor::place() const {
+const Place &Tensor::place() const {
   PADDLE_ENFORCE_NOT_NULL(
       impl_,
       phi::errors::PermissionDenied(
diff --git a/paddle/phi/api/lib/tensor_method.cc b/paddle/phi/api/lib/tensor_method.cc
index 463b72d0dbf..5285392b4a6 100644
--- a/paddle/phi/api/lib/tensor_method.cc
+++ b/paddle/phi/api/lib/tensor_method.cc
@@ -27,13 +27,13 @@ namespace paddle {
 namespace experimental {
 // declare cast api
 Tensor cast(const Tensor &x, DataType out_dtype);
-Tensor copy_to(const Tensor &x, Place place, bool blocking);
+Tensor copy_to(const Tensor &x, const Place &place, bool blocking);
 
 Tensor Tensor::cast(DataType target_type) const {
   return experimental::cast(*this, target_type);
 }
 
-Tensor Tensor::copy_to(Place place, bool blocking) const {
+Tensor Tensor::copy_to(const Place &place, bool blocking) const {
   return experimental::copy_to(*this, place, blocking);
 }
 
diff --git a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
index 582b14c82b5..55c9571d44f 100644
--- a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
@@ -20,6 +20,7 @@ import paddle
 import paddle.static as static
 import subprocess
 import numpy as np
+from paddle.vision.transforms import Compose, Normalize
 from paddle.utils.cpp_extension.extension_utils import run_cmd
 from paddle.fluid.framework import _test_eager_guard
 
@@ -329,6 +330,33 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
                     "custom op dx grad: {},\n paddle api dx grad: {}".format(
                         dx_grad, pd_dx_grad))
 
+    def test_with_dataloader(self):
+        for device in self.devices:
+            paddle.set_device(device)
+            # data loader
+            transform = Compose(
+                [Normalize(
+                    mean=[127.5], std=[127.5], data_format='CHW')])
+            train_dataset = paddle.vision.datasets.MNIST(
+                mode='train', transform=transform)
+            train_loader = paddle.io.DataLoader(
+                train_dataset,
+                batch_size=64,
+                shuffle=True,
+                drop_last=True,
+                num_workers=0)
+
+            for batch_id, (image, _) in enumerate(train_loader()):
+                out = self.custom_ops[0](image)
+                pd_out = paddle.nn.functional.relu(image)
+                self.assertTrue(
+                    np.array_equal(out, pd_out),
+                    "custom op out: {},\n paddle api out: {}".format(out,
+                                                                     pd_out))
+
+                if batch_id == 5:
+                    break
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/utils/code_gen/api_base.py b/python/paddle/utils/code_gen/api_base.py
index de30b45c31e..e8d067483d8 100644
--- a/python/paddle/utils/code_gen/api_base.py
+++ b/python/paddle/utils/code_gen/api_base.py
@@ -103,7 +103,7 @@ class BaseAPI(object):
             'double': 'double',
             'bool': 'bool',
             'str': 'const std::string&',
-            'Place': 'Place',
+            'Place': 'const Place&',
             'DataLayout': 'DataLayout',
             'DataType': 'DataType',
             'int64_t[]': 'const std::vector<int64_t>&',
@@ -118,7 +118,7 @@ class BaseAPI(object):
             'float': 'paddle::optional<float>',
             'double': 'paddle::optional<double>',
             'bool': 'paddle::optional<bool>',
-            'Place': 'paddle::optional<Place>',
+            'Place': 'paddle::optional<const Place&>',
             'DataLayout': 'paddle::optional<DataLayout>',
             'DataType': 'paddle::optional<DataType>'
         }
@@ -329,7 +329,7 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self
                 assert len(
                     vars_list
                 ) == 2, f"{self.api} api: The number of params to set backend with '>' only allows 2, but received {len(vars_list)}."
-                assert (vars_list[0].strip() in self.attrs['names']) and (self.attrs['attr_info'][vars_list[0].strip()][0] == 'Place'), \
+                assert (vars_list[0].strip() in self.attrs['names']) and (self.attrs['attr_info'][vars_list[0].strip()][0] == 'const Place&'), \
                     f"{self.api} api: When use '>' to set kernel backend, the first param should be a attribute with Place type."
                 backend_select_code = f"""
   kernel_backend = ParseBackendWithInputOrder({vars_list[0].strip()}, {vars_list[1].strip()});
@@ -361,7 +361,7 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self
         attr_layout_count = 0
         attr_data_type_count = 0
         for attr_name in attrs['names']:
-            if attrs['attr_info'][attr_name][0] == 'Place':
+            if attrs['attr_info'][attr_name][0] == 'const Place&':
                 assert kernel['backend'] is not None, \
                     f"{api} api: When there is a parameter with 'Place' type in attributes, you must set backend of kernel manually."
                 attr_backend_count = attr_backend_count + 1
@@ -421,7 +421,7 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self
 
         if len(input_names) == 0:
             assert attr_backend_count > 0 and attr_data_type_count > 0, \
-                f"{api} api: When there is no input tensor, the args must have 'Backend' and 'DataType'."
+                f"{api} api: When there is no input tensor, the args must have 'Place' and 'DataType'."
 
         kernel_select_args = ""
         for input_name in input_names:
diff --git a/python/paddle/utils/code_gen/strings_api_gen.py b/python/paddle/utils/code_gen/strings_api_gen.py
index d7117e9d540..061ea6c3cee 100644
--- a/python/paddle/utils/code_gen/strings_api_gen.py
+++ b/python/paddle/utils/code_gen/strings_api_gen.py
@@ -225,7 +225,7 @@ PADDLE_API {self.outputs['return_type']} {self.get_api_func_name()}({self.args_s
                 assert len(
                     vars_list
                 ) == 2, f"{api} api: The number of params to set backend with '>' only allows 2, but received {len(vars_list)}."
-                assert (vars_list[0].strip() in attrs['names']) and (attrs['attr_info'][vars_list[0].strip()][0] == 'Place'), \
+                assert (vars_list[0].strip() in attrs['names']) and (attrs['attr_info'][vars_list[0].strip()][0] == 'const Place&'), \
                     f"{api} api: When use '>' to set kernel backend, the first param should be a attribute with Place type."
                 kernel_select_code = kernel_select_code + f"""
   kernel_backend = ParseBackendWithInputOrder({vars_list[0].strip()}, {vars_list[1].strip()});
-- 
GitLab