move share_buffer kernel to phi (#48858)

* move share_buffer kernel to phi * fix ut * add source file * fix window links

move share_buffer kernel to phi (#48858)
* move share_buffer kernel to phi * fix ut * add source file * fix window links
c2e77ba3 · Leo Chen · GitHub · 8f1e24d5 · c2e77ba3 · c2e77ba3
14 changed file
--- a/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
@@ -81,6 +81,7 @@ yaml_types_mapping = {
    'str': 'std::string',
    'str[]': 'std::vector<std::string>',
    'float[]': 'std::vector<float>',
+    'bool[]': 'std::vector<bool>',
    'Place': 'paddle::Place',
    'DataLayout': 'phi::DataLayout',
    'DataType': 'paddle::experimental::DataType',

--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -3258,6 +3258,10 @@ void OperatorWithKernel::BuildPhiKernelContext(
            phi_kernel_context->EmplaceBackAttr(
                PADDLE_GET_CONST(std::vector<int>, attr_iter->second));
            break;
+          case phi::AttributeType::BOOLS:
+            phi_kernel_context->EmplaceBackAttr(
+                PADDLE_GET_CONST(std::vector<bool>, attr_iter->second));
+            break;
          case phi::AttributeType::DATA_TYPE: {
            auto data_type = framework::TransToPhiDataType(
                static_cast<framework::proto::VarType::Type>(

--- a/paddle/fluid/operators/CMakeLists.txt
+++ b/paddle/fluid/operators/CMakeLists.txt
@@ -207,7 +207,7 @@ elseif(WITH_ROCM)
 else()
    cc_test(test_leaky_relu_grad_grad_functor SRCS test_leaky_relu_grad_grad_functor.cc DEPS tensor device_context eigen3)
 endif()
-cc_test(share_buffer_op_cpp_test SRCS share_buffer_op_test.cc DEPS lod_tensor device_context share_buffer_op)
+cc_test(share_buffer_op_cpp_test SRCS share_buffer_op_test.cc DEPS lod_tensor device_context generated_op)

 cc_library(tensor_formatter SRCS tensor_formatter.cc DEPS ${OP_HEADER_DEPS})
 if (WITH_PYTHON)

--- a/paddle/fluid/operators/share_buffer_op.cc
+++ b/paddle/fluid/operators/share_buffer_op.cc
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/operators/share_buffer_op.h"
-
-namespace paddle {
-namespace operators {
-
-class ShareBufferOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
-  void InferShape(framework::InferShapeContext* ctx) const override {}
-
- protected:
-  framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    // dtype is not important
-    return framework::OpKernelType(framework::proto::VarType::FP32,
-                                   ctx.GetPlace());
-  }
-
-  framework::OpKernelType GetKernelTypeForVar(
-      const std::string& var_name,
-      const phi::DenseTensor& tensor,
-      const framework::OpKernelType& expected_kernel_type) const override {
-    return expected_kernel_type;
-  }
-};
-
-class ShareBufferOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput("X", "(Tensor), The input tensors of share buffer op")
-        .AsDuplicable();
-    AddOutput("Out", "(Tensor), The output tensors of share buffer op")
-        .AsDuplicable();
-    AddOutput("XOut",
-              "(Tensor), The output tensors which are the same as X. It is "
-              "used to build the graph dependency")
-        .AsDuplicable();
-    AddAttr<std::vector<bool>>("share_dims_and_dtype",
-                               "Whether to share dims and data type")
-        .SetDefault(std::vector<bool>());
-    AddComment(
-        R"DOC(Operator used to perform inplace memory reuse. It should be not exposed to Python APIs.)DOC");
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-REGISTER_OPERATOR(share_buffer, ops::ShareBufferOp, ops::ShareBufferOpMaker);
-
-// dtype is not important
-REGISTER_OP_CPU_KERNEL(share_buffer, ops::ShareBufferOpKernel<float>);
-
-#ifdef PADDLE_WITH_ASCEND_CL
-REGISTER_OP_NPU_KERNEL(share_buffer, ops::ShareBufferOpKernel<float>);
-#endif
-
-#ifdef PADDLE_WITH_XPU
-REGISTER_OP_XPU_KERNEL(share_buffer, ops::ShareBufferOpKernel<float>);
-#endif
--- a/paddle/fluid/operators/share_buffer_op.h
+++ b/paddle/fluid/operators/share_buffer_op.h
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/framework/operator.h"
-
-namespace paddle {
-namespace operators {
-
-template <typename T>
-class ShareBufferOpKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext &ctx) const override {
-    const auto inputs = ctx.MultiInput<phi::DenseTensor>("X");
-    auto outputs = ctx.MultiOutput<phi::DenseTensor>("Out");
-    size_t n = inputs.size();
-    PADDLE_ENFORCE_EQ(
-        n,
-        outputs.size(),
-        platform::errors::PermissionDenied("Variable number not match."));
-    const auto &share_dims_and_dtype =
-        ctx.Attr<std::vector<bool>>("share_dims_and_dtype");
-    if (!share_dims_and_dtype.empty()) {
-      PADDLE_ENFORCE_EQ(n,
-                        share_dims_and_dtype.size(),
-                        platform::errors::PermissionDenied(
-                            "Attribute share_dims_and_dtype number not match "
-                            "input variable number."));
-    }
-
-    const std::vector<std::string> *input_args = nullptr,
-                                   *output_args = nullptr;
-    if (VLOG_IS_ON(10)) {
-      input_args = &ctx.GetOp().Inputs("X");
-      output_args = &ctx.GetOp().Outputs("Out");
-    }
-    for (size_t i = 0; i < n; ++i) {
-      if (inputs[i] == nullptr || outputs[i] == nullptr) {
-        continue;
-      }
-      outputs[i]->ShareBufferWith(*inputs[i]);
-      VLOG(10) << "Share tensor buffer " << (*input_args)[i] << " -> "
-               << (*output_args)[i];
-      if (!share_dims_and_dtype.empty() && share_dims_and_dtype[i]) {
-        outputs[i]->Resize(inputs[i]->dims());
-        outputs[i]->ShareDataTypeWith(*inputs[i]);
-      }
-    }
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
--- a/paddle/fluid/operators/share_buffer_op_test.cc
+++ b/paddle/fluid/operators/share_buffer_op_test.cc
@@ -18,8 +18,15 @@
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/platform/place.h"
+#include "paddle/phi/core/kernel_registry.h"

-USE_OP(share_buffer);
+USE_OP_ITSELF(share_buffer);
+
+PD_DECLARE_KERNEL(share_buffer, CPU, ALL_LAYOUT);
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+PD_DECLARE_KERNEL(share_buffer, GPU, ALL_LAYOUT);
+#endif

 namespace paddle {
 namespace framework {

--- a/paddle/phi/api/yaml/generator/api_base.py
+++ b/paddle/phi/api/yaml/generator/api_base.py
@@ -166,6 +166,7 @@ class BaseAPI:
            'float[]': 'const std::vector<float>&',
            'double': 'double',
            'bool': 'bool',
+            'bool[]': 'const std::vector<bool>&',
            'str': 'const std::string&',
            'str[]': 'const std::vector<std::string>&',
            'Place': 'const Place&',

--- a/paddle/phi/api/yaml/op_compat.yaml
+++ b/paddle/phi/api/yaml/op_compat.yaml
@@ -1019,6 +1019,13 @@
  extra :
    attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"]

+- op : share_buffer
+  inputs :
+    x : X
+  outputs :
+    out : Out
+    xout : XOut
+
 - op : shuffle_channel
  backward : shuffle_channel_grad
  extra :

--- a/paddle/phi/api/yaml/ops.yaml
+++ b/paddle/phi/api/yaml/ops.yaml
@@ -952,3 +952,11 @@
  kernel :
    func : unfold
  backward : unfold_grad
+
+- op: share_buffer
+  args : (Tensor[] x, bool[] share_dims_and_dtype={})
+  output : Tensor[](out){x.size()}, Tensor[](xout){x.size()}
+  infer_meta :
+    func : ShareBufferInferMeta
+  kernel :
+    func : share_buffer
--- a/paddle/phi/infermeta/multiary.cc
+++ b/paddle/phi/infermeta/multiary.cc
@@ -2668,6 +2668,30 @@ void UnchangedMultiInferMeta(const std::vector<const MetaTensor*>& x,
  }
 }

+void ShareBufferInferMeta(const std::vector<const MetaTensor*>& xs,
+                          const std::vector<bool>& share_dims_and_dtype,
+                          std::vector<MetaTensor*> outs,
+                          std::vector<MetaTensor*> xouts) {
+  if (share_dims_and_dtype.empty()) {
+    return;
+  }
+  PADDLE_ENFORCE_EQ(xs.size(),
+                    share_dims_and_dtype.size(),
+                    phi::errors::PermissionDenied(
+                        "The input(X) and attribute share_dims_and_dtype "
+                        "should have the same size, but got size of input(X) "
+                        "is %d and size of share_dims_and_dtype is %d.",
+                        xs.size(),
+                        share_dims_and_dtype.size()));
+
+  for (size_t i = 0; i < xs.size(); ++i) {
+    if (share_dims_and_dtype[i]) {
+      outs[i]->set_dims(xs[i]->dims());
+      outs[i]->set_dtype(xs[i]->dtype());
+    }
+  }
+}
+
 void UpdateLossScalingInferMeta(const std::vector<const MetaTensor*>& xs,
                                const MetaTensor& found_infinite,
                                const MetaTensor& prev_loss_scaling,

--- a/paddle/phi/infermeta/multiary.h
+++ b/paddle/phi/infermeta/multiary.h
@@ -479,6 +479,11 @@ void StackInferMeta(const std::vector<const MetaTensor*>& x,
 void UnchangedMultiInferMeta(const std::vector<const MetaTensor*>& x,
                             std::vector<MetaTensor*> out);

+void ShareBufferInferMeta(const std::vector<const MetaTensor*>& x,
+                          const std::vector<bool>& share_dims_and_dtype,
+                          std::vector<MetaTensor*> out,
+                          std::vector<MetaTensor*> xout);
+
 void UpdateLossScalingInferMeta(const std::vector<const MetaTensor*>& xs,
                                const MetaTensor& found_infinite,
                                const MetaTensor& prev_loss_scaling,

--- a/paddle/phi/kernels/shard_index_kernel.h
+++ b/paddle/phi/kernels/shard_index_kernel.h
@@ -14,6 +14,8 @@

 #pragma once

+#include <vector>
+
 #include "paddle/phi/core/dense_tensor.h"

 namespace phi {

--- a/paddle/phi/kernels/share_buffer_kernel.cc
+++ b/paddle/phi/kernels/share_buffer_kernel.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/share_buffer_kernel.h"
+
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/core/visit_type.h"
+
+namespace phi {
+
+template <typename Context>
+void ShareBufferKernel(const Context &dev_ctx,
+                       const std::vector<const DenseTensor *> &x,
+                       const std::vector<bool> &share_dims_and_dtype,
+                       std::vector<DenseTensor *> out,
+                       std::vector<DenseTensor *> xout) {
+  PADDLE_ENFORCE_EQ(
+      x.size(),
+      out.size(),
+      phi::errors::PermissionDenied(
+          "The input(X) and Output(out) should have the same size, but got "
+          "size of Input(X) is %d and size of Output(out) is %d.",
+          x.size(),
+          out.size()));
+  for (size_t i = 0; i < x.size(); ++i) {
+    if (x[i] == nullptr || out[i] == nullptr) {
+      continue;
+    }
+    out[i]->ShareBufferWith(*x[i]);
+    VLOG(10) << "Share tensor buffer ";
+  }
+}
+
+}  // namespace phi
+
+PD_REGISTER_GENERAL_KERNEL(share_buffer,
+                           CPU,
+                           ALL_LAYOUT,
+                           phi::ShareBufferKernel<phi::CPUContext>,
+                           ALL_DTYPE) {}
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+PD_REGISTER_GENERAL_KERNEL(share_buffer,
+                           GPU,
+                           ALL_LAYOUT,
+                           phi::ShareBufferKernel<phi::GPUContext>,
+                           ALL_DTYPE) {}
+#endif
+
+#ifdef PADDLE_WITH_XPU
+PD_REGISTER_GENERAL_KERNEL(share_buffer,
+                           XPU,
+                           ALL_LAYOUT,
+                           phi::ShareBufferKernel<phi::XPUContext>,
+                           ALL_DTYPE) {}
+#endif
--- a/paddle/fluid/operators/share_buffer_op.cu
+++ b/paddle/fluid/operators/share_buffer_op.cu
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -12,7 +12,19 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/fluid/operators/share_buffer_op.h"
+#pragma once

-namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(share_buffer, ops::ShareBufferOpKernel<float>);
+#include <vector>
+
+#include "paddle/phi/core/dense_tensor.h"
+
+namespace phi {
+
+template <typename Context>
+void ShareBufferKernel(const Context &dev_ctx,
+                       const std::vector<const DenseTensor *> &x,
+                       const std::vector<bool> &share_dims_and_dtype,
+                       std::vector<DenseTensor *> out,
+                       std::vector<DenseTensor *> xout);
+
+}  // namespace phi