From 3f57ef7a1fedd598d9d171261df66c50b0fa5222 Mon Sep 17 00:00:00 2001
From: chentianyu03 <chentianyu03@baidu.com>
Date: Sun, 3 Apr 2022 17:22:56 +0800
Subject: [PATCH] [Phi]Concat grad (#41112)

* add concat_grad kernel

* fix error

* remove comment code

* fix outs nullptr error

* change to phi header

* add concat_grad declare for standalone_executor_test
---
 .../new_executor/standalone_executor_test.cc  |  3 +-
 paddle/fluid/operators/concat_op.cc           | 15 ----
 paddle/fluid/operators/concat_op.cu.cc        | 36 ----------
 paddle/fluid/operators/concat_op.h            | 56 ---------------
 paddle/phi/kernels/concat_grad_kernel.h       | 30 ++++++++
 paddle/phi/kernels/cpu/concat_grad_kernel.cc  | 35 ++++++++++
 paddle/phi/kernels/gpu/concat_grad_kernel.cu  | 37 ++++++++++
 .../kernels/impl/concat_grad_kernel_impl.h    | 69 +++++++++++++++++++
 paddle/phi/ops/compat/concat_sig.cc           | 14 ++++
 9 files changed, 187 insertions(+), 108 deletions(-)
 delete mode 100644 paddle/fluid/operators/concat_op.cu.cc
 create mode 100644 paddle/phi/kernels/concat_grad_kernel.h
 create mode 100644 paddle/phi/kernels/cpu/concat_grad_kernel.cc
 create mode 100644 paddle/phi/kernels/gpu/concat_grad_kernel.cu
 create mode 100644 paddle/phi/kernels/impl/concat_grad_kernel_impl.h
diff --git a/paddle/fluid/framework/new_executor/standalone_executor_test.cc b/paddle/fluid/framework/new_executor/standalone_executor_test.cc
index b5670565e2a..fbcbb2ca23b 100644
--- a/paddle/fluid/framework/new_executor/standalone_executor_test.cc
+++ b/paddle/fluid/framework/new_executor/standalone_executor_test.cc
@@ -46,7 +46,7 @@ USE_OP_ITSELF(elementwise_add_grad);
 USE_OP_ITSELF(matmul_grad);
 USE_OP_ITSELF(square);
 USE_OP_ITSELF(transpose2_grad);
-USE_OP(concat_grad);
+USE_OP_ITSELF(concat_grad);
 USE_OP_ITSELF(elementwise_mul_grad);
 USE_OP_ITSELF(sigmoid_grad);
 USE_OP_ITSELF(tanh_grad);
@@ -67,6 +67,7 @@ PD_DECLARE_KERNEL(transpose, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(reshape, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(split, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(concat, GPU, ALL_LAYOUT);
+PD_DECLARE_KERNEL(concat_grad, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(add_raw, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT);
diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc
index 059fafa3e7f..a467f2dbee7 100644
--- a/paddle/fluid/operators/concat_op.cc
+++ b/paddle/fluid/operators/concat_op.cc
@@ -216,18 +216,3 @@ REGISTER_OPERATOR(concat_grad, ops::ConcatOpGrad,
                   ops::ConcatDoubleGradOpMaker<paddle::framework::OpDesc>,
                   ops::ConcatDoubleGradOpMaker<paddle::imperative::OpBase>,
                   ops::ConcatOpGradNoNeedBufferVarInferer);
-
-REGISTER_OP_CPU_KERNEL(
-    concat_grad,
-    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, double>,
-    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, bool>,
-    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, int64_t>,
-    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext,
-                          paddle::platform::float16>,
-    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, int>,
-    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, uint8_t>,
-    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext,
-                          paddle::platform::complex<float>>,
-    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext,
-                          paddle::platform::complex<double>>);
diff --git a/paddle/fluid/operators/concat_op.cu.cc b/paddle/fluid/operators/concat_op.cu.cc
deleted file mode 100644
index f7b64f16e2d..00000000000
--- a/paddle/fluid/operators/concat_op.cu.cc
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/concat_op.h"
-#include "paddle/fluid/platform/bfloat16.h"
-#include "paddle/fluid/platform/complex.h"
-#include "paddle/fluid/platform/float16.h"
-
-namespace ops = paddle::operators;
-namespace plat = paddle::platform;
-
-REGISTER_OP_CUDA_KERNEL(
-    concat_grad,
-    ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, double>,
-    ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, bool>,
-    ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, plat::float16>,
-    ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, plat::bfloat16>,
-    ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, int64_t>,
-    ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, int>,
-    ops::ConcatGradKernel<paddle::platform::CUDADeviceContext, uint8_t>,
-    ops::ConcatGradKernel<paddle::platform::CUDADeviceContext,
-                          plat::complex<float>>,
-    ops::ConcatGradKernel<paddle::platform::CUDADeviceContext,
-                          plat::complex<double>>);
diff --git a/paddle/fluid/operators/concat_op.h b/paddle/fluid/operators/concat_op.h
index ec43e2ad374..50aca54c12d 100644
--- a/paddle/fluid/operators/concat_op.h
+++ b/paddle/fluid/operators/concat_op.h
@@ -39,62 +39,6 @@ static inline int64_t ComputeAxis(int64_t axis, int64_t rank) {
   }
   return axis > 0 ? axis : 0;
 }
-template <typename DeviceContext, typename T>
-class ConcatGradKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const {
-    auto* out_grad =
-        ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
-    auto ins = ctx.MultiInput<framework::LoDTensor>("X");
-    auto out_var_names = ctx.OutputNames(framework::GradVarName("X"));
-    auto outs =
-        ctx.MultiOutput<framework::LoDTensor>(framework::GradVarName("X"));
-
-    {
-      auto dx = outs;
-      auto x = ins;
-      for (size_t i = 0; i < dx.size(); ++i) {
-        if (dx[i] != nullptr) {
-          dx[i]->set_lod(x[i]->lod());
-        }
-      }
-    }
-    PADDLE_ENFORCE_NOT_NULL(ins[0],
-                            platform::errors::NotFound(
-                                "The first input tensor is not initalized."));
-
-    auto axis = ctx.Attr<int>("axis");
-    if (ctx.HasInput("AxisTensor")) {
-      auto* axis_tensor = ctx.Input<framework::Tensor>("AxisTensor");
-      axis = GetDataFromTensor<int>(axis_tensor)[0];
-    }
-    axis = ComputeAxis(static_cast<int64_t>(axis),
-                       static_cast<int64_t>(ins[0]->dims().size()));
-    // get output tensor that the name is not kEmptyVarName
-    std::vector<framework::Tensor*> outputs;
-    for (size_t j = 0; j < outs.size(); ++j) {
-      if (out_var_names[j] != framework::kEmptyVarName &&
-          outs[j]->numel() != 0UL) {
-        outs[j]->mutable_data<T>(ctx.GetPlace());
-        outputs.push_back(outs[j]);
-      } else {
-        outputs.push_back(nullptr);
-      }
-    }
-    auto& dev_ctx = ctx.template device_context<DeviceContext>();
-
-    // Sometimes direct copies will be faster, this maybe need deeply analysis.
-    if (axis == 0 && outs.size() < 10) {
-      std::vector<const framework::Tensor*> ref_shape;
-      ref_shape.insert(ref_shape.begin(), ins.begin(), ins.end());
-      StridedMemcpyWithAxis0<T>(dev_ctx, *out_grad, ref_shape, &outputs);
-    } else {
-      math::SplitFunctor<DeviceContext, T> split_functor;
-      split_functor(dev_ctx, *out_grad, ctx.MultiInput<framework::Tensor>("X"),
-                    static_cast<int>(axis), &outputs);
-    }
-  }
-};
 
 }  // namespace operators
 }  // namespace paddle
diff --git a/paddle/phi/kernels/concat_grad_kernel.h b/paddle/phi/kernels/concat_grad_kernel.h
new file mode 100644
index 00000000000..e407d73bb49
--- /dev/null
+++ b/paddle/phi/kernels/concat_grad_kernel.h
@@ -0,0 +1,30 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/common/scalar.h"
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/infermeta/multiary.h"
+#include "paddle/phi/kernels/empty_kernel.h"
+namespace phi {
+
+template <typename T, typename Context>
+void ConcatGradKernel(const Context& dev_ctx,
+                      const std::vector<const DenseTensor*>& x,
+                      const DenseTensor& out_grad,
+                      const Scalar& axis_scalar,
+                      std::vector<DenseTensor*> x_grad);
+
+}  // namespace phi
diff --git a/paddle/phi/kernels/cpu/concat_grad_kernel.cc b/paddle/phi/kernels/cpu/concat_grad_kernel.cc
new file mode 100644
index 00000000000..56ed95769fe
--- /dev/null
+++ b/paddle/phi/kernels/cpu/concat_grad_kernel.cc
@@ -0,0 +1,35 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/concat_grad_kernel.h"
+
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/common/bfloat16.h"
+#include "paddle/phi/common/complex.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/impl/concat_grad_kernel_impl.h"
+
+PD_REGISTER_KERNEL(concat_grad,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::ConcatGradKernel,
+                   double,
+                   float,
+                   bool,
+                   int64_t,
+                   int,
+                   uint8_t,
+                   phi::dtype::float16,
+                   phi::dtype::complex<float>,
+                   phi::dtype::complex<double>) {}
diff --git a/paddle/phi/kernels/gpu/concat_grad_kernel.cu b/paddle/phi/kernels/gpu/concat_grad_kernel.cu
new file mode 100644
index 00000000000..2445978daca
--- /dev/null
+++ b/paddle/phi/kernels/gpu/concat_grad_kernel.cu
@@ -0,0 +1,37 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/concat_grad_kernel.h"
+
+#include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/common/bfloat16.h"
+#include "paddle/phi/common/complex.h"
+#include "paddle/phi/common/float16.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/impl/concat_grad_kernel_impl.h"
+
+PD_REGISTER_KERNEL(concat_grad,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::ConcatGradKernel,
+                   float,
+                   double,
+                   bool,
+                   int64_t,
+                   int,
+                   uint8_t,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16,
+                   phi::dtype::complex<float>,
+                   phi::dtype::complex<double>) {}
diff --git a/paddle/phi/kernels/impl/concat_grad_kernel_impl.h b/paddle/phi/kernels/impl/concat_grad_kernel_impl.h
new file mode 100644
index 00000000000..e89920340ff
--- /dev/null
+++ b/paddle/phi/kernels/impl/concat_grad_kernel_impl.h
@@ -0,0 +1,69 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include "paddle/phi/kernels/concat_grad_kernel.h"
+
+#include "paddle/fluid/operators/strided_memcpy.h"
+#include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
+#include "paddle/phi/kernels/funcs/concat_funcs.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void ConcatGradKernel(const Context& dev_ctx,
+                      const std::vector<const DenseTensor*>& x,
+                      const DenseTensor& out_grad,
+                      const Scalar& axis_scalar,
+                      std::vector<DenseTensor*> x_grad) {
+  auto outs = x_grad;
+  {
+    auto dx = x_grad;
+    for (size_t i = 0; i < dx.size(); ++i) {
+      if (dx[i] != nullptr) {
+        dx[i]->set_lod(x[i]->lod());
+      }
+    }
+  }
+  PADDLE_ENFORCE_NOT_NULL(
+      x[0], phi::errors::NotFound("The first input tensor is not initalized."));
+
+  auto axis = axis_scalar.to<int>();
+  axis = funcs::ComputeAxis(static_cast<int64_t>(axis),
+                            static_cast<int64_t>(x[0]->dims().size()));
+  // get output tensor that the name is not kEmptyVarName
+  std::vector<DenseTensor*> outputs;
+  for (size_t j = 0; j < outs.size(); ++j) {
+    if (outs[j] && outs[j]->numel() != 0UL) {
+      dev_ctx.template Alloc<T>(outs[j]);
+
+      outputs.push_back(outs[j]);
+    } else {
+      outputs.push_back(nullptr);
+    }
+  }
+
+  // Sometimes direct copies will be faster, this maybe need deeply analysis.
+  if (axis == 0 && outs.size() < 10) {
+    std::vector<const DenseTensor*> ref_shape;
+    ref_shape.insert(ref_shape.begin(), x.begin(), x.end());
+    paddle::operators::StridedMemcpyWithAxis0<T>(
+        dev_ctx, out_grad, ref_shape, &outputs);
+  } else {
+    phi::funcs::SplitFunctor<Context, T> split_functor;
+    split_functor(dev_ctx, out_grad, x, static_cast<int>(axis), &outputs);
+  }
+}
+
+}  // namespace phi
diff --git a/paddle/phi/ops/compat/concat_sig.cc b/paddle/phi/ops/compat/concat_sig.cc
index 21e653ccfe9..d443f521c61 100644
--- a/paddle/phi/ops/compat/concat_sig.cc
+++ b/paddle/phi/ops/compat/concat_sig.cc
@@ -23,6 +23,20 @@ KernelSignature ConcatOpArgumentMapping(const ArgumentMappingContext& ctx) {
   return KernelSignature("concat", {"X"}, {"axis"}, {"Out"});
 }
 
+KernelSignature ConcatGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
+  if (ctx.HasInput("AxisTensor")) {
+    return KernelSignature("concat_grad",
+                           {"X", {GradVarName("Out")}},
+                           {"AxisTensor"},
+                           {{GradVarName("X")}});
+  }
+  return KernelSignature("concat_grad",
+                         {"X", {GradVarName("Out")}},
+                         {"axis"},
+                         {{GradVarName("X")}});
+}
+
 }  // namespace phi
 
 PD_REGISTER_ARG_MAPPING_FN(concat, phi::ConcatOpArgumentMapping);
+PD_REGISTER_ARG_MAPPING_FN(concat_grad, phi::ConcatGradOpArgumentMapping);
-- 
GitLab