From a635a8a5c4ec914f57058bdcccf854d620ce5f42 Mon Sep 17 00:00:00 2001
From: Ruibiao Chen <chenruibiao@baidu.com>
Date: Fri, 26 Aug 2022 22:34:46 +0800
Subject: [PATCH] Move conv2d_transpose_grad XPU kernel to PHI, test=kunlun
 (#45466)

---
 .../fluid/operators/conv_transpose_op_xpu.cc  | 116 ------------------
 .../kernels/xpu/conv_transpose_grad_kernel.cc | 106 ++++++++++++++++
 2 files changed, 106 insertions(+), 116 deletions(-)
 delete mode 100644 paddle/fluid/operators/conv_transpose_op_xpu.cc
 create mode 100644 paddle/phi/kernels/xpu/conv_transpose_grad_kernel.cc
diff --git a/paddle/fluid/operators/conv_transpose_op_xpu.cc b/paddle/fluid/operators/conv_transpose_op_xpu.cc
deleted file mode 100644
index 882bd0b091..0000000000
--- a/paddle/fluid/operators/conv_transpose_op_xpu.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/conv_transpose_op.h"
-#include "paddle/fluid/platform/device/device_wrapper.h"
-#include "paddle/phi/kernels/cpu/conv_util.h"
-
-#ifdef PADDLE_WITH_XPU
-namespace paddle {
-namespace operators {
-
-using Tensor = framework::Tensor;
-
-template <typename DeviceContext, typename T>
-class Conv2DTransposeGradXPUKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    const Tensor* input = context.Input<Tensor>("Input");
-    const Tensor* output_grad =
-        context.Input<Tensor>(framework::GradVarName("Output"));
-    Tensor* input_grad =
-        context.Output<Tensor>(framework::GradVarName("Input"));
-    Tensor* filter_grad =
-        context.Output<Tensor>(framework::GradVarName("Filter"));
-    // The filter and filter_grad will be reshaped in the calculations,
-    // so here use an assignment operation,
-    // that avoids modifying the variable in the Scope.
-    Tensor filter = *context.Input<Tensor>("Filter");
-    if (!input_grad && !filter_grad) return;
-    int groups = context.Attr<int>("groups");
-    std::vector<int> strides = context.Attr<std::vector<int>>("strides");
-    std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
-    std::vector<int> dilations = context.Attr<std::vector<int>>("dilations");
-    const std::string data_format = context.Attr<std::string>("data_format");
-    const std::string padding_algorithm =
-        context.Attr<std::string>("padding_algorithm");
-
-    PADDLE_ENFORCE_EQ(
-        data_format == "NHWC" || data_format == "NDHWC",
-        false,
-        platform::errors::InvalidArgument(
-            ("XPU do support data_format is NCHW in conv grad op.")));
-
-    framework::DDim in_data_dims =
-        phi::slice_ddim(input->dims(), 2, input->dims().size());
-    framework::DDim filter_data_dims =
-        phi::slice_ddim(filter.dims(), 2, filter.dims().size());
-    std::vector<int> ksize = phi::vectorize<int>(filter_data_dims);
-    phi::UpdatePaddingAndDilation(
-        &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
-
-    const int batch_size = static_cast<int>(input->dims()[0]);
-    const int img_yc = static_cast<int>(input->dims()[1]);
-    const int img_yh = static_cast<int>(input->dims()[2]);
-    const int img_yw = static_cast<int>(input->dims()[3]);
-    const int img_xc = static_cast<int>(output_grad->dims()[1]);
-    const int img_xh = static_cast<int>(output_grad->dims()[2]);
-    const int img_xw = static_cast<int>(output_grad->dims()[3]);
-    if (input_grad) {
-      input_grad->mutable_data<T>(context.GetPlace());
-    }
-    if (filter_grad) {
-      filter_grad->mutable_data<T>(context.GetPlace());
-    }
-
-    auto& dev_ctx = context.template device_context<DeviceContext>();
-    int r = xpu::conv2d_transpose_grad<float, float, float, int16_t>(
-        dev_ctx.x_context(),
-        input->data<T>(),
-        filter.data<T>(),
-        output_grad->data<T>(),
-        input_grad ? input_grad->data<T>() : nullptr,
-        filter_grad ? filter_grad->data<T>() : nullptr,
-        batch_size,
-        img_yc,
-        img_yh,
-        img_yw,
-        img_xc,
-        img_xh,
-        img_xw,
-        ksize,
-        strides,
-        paddings,
-        dilations,
-        groups,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-        true);
-    PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv2d_transpose_grad");
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-namespace ops = paddle::operators;
-REGISTER_OP_XPU_KERNEL(
-    conv2d_transpose_grad,
-    ops::Conv2DTransposeGradXPUKernel<paddle::platform::XPUDeviceContext,
-                                      float>);
-#endif
diff --git a/paddle/phi/kernels/xpu/conv_transpose_grad_kernel.cc b/paddle/phi/kernels/xpu/conv_transpose_grad_kernel.cc
new file mode 100644
index 0000000000..49061069b8
--- /dev/null
+++ b/paddle/phi/kernels/xpu/conv_transpose_grad_kernel.cc
@@ -0,0 +1,106 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/conv_transpose_grad_kernel.h"
+
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/cpu/conv_util.h"
+
+namespace phi {
+template <typename T, typename Context>
+void Conv2dTransposeGradKernel(const Context& ctx,
+                               const DenseTensor& x,
+                               const DenseTensor& filter,
+                               const DenseTensor& dout,
+                               const std::vector<int>& strides,
+                               const std::vector<int>& paddings,
+                               const std::vector<int>& output_padding,
+                               const std::vector<int>& output_size,
+                               const std::string& padding_algorithm,
+                               int groups,
+                               const std::vector<int>& dilations,
+                               const std::string& data_format,
+                               DenseTensor* dx,
+                               DenseTensor* dfilter) {
+  // The filter and dfilter will be reshaped in the calculations,
+  // so here use an assignment operation,
+  // that avoids modifying the variable in the Scope.
+  DenseTensor filter_ = filter;
+  if (!dx && !dfilter) return;
+
+  std::vector<int> paddings_ = paddings;
+  std::vector<int> dilations_ = dilations;
+
+  PADDLE_ENFORCE_EQ(
+      data_format == "NHWC" || data_format == "NDHWC",
+      false,
+      errors::InvalidArgument(
+          ("XPU do support data_format is NCHW in conv grad op.")));
+
+  DDim in_data_dims = slice_ddim(x.dims(), 2, x.dims().size());
+  DDim filter_data_dims = slice_ddim(filter_.dims(), 2, filter_.dims().size());
+  std::vector<int> ksize = vectorize<int>(filter_data_dims);
+  UpdatePaddingAndDilation(
+      &paddings_, &dilations_, padding_algorithm, in_data_dims, strides, ksize);
+
+  const int batch_size = static_cast<int>(x.dims()[0]);
+  const int img_yc = static_cast<int>(x.dims()[1]);
+  const int img_yh = static_cast<int>(x.dims()[2]);
+  const int img_yw = static_cast<int>(x.dims()[3]);
+  const int img_xc = static_cast<int>(dout.dims()[1]);
+  const int img_xh = static_cast<int>(dout.dims()[2]);
+  const int img_xw = static_cast<int>(dout.dims()[3]);
+  if (dx) {
+    ctx.template Alloc<T>(dx);
+  }
+  if (dfilter) {
+    ctx.template Alloc<T>(dfilter);
+  }
+
+  int r = xpu::conv2d_transpose_grad<float, float, float, int16_t>(
+      ctx.x_context(),
+      x.data<T>(),
+      filter_.data<T>(),
+      dout.data<T>(),
+      dx ? dx->data<T>() : nullptr,
+      dfilter ? dfilter->data<T>() : nullptr,
+      batch_size,
+      img_yc,
+      img_yh,
+      img_yw,
+      img_xc,
+      img_xh,
+      img_xw,
+      ksize,
+      strides,
+      paddings_,
+      dilations_,
+      groups,
+      nullptr,
+      nullptr,
+      nullptr,
+      nullptr,
+      nullptr,
+      true);
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv2d_transpose_grad");
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(conv2d_transpose_grad,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::Conv2dTransposeGradKernel,
+                   float) {}
-- 
GitLab