From 1f1a7835b9cc5499d19e860a0aba250f5e3cd2c2 Mon Sep 17 00:00:00 2001
From: Ruibiao Chen <chenruibiao@baidu.com>
Date: Fri, 26 Aug 2022 17:39:03 +0800
Subject: [PATCH] Move conv2d_transpose XPU kernel to PHI, test=kunlun (#45419)

---
 .../fluid/operators/conv_transpose_op_xpu.cc  | 103 --------------
 .../phi/kernels/xpu/conv_transpose_kernel.cc  | 126 ++++++++++++++++++
 2 files changed, 126 insertions(+), 103 deletions(-)
 create mode 100644 paddle/phi/kernels/xpu/conv_transpose_kernel.cc
diff --git a/paddle/fluid/operators/conv_transpose_op_xpu.cc b/paddle/fluid/operators/conv_transpose_op_xpu.cc
index 08a58678a2..882bd0b091 100644
--- a/paddle/fluid/operators/conv_transpose_op_xpu.cc
+++ b/paddle/fluid/operators/conv_transpose_op_xpu.cc
@@ -24,106 +24,6 @@ namespace operators {
 
 using Tensor = framework::Tensor;
 
-// target_len == 2 || target_len == 4
-inline std::vector<int> vector_extend(const std::vector<int>& src,
-                                      int target_len) {
-  if (target_len == 2 && src.size() == 1) {
-    return {src[0], src[0]};
-  }
-  if (target_len == 4 && src.size() == 1) {
-    return {src[0], src[0], src[0], src[0]};
-  }
-  if (target_len == 4 && src.size() == 2) {
-    return {src[0], src[0], src[1], src[1]};
-  }
-  return src;
-}
-
-template <typename DeviceContext, typename T>
-class Conv2DTransposeXPUKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    const Tensor* input = context.Input<Tensor>("Input");
-    // The filter will be reshaped in the calculations,
-    // so here use an assignment operation,
-    // that avoids modifying the variable in the Scope.
-    Tensor filter = *context.Input<Tensor>("Filter");
-    Tensor* output = context.Output<Tensor>("Output");
-    output->mutable_data<T>(context.GetPlace());
-    int groups = context.Attr<int>("groups");
-    std::vector<int> strides = context.Attr<std::vector<int>>("strides");
-    std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
-    std::vector<int> dilations = context.Attr<std::vector<int>>("dilations");
-    const std::string data_format = context.Attr<std::string>("data_format");
-    const std::string padding_algorithm =
-        context.Attr<std::string>("padding_algorithm");
-
-    PADDLE_ENFORCE_EQ(
-        data_format == "NHWC" || data_format == "NDHWC",
-        false,
-        platform::errors::InvalidArgument(
-            ("XPU do support data_format is NCHW in conv_transpose op.")));
-
-    framework::DDim in_data_dims =
-        phi::slice_ddim(input->dims(), 2, input->dims().size());
-    framework::DDim filter_data_dims =
-        phi::slice_ddim(filter.dims(), 2, filter.dims().size());
-    std::vector<int> ksize = phi::vectorize<int>(filter_data_dims);
-    phi::UpdatePaddingAndDilation(
-        &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
-
-    const int batch_size = static_cast<int>(input->dims()[0]);
-    const int img_yc = static_cast<int>(input->dims()[1]);
-    const int img_yh = static_cast<int>(input->dims()[2]);
-    const int img_yw = static_cast<int>(input->dims()[3]);
-    const int img_xc = static_cast<int>(output->dims()[1]);
-    const int img_xh = static_cast<int>(output->dims()[2]);
-    const int img_xw = static_cast<int>(output->dims()[3]);
-
-    {
-      std::vector<int> ksize_check = vector_extend(ksize, 2);
-      std::vector<int> stride_check = vector_extend(strides, 2);
-      std::vector<int> pad_check = vector_extend(paddings, 4);
-      std::vector<int> dilation_check = vector_extend(dilations, 2);
-
-      int xh_check = (img_yh - 1) * stride_check[0] - pad_check[0] -
-                     pad_check[1] +
-                     (dilation_check[0] * (ksize_check[0] - 1) + 1);
-      int xw_check = (img_yw - 1) * stride_check[1] - pad_check[2] -
-                     pad_check[3] +
-                     (dilation_check[1] * (ksize_check[1] - 1) + 1);
-
-      PADDLE_ENFORCE_EQ(
-          xh_check == img_xh && xw_check == img_xw,
-          true,
-          platform::errors::InvalidArgument(
-              ("XPU output size check error in conv_transpose op.")));
-    }
-
-    auto& dev_ctx = context.template device_context<DeviceContext>();
-    int r = xpu::conv2d_transpose<float, float, float, int16_t>(
-        dev_ctx.x_context(),
-        input->data<float>(),
-        filter.data<float>(),
-        output->data<float>(),
-        batch_size,
-        img_yc,
-        img_yh,
-        img_yw,
-        img_xc,
-        ksize,
-        strides,
-        paddings,
-        dilations,
-        groups,
-        nullptr,
-        nullptr,
-        nullptr,
-        true);
-    PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv2d_transpose");
-  }
-};
-
 template <typename DeviceContext, typename T>
 class Conv2DTransposeGradXPUKernel : public framework::OpKernel<T> {
  public:
@@ -209,9 +109,6 @@ class Conv2DTransposeGradXPUKernel : public framework::OpKernel<T> {
 }  // namespace operators
 }  // namespace paddle
 namespace ops = paddle::operators;
-REGISTER_OP_XPU_KERNEL(
-    conv2d_transpose,
-    ops::Conv2DTransposeXPUKernel<paddle::platform::XPUDeviceContext, float>);
 REGISTER_OP_XPU_KERNEL(
     conv2d_transpose_grad,
     ops::Conv2DTransposeGradXPUKernel<paddle::platform::XPUDeviceContext,
diff --git a/paddle/phi/kernels/xpu/conv_transpose_kernel.cc b/paddle/phi/kernels/xpu/conv_transpose_kernel.cc
new file mode 100644
index 0000000000..3fcd4b4a32
--- /dev/null
+++ b/paddle/phi/kernels/xpu/conv_transpose_kernel.cc
@@ -0,0 +1,126 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/conv_transpose_kernel.h"
+
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/cpu/conv_util.h"
+
+namespace phi {
+
+// target_len == 2 || target_len == 4
+inline std::vector<int> vector_extend(const std::vector<int>& src,
+                                      int target_len) {
+  if (target_len == 2 && src.size() == 1) {
+    return {src[0], src[0]};
+  }
+  if (target_len == 4 && src.size() == 1) {
+    return {src[0], src[0], src[0], src[0]};
+  }
+  if (target_len == 4 && src.size() == 2) {
+    return {src[0], src[0], src[1], src[1]};
+  }
+  return src;
+}
+
+template <typename T, typename Context>
+void Conv2dTransposeKernel(const Context& ctx,
+                           const DenseTensor& x,
+                           const DenseTensor& filter,
+                           const std::vector<int>& strides,
+                           const std::vector<int>& paddings,
+                           const std::vector<int>& output_padding,
+                           const std::vector<int>& output_size,
+                           const std::string& padding_algorithm,
+                           int groups,
+                           const std::vector<int>& dilations,
+                           const std::string& data_format,
+                           DenseTensor* out) {
+  // The filter will be reshaped in the calculations,
+  // so here use an assignment operation,
+  // that avoids modifying the variable in the Scope.
+  DenseTensor filter_ = filter;
+
+  ctx.template Alloc<T>(out);
+
+  PADDLE_ENFORCE_EQ(
+      data_format == "NHWC" || data_format == "NDHWC",
+      false,
+      errors::InvalidArgument(
+          ("XPU do support data_format is NCHW in conv_transpose op.")));
+
+  DDim in_data_dims = slice_ddim(x.dims(), 2, x.dims().size());
+  DDim filter_data_dims = slice_ddim(filter_.dims(), 2, filter_.dims().size());
+  std::vector<int> ksize = vectorize<int>(filter_data_dims);
+
+  std::vector<int> paddings_ = paddings;
+  std::vector<int> dilations_ = dilations;
+  UpdatePaddingAndDilation(
+      &paddings_, &dilations_, padding_algorithm, in_data_dims, strides, ksize);
+
+  const int batch_size = static_cast<int>(x.dims()[0]);
+  const int img_yc = static_cast<int>(x.dims()[1]);
+  const int img_yh = static_cast<int>(x.dims()[2]);
+  const int img_yw = static_cast<int>(x.dims()[3]);
+  const int img_xc = static_cast<int>(out->dims()[1]);
+  const int img_xh = static_cast<int>(out->dims()[2]);
+  const int img_xw = static_cast<int>(out->dims()[3]);
+
+  {
+    std::vector<int> ksize_check = vector_extend(ksize, 2);
+    std::vector<int> stride_check = vector_extend(strides, 2);
+    std::vector<int> pad_check = vector_extend(paddings_, 4);
+    std::vector<int> dilation_check = vector_extend(dilations_, 2);
+
+    int xh_check = (img_yh - 1) * stride_check[0] - pad_check[0] -
+                   pad_check[1] +
+                   (dilation_check[0] * (ksize_check[0] - 1) + 1);
+    int xw_check = (img_yw - 1) * stride_check[1] - pad_check[2] -
+                   pad_check[3] +
+                   (dilation_check[1] * (ksize_check[1] - 1) + 1);
+
+    PADDLE_ENFORCE_EQ(
+        xh_check == img_xh && xw_check == img_xw,
+        true,
+        errors::InvalidArgument(
+            ("XPU output size check error in conv_transpose op.")));
+  }
+
+  int r =
+      xpu::conv2d_transpose<float, float, float, int16_t>(ctx.x_context(),
+                                                          x.data<float>(),
+                                                          filter_.data<float>(),
+                                                          out->data<float>(),
+                                                          batch_size,
+                                                          img_yc,
+                                                          img_yh,
+                                                          img_yw,
+                                                          img_xc,
+                                                          ksize,
+                                                          strides,
+                                                          paddings_,
+                                                          dilations_,
+                                                          groups,
+                                                          nullptr,
+                                                          nullptr,
+                                                          nullptr,
+                                                          true);
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv2d_transpose");
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(
+    conv2d_transpose, XPU, ALL_LAYOUT, phi::Conv2dTransposeKernel, float) {}
-- 
GitLab