[XPU] add tile_grad op (#48720)

8de336f9 · houj04 · GitHub · 8fb829ba · 8de336f9 · 8de336f9
4 changed file
--- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h
+++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h
@@ -663,6 +663,7 @@ XPUOpMap& get_kl2_ops() {
                     pOpKernelType(vartype::INT64, XPUPlace()),
                     pOpKernelType(vartype::BOOL, XPUPlace()),
                     pOpKernelType(vartype::FP32, XPUPlace())})},
+      {"tile_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
      {"transpose2_grad",
       XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
                     pOpKernelType(vartype::FP16, XPUPlace())})},

--- a/paddle/phi/kernels/impl/tile_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/tile_grad_kernel_impl.h
@@ -97,8 +97,8 @@ void TileGradKernel(const Context& dev_ctx,
    PADDLE_ENFORCE_GE(dims,
                      1,
                      errors::InvalidArgument(
-                          "Th rank of the input 'Out@GRAD' for tile_grad op "
-                          " must be greater than or equal to 1, but "
+                          "The rank of the input 'Out@GRAD' for tile_grad op "
+                          "must be greater than or equal to 1, but "
                          "the value received is %d.",
                          dims));
    PADDLE_ENFORCE_LE(dims,

--- a/paddle/phi/kernels/xpu/tile_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/tile_grad_kernel.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/tile_grad_kernel.h"
+
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void TileGradKernel(const Context& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& out_grad,
+                    const IntArray& repeat_times,
+                    DenseTensor* x_grad) {
+  auto x_dims = x.dims();
+  auto vec_x_dims = phi::vectorize<int>(x_dims);
+  auto repeat_times_data = repeat_times.GetData();
+  if (repeat_times_data.size() < vec_x_dims.size()) {
+    int diff = vec_x_dims.size() - repeat_times_data.size();
+    repeat_times_data.insert(repeat_times_data.begin(), diff, 1);
+  } else {
+    int diff = repeat_times_data.size() - vec_x_dims.size();
+    vec_x_dims.insert(vec_x_dims.begin(), diff, 1);
+  }
+  // 1. reshape_dims_vec is the broadcast parameter.
+  // 2. reduce_dims_vec is the dimension parameter to compute gradients. For
+  //    each dimension expanded, the gradients should be summed to original
+  //    size.
+  std::vector<int> reshape_dims_vec;
+  std::vector<int> reduce_dims_vec;
+  for (size_t i = 0; i < repeat_times_data.size(); ++i) {
+    reduce_dims_vec.push_back(reshape_dims_vec.size());
+    reshape_dims_vec.push_back(repeat_times_data[i]);
+    reshape_dims_vec.push_back(vec_x_dims[i]);
+  }
+
+  dev_ctx.template Alloc<T>(x_grad);
+
+  int dims = reduce_dims_vec.size();
+
+  bool just_copy = true;
+  for (size_t i = 0; i < repeat_times_data.size(); i++) {
+    if (repeat_times_data[i] != 1) {
+      just_copy = false;
+      break;
+    }
+  }
+  // no need reduce, just copy
+  if (just_copy) {
+    phi::Copy(dev_ctx, out_grad, dev_ctx.GetPlace(), false, x_grad);
+    // TensorCopy may change the dims of dx
+    x_grad->Resize(x_dims);
+  } else {
+    PADDLE_ENFORCE_GE(dims,
+                      1,
+                      errors::InvalidArgument(
+                          "The rank of the input 'Out@GRAD' for tile_grad op "
+                          "must be greater than or equal to 1, but "
+                          "the value received is %d.",
+                          dims));
+    PADDLE_ENFORCE_LE(dims,
+                      MAX_RANK_SUPPORTED,
+                      errors::InvalidArgument(
+                          "The rank of the input 'Out@GRAD' for tile_grad op "
+                          "must be less than or equal "
+                          "to %d, but the value received is %d.",
+                          MAX_RANK_SUPPORTED,
+                          dims));
+
+    using XPUType = typename XPUTypeTrait<T>::Type;
+    // int reduce_sum(Context* ctx, const T* x, T* y, const std::vector<int>&
+    // xshape, const std::vector<int>& rdims)
+    const auto* out_data = out_grad.data<XPUType>();
+    auto* x_grad_data = x_grad->data<XPUType>();
+    int r = xpu::reduce_sum<XPUType>(dev_ctx.x_context(),
+                                     out_data,
+                                     x_grad_data,
+                                     reshape_dims_vec,
+                                     reduce_dims_vec);
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_sum");
+  }
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(tile_grad, XPU, ALL_LAYOUT, phi::TileGradKernel, float) {}
--- a/python/paddle/fluid/tests/unittests/xpu/test_tile_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_tile_op_xpu.py
@@ -59,6 +59,9 @@ class XPUTestTileOpRank1(XPUOpTestWrapper):
        def test_check_output(self):
            self.check_output_with_place(self.place)

+        def test_check_grad(self):
+            self.check_grad_with_place(self.place, ['X'], 'Out')
+
    # with dimension expanding
    class TestTileOpRank2Expanding(TestTileOpRank1):
        def init_data(self):
@@ -126,6 +129,9 @@ class XPUTestTileOpRank1_tensor_attr(XPUOpTestWrapper):
        def test_check_output(self):
            self.check_output_with_place(self.place)

+        def test_check_grad(self):
+            self.check_grad_with_place(self.place, ['X'], 'Out')
+
    class TestTileOpRank2_Corner_tensor_attr(TestTileOpRank1_tensor_attr):
        def init_data(self):
            self.ori_shape = [12, 14]
@@ -168,6 +174,9 @@ class XPUTestTileOpRank1_tensor(XPUOpTestWrapper):
        def test_check_output(self):
            self.check_output_with_place(self.place)

+        def test_check_grad(self):
+            self.check_grad_with_place(self.place, ['X'], 'Out')
+
    class TestTileOpRank2_tensor(TestTileOpRank1_tensor):
        def init_data(self):
            self.ori_shape = [12, 14]