diff --git a/paddle/fluid/platform/device/xpu/xpu2_op_list.h b/paddle/fluid/platform/device/xpu/xpu2_op_list.h index 835d52bdf4eea074003460cc4a70f8a0b5b46342..59fd27dced7797caeb48dd87c4fdf66ea1055b59 100644 --- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h +++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h @@ -663,6 +663,7 @@ XPUOpMap& get_kl2_ops() { pOpKernelType(vartype::INT64, XPUPlace()), pOpKernelType(vartype::BOOL, XPUPlace()), pOpKernelType(vartype::FP32, XPUPlace())})}, + {"tile_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"transpose2_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), pOpKernelType(vartype::FP16, XPUPlace())})}, diff --git a/paddle/phi/kernels/impl/tile_grad_kernel_impl.h b/paddle/phi/kernels/impl/tile_grad_kernel_impl.h index 05f9139b1485b637f4ebc1d3368411e8120bf92e..d9b97956ce9d1e46e4e5f3ab11fdac5b9ac52c89 100644 --- a/paddle/phi/kernels/impl/tile_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/tile_grad_kernel_impl.h @@ -97,8 +97,8 @@ void TileGradKernel(const Context& dev_ctx, PADDLE_ENFORCE_GE(dims, 1, errors::InvalidArgument( - "Th rank of the input 'Out@GRAD' for tile_grad op " - " must be greater than or equal to 1, but " + "The rank of the input 'Out@GRAD' for tile_grad op " + "must be greater than or equal to 1, but " "the value received is %d.", dims)); PADDLE_ENFORCE_LE(dims, diff --git a/paddle/phi/kernels/xpu/tile_grad_kernel.cc b/paddle/phi/kernels/xpu/tile_grad_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..c9dce98d192343894c283e593f89eaeafd18ad3a --- /dev/null +++ b/paddle/phi/kernels/xpu/tile_grad_kernel.cc @@ -0,0 +1,99 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/tile_grad_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template +void TileGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const IntArray& repeat_times, + DenseTensor* x_grad) { + auto x_dims = x.dims(); + auto vec_x_dims = phi::vectorize(x_dims); + auto repeat_times_data = repeat_times.GetData(); + if (repeat_times_data.size() < vec_x_dims.size()) { + int diff = vec_x_dims.size() - repeat_times_data.size(); + repeat_times_data.insert(repeat_times_data.begin(), diff, 1); + } else { + int diff = repeat_times_data.size() - vec_x_dims.size(); + vec_x_dims.insert(vec_x_dims.begin(), diff, 1); + } + // 1. reshape_dims_vec is the broadcast parameter. + // 2. reduce_dims_vec is the dimension parameter to compute gradients. For + // each dimension expanded, the gradients should be summed to original + // size. + std::vector reshape_dims_vec; + std::vector reduce_dims_vec; + for (size_t i = 0; i < repeat_times_data.size(); ++i) { + reduce_dims_vec.push_back(reshape_dims_vec.size()); + reshape_dims_vec.push_back(repeat_times_data[i]); + reshape_dims_vec.push_back(vec_x_dims[i]); + } + + dev_ctx.template Alloc(x_grad); + + int dims = reduce_dims_vec.size(); + + bool just_copy = true; + for (size_t i = 0; i < repeat_times_data.size(); i++) { + if (repeat_times_data[i] != 1) { + just_copy = false; + break; + } + } + // no need reduce, just copy + if (just_copy) { + phi::Copy(dev_ctx, out_grad, dev_ctx.GetPlace(), false, x_grad); + // TensorCopy may change the dims of dx + x_grad->Resize(x_dims); + } else { + PADDLE_ENFORCE_GE(dims, + 1, + errors::InvalidArgument( + "The rank of the input 'Out@GRAD' for tile_grad op " + "must be greater than or equal to 1, but " + "the value received is %d.", + dims)); + PADDLE_ENFORCE_LE(dims, + MAX_RANK_SUPPORTED, + errors::InvalidArgument( + "The rank of the input 'Out@GRAD' for tile_grad op " + "must be less than or equal " + "to %d, but the value received is %d.", + MAX_RANK_SUPPORTED, + dims)); + + using XPUType = typename XPUTypeTrait::Type; + // int reduce_sum(Context* ctx, const T* x, T* y, const std::vector& + // xshape, const std::vector& rdims) + const auto* out_data = out_grad.data(); + auto* x_grad_data = x_grad->data(); + int r = xpu::reduce_sum(dev_ctx.x_context(), + out_data, + x_grad_data, + reshape_dims_vec, + reduce_dims_vec); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_sum"); + } +} + +} // namespace phi + +PD_REGISTER_KERNEL(tile_grad, XPU, ALL_LAYOUT, phi::TileGradKernel, float) {} diff --git a/python/paddle/fluid/tests/unittests/xpu/test_tile_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_tile_op_xpu.py index 41fc20daffcdfe8b506704098a07205b0d1cd373..106b915dd70aa3da21b764d628bb2ebaae54e159 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_tile_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_tile_op_xpu.py @@ -59,6 +59,9 @@ class XPUTestTileOpRank1(XPUOpTestWrapper): def test_check_output(self): self.check_output_with_place(self.place) + def test_check_grad(self): + self.check_grad_with_place(self.place, ['X'], 'Out') + # with dimension expanding class TestTileOpRank2Expanding(TestTileOpRank1): def init_data(self): @@ -126,6 +129,9 @@ class XPUTestTileOpRank1_tensor_attr(XPUOpTestWrapper): def test_check_output(self): self.check_output_with_place(self.place) + def test_check_grad(self): + self.check_grad_with_place(self.place, ['X'], 'Out') + class TestTileOpRank2_Corner_tensor_attr(TestTileOpRank1_tensor_attr): def init_data(self): self.ori_shape = [12, 14] @@ -168,6 +174,9 @@ class XPUTestTileOpRank1_tensor(XPUOpTestWrapper): def test_check_output(self): self.check_output_with_place(self.place) + def test_check_grad(self): + self.check_grad_with_place(self.place, ['X'], 'Out') + class TestTileOpRank2_tensor(TestTileOpRank1_tensor): def init_data(self): self.ori_shape = [12, 14]