diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc b/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc deleted file mode 100644 index 1d36bdb28412129b899be201123ee11c49ae98db..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifdef PADDLE_WITH_XPU -#include -#include - -#include "paddle/fluid/operators/reduce_ops/reduce_op_xpu.h" -#include "paddle/fluid/platform/device/xpu/xpu_header.h" - -namespace paddle { -namespace operators { - -template -class ReduceSumXPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - XPUReduce(context, xpu::reduce_sum); - } -}; - -template -class ReduceSumGradXPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto dims = context.Attr>("dim"); - bool reduce_all = context.Attr("reduce_all"); - auto* x = context.Input("X"); - auto* out = context.Input(framework::GradVarName("Out")); - auto* x_grad = context.Output(framework::GradVarName("X")); - - int in_dtype = context.Attr("in_dtype"); - PADDLE_ENFORCE_EQ( - in_dtype == -1, - true, - platform::errors::InvalidArgument( - "XPU only support in_dtype == -1 in reduce_sum_grad op.")); - - auto& dev_ctx = context.template device_context(); - x_grad->mutable_data(context.GetPlace()); - const auto* out_data = out->data(); - auto* x_grad_data = x_grad->data(); - - const auto& input_dim_size = x->dims().size(); - std::vector true_dims; - for (size_t i = 0; i < dims.size(); ++i) { - if (dims[i] < 0) { - true_dims.push_back(dims[i] + input_dim_size); - } else { - true_dims.push_back(dims[i]); - } - } - - std::vector ydims(input_dim_size); - std::vector xdims((input_dim_size)); - std::set dims_set(true_dims.begin(), true_dims.end()); - for (auto i = 0; i < input_dim_size; i++) { - xdims[i] = x->dims()[i]; - if (dims_set.find(i) != dims_set.end() || reduce_all) { - ydims[i] = 1; - } else { - ydims[i] = x->dims()[i]; - } - } - - int r = xpu::broadcast( - dev_ctx.x_context(), out_data, x_grad_data, ydims, xdims); - PADDLE_ENFORCE_EQ( - r == xpu::Error_t::SUCCESS, - true, - platform::errors::External("XPU broadcast in reduce_sum_grad op return" - " wrong value[%d %s].", - r, - XPUAPIErrorMsg[r])); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OP_XPU_KERNEL( - reduce_sum, - ops::ReduceSumXPUKernel); -REGISTER_OP_XPU_KERNEL( - reduce_sum_grad, - ops::ReduceSumGradXPUKernel); - -#endif diff --git a/paddle/phi/kernels/reduce_sum_kernel.cc b/paddle/phi/kernels/reduce_sum_kernel.cc index 83db2d854b991e10ac4d50a17d4434f2b4af34a6..075e4a6022d7f56a04be29aeaf4b947caf20b39b 100644 --- a/paddle/phi/kernels/reduce_sum_kernel.cc +++ b/paddle/phi/kernels/reduce_sum_kernel.cc @@ -73,7 +73,7 @@ PD_REGISTER_KERNEL(sum, } #endif -#if defined(PADDLE_WITH_XPU_KP) +#if defined(PADDLE_WITH_XPU_KP) && !defined(PADDLE_WITH_XPU) PD_REGISTER_KERNEL(sum, KPS, ALL_LAYOUT, phi::SumKernel, float) { kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED); } @@ -83,3 +83,7 @@ PD_REGISTER_KERNEL(sum, KPS, ALL_LAYOUT, phi::SumKernel, float) { PD_REGISTER_KERNEL( sum, OneDNN, ALL_LAYOUT, phi::SumKernel, float, phi::dtype::bfloat16) {} #endif + +#if defined(PADDLE_WITH_XPU) +PD_REGISTER_KERNEL(sum, XPU, ALL_LAYOUT, phi::SumKernel, float) {} +#endif diff --git a/paddle/phi/kernels/xpu/reduce_sum_grad_kernel.cc b/paddle/phi/kernels/xpu/reduce_sum_grad_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..9dc1fe92faccdedba0b1e33f23736018d0acc392 --- /dev/null +++ b/paddle/phi/kernels/xpu/reduce_sum_grad_kernel.cc @@ -0,0 +1,65 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "paddle/phi/kernels/reduce_sum_grad_kernel.h" + +#include +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template +void ReduceSumGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const IntArray& dims_arr, + bool keep_dim, + bool reduce_all, + DenseTensor* x_grad) { + using XPUType = typename XPUTypeTrait::Type; + auto dims = dims_arr.GetData(); + dev_ctx.template Alloc(x_grad); + const auto* out_data = out_grad.data(); + auto* x_grad_data = x_grad->data(); + const auto& input_dim_size = x.dims().size(); + std::vector true_dims; + for (size_t i = 0; i < dims.size(); ++i) { + if (dims[i] < 0) { + true_dims.push_back(dims[i] + input_dim_size); + } else { + true_dims.push_back(dims[i]); + } + } + + std::vector ydims(input_dim_size); + std::vector xdims((input_dim_size)); + std::set dims_set(true_dims.begin(), true_dims.end()); + for (auto i = 0; i < input_dim_size; i++) { + xdims[i] = x.dims()[i]; + if (dims_set.find(i) != dims_set.end() || reduce_all) { + ydims[i] = 1; + } else { + ydims[i] = x.dims()[i]; + } + } + + int r = xpu::broadcast( + dev_ctx.x_context(), out_data, x_grad_data, ydims, xdims); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast"); +} + +} // namespace phi + +PD_REGISTER_KERNEL(sum_grad, XPU, ALL_LAYOUT, phi::ReduceSumGradKernel, float) { +} diff --git a/paddle/phi/kernels/xpu/reduce_sum_kernel.cc b/paddle/phi/kernels/xpu/reduce_sum_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..74c50304b1407b22eeb25b828bd0af03303148e3 --- /dev/null +++ b/paddle/phi/kernels/xpu/reduce_sum_kernel.cc @@ -0,0 +1,44 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/reduce_sum_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/backends/xpu/xpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/xpu/reduce.h" + +namespace phi { + +template +void SumRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const IntArray& dims, + bool keep_dim, + bool reduce_all, + DataType out_dtype, + DenseTensor* out) { + int r = XPUReduce(dev_ctx, + x, + dims.GetData(), + keep_dim, + reduce_all, + out, + xpu::reduce_sum); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_sum"); +} + +} // namespace phi + +PD_REGISTER_KERNEL(sum_raw, XPU, ALL_LAYOUT, phi::SumRawKernel, float) {} diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_max_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_max_op_xpu.py index ac827b6738f8fb048c7eb7c8375526282acfc182..597b7ee0fe98772765ccdd90701f139e1dcbd585 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_max_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_max_op_xpu.py @@ -67,7 +67,7 @@ class XPUTestReduceMaxOp(XPUOpTestWrapper): self.check_output_with_place(self.place) def test_check_grad(self): - pass + self.check_grad_with_place(self.place, ['X'], 'Out') class XPUTestReduceMaxCase1(XPUTestReduceMaxBase): diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py index d80fd187dfdf7dfde4b01036582397f3ba078c03..15db9e5a375cc37ea1321a9bb83d36f19a9f5718 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py @@ -67,7 +67,7 @@ class XPUTestReduceSumOp(XPUOpTestWrapper): self.check_output_with_place(self.place) def test_check_grad(self): - pass + self.check_grad_with_place(self.place, ['X'], 'Out') class XPUTestReduceSumCase1(XPUTestReduceSumBase):