From 5829069d6860ac02a878067b654833c4d5ca62c4 Mon Sep 17 00:00:00 2001 From: ykkk2333 <77383312+ykkk2333@users.noreply.github.com> Date: Wed, 14 Sep 2022 13:47:40 +0800 Subject: [PATCH] [XPU] migrate reduce kernels to phi, test=kunlun (#45973) --- .../operators/reduce_ops/reduce_max_op_xpu.cc | 165 ------------------ .../reduce_ops/reduce_mean_op_xpu.cc | 161 ----------------- .../reduce_ops/reduce_prod_op_xpu.cc | 83 --------- paddle/phi/kernels/reduce_max_kernel.cc | 6 +- paddle/phi/kernels/reduce_mean_kernel.cc | 6 +- paddle/phi/kernels/reduce_prod_kernel.cc | 6 +- paddle/phi/kernels/xpu/reduce.h | 81 +++++++++ .../phi/kernels/xpu/reduce_max_grad_kernel.cc | 113 ++++++++++++ paddle/phi/kernels/xpu/reduce_max_kernel.cc | 43 +++++ .../kernels/xpu/reduce_mean_grad_kernel.cc | 85 +++++++++ paddle/phi/kernels/xpu/reduce_mean_kernel.cc | 43 +++++ paddle/phi/kernels/xpu/reduce_prod_kernel.cc | 43 +++++ 12 files changed, 423 insertions(+), 412 deletions(-) delete mode 100644 paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc delete mode 100644 paddle/fluid/operators/reduce_ops/reduce_mean_op_xpu.cc delete mode 100644 paddle/fluid/operators/reduce_ops/reduce_prod_op_xpu.cc create mode 100644 paddle/phi/kernels/xpu/reduce.h create mode 100644 paddle/phi/kernels/xpu/reduce_max_grad_kernel.cc create mode 100644 paddle/phi/kernels/xpu/reduce_max_kernel.cc create mode 100644 paddle/phi/kernels/xpu/reduce_mean_grad_kernel.cc create mode 100644 paddle/phi/kernels/xpu/reduce_mean_kernel.cc create mode 100644 paddle/phi/kernels/xpu/reduce_prod_kernel.cc diff --git a/paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc b/paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc deleted file mode 100644 index ef9332e4a81..00000000000 --- a/paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifdef PADDLE_WITH_XPU -#include -#include - -#include "paddle/fluid/operators/reduce_ops/reduce_op_xpu.h" -#include "paddle/fluid/platform/device/xpu/xpu_header.h" - -namespace paddle { -namespace operators { - -template -class ReduceMaxXPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - XPUReduce(context, xpu::reduce_max); - } -}; - -template -class ReduceMaxGradXPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto dims = context.Attr>("dim"); - bool reduce_all = context.Attr("reduce_all"); - auto* x = context.Input("X"); - auto* out = context.Input("Out"); - auto* out_grad = context.Input(framework::GradVarName("Out")); - auto* x_grad = context.Output(framework::GradVarName("X")); - - int in_dtype = context.Attr("in_dtype"); - PADDLE_ENFORCE_EQ( - in_dtype == -1, - true, - platform::errors::InvalidArgument( - "XPU only support in_dtype == -1 in reduce_sum_grad op.")); - - auto& dev_ctx = context.template device_context(); - x_grad->mutable_data(context.GetPlace()); - const T* x_data = x->data(); - const T* out_data = out->data(); - const T* out_grad_data = out_grad->data(); - auto* x_grad_data = x_grad->data(); - const auto& input_dim_size = x->dims().size(); - std::vector true_dims; - for (size_t i = 0; i < dims.size(); ++i) { - if (dims[i] < 0) { - true_dims.push_back(dims[i] + input_dim_size); - } else { - true_dims.push_back(dims[i]); - } - } - std::vector ydims(input_dim_size); - std::vector xdims((input_dim_size)); - std::set dims_set(true_dims.begin(), true_dims.end()); - for (auto i = 0; i < input_dim_size; i++) { - xdims[i] = x->dims()[i]; - if (dims_set.find(i) != dims_set.end() || reduce_all) { - ydims[i] = 1; - } else { - ydims[i] = x->dims()[i]; - } - } - - T* brocast1 = nullptr; - T* brocast2 = nullptr; - bool* equal = nullptr; - PADDLE_ENFORCE_EQ( - xpu_malloc(reinterpret_cast(&brocast1), x->numel() * sizeof(T)), - XPU_SUCCESS, - platform::errors::ResourceExhausted("XPU has no enough memory")); - PADDLE_ENFORCE_EQ( - xpu_malloc(reinterpret_cast(&equal), x->numel() * sizeof(bool)), - XPU_SUCCESS, - platform::errors::ResourceExhausted("XPU has no enough memory")); - PADDLE_ENFORCE_EQ( - xpu_malloc(reinterpret_cast(&brocast2), x->numel() * sizeof(T)), - XPU_SUCCESS, - platform::errors::ResourceExhausted("XPU has no enough memory")); - - // step 1. brocast out and out_grad - int r = xpu::broadcast( - dev_ctx.x_context(), out_data, brocast1, ydims, xdims); - PADDLE_ENFORCE_EQ( - r == xpu::Error_t::SUCCESS, - true, - platform::errors::External("XPU broadcast in reduce_max_grad op return" - " wrong value[%d %s].", - r, - XPUAPIErrorMsg[r])); - r = xpu::broadcast( - dev_ctx.x_context(), out_grad_data, brocast2, ydims, xdims); - PADDLE_ENFORCE_EQ( - r == xpu::Error_t::SUCCESS, - true, - platform::errors::External("XPU broadcast in reduce_max_grad op return" - " wrong value[%d %s].", - r, - XPUAPIErrorMsg[r])); - // step 2. comparse out_brocast and x - r = xpu::equal(dev_ctx.x_context(), x_data, brocast1, equal, x->numel()); - PADDLE_ENFORCE_EQ( - r == xpu::Error_t::SUCCESS, - true, - platform::errors::External("XPU equal in reduce_max_grad " - "op return wrong value[%d %s].", - r, - XPUAPIErrorMsg[r])); - // step 3. get x_grad - r = xpu::constant(dev_ctx.x_context(), brocast1, x->numel(), 0); - PADDLE_ENFORCE_EQ( - r == xpu::Error_t::SUCCESS, - true, - platform::errors::External("XPU constant in reduce_max_grad op return" - " wrong value[%d %s].", - r, - XPUAPIErrorMsg[r])); - r = xpu::select(dev_ctx.x_context(), - equal, - brocast2, - brocast1, - x_grad_data, - xdims, - xdims); - PADDLE_ENFORCE_EQ( - r == xpu::Error_t::SUCCESS, - true, - platform::errors::External("XPU select in reduce_max_grad op return" - " wrong value[%d %s].", - r, - XPUAPIErrorMsg[r])); - - if (dev_ctx.x_context()->xpu_stream) { - dev_ctx.Wait(); - } - xpu_free(brocast1); - xpu_free(brocast2); - xpu_free(equal); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OP_XPU_KERNEL( - reduce_max, - ops::ReduceMaxXPUKernel); -REGISTER_OP_XPU_KERNEL( - reduce_max_grad, - ops::ReduceMaxGradXPUKernel); - -#endif diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op_xpu.cc b/paddle/fluid/operators/reduce_ops/reduce_mean_op_xpu.cc deleted file mode 100644 index c86ebbc20c3..00000000000 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op_xpu.cc +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifdef PADDLE_WITH_XPU - -#include -#include -#include - -#include "paddle/fluid/operators/reduce_ops/reduce_mean_op.h" - -namespace paddle { -namespace operators { -template -class ReduceMeanXPUKernel : public framework::OpKernel { - using XPUType = typename XPUTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext& context) const override { - PADDLE_ENFORCE_EQ( - platform::is_xpu_place(context.GetPlace()), - true, - platform::errors::Unavailable("This kernel only runs on XPU.")); - bool reduce_all = context.Attr("reduce_all"); - auto* input = context.Input("X"); - auto* output = context.Output("Out"); - output->mutable_data(context.GetPlace()); - auto& dev_ctx = context.template device_context(); - - std::vector xdims; - for (int i = 0; i < input->dims().size(); i++) { - xdims.push_back(input->dims()[i]); - } - auto rdims = context.Attr>("dim"); - const auto& input_dim_size = input->dims().size(); - std::vector reduce_dims; - if (reduce_all) { - for (size_t i = 0; i < xdims.size(); i++) { - reduce_dims.push_back(static_cast(i)); - } - } else { - for (size_t i = 0; i < rdims.size(); ++i) { - if (rdims[i] < 0) { - reduce_dims.push_back(rdims[i] + input_dim_size); - } else { - reduce_dims.push_back(rdims[i]); - } - } - } - int r = xpu::reduce_mean(dev_ctx.x_context(), - reinterpret_cast(input->data()), - reinterpret_cast(output->data()), - xdims, - reduce_dims); - - PADDLE_ENFORCE_EQ(r, - XPU_SUCCESS, - platform::errors::External( - "XPU reduce_mean kernel return wrong value[%d %s]", - r, - XPUAPIErrorMsg[r])); - } -}; - -template -class ReduceMeanGradXPUKernel : public framework::OpKernel { - using XPUType = typename XPUTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output_grad = ctx.Input(framework::GradVarName("Out")); - auto* input_grad = ctx.Output(framework::GradVarName("X")); - - XPUType* x_data = - reinterpret_cast(input_grad->mutable_data(ctx.GetPlace())); - const XPUType* dy_data = - reinterpret_cast(output_grad->data()); - - bool reduce_all = ctx.Attr("reduce_all"); - auto reduce_dims = ctx.Attr>("dim"); - bool keep_dim = ctx.Attr("keep_dim"); - - std::vector xdims; - for (int i = 0; i < input->dims().size(); i++) { - xdims.push_back(input->dims()[i]); - } - std::vector ydims; - for (int i = 0; i < output_grad->dims().size(); i++) { - ydims.push_back(output_grad->dims()[i]); - } - - int reduce_numel = 1; - if (reduce_all) { - reduce_dims.clear(); - for (size_t d = 0; d < xdims.size(); ++d) { - reduce_dims.push_back(static_cast(d)); - } - } - for (auto& d : reduce_dims) { - if (d < 0) { - d = d + xdims.size(); - } - reduce_numel *= xdims[d]; - } - - if (keep_dim != true) { - sort(reduce_dims.begin(), reduce_dims.end()); - for (auto& d : reduce_dims) { - ydims.insert(ydims.begin() + d, 1); - } - } - - float val = 1.0f / static_cast(reduce_numel); - - auto& dev_ctx = ctx.template device_context(); - - int r = xpu::constant( - dev_ctx.x_context(), x_data, input->numel(), static_cast(val)); - - PADDLE_ENFORCE_EQ(r, - XPU_SUCCESS, - platform::errors::External( - "XPU constant kernel return wrong value[%d %s]", - r, - XPUAPIErrorMsg[r])); - r = xpu::broadcast_mul( - dev_ctx.x_context(), x_data, dy_data, x_data, xdims, ydims); - - PADDLE_ENFORCE_EQ(r, - XPU_SUCCESS, - platform::errors::External( - "XPU broadcast_mul kernel return wrong value[%d %s]", - r, - XPUAPIErrorMsg[r])); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OP_XPU_KERNEL( - reduce_mean, - ops::ReduceMeanXPUKernel); - -REGISTER_OP_XPU_KERNEL( - reduce_mean_grad, - ops::ReduceMeanGradXPUKernel); - -#endif diff --git a/paddle/fluid/operators/reduce_ops/reduce_prod_op_xpu.cc b/paddle/fluid/operators/reduce_ops/reduce_prod_op_xpu.cc deleted file mode 100644 index 8b26a73fc95..00000000000 --- a/paddle/fluid/operators/reduce_ops/reduce_prod_op_xpu.cc +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifdef PADDLE_WITH_XPU - -#include -#include - -#include "paddle/fluid/operators/reduce_ops/reduce_prod_op.h" - -namespace paddle { -namespace operators { -template -class ReduceProdXPUKernel : public framework::OpKernel { - using XPUType = typename XPUTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext& context) const override { - PADDLE_ENFORCE_EQ( - platform::is_xpu_place(context.GetPlace()), - true, - platform::errors::Unavailable("This kernel only runs on XPU.")); - bool reduce_all = context.Attr("reduce_all"); - auto* input = context.Input("X"); - auto* output = context.Output("Out"); - output->mutable_data(context.GetPlace()); - auto& dev_ctx = context.template device_context(); - - std::vector xdims; - for (int i = 0; i < input->dims().size(); i++) { - xdims.push_back(input->dims()[i]); - } - auto rdims = context.Attr>("dim"); - const auto& input_dim_size = input->dims().size(); - - std::vector reduce_dims; - if (reduce_all) { - for (size_t i = 0; i < xdims.size(); i++) { - reduce_dims.push_back(static_cast(i)); - } - } else { - for (size_t i = 0; i < rdims.size(); ++i) { - if (rdims[i] < 0) { - reduce_dims.push_back(rdims[i] + input_dim_size); - } else { - reduce_dims.push_back(rdims[i]); - } - } - } - int r = xpu::reduce_prod(dev_ctx.x_context(), - reinterpret_cast(input->data()), - reinterpret_cast(output->data()), - xdims, - reduce_dims); - - PADDLE_ENFORCE_EQ(r, - XPU_SUCCESS, - platform::errors::External( - "XPU reduce_prod kernel return wrong value[%d %s]", - r, - XPUAPIErrorMsg[r])); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OP_XPU_KERNEL( - reduce_prod, - ops::ReduceProdXPUKernel); - -#endif diff --git a/paddle/phi/kernels/reduce_max_kernel.cc b/paddle/phi/kernels/reduce_max_kernel.cc index 7a6e53d7651..cf5862ef997 100644 --- a/paddle/phi/kernels/reduce_max_kernel.cc +++ b/paddle/phi/kernels/reduce_max_kernel.cc @@ -42,7 +42,7 @@ PD_REGISTER_KERNEL( max, GPU, ALL_LAYOUT, phi::MaxKernel, float, double, int, int64_t) {} #endif -#if defined(PADDLE_WITH_XPU_KP) +#if defined(PADDLE_WITH_XPU_KP) && !defined(PADDLE_WITH_XPU) PD_REGISTER_KERNEL(max, KPS, ALL_LAYOUT, phi::MaxKernel, float) {} #endif @@ -50,3 +50,7 @@ PD_REGISTER_KERNEL(max, KPS, ALL_LAYOUT, phi::MaxKernel, float) {} PD_REGISTER_KERNEL( max, OneDNN, ALL_LAYOUT, phi::MaxKernel, float, phi::dtype::bfloat16) {} #endif + +#if defined(PADDLE_WITH_XPU) +PD_REGISTER_KERNEL(max, XPU, ALL_LAYOUT, phi::MaxKernel, float) {} +#endif diff --git a/paddle/phi/kernels/reduce_mean_kernel.cc b/paddle/phi/kernels/reduce_mean_kernel.cc index df3ec97592f..e7df2596f52 100644 --- a/paddle/phi/kernels/reduce_mean_kernel.cc +++ b/paddle/phi/kernels/reduce_mean_kernel.cc @@ -47,7 +47,7 @@ PD_REGISTER_KERNEL(mean, phi::dtype::float16) {} #endif -#if defined(PADDLE_WITH_XPU_KP) +#if defined(PADDLE_WITH_XPU_KP) && !defined(PADDLE_WITH_XPU) PD_REGISTER_KERNEL(mean, KPS, ALL_LAYOUT, phi::MeanKernel, float) {} #endif @@ -55,3 +55,7 @@ PD_REGISTER_KERNEL(mean, KPS, ALL_LAYOUT, phi::MeanKernel, float) {} PD_REGISTER_KERNEL( mean, OneDNN, ALL_LAYOUT, phi::MeanKernel, float, phi::dtype::bfloat16) {} #endif + +#if defined(PADDLE_WITH_XPU) +PD_REGISTER_KERNEL(mean, XPU, ALL_LAYOUT, phi::MeanKernel, float) {} +#endif diff --git a/paddle/phi/kernels/reduce_prod_kernel.cc b/paddle/phi/kernels/reduce_prod_kernel.cc index 37f1f7bb817..538c5a5175a 100644 --- a/paddle/phi/kernels/reduce_prod_kernel.cc +++ b/paddle/phi/kernels/reduce_prod_kernel.cc @@ -39,6 +39,10 @@ PD_REGISTER_KERNEL( prod, GPU, ALL_LAYOUT, phi::ProdKernel, float, double, int, int64_t) {} #endif -#if defined(PADDLE_WITH_XPU_KP) +#if defined(PADDLE_WITH_XPU_KP) && !defined(PADDLE_WITH_XPU) PD_REGISTER_KERNEL(prod, KPS, ALL_LAYOUT, phi::ProdKernel, float) {} #endif + +#if defined(PADDLE_WITH_XPU) +PD_REGISTER_KERNEL(prod, XPU, ALL_LAYOUT, phi::ProdKernel, float) {} +#endif diff --git a/paddle/phi/kernels/xpu/reduce.h b/paddle/phi/kernels/xpu/reduce.h new file mode 100644 index 00000000000..81fe362a61a --- /dev/null +++ b/paddle/phi/kernels/xpu/reduce.h @@ -0,0 +1,81 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include +#include +#include + +namespace phi { + +template +int XPUReduce(const Context& dev_ctx, + const DenseTensor& x, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DenseTensor* out, + std::function&, + const std::vector&)> func) { + dev_ctx.template Alloc(out); + + const auto* x_data = x.data(); + auto* y_data = out->data(); + const auto& input_dim_size = x.dims().size(); + std::vector true_dims; + for (size_t i = 0; i < dims.size(); ++i) { + if (dims[i] < 0) { + true_dims.push_back(dims[i] + input_dim_size); + } else { + true_dims.push_back(dims[i]); + } + } + + std::vector reduce_dims; + std::vector xdims((input_dim_size)); + for (int i = 0; i < input_dim_size; ++i) { + xdims[i] = x.dims()[i]; + } + if (reduce_all) { + for (int i = 0; i < input_dim_size; ++i) { + reduce_dims.push_back(i); + } + } else { + std::set dims_set(true_dims.begin(), true_dims.end()); + for (auto i = 0; i < input_dim_size; i++) { + if (dims_set.find(i) != dims_set.end()) { + if (x.dims()[i] != 1) { + reduce_dims.push_back(i); + } + } + } + } + + int r = xpu::SUCCESS; + if (reduce_dims.size() == 0) { + r = xpu::copy( + dev_ctx.x_context(), x_data, y_data, x.numel() * sizeof(T)); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "copy"); + } else { + r = func(dev_ctx.x_context(), x_data, y_data, xdims, reduce_dims); + } + return r; +} + +} // namespace phi diff --git a/paddle/phi/kernels/xpu/reduce_max_grad_kernel.cc b/paddle/phi/kernels/xpu/reduce_max_grad_kernel.cc new file mode 100644 index 00000000000..df4dc678392 --- /dev/null +++ b/paddle/phi/kernels/xpu/reduce_max_grad_kernel.cc @@ -0,0 +1,113 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/reduce_max_grad_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/backends/xpu/xpu_context.h" +#include "paddle/phi/core/kernel_registry.h" + +#include "paddle/phi/kernels/xpu/reduce.h" + +namespace phi { + +template +void ReduceMaxGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out, + const DenseTensor& out_grad, + const IntArray& dims_arr, + bool keep_dim, + bool reduce_all, + DenseTensor* x_grad) { + auto dims = dims_arr.GetData(); + + dev_ctx.template Alloc(x_grad); + const T* x_data = x.data(); + const T* out_data = out.data(); + const T* out_grad_data = out_grad.data(); + auto* x_grad_data = x_grad->data(); + const auto& input_dim_size = x.dims().size(); + std::vector true_dims; + for (size_t i = 0; i < dims.size(); ++i) { + if (dims[i] < 0) { + true_dims.push_back(dims[i] + input_dim_size); + } else { + true_dims.push_back(dims[i]); + } + } + std::vector ydims(input_dim_size); + std::vector xdims((input_dim_size)); + std::set dims_set(true_dims.begin(), true_dims.end()); + for (auto i = 0; i < input_dim_size; i++) { + xdims[i] = x.dims()[i]; + if (dims_set.find(i) != dims_set.end() || reduce_all) { + ydims[i] = 1; + } else { + ydims[i] = x.dims()[i]; + } + } + + T* brocast1 = nullptr; + T* brocast2 = nullptr; + bool* equal = nullptr; + PADDLE_ENFORCE_EQ( + xpu_malloc(reinterpret_cast(&brocast1), x.numel() * sizeof(T)), + XPU_SUCCESS, + errors::ResourceExhausted("XPU has no enough memory")); + PADDLE_ENFORCE_EQ( + xpu_malloc(reinterpret_cast(&equal), x.numel() * sizeof(bool)), + XPU_SUCCESS, + errors::ResourceExhausted("XPU has no enough memory")); + PADDLE_ENFORCE_EQ( + xpu_malloc(reinterpret_cast(&brocast2), x.numel() * sizeof(T)), + XPU_SUCCESS, + errors::ResourceExhausted("XPU has no enough memory")); + + // step 1. brocast out and out_grad + int r = + xpu::broadcast(dev_ctx.x_context(), out_data, brocast1, ydims, xdims); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast"); + + r = xpu::broadcast( + dev_ctx.x_context(), out_grad_data, brocast2, ydims, xdims); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast"); + + // step 2. comparse out_brocast and x + r = xpu::equal(dev_ctx.x_context(), x_data, brocast1, equal, x.numel()); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "equal"); + // step 3. get x_grad + r = xpu::constant(dev_ctx.x_context(), brocast1, x.numel(), 0); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant"); + r = xpu::select(dev_ctx.x_context(), + equal, + brocast2, + brocast1, + x_grad_data, + xdims, + xdims); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "select"); + + if (dev_ctx.x_context()->xpu_stream) { + dev_ctx.Wait(); + } + xpu_free(brocast1); + xpu_free(brocast2); + xpu_free(equal); +} + +} // namespace phi + +PD_REGISTER_KERNEL(max_grad, XPU, ALL_LAYOUT, phi::ReduceMaxGradKernel, float) { +} diff --git a/paddle/phi/kernels/xpu/reduce_max_kernel.cc b/paddle/phi/kernels/xpu/reduce_max_kernel.cc new file mode 100644 index 00000000000..d0994f580cf --- /dev/null +++ b/paddle/phi/kernels/xpu/reduce_max_kernel.cc @@ -0,0 +1,43 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/reduce_max_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/backends/xpu/xpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/xpu/reduce.h" + +namespace phi { + +template +void MaxRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const IntArray& dims, + bool keep_dim, + bool reduce_all, + DenseTensor* out) { + int r = XPUReduce(dev_ctx, + x, + dims.GetData(), + keep_dim, + reduce_all, + out, + xpu::reduce_max); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_max"); +} + +} // namespace phi + +PD_REGISTER_KERNEL(max_raw, XPU, ALL_LAYOUT, phi::MaxRawKernel, float) {} diff --git a/paddle/phi/kernels/xpu/reduce_mean_grad_kernel.cc b/paddle/phi/kernels/xpu/reduce_mean_grad_kernel.cc new file mode 100644 index 00000000000..2d82a77a24d --- /dev/null +++ b/paddle/phi/kernels/xpu/reduce_mean_grad_kernel.cc @@ -0,0 +1,85 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/reduce_mean_grad_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/backends/xpu/xpu_context.h" +#include "paddle/phi/core/kernel_registry.h" + +#include "paddle/phi/kernels/xpu/reduce.h" + +namespace phi { + +template +void ReduceMeanGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const IntArray& dims_arr, + bool keep_dim, + bool reduce_all, + DenseTensor* x_grad) { + using XPUType = typename XPUTypeTrait::Type; + dev_ctx.template Alloc(x_grad); + const XPUType* dy_data = reinterpret_cast(out_grad.data()); + + XPUType* x_data = reinterpret_cast(x_grad->data()); + + auto reduce_dims = dims_arr.GetData(); + + std::vector xdims; + for (int i = 0; i < x.dims().size(); i++) { + xdims.push_back(x.dims()[i]); + } + std::vector ydims; + for (int i = 0; i < out_grad.dims().size(); i++) { + ydims.push_back(out_grad.dims()[i]); + } + + int reduce_numel = 1; + if (reduce_all) { + reduce_dims.clear(); + for (size_t d = 0; d < xdims.size(); ++d) { + reduce_dims.push_back(static_cast(d)); + } + } + for (auto& d : reduce_dims) { + if (d < 0) { + d = d + xdims.size(); + } + reduce_numel *= xdims[d]; + } + + if (keep_dim != true) { + sort(reduce_dims.begin(), reduce_dims.end()); + for (auto& d : reduce_dims) { + ydims.insert(ydims.begin() + d, 1); + } + } + + float val = 1.0f / static_cast(reduce_numel); + + int r = xpu::constant( + dev_ctx.x_context(), x_data, x.numel(), static_cast(val)); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant"); + + r = xpu::broadcast_mul( + dev_ctx.x_context(), x_data, dy_data, x_data, xdims, ydims); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_mul"); +} + +} // namespace phi + +PD_REGISTER_KERNEL( + mean_grad, XPU, ALL_LAYOUT, phi::ReduceMeanGradKernel, float) {} diff --git a/paddle/phi/kernels/xpu/reduce_mean_kernel.cc b/paddle/phi/kernels/xpu/reduce_mean_kernel.cc new file mode 100644 index 00000000000..4af1ba2da27 --- /dev/null +++ b/paddle/phi/kernels/xpu/reduce_mean_kernel.cc @@ -0,0 +1,43 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/reduce_mean_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/backends/xpu/xpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/xpu/reduce.h" + +namespace phi { + +template +void MeanRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const IntArray& dims, + bool keep_dim, + bool reduce_all, + DenseTensor* out) { + int r = XPUReduce(dev_ctx, + x, + dims.GetData(), + keep_dim, + reduce_all, + out, + xpu::reduce_mean); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_mean"); +} + +} // namespace phi + +PD_REGISTER_KERNEL(mean_raw, XPU, ALL_LAYOUT, phi::MeanRawKernel, float) {} diff --git a/paddle/phi/kernels/xpu/reduce_prod_kernel.cc b/paddle/phi/kernels/xpu/reduce_prod_kernel.cc new file mode 100644 index 00000000000..c82dd1b5f66 --- /dev/null +++ b/paddle/phi/kernels/xpu/reduce_prod_kernel.cc @@ -0,0 +1,43 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/reduce_prod_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/backends/xpu/xpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/xpu/reduce.h" + +namespace phi { + +template +void ProdRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const IntArray& dims, + bool keep_dim, + bool reduce_all, + DenseTensor* out) { + int r = XPUReduce(dev_ctx, + x, + dims.GetData(), + keep_dim, + reduce_all, + out, + xpu::reduce_prod); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_prod"); +} + +} // namespace phi + +PD_REGISTER_KERNEL(prod_raw, XPU, ALL_LAYOUT, phi::ProdRawKernel, float) {} -- GitLab