diff --git a/paddle/fluid/operators/one_hot_v2_op_xpu.cc b/paddle/fluid/operators/one_hot_v2_op_xpu.cc deleted file mode 100644 index 1d750dfbc131885805c2c2b9d683a7956e1755c7..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/one_hot_v2_op_xpu.cc +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifdef PADDLE_WITH_XPU -#include -#include - -#include "paddle/fluid/operators/one_hot_op.h" - -namespace paddle { -namespace operators { - -using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; - -template -class OneHotV2XPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* in = context.Input("X"); - auto* out = context.Output("Out"); - int depth = context.Attr("depth"); - if (context.HasInput("depth_tensor")) { - auto* depth_tensor = context.Input("depth_tensor"); - auto* depth_data = depth_tensor->data(); - if (platform::is_xpu_place(depth_tensor->place())) { - xpu_memcpy(static_cast(&depth), - static_cast(depth_data), - sizeof(int32_t), - XPU_DEVICE_TO_HOST); - } else { - depth = depth_data[0]; - } - auto out_dims = out->dims(); - out_dims[out_dims.size() - 1] = depth; - out->Resize(out_dims); - } - - auto& dev_ctx = context.template device_context(); - int len = in->numel(); - int ret = xpu::one_hot(dev_ctx.x_context(), - in->data(), - out->mutable_data(context.GetPlace()), - len, - depth, - 1.0, - 0.0); - - PADDLE_ENFORCE_EQ(ret, - XPU_SUCCESS, - platform::errors::External( - "XPU one_hot kernel return wrong value[%d %s]", - ret, - XPUAPIErrorMsg[ret])); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_XPU_KERNEL( - one_hot_v2, - ops::OneHotV2XPUKernel, - ops::OneHotV2XPUKernel); -#endif diff --git a/paddle/fluid/operators/p_norm_op_xpu.cc b/paddle/fluid/operators/p_norm_op_xpu.cc deleted file mode 100644 index 0d2bb42790381a5f6e7bd376b47b16cfd1f313db..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/p_norm_op_xpu.cc +++ /dev/null @@ -1,355 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_XPU - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/reduce_ops/reduce_op_xpu.h" -#include "paddle/fluid/platform/device/device_wrapper.h" -#include "paddle/fluid/platform/device/xpu/xpu_header.h" - -namespace paddle { -namespace operators { - -inline void GetDims( - const phi::DDim& dim, int axis, int* m, int* t, int* n, bool asvector) { - *m = 1; - *n = 1; - *t = dim[axis]; - if (asvector) { - *t = product(dim); - } else { - for (int i = 0; i < axis; ++i) { - (*m) *= dim[i]; - } - for (int i = axis + 1; i < dim.size(); ++i) { - (*n) *= dim[i]; - } - } -} - -using Tensor = framework::Tensor; -template -class P_NormXPUKernel : public framework::OpKernel { - using XPUType = typename XPUTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); - auto* out = ctx.Output("Out"); - out->mutable_data(ctx.GetPlace()); - - float porder = ctx.Attr("porder"); - int axis = ctx.Attr("axis"); - bool asvector = ctx.Attr("asvector"); - - auto& dev_ctx = ctx.template device_context(); - auto xdim = in->dims(); - if (axis < 0) axis = xdim.size() + axis; - std::vector r_dim; - std::vector x_dim; - std::vector y_dim; - int m = 1; - int n = 1; - int t = 1; - GetDims(xdim, axis, &m, &t, &n, asvector); - x_dim.push_back(m); - x_dim.push_back(t); - x_dim.push_back(n); - - r_dim.push_back(1); - - y_dim.push_back(m); - y_dim.push_back(n); - - int r = 0; - - xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); - XPUType* tmp_x = RAII_GUARD.alloc_l3_or_gm(m * t * n); - PADDLE_ENFORCE_XDNN_NOT_NULL(tmp_x); - r = xpu::abs(dev_ctx.x_context(), - reinterpret_cast(in->data()), - tmp_x, - m * t * n); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "abs"); - if (porder == INFINITY) { - r = xpu::reduce_max(dev_ctx.x_context(), - tmp_x, - reinterpret_cast(out->data()), - x_dim, - r_dim); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_max"); - } else if (porder == -INFINITY) { - r = xpu::reduce_min(dev_ctx.x_context(), - tmp_x, - reinterpret_cast(out->data()), - x_dim, - r_dim); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_min"); - } else if (porder == 0) { - XPUType* zeros = RAII_GUARD.alloc_l3_or_gm(1); - PADDLE_ENFORCE_XDNN_NOT_NULL(zeros); - r = xpu::constant(dev_ctx.x_context(), zeros, 1, 0.0f); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant"); - std::vector zeros_dim(1, 1); - - bool* tmp2_x = RAII_GUARD.alloc_l3_or_gm(m * t * n); - PADDLE_ENFORCE_XDNN_NOT_NULL(tmp2_x); - - r = xpu::broadcast_not_equal( - dev_ctx.x_context(), tmp_x, zeros, tmp2_x, x_dim, zeros_dim); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_not_equal"); - - XPUType* x_mid = tmp_x; - - r = xpu::cast( - dev_ctx.x_context(), tmp2_x, x_mid, m * t * n); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast"); - - r = xpu::reduce_sum(dev_ctx.x_context(), - x_mid, - reinterpret_cast(out->data()), - x_dim, - r_dim); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_sum"); - - } else { - Tensor porder_tensor; - framework::DDim pdim = phi::make_ddim({1}); - porder_tensor.mutable_data(pdim, in->place()); - r = xpu::constant( - dev_ctx.x_context(), porder_tensor.data(), 1, porder); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant"); - std::vector p_dim(1, 1); - - XPUType* tmp2_x = RAII_GUARD.alloc_l3_or_gm(m * t * n); - PADDLE_ENFORCE_XDNN_NOT_NULL(tmp2_x); - r = xpu::broadcast_pow( - dev_ctx.x_context(), - reinterpret_cast(tmp_x), - reinterpret_cast(porder_tensor.data()), - tmp2_x, - x_dim, - p_dim); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_pow"); - - XPUType* tmp_y = RAII_GUARD.alloc_l3_or_gm(m * n); - PADDLE_ENFORCE_XDNN_NOT_NULL(tmp_y); - - r = xpu::reduce_sum(dev_ctx.x_context(), - reinterpret_cast(tmp2_x), - tmp_y, - x_dim, - r_dim); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_sum"); - - r = xpu::constant( - dev_ctx.x_context(), porder_tensor.data(), 1, 1.0f / porder); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant"); - - r = xpu::broadcast_pow( - dev_ctx.x_context(), - reinterpret_cast(tmp_y), - reinterpret_cast(porder_tensor.data()), - reinterpret_cast(out->data()), - y_dim, - p_dim); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_pow"); - dev_ctx.Wait(); - } - } -}; - -template -class P_NormGradXPUKernel : public framework::OpKernel { - using XPUType = typename XPUTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Out"); - auto* dy = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - dx->mutable_data(ctx.GetPlace()); - auto xdim = x->dims(); - float porder = ctx.Attr("porder"); - bool asvector = ctx.Attr("asvector"); - int axis = ctx.Attr("axis"); - axis = axis < 0 ? xdim.size() + axis : axis; - - auto& dev_ctx = ctx.template device_context(); - - int m, t, n; - GetDims(xdim, axis, &m, &t, &n, asvector); - - std::vector r_dim; - std::vector x_dim; - std::vector y_dim; - - x_dim.push_back(m); - x_dim.push_back(t); - x_dim.push_back(n); - - y_dim.push_back(m); - y_dim.push_back(1); - y_dim.push_back(n); - - int r = 0; - if (porder == 0) { - r = xpu::constant(dev_ctx.x_context(), - reinterpret_cast(dx->data()), - m * t * n, - static_cast(0)); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant"); - } else if (porder == INFINITY || porder == -INFINITY) { - xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); - XPUType* x_abs = RAII_GUARD.alloc_l3_or_gm(m * t * n); - PADDLE_ENFORCE_XDNN_NOT_NULL(x_abs); - r = xpu::abs(dev_ctx.x_context(), - reinterpret_cast(x->data()), - x_abs, - m * t * n); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "abs"); - - bool* dx_t = RAII_GUARD.alloc_l3_or_gm(m * t * n); - PADDLE_ENFORCE_XDNN_NOT_NULL(dx_t); - - XPUType* dx_mid = RAII_GUARD.alloc_l3_or_gm(m * t * n); - PADDLE_ENFORCE_XDNN_NOT_NULL(dx_mid); - - r = xpu::broadcast_equal( - dev_ctx.x_context(), - reinterpret_cast(x_abs), - reinterpret_cast(y->data()), - dx_t, - x_dim, - y_dim); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_equal"); - - r = xpu::cast( - dev_ctx.x_context(), dx_t, dx_mid, m * t * n); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast"); - - XPUType* x_sign = RAII_GUARD.alloc_l3_or_gm(m * t * n); - PADDLE_ENFORCE_XDNN_NOT_NULL(x_sign); - r = xpu::sign(dev_ctx.x_context(), - reinterpret_cast(x->data()), - x_sign, - m * t * n); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "sign"); - - XPUType* dx_pre_dy = x_abs; - r = xpu::mul(dev_ctx.x_context(), - reinterpret_cast(dx_mid), - reinterpret_cast(x_sign), - dx_pre_dy, - m * t * n); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "mul"); - - r = xpu::broadcast_mul(dev_ctx.x_context(), - dx_pre_dy, - reinterpret_cast(dy->data()), - reinterpret_cast(dx->data()), - x_dim, - y_dim); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_mul"); - - } else { - xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); - XPUType* x_abs = RAII_GUARD.alloc_l3_or_gm(m * t * n); - PADDLE_ENFORCE_XDNN_NOT_NULL(x_abs); - r = xpu::abs(dev_ctx.x_context(), - reinterpret_cast(x->data()), - x_abs, - m * t * n); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "abs"); - - Tensor porder_tensor; - framework::DDim pdim = phi::make_ddim({1}); - porder_tensor.mutable_data(pdim, x->place()); - r = xpu::constant( - dev_ctx.x_context(), porder_tensor.data(), 1, porder - 1.0f); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant"); - std::vector p_dim(1, 1); - - XPUType* x_pow = RAII_GUARD.alloc_l3_or_gm(m * t * n); - PADDLE_ENFORCE_XDNN_NOT_NULL(x_pow); - r = xpu::broadcast_pow( - dev_ctx.x_context(), - reinterpret_cast(x_abs), - reinterpret_cast(porder_tensor.data()), - x_pow, - x_dim, - p_dim); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_pow"); - - XPUType* y_pow = RAII_GUARD.alloc_l3_or_gm(m * n); - PADDLE_ENFORCE_XDNN_NOT_NULL(y_pow); - r = xpu::broadcast_pow( - dev_ctx.x_context(), - reinterpret_cast(y->data()), - reinterpret_cast(porder_tensor.data()), - y_pow, - y_dim, - p_dim); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_pow"); - dev_ctx.Wait(); - - XPUType* dx_t = x_abs; - - r = xpu::broadcast_div( - dev_ctx.x_context(), x_pow, y_pow, dx_t, x_dim, y_dim); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_div"); - - XPUType* x_sign = x_pow; - r = xpu::sign(dev_ctx.x_context(), - reinterpret_cast(x->data()), - x_sign, - m * t * n); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "sign"); - - XPUType* dx_mid = RAII_GUARD.alloc_l3_or_gm(m * t * n); - PADDLE_ENFORCE_XDNN_NOT_NULL(dx_mid); - - r = xpu::broadcast_mul(dev_ctx.x_context(), - reinterpret_cast(x_sign), - reinterpret_cast(dy->data()), - dx_mid, - x_dim, - y_dim); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_mul"); - - r = xpu::broadcast_mul(dev_ctx.x_context(), - reinterpret_cast(dx_t), - reinterpret_cast(dx_mid), - reinterpret_cast(dx->data()), - x_dim, - x_dim); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_mul"); - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_XPU_KERNEL( - p_norm, ops::P_NormXPUKernel); -REGISTER_OP_XPU_KERNEL( - p_norm_grad, - ops::P_NormGradXPUKernel); - -#endif diff --git a/paddle/phi/kernels/one_hot_kernel.cc b/paddle/phi/kernels/one_hot_kernel.cc index 755e06752509a4d091ad95b9c0eaefe0998fa6d9..fb5e121676c1ff087f251ec4d1f27bb737d87c07 100644 --- a/paddle/phi/kernels/one_hot_kernel.cc +++ b/paddle/phi/kernels/one_hot_kernel.cc @@ -35,3 +35,7 @@ PD_REGISTER_KERNEL(one_hot, CPU, ALL_LAYOUT, phi::OneHotKernel, int, int64_t) {} #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) PD_REGISTER_KERNEL(one_hot, GPU, ALL_LAYOUT, phi::OneHotKernel, int, int64_t) {} #endif + +#ifdef PADDLE_WITH_XPU +PD_REGISTER_KERNEL(one_hot, XPU, ALL_LAYOUT, phi::OneHotKernel, int, int64_t) {} +#endif diff --git a/paddle/phi/kernels/xpu/one_hot_kernel.cc b/paddle/phi/kernels/xpu/one_hot_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..a19643cac51001b952a7b0ed9d61a0fa269d49d9 --- /dev/null +++ b/paddle/phi/kernels/xpu/one_hot_kernel.cc @@ -0,0 +1,65 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/one_hot_kernel.h" +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/backends/xpu/xpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/utils/data_type.h" + +namespace phi { +template +struct OneHotV2OpFunctor { + const DenseTensor* in_; + DenseTensor* out_; + int depth_; + const Context& ctx_; + + OneHotV2OpFunctor(const DenseTensor* in, + DenseTensor* out, + int depth, + const Context& ctx) + : in_(in), out_(out), depth_(depth), ctx_(ctx) {} + + template + void apply() const { + auto* p_in_data = in_->data(); + auto numel = in_->numel(); + auto* p_out_data = ctx_.template Alloc(out_); + int r = xpu::one_hot( + ctx_.x_context(), p_in_data, p_out_data, numel, depth_, 1.0, 0.0); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "one_hot"); + } +}; + +template +void OneHotRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const Scalar& depth, + DataType dtype, + bool allow_out_of_range, + DenseTensor* out) { + auto depth_v = depth.to(); + auto out_dims = out->dims(); + if (out_dims[out_dims.size() - 1] == -1) { + out_dims[out_dims.size() - 1] = depth_v; + out->Resize(out_dims); + } + phi::VisitDataType(dtype, + OneHotV2OpFunctor(&x, out, depth_v, dev_ctx)); +} +} // namespace phi + +PD_REGISTER_KERNEL( + one_hot_raw, XPU, ALL_LAYOUT, phi::OneHotRawKernel, int, int64_t) {} diff --git a/paddle/phi/kernels/xpu/p_norm_grad_kernel.cc b/paddle/phi/kernels/xpu/p_norm_grad_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..883e3262a64876525546494e45b5441df98aec4a --- /dev/null +++ b/paddle/phi/kernels/xpu/p_norm_grad_kernel.cc @@ -0,0 +1,202 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/p_norm_grad_kernel.h" +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +inline void GetDims( + const phi::DDim& dim, int axis, int* m, int* t, int* n, bool asvector) { + *m = 1; + *n = 1; + *t = dim[axis]; + if (asvector) { + *t = product(dim); + } else { + for (int i = 0; i < axis; ++i) { + (*m) *= dim[i]; + } + for (int i = axis + 1; i < dim.size(); ++i) { + (*n) *= dim[i]; + } + } +} +template +void PNormGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out, + const DenseTensor& out_grad, + float porder, + int axis, + float epsilon, + bool keepdim, + bool asvector, + DenseTensor* x_grad) { + using XPUType = typename XPUTypeTrait::Type; + dev_ctx.template Alloc(x_grad); + auto xdim = x.dims(); + axis = axis < 0 ? xdim.size() + axis : axis; + int m, t, n; + GetDims(xdim, axis, &m, &t, &n, asvector); + + std::vector r_dim; + std::vector x_dim; + std::vector y_dim; + + x_dim.push_back(m); + x_dim.push_back(t); + x_dim.push_back(n); + + y_dim.push_back(m); + y_dim.push_back(1); + y_dim.push_back(n); + + int r = 0; + if (porder == 0) { + r = xpu::constant(dev_ctx.x_context(), + reinterpret_cast(x_grad->data()), + m * t * n, + static_cast(0)); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant"); + } else if (porder == INFINITY || porder == -INFINITY) { + xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); + XPUType* x_abs = RAII_GUARD.alloc_l3_or_gm(m * t * n); + PADDLE_ENFORCE_XDNN_NOT_NULL(x_abs); + r = xpu::abs(dev_ctx.x_context(), + reinterpret_cast(x.data()), + x_abs, + m * t * n); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "abs"); + + bool* dx_t = RAII_GUARD.alloc_l3_or_gm(m * t * n); + PADDLE_ENFORCE_XDNN_NOT_NULL(dx_t); + + XPUType* dx_mid = RAII_GUARD.alloc_l3_or_gm(m * t * n); + PADDLE_ENFORCE_XDNN_NOT_NULL(dx_mid); + + r = xpu::broadcast_equal( + dev_ctx.x_context(), + reinterpret_cast(x_abs), + reinterpret_cast(out.data()), + dx_t, + x_dim, + y_dim); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_equal"); + + r = xpu::cast(dev_ctx.x_context(), dx_t, dx_mid, m * t * n); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast"); + + XPUType* x_sign = RAII_GUARD.alloc_l3_or_gm(m * t * n); + PADDLE_ENFORCE_XDNN_NOT_NULL(x_sign); + r = xpu::sign(dev_ctx.x_context(), + reinterpret_cast(x.data()), + x_sign, + m * t * n); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "sign"); + + XPUType* dx_pre_dy = x_abs; + r = xpu::mul(dev_ctx.x_context(), + reinterpret_cast(dx_mid), + reinterpret_cast(x_sign), + dx_pre_dy, + m * t * n); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "mul"); + + r = xpu::broadcast_mul(dev_ctx.x_context(), + dx_pre_dy, + reinterpret_cast(out_grad.data()), + reinterpret_cast(x_grad->data()), + x_dim, + y_dim); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_mul"); + + } else { + xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); + XPUType* x_abs = RAII_GUARD.alloc_l3_or_gm(m * t * n); + PADDLE_ENFORCE_XDNN_NOT_NULL(x_abs); + r = xpu::abs(dev_ctx.x_context(), + reinterpret_cast(x.data()), + x_abs, + m * t * n); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "abs"); + + DenseTensor porder_tensor; + phi::DDim pdim = phi::make_ddim({1}); + porder_tensor.Resize(pdim); + dev_ctx.template Alloc(&porder_tensor); + r = xpu::constant( + dev_ctx.x_context(), porder_tensor.data(), 1, porder - 1.0f); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant"); + std::vector p_dim(1, 1); + + XPUType* x_pow = RAII_GUARD.alloc_l3_or_gm(m * t * n); + PADDLE_ENFORCE_XDNN_NOT_NULL(x_pow); + r = xpu::broadcast_pow( + dev_ctx.x_context(), + reinterpret_cast(x_abs), + reinterpret_cast(porder_tensor.data()), + x_pow, + x_dim, + p_dim); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_pow"); + + XPUType* y_pow = RAII_GUARD.alloc_l3_or_gm(m * n); + PADDLE_ENFORCE_XDNN_NOT_NULL(y_pow); + r = xpu::broadcast_pow( + dev_ctx.x_context(), + reinterpret_cast(out.data()), + reinterpret_cast(porder_tensor.data()), + y_pow, + y_dim, + p_dim); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_pow"); + dev_ctx.Wait(); + + XPUType* dx_t = x_abs; + + r = xpu::broadcast_div( + dev_ctx.x_context(), x_pow, y_pow, dx_t, x_dim, y_dim); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_div"); + + XPUType* x_sign = x_pow; + r = xpu::sign(dev_ctx.x_context(), + reinterpret_cast(x.data()), + x_sign, + m * t * n); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "sign"); + + XPUType* dx_mid = RAII_GUARD.alloc_l3_or_gm(m * t * n); + PADDLE_ENFORCE_XDNN_NOT_NULL(dx_mid); + + r = xpu::broadcast_mul(dev_ctx.x_context(), + reinterpret_cast(x_sign), + reinterpret_cast(out_grad.data()), + dx_mid, + x_dim, + y_dim); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_mul"); + + r = xpu::broadcast_mul(dev_ctx.x_context(), + reinterpret_cast(dx_t), + reinterpret_cast(dx_mid), + reinterpret_cast(x_grad->data()), + x_dim, + x_dim); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_mul"); + } +} +} // namespace phi +PD_REGISTER_KERNEL(p_norm_grad, XPU, ALL_LAYOUT, phi::PNormGradKernel, float) {} diff --git a/paddle/phi/kernels/xpu/p_norm_kernel.cc b/paddle/phi/kernels/xpu/p_norm_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..7ef72c61ad3aa11ee279e2bc7fcd1839068d5b09 --- /dev/null +++ b/paddle/phi/kernels/xpu/p_norm_kernel.cc @@ -0,0 +1,165 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/p_norm_kernel.h" +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +inline void GetDims( + const phi::DDim& dim, int axis, int* m, int* t, int* n, bool asvector) { + *m = 1; + *n = 1; + *t = dim[axis]; + if (asvector) { + *t = product(dim); + } else { + for (int i = 0; i < axis; ++i) { + (*m) *= dim[i]; + } + for (int i = axis + 1; i < dim.size(); ++i) { + (*n) *= dim[i]; + } + } +} + +template +void PNormKernel(const Context& dev_ctx, + const DenseTensor& x, + float porder, + int axis, + float epsilon, + bool keepdim, + bool asvector, + DenseTensor* out) { + using XPUType = typename XPUTypeTrait::Type; + dev_ctx.template Alloc(out); + auto xdim = x.dims(); + if (axis < 0) axis = xdim.size() + axis; + std::vector r_dim; + std::vector x_dim; + std::vector y_dim; + int m = 1; + int n = 1; + int t = 1; + GetDims(xdim, axis, &m, &t, &n, asvector); + x_dim.push_back(m); + x_dim.push_back(t); + x_dim.push_back(n); + + r_dim.push_back(1); + + y_dim.push_back(m); + y_dim.push_back(n); + + int r = 0; + + xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); + XPUType* tmp_x = RAII_GUARD.alloc_l3_or_gm(m * t * n); + PADDLE_ENFORCE_XDNN_NOT_NULL(tmp_x); + + r = xpu::abs(dev_ctx.x_context(), + reinterpret_cast(x.data()), + tmp_x, + m * t * n); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "abs"); + if (porder == INFINITY) { + r = xpu::reduce_max(dev_ctx.x_context(), + tmp_x, + reinterpret_cast(out->data()), + x_dim, + r_dim); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_max"); + } else if (porder == -INFINITY) { + r = xpu::reduce_min(dev_ctx.x_context(), + tmp_x, + reinterpret_cast(out->data()), + x_dim, + r_dim); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_min"); + } else if (porder == 0) { + XPUType* zeros = RAII_GUARD.alloc_l3_or_gm(1); + PADDLE_ENFORCE_XDNN_NOT_NULL(zeros); + r = xpu::constant(dev_ctx.x_context(), zeros, 1, 0.0f); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant"); + std::vector zeros_dim(1, 1); + + bool* tmp2_x = RAII_GUARD.alloc_l3_or_gm(m * t * n); + PADDLE_ENFORCE_XDNN_NOT_NULL(tmp2_x); + + r = xpu::broadcast_not_equal( + dev_ctx.x_context(), tmp_x, zeros, tmp2_x, x_dim, zeros_dim); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_not_equal"); + + XPUType* x_mid = tmp_x; + + r = xpu::cast(dev_ctx.x_context(), tmp2_x, x_mid, m * t * n); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast"); + + r = xpu::reduce_sum(dev_ctx.x_context(), + x_mid, + reinterpret_cast(out->data()), + x_dim, + r_dim); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_sum"); + + } else { + DenseTensor porder_tensor; + phi::DDim pdim = phi::make_ddim({1}); + porder_tensor.Resize(pdim); + dev_ctx.template Alloc(&porder_tensor); + r = xpu::constant( + dev_ctx.x_context(), porder_tensor.data(), 1, porder); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant"); + std::vector p_dim(1, 1); + + XPUType* tmp2_x = RAII_GUARD.alloc_l3_or_gm(m * t * n); + PADDLE_ENFORCE_XDNN_NOT_NULL(tmp2_x); + r = xpu::broadcast_pow( + dev_ctx.x_context(), + reinterpret_cast(tmp_x), + reinterpret_cast(porder_tensor.data()), + reinterpret_cast(tmp2_x), + x_dim, + p_dim); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_pow"); + + XPUType* tmp_y = RAII_GUARD.alloc_l3_or_gm(m * n); + PADDLE_ENFORCE_XDNN_NOT_NULL(tmp_y); + + r = xpu::reduce_sum(dev_ctx.x_context(), + reinterpret_cast(tmp2_x), + reinterpret_cast(tmp_y), + x_dim, + r_dim); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_sum"); + + r = xpu::constant( + dev_ctx.x_context(), porder_tensor.data(), 1, 1.0f / porder); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant"); + + r = xpu::broadcast_pow( + dev_ctx.x_context(), + reinterpret_cast(tmp_y), + reinterpret_cast(porder_tensor.data()), + reinterpret_cast(out->data()), + y_dim, + p_dim); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_pow"); + dev_ctx.Wait(); + } +} +} // namespace phi +PD_REGISTER_KERNEL(p_norm, XPU, ALL_LAYOUT, phi::PNormKernel, float) {}