diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc b/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc deleted file mode 100644 index 7b2c72d081262210c8b1cb5222623efdd7a4e695..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc +++ /dev/null @@ -1,118 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_XPU -#include -#include - -#include "paddle/fluid/operators/elementwise/elementwise_op.h" -#include "paddle/fluid/operators/elementwise/elementwise_xpu.h" -#include "paddle/fluid/platform/device/device_wrapper.h" - -namespace paddle { -namespace operators { - -template -class ElementwiseAddXPUKernel : public framework::OpKernel { - using XPUType = typename XPUTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext& ctx) const override { - XPUElementwise(ctx, xpu::broadcast_add); - } -}; - -template -class ElementwiseAddGradXPUKernel : public ElemwiseGradKernel { - using XPUType = typename XPUTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext& ctx) const override { - ElemwiseGradKernel::Compute(ctx); - auto* x = ctx.Input("X"); - auto* dz = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); - const framework::DDim& dz_dims = dz->dims(); - int axis = ctx.Attr("axis"); - - const T* dz_data = dz->data(); - auto& dev_ctx = - ctx.template device_context(); - - if (dx != nullptr) { - T* dx_data = dx->mutable_data(ctx.GetPlace()); - if (dx->dims() == dz_dims) { - if (dx_data != dz_data) { - framework::TensorCopy( - *dz, - ctx.GetPlace(), - ctx.template device_context(), - dx); - } - } else { - // For inplace strategy, dx will be stored in addr of dz, which makes - // the result of dy wrong. - if (dx->IsSharedBufferWith(*dz)) { - dx->clear(); - dx->mutable_data(x->dims(), ctx.GetPlace()); - } - std::vector reduce_dims = GetReduceDim(dx->dims(), dz_dims, axis); - std::vector dz_vector = phi::vectorize(dz_dims); - - int ret = - xpu::reduce_sum(dev_ctx.x_context(), - reinterpret_cast(dz_data), - reinterpret_cast(dx->data()), - dz_vector, - reduce_dims); - PADDLE_ENFORCE_XDNN_SUCCESS(ret, "reduce_sum"); - } - } - - if (dy != nullptr) { - T* dy_data = dy->mutable_data(ctx.GetPlace()); - if (dy->dims() == dz_dims) { - if (dy_data != dz_data) { - framework::TensorCopy( - *dz, - ctx.GetPlace(), - ctx.template device_context(), - dy); - } - } else { - std::vector reduce_dims = GetReduceDim(dy->dims(), dz_dims, axis); - std::vector dz_vector = phi::vectorize(dz_dims); - int ret = - xpu::reduce_sum(dev_ctx.x_context(), - reinterpret_cast(dz_data), - reinterpret_cast(dy_data), - dz_vector, - reduce_dims); - PADDLE_ENFORCE_XDNN_SUCCESS(ret, "reduce_sum"); - } - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_XPU_KERNEL(elementwise_add, - ops::ElementwiseAddXPUKernel, - ops::ElementwiseAddXPUKernel); -REGISTER_OP_XPU_KERNEL( - elementwise_add_grad, - ops::ElementwiseAddGradXPUKernel, - ops::ElementwiseAddGradXPUKernel); -#endif diff --git a/paddle/fluid/operators/elementwise/elementwise_xpu.h b/paddle/fluid/operators/elementwise/elementwise_xpu.h index bc5f03bb904a58e9b17a81b3abc30216919d4b7a..403ba5a592fd0de1efcf730693e98e5dcfd2eb2c 100644 --- a/paddle/fluid/operators/elementwise/elementwise_xpu.h +++ b/paddle/fluid/operators/elementwise/elementwise_xpu.h @@ -21,6 +21,7 @@ limitations under the License. */ #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/place.h" +#include "paddle/phi/kernels/xpu/elementwise.h" #include "xpu/refactor/math.h" namespace paddle { @@ -48,67 +49,11 @@ void XPUElementwise(const framework::ExecutionContext& ctx, auto x = x_var->Get(); auto* y = ctx.Input("Y"); auto* z = ctx.Output("Out"); - z->mutable_data(ctx.GetPlace()); - auto x_dims = x.dims(); - auto y_dims = y->dims(); - int max_dim = std::max(x_dims.size(), y_dims.size()); int axis = ctx.Attr("axis"); - axis = (axis == -1 ? std::abs(x_dims.size() - y_dims.size()) : axis); - - PADDLE_ENFORCE_GE( - axis, - 0, - platform::errors::InvalidArgument( - "Axis should be great than or equal to 0, but received axis is %d.", - axis)); - PADDLE_ENFORCE_LT(axis, - max_dim, - platform::errors::InvalidArgument( - "Axis should be less than %d, but received axis is %d.", - max_dim, - axis)); - std::vector x_dims_vec(max_dim, 1); - std::vector y_dims_vec(max_dim, 1); - if (x_dims.size() == max_dim) { - for (int i = 0; i < max_dim; i++) { - x_dims_vec[i] = x_dims[i]; - } - } else { - for (int i = 0; i < x_dims.size(); i++) { - x_dims_vec[i + axis] = x_dims[i]; - } - } - if (y_dims.size() == max_dim) { - for (int i = 0; i < max_dim; i++) { - y_dims_vec[i] = y_dims[i]; - } - } else { - for (int i = 0; i < y_dims.size(); i++) { - y_dims_vec[i + axis] = y_dims[i]; - } - } - const T* x_data = x.data(); - const T* y_data = y->data(); - T* z_data = z->data(); auto& dev_ctx = ctx.template device_context(); - - int ret = xpu::SUCCESS; - - ret = func(dev_ctx.x_context(), - reinterpret_cast(x_data), - reinterpret_cast(y_data), - reinterpret_cast(z_data), - x_dims_vec, - y_dims_vec); - PADDLE_ENFORCE_EQ( - ret, - xpu::SUCCESS, - platform::errors::External( - "XPU kernel Elementwise occur error in XPUElementwise error code ", - ret, - XPUAPIErrorMsg[ret])); + phi::XPUElementwise(dev_ctx, x, *y, axis, z, func); } template @@ -128,78 +73,12 @@ void XPUElementwiseGrad(const framework::ExecutionContext& ctx, auto* dz = ctx.Input(framework::GradVarName("Out")); auto* dx = ctx.Output(framework::GradVarName("X")); auto* dy = ctx.Output(framework::GradVarName("Y")); - auto* z = dz; int axis = ctx.Attr("axis"); - const framework::DDim& x_dims = x->dims(); - const framework::DDim& y_dims = y->dims(); - int max_dim = std::max(x_dims.size(), y_dims.size()); - axis = (axis == -1 ? std::abs(x_dims.size() - y_dims.size()) : axis); - PADDLE_ENFORCE_GE( - axis, - 0, - platform::errors::InvalidArgument( - "Axis should be great than or equal to 0, but received axis is %d.", - axis)); - PADDLE_ENFORCE_LT(axis, - max_dim, - platform::errors::InvalidArgument( - "Axis should be less than %d, but received axis is %d.", - max_dim, - axis)); - std::vector x_dims_vec(max_dim, 1); - std::vector y_dims_vec(max_dim, 1); - if (x_dims.size() == max_dim) { - for (int i = 0; i < max_dim; i++) { - x_dims_vec[i] = x_dims[i]; - } - } else { - for (int i = 0; i < x_dims.size(); i++) { - x_dims_vec[i + axis] = x_dims[i]; - } - } - if (y_dims.size() == max_dim) { - for (int i = 0; i < max_dim; i++) { - y_dims_vec[i] = y_dims[i]; - } - } else { - for (int i = 0; i < y_dims.size(); i++) { - y_dims_vec[i + axis] = y_dims[i]; - } - } - - const T* x_data = use_x_y_data ? x->data() : z->data(); - const T* y_data = use_x_y_data ? y->data() : z->data(); - const T* z_data = z->data(); - const T* dz_data = dz->data(); - T* dx_data = nullptr; - T* dy_data = nullptr; auto& dev_ctx = ctx.template device_context(); - - if (dx) { - dx_data = dx->mutable_data(ctx.GetPlace()); - } - if (dy) { - dy_data = dy->mutable_data(ctx.GetPlace()); - } - - int ret = func(dev_ctx.x_context(), - reinterpret_cast(x_data), - reinterpret_cast(y_data), - reinterpret_cast(z_data), - reinterpret_cast(dz_data), - reinterpret_cast(dy_data), - reinterpret_cast(dx_data), - x_dims_vec, - y_dims_vec); - PADDLE_ENFORCE_EQ( - ret, - xpu::SUCCESS, - platform::errors::External( - "XPU kernel Elementwise occur error in XPUElementwise error code ", - ret, - XPUAPIErrorMsg[ret])); + phi::XPUElementwiseGrad( + dev_ctx, *x, *y, *dz, axis, dx, dy, func, use_x_y_data); } } // namespace operators diff --git a/paddle/phi/kernels/xpu/elementwise.h b/paddle/phi/kernels/xpu/elementwise.h new file mode 100644 index 0000000000000000000000000000000000000000..b0aeff070ae2de4e1aa48d044937450c71df912c --- /dev/null +++ b/paddle/phi/kernels/xpu/elementwise.h @@ -0,0 +1,190 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once +#ifdef PADDLE_WITH_XPU +#include +#include +#include +#include +#include + +#include "paddle/phi/backends/xpu/xpu_context.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/dense_tensor.h" +#include "xpu/refactor/math.h" + +namespace phi { + +template +void XPUElementwise(const XPUContext& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* z, + std::function&, + const std::vector&)> func) { + dev_ctx.template Alloc(z); + auto x_dims = x.dims(); + auto y_dims = y.dims(); + int max_dim = std::max(x_dims.size(), y_dims.size()); + axis = (axis == -1 ? std::abs(x_dims.size() - y_dims.size()) : axis); + + PADDLE_ENFORCE_GE( + axis, + 0, + errors::InvalidArgument( + "Axis should be great than or equal to 0, but received axis is %d.", + axis)); + PADDLE_ENFORCE_LT(axis, + max_dim, + errors::InvalidArgument( + "Axis should be less than %d, but received axis is %d.", + max_dim, + axis)); + std::vector x_dims_vec(max_dim, 1); + std::vector y_dims_vec(max_dim, 1); + if (x_dims.size() == max_dim) { + for (int i = 0; i < max_dim; i++) { + x_dims_vec[i] = x_dims[i]; + } + } else { + for (int i = 0; i < x_dims.size(); i++) { + x_dims_vec[i + axis] = x_dims[i]; + } + } + if (y_dims.size() == max_dim) { + for (int i = 0; i < max_dim; i++) { + y_dims_vec[i] = y_dims[i]; + } + } else { + for (int i = 0; i < y_dims.size(); i++) { + y_dims_vec[i + axis] = y_dims[i]; + } + } + const T* x_data = x.data(); + const T* y_data = y.data(); + T* z_data = z->data(); + + int ret = xpu::SUCCESS; + + ret = func(dev_ctx.x_context(), + reinterpret_cast(x_data), + reinterpret_cast(y_data), + reinterpret_cast(z_data), + x_dims_vec, + y_dims_vec); + PADDLE_ENFORCE_EQ( + ret, + xpu::SUCCESS, + errors::External( + "XPU kernel Elementwise occur error in XPUElementwise error code ", + ret, + XPUAPIErrorMsg[ret])); +} + +template +void XPUElementwiseGrad(const XPUContext& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dz, + int axis, + DenseTensor* dx, + DenseTensor* dy, + std::function&, + const std::vector&)> func, + bool use_x_y_data) { + auto* z = &dz; + const DDim& x_dims = x.dims(); + const DDim& y_dims = y.dims(); + int max_dim = std::max(x_dims.size(), y_dims.size()); + axis = (axis == -1 ? std::abs(x_dims.size() - y_dims.size()) : axis); + PADDLE_ENFORCE_GE( + axis, + 0, + errors::InvalidArgument( + "Axis should be great than or equal to 0, but received axis is %d.", + axis)); + PADDLE_ENFORCE_LT(axis, + max_dim, + errors::InvalidArgument( + "Axis should be less than %d, but received axis is %d.", + max_dim, + axis)); + std::vector x_dims_vec(max_dim, 1); + std::vector y_dims_vec(max_dim, 1); + if (x_dims.size() == max_dim) { + for (int i = 0; i < max_dim; i++) { + x_dims_vec[i] = x_dims[i]; + } + } else { + for (int i = 0; i < x_dims.size(); i++) { + x_dims_vec[i + axis] = x_dims[i]; + } + } + if (y_dims.size() == max_dim) { + for (int i = 0; i < max_dim; i++) { + y_dims_vec[i] = y_dims[i]; + } + } else { + for (int i = 0; i < y_dims.size(); i++) { + y_dims_vec[i + axis] = y_dims[i]; + } + } + + const T* x_data = use_x_y_data ? x.data() : z->data(); + const T* y_data = use_x_y_data ? y.data() : z->data(); + const T* z_data = z->data(); + + const T* dz_data = dz.data(); + T* dx_data = nullptr; + T* dy_data = nullptr; + + if (dx) { + dx_data = dev_ctx.template Alloc(dx); + } + if (dy) { + dy_data = dev_ctx.template Alloc(dy); + } + + int ret = func(dev_ctx.x_context(), + reinterpret_cast(x_data), + reinterpret_cast(y_data), + reinterpret_cast(z_data), + reinterpret_cast(dz_data), + reinterpret_cast(dy_data), + reinterpret_cast(dx_data), + x_dims_vec, + y_dims_vec); + PADDLE_ENFORCE_EQ( + ret, + xpu::SUCCESS, + errors::External( + "XPU kernel Elementwise occur error in XPUElementwise error code ", + ret, + XPUAPIErrorMsg[ret])); +} + +} // namespace phi +#endif diff --git a/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc b/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..27fd9ed39f2c1fde1f911af14a055ffa6a78613c --- /dev/null +++ b/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "paddle/phi/kernels/elementwise_add_grad_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/backends/xpu/xpu_context.h" +#include "paddle/phi/backends/xpu/xpu_header.h" +#include "paddle/phi/backends/xpu/xpu_info.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_utils.h" +#include "paddle/phi/kernels/funcs/elementwise_base.h" + +namespace phi { +template +void AddGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + using XPUType = typename XPUTypeTrait::Type; + funcs::ElementwiseGradPreProcess(dout, dx); + auto* dz = &dout; + const DDim& dz_dims = dz->dims(); + + const T* dz_data = dz->data(); + + if (dx != nullptr) { + T* dx_data = dev_ctx.template Alloc(dx); + if (dx->dims() == dz_dims) { + if (dx_data != dz_data) { + Copy(dev_ctx, *dz, dev_ctx.GetPlace(), false, dx); + } + } else { + // For inplace strategy, dx will be stored in addr of dz, which makes + // the result of dy wrong. + if (dx->IsSharedBufferWith(*dz)) { + dx->clear(); + dx->Resize(x.dims()); + dev_ctx.template Alloc(dx); + } + std::vector reduce_dims = + funcs::GetReduceDim(dx->dims(), dz_dims, axis); + std::vector dz_vector = phi::vectorize(dz_dims); + + int ret = + xpu::reduce_sum(dev_ctx.x_context(), + reinterpret_cast(dz_data), + reinterpret_cast(dx->data()), + dz_vector, + reduce_dims); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "reduce_sum"); + } + } + + if (dy != nullptr) { + T* dy_data = dy->mutable_data(dev_ctx.GetPlace()); + if (dy->dims() == dz_dims) { + if (dy_data != dz_data) { + Copy(dev_ctx, *dz, dev_ctx.GetPlace(), false, dy); + } + } else { + std::vector reduce_dims = + funcs::GetReduceDim(dy->dims(), dz_dims, axis); + std::vector dz_vector = phi::vectorize(dz_dims); + int ret = + xpu::reduce_sum(dev_ctx.x_context(), + reinterpret_cast(dz_data), + reinterpret_cast(dy_data), + dz_vector, + reduce_dims); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "reduce_sum"); + } + } +} +} // namespace phi + +PD_REGISTER_KERNEL( + add_grad, XPU, ALL_LAYOUT, phi::AddGradKernel, phi::dtype::float16, float) { +} diff --git a/paddle/phi/kernels/xpu/elementwise_add_kernel.cc b/paddle/phi/kernels/xpu/elementwise_add_kernel.cc index 9c5b521849cb619c740e336c2b119f7ffde07c01..b91b99bc2064f7a7bf51e1df6b851d6013ccd6e1 100644 --- a/paddle/phi/kernels/xpu/elementwise_add_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_add_kernel.cc @@ -12,10 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include + +#include "paddle/phi/kernels/elementwise_add_kernel.h" + #include "paddle/phi/api/ext/dispatch.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/backends/xpu/xpu_context.h" +#include "paddle/phi/backends/xpu/xpu_header.h" +#include "paddle/phi/backends/xpu/xpu_info.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" +#include "paddle/phi/kernels/xpu/elementwise.h" namespace phi { @@ -38,6 +47,25 @@ void GradAddXPUKernel(const Context& dev_ctx, PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_add"); } +template +void AddRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out) { + using XPUType = typename XPUTypeTrait::Type; + XPUElementwise( + dev_ctx, x, y, axis, out, xpu::broadcast_add); +} + +template +void AddKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + AddRawKernel(dev_ctx, x, y, -1, out); +} + } // namespace phi PD_REGISTER_KERNEL(grad_add, @@ -46,3 +74,8 @@ PD_REGISTER_KERNEL(grad_add, phi::GradAddXPUKernel, phi::dtype::float16, float) {} +PD_REGISTER_KERNEL( + add_raw, XPU, ALL_LAYOUT, phi::AddRawKernel, phi::dtype::float16, float) {} + +PD_REGISTER_KERNEL( + add, XPU, ALL_LAYOUT, phi::AddKernel, phi::dtype::float16, float) {}