diff --git a/paddle/fluid/operators/dgc_op.h b/paddle/fluid/operators/dgc_op.h index ac9c440076257a11e265acc4e18047c06c797609..b1bf5e2778167ba9a8e39803c4369be12823259b 100644 --- a/paddle/fluid/operators/dgc_op.h +++ b/paddle/fluid/operators/dgc_op.h @@ -15,9 +15,11 @@ limitations under the License. */ #pragma once #include #include "dgc/dgc.h" + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/memory/malloc.h" -#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" +#include "paddle/fluid/operators/elementwise/elementwise_op_function.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" namespace paddle { namespace operators { @@ -153,18 +155,18 @@ class DGCOpKernel : public framework::OpKernel { u_out_e.device(eigen_ctx) = m * (u_e + grad_out_e); // v = u + v + g - ElementwiseComputeEx, DeviceContext, T>( - ctx, u, v, 0, AddFunctor(), v_out); + ElementwiseComputeEx, DeviceContext, T>( + ctx, u, v, 0, phi::funcs::AddFunctor(), v_out); - ElementwiseComputeEx, DeviceContext, T>( - ctx, g, v, 0, AddFunctor(), v_out); + ElementwiseComputeEx, DeviceContext, T>( + ctx, g, v, 0, phi::funcs::AddFunctor(), v_out); } else { // u = m * u + g u_out_e.device(eigen_ctx) = m * u_e + grad_out_e; // v = u + v - ElementwiseComputeEx, DeviceContext, T>( - ctx, u, v, 0, AddFunctor(), v_out); + ElementwiseComputeEx, DeviceContext, T>( + ctx, u, v, 0, phi::funcs::AddFunctor(), v_out); } T* v_out_data = v_out->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.cc b/paddle/fluid/operators/elementwise/elementwise_add_op.cc index 53037c1fa653648044e2dc0981ec5c63351e7c15..ed9b98a128a2166907c70e46fafa31d3b9c1cb35 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.cc @@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" - #include #include "paddle/fluid/operators/elementwise/elementwise_op.h" @@ -125,17 +123,6 @@ REGISTER_OPERATOR( paddle::framework::EmptyGradOpMaker, paddle::framework::EmptyGradOpMaker); -REGISTER_OP_CPU_KERNEL( - grad_add, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel>, - ops::ElementwiseAddKernel>); - REGISTER_OP_VERSION(elementwise_add) .AddCheckpoint( R"ROC(Register elementwise_add for adding the attribute of diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.h b/paddle/fluid/operators/elementwise/elementwise_add_op.h deleted file mode 100644 index d77d4ed036394e50f1cf6affcfa7045636df6ac0..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.h +++ /dev/null @@ -1,66 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#ifdef __xpu__ -#include -#include -#include "paddle/fluid/operators/elementwise/elementwise_op.h" -#include "paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h" -#include "paddle/fluid/operators/elementwise/elementwise_xpu.h" -#include "paddle/fluid/platform/device/device_wrapper.h" -#else -#include -#include -#include "paddle/fluid/operators/elementwise/elementwise_op.h" - -// only can include the headers in paddle/phi/include dirs -#include "paddle/phi/kernels/elementwise_add_grad_kernel.h" -#include "paddle/phi/kernels/elementwise_add_kernel.h" -#endif - -namespace paddle { -namespace operators { - -template -class ElementwiseAddKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { -#ifdef __xpu__ - std::vector ins; - std::vector outs; - int axis = PackTensorsIntoVector(ctx, &ins, &outs); - const auto& xpu_ctx = - ctx.template device_context(); - paddle::operators::LaunchElementwiseCudaKernel, 1>( - xpu_ctx, ins, &outs, axis, kps::AddFunctor()); -#else - auto *x = ctx.Input("X"); - auto *y = ctx.Input("Y"); - auto *z = ctx.Output("Out"); - z->mutable_data(ctx.GetPlace()); - - auto &dev_ctx = ctx.device_context(); - int axis = ctx.Attr("axis"); - phi::AddRawKernel( - static_cast::TYPE &>(dev_ctx), - *x, *y, axis, z); -#endif - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.kps b/paddle/fluid/operators/elementwise/elementwise_add_op.kps deleted file mode 100644 index ecd52a310acdbebd648da5ed0f1abddb298224ef..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.kps +++ /dev/null @@ -1,61 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_XPU_KP - -// Please do not modify the following code -#if defined(__CUDA_ARCH__) -#undef __CUDA_ARCH__ -#endif - -#if defined(__CUDACC__) -#undef __CUDACC__ -#endif - -#if defined(__CUDA__) -#undef __CUDA__ -#endif - -#if defined(__NVCC__) -#undef __NVCC__ -#endif - -#include // NOLINT -#include "xpu/kernel/cluster_header.h" // NOLINT -#include "xpu/kernel/debug.h" // NOLINT -#include "xpu/kernel/math.h" // NOLINT - -#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" -#else -#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" -#include "paddle/phi/kernels/gpu/elementwise_grad.h" -#endif - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -#ifdef PADDLE_WITH_XPU_KP -REGISTER_OP_KERNEL(elementwise_add, KP, plat::XPUPlace, - ops::ElementwiseAddKernel); -#else -REGISTER_OP_CUDA_KERNEL( - grad_add, ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel>, - ops::ElementwiseAddKernel>); -#endif diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_add_op_npu.cc index d35e3f6641b4548e8176c1f2ee14a01a6f3824e8..178aa329577b7104bf91bd329e8a4e71ef5437a3 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_add_op_npu.cc @@ -16,7 +16,6 @@ limitations under the License. */ #include #include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" #include "paddle/fluid/operators/elementwise/elementwise_npu.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc b/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc index feb73abf3ff08ced63ba84badb7920a0dcdc07a2..22a5de4c6094167ee4e87d5d2ab67d341f897bbe 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" #include #include #include "paddle/fluid/operators/elementwise/elementwise_op.h" diff --git a/paddle/fluid/operators/fused/attn_gemm.h b/paddle/fluid/operators/fused/attn_gemm.h index c4e73c6bf97fdb13177a122c1b8cb433a21e30fb..9542f0742ea349bcb61cc8dbc2cd8942d16d16e0 100644 --- a/paddle/fluid/operators/fused/attn_gemm.h +++ b/paddle/fluid/operators/fused/attn_gemm.h @@ -14,9 +14,10 @@ limitations under the License. */ #include "paddle/fluid/platform/float16.h" #include "paddle/phi/kernels/funcs/blas/blas.h" -#include "paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h" #include "paddle/fluid/operators/kernel_primitives/kernel_primitives.h" #include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h" +#include "paddle/phi/kernels/funcs/broadcast_function.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" namespace paddle { namespace operators { @@ -67,9 +68,8 @@ class AttnMatMul { ins.emplace_back(bias); outs.emplace_back(bias_out); int elewise_add_axis = -1; - paddle::operators::LaunchElementwiseCudaKernel( - dev_ctx_, ins, &outs, elewise_add_axis, AddFunctor()); + phi::funcs::BroadcastKernel( + dev_ctx_, ins, &outs, elewise_add_axis, phi::funcs::AddFunctor()); } } diff --git a/paddle/fluid/operators/fused/fmha_ref.h b/paddle/fluid/operators/fused/fmha_ref.h index 3d75d127ab60aec17d7d089498752bf2e6d075e0..0e9fba73933b7fd7c976d090f9297ccba6c5444b 100644 --- a/paddle/fluid/operators/fused/fmha_ref.h +++ b/paddle/fluid/operators/fused/fmha_ref.h @@ -12,12 +12,12 @@ limitations under the License. */ #pragma once #include "paddle/fluid/operators/dropout_impl.cu.h" -#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" -#include "paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h" #include "paddle/fluid/operators/fused/fused_softmax_mask.cu.h" #include "paddle/fluid/operators/transpose_op.cu.h" +#include "paddle/phi/kernels/funcs/broadcast_function.h" #include "paddle/phi/kernels/funcs/concat_and_split_functor.h" #include "paddle/phi/kernels/funcs/elementwise_base.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" #include "paddle/phi/kernels/funcs/functors.h" #include "paddle/phi/kernels/gpudnn/softmax_gpudnn.h" @@ -160,9 +160,9 @@ class FMHARef { ins.emplace_back(src_mask_tensor); outs.emplace_back(src_mask_out_tensor); int elewise_add_axis = -1; - paddle::operators::LaunchElementwiseCudaKernel( - dev_ctx_, ins, &outs, elewise_add_axis, AddFunctor()); + phi::funcs::BroadcastKernel( + dev_ctx_, ins, &outs, elewise_add_axis, + phi::funcs::AddFunctor()); phi::SoftmaxForwardCUDAKernelDriver( dev_ctx_, *src_mask_out_tensor, softmax_axis, softmax_out_tensor); diff --git a/paddle/fluid/operators/fused/fused_attention_op.cu b/paddle/fluid/operators/fused/fused_attention_op.cu index d26577f06fe683fb1528c61b4401b9e578c90c9f..ec8a4d962e8084f8c8dfb74f9b789ef5d94bb00c 100644 --- a/paddle/fluid/operators/fused/fused_attention_op.cu +++ b/paddle/fluid/operators/fused/fused_attention_op.cu @@ -19,7 +19,8 @@ limitations under the License. */ #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" -#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" +#include "paddle/phi/kernels/funcs/broadcast_function.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/fluid/operators/fused/attention_layer_norm.h" @@ -543,10 +544,9 @@ class FusedAttentionGradKernel : public framework::OpKernel { ins.emplace_back(d_x); outs.emplace_back(d_x); int elewise_add_axis = -1; - paddle::operators::LaunchElementwiseCudaKernel( + phi::funcs::BroadcastKernel( ctx.cuda_device_context(), ins, &outs, elewise_add_axis, - AddFunctor()); + phi::funcs::AddFunctor()); } }; diff --git a/paddle/fluid/operators/fused/fused_feedforward_op.cu b/paddle/fluid/operators/fused/fused_feedforward_op.cu index c38d9f7d4bcbd25b3111b35a918de0f4ebdabefb..2eb9885286dab8702cd96519a76b4bb69ed6aff6 100644 --- a/paddle/fluid/operators/fused/fused_feedforward_op.cu +++ b/paddle/fluid/operators/fused/fused_feedforward_op.cu @@ -17,9 +17,10 @@ limitations under the License. */ #include "paddle/fluid/operators/matmul_v2_op.h" #include "paddle/phi/kernels/funcs/blas/blas.h" -#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" #include "paddle/fluid/operators/fused/fused_dropout_helper.h" #include "paddle/fluid/operators/layer_norm_kernel.cu.h" +#include "paddle/phi/kernels/funcs/broadcast_function.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #include "paddle/fluid/platform/collective_helper.h" @@ -345,9 +346,8 @@ class FusedFeedForwardGradKernel : public framework::OpKernel { ins[1] = d_x; outs[0] = d_x; int elewise_add_axis = -1; - paddle::operators::LaunchElementwiseCudaKernel( - ctx, ins, &outs, elewise_add_axis, AddFunctor()); + phi::funcs::BroadcastKernel( + ctx, ins, &outs, elewise_add_axis, phi::funcs::AddFunctor()); } void Compute(const framework::ExecutionContext& context) const override { diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu index fdd0208c3d316e9386eba2a60dde85fbab6005ae..fe93d323c59bcd83b567b60fa00333fb0275b115 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu +++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu @@ -24,7 +24,6 @@ limitations under the License. */ #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" -#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/fluid/operators/fused/attention_layer_norm.h" diff --git a/paddle/phi/kernels/cpu/elementwise_add_kernel.cc b/paddle/phi/kernels/cpu/elementwise_add_kernel.cc index 607026454724937db841150bdf96d6ea8b976a4c..5019b9f57062874b5f78a002dbf1cdd411bc4e9c 100644 --- a/paddle/phi/kernels/cpu/elementwise_add_kernel.cc +++ b/paddle/phi/kernels/cpu/elementwise_add_kernel.cc @@ -34,6 +34,14 @@ void AddKernel(const Context& dev_ctx, AddRawKernel(dev_ctx, x, y, axis, out); } +template +void GradAddKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + AddKernel(dev_ctx, x, y, out); +} + } // namespace phi using complex64 = ::phi::dtype::complex; @@ -65,3 +73,15 @@ PD_REGISTER_KERNEL(add, int64_t, complex64, complex128) {} + +PD_REGISTER_KERNEL(grad_add, + CPU, + ALL_LAYOUT, + phi::GradAddKernel, + float, + double, + int16_t, + int, + int64_t, + complex64, + complex128) {} diff --git a/paddle/phi/kernels/kps/elementwise_add_kernel.cu b/paddle/phi/kernels/kps/elementwise_add_kernel.cu index 8f7d45771d9d0694b4ed116c6a4c36955fea60d9..98e39ada32b8be0fa0efd3f5f711f13881383167 100644 --- a/paddle/phi/kernels/kps/elementwise_add_kernel.cu +++ b/paddle/phi/kernels/kps/elementwise_add_kernel.cu @@ -33,6 +33,14 @@ void AddKernel(const Context& dev_ctx, AddRawKernel(dev_ctx, x, y, axis, out); } +template +void GradAddKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + AddKernel(dev_ctx, x, y, out); +} + } // namespace phi #ifdef PADDLE_WITH_XPU_KP @@ -71,4 +79,18 @@ PD_REGISTER_KERNEL(add, phi::dtype::bfloat16, complex64, complex128) {} + +PD_REGISTER_KERNEL(grad_add, + KPS, + ALL_LAYOUT, + phi::GradAddKernel, + float, + double, + int16_t, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16, + complex64, + complex128) {} #endif diff --git a/paddle/phi/ops/compat/elementwise_sig.cc b/paddle/phi/ops/compat/elementwise_sig.cc index c760c966b0647d0ab68834f20bba01e5eb791f82..17fb1858373d9581e62ed89cd3e05abe744362fe 100644 --- a/paddle/phi/ops/compat/elementwise_sig.cc +++ b/paddle/phi/ops/compat/elementwise_sig.cc @@ -25,6 +25,11 @@ KernelSignature ElementwiseAddOpArgumentMapping( return KernelSignature("add_raw", {"X", "Y"}, {"axis"}, {"Out"}); } +KernelSignature ElementwiseGradAddOpArgumentMapping( + const ArgumentMappingContext& ctx) { + return KernelSignature("grad_add", {"X", "Y"}, {}, {"Out"}); +} + KernelSignature ElementwiseSubOpArgumentMapping( const ArgumentMappingContext& ctx) { int axis = paddle::any_cast(ctx.Attr("axis")); @@ -317,3 +322,4 @@ PD_REGISTER_ARG_MAPPING_FN(elementwise_heaviside_grad, phi::ElementwiseHeavisideGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(elementwise_pow_grad, phi::ElementwisePowGradOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(grad_add, phi::ElementwiseGradAddOpArgumentMapping);