From 4d7a9eef4237c2780ca5799805c74dcf90b3ceb8 Mon Sep 17 00:00:00 2001 From: YuanRisheng Date: Tue, 24 May 2022 19:21:44 +0800 Subject: [PATCH] [Phi]Move grad_add op kernel into phi and delete elementwise_add_op file (#42903) * move grad_add * fix unittest bugs * fix compile bugs --- paddle/fluid/operators/dgc_op.h | 16 +++-- .../elementwise/elementwise_add_op.cc | 13 ---- .../elementwise/elementwise_add_op.h | 66 ------------------- .../elementwise/elementwise_add_op.kps | 61 ----------------- .../elementwise/elementwise_add_op_npu.cc | 1 - .../elementwise/elementwise_add_op_xpu.cc | 1 - paddle/fluid/operators/fused/attn_gemm.h | 8 +-- paddle/fluid/operators/fused/fmha_ref.h | 10 +-- .../operators/fused/fused_attention_op.cu | 8 +-- .../operators/fused/fused_feedforward_op.cu | 8 +-- .../fused/fused_multi_transformer_op.cu | 1 - .../phi/kernels/cpu/elementwise_add_kernel.cc | 20 ++++++ .../phi/kernels/kps/elementwise_add_kernel.cu | 22 +++++++ paddle/phi/ops/compat/elementwise_sig.cc | 6 ++ 14 files changed, 74 insertions(+), 167 deletions(-) delete mode 100644 paddle/fluid/operators/elementwise/elementwise_add_op.h delete mode 100644 paddle/fluid/operators/elementwise/elementwise_add_op.kps diff --git a/paddle/fluid/operators/dgc_op.h b/paddle/fluid/operators/dgc_op.h index ac9c440076..b1bf5e2778 100644 --- a/paddle/fluid/operators/dgc_op.h +++ b/paddle/fluid/operators/dgc_op.h @@ -15,9 +15,11 @@ limitations under the License. */ #pragma once #include #include "dgc/dgc.h" + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/memory/malloc.h" -#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" +#include "paddle/fluid/operators/elementwise/elementwise_op_function.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" namespace paddle { namespace operators { @@ -153,18 +155,18 @@ class DGCOpKernel : public framework::OpKernel { u_out_e.device(eigen_ctx) = m * (u_e + grad_out_e); // v = u + v + g - ElementwiseComputeEx, DeviceContext, T>( - ctx, u, v, 0, AddFunctor(), v_out); + ElementwiseComputeEx, DeviceContext, T>( + ctx, u, v, 0, phi::funcs::AddFunctor(), v_out); - ElementwiseComputeEx, DeviceContext, T>( - ctx, g, v, 0, AddFunctor(), v_out); + ElementwiseComputeEx, DeviceContext, T>( + ctx, g, v, 0, phi::funcs::AddFunctor(), v_out); } else { // u = m * u + g u_out_e.device(eigen_ctx) = m * u_e + grad_out_e; // v = u + v - ElementwiseComputeEx, DeviceContext, T>( - ctx, u, v, 0, AddFunctor(), v_out); + ElementwiseComputeEx, DeviceContext, T>( + ctx, u, v, 0, phi::funcs::AddFunctor(), v_out); } T* v_out_data = v_out->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.cc b/paddle/fluid/operators/elementwise/elementwise_add_op.cc index 53037c1fa6..ed9b98a128 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.cc @@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" - #include #include "paddle/fluid/operators/elementwise/elementwise_op.h" @@ -125,17 +123,6 @@ REGISTER_OPERATOR( paddle::framework::EmptyGradOpMaker, paddle::framework::EmptyGradOpMaker); -REGISTER_OP_CPU_KERNEL( - grad_add, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel>, - ops::ElementwiseAddKernel>); - REGISTER_OP_VERSION(elementwise_add) .AddCheckpoint( R"ROC(Register elementwise_add for adding the attribute of diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.h b/paddle/fluid/operators/elementwise/elementwise_add_op.h deleted file mode 100644 index d77d4ed036..0000000000 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.h +++ /dev/null @@ -1,66 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#ifdef __xpu__ -#include -#include -#include "paddle/fluid/operators/elementwise/elementwise_op.h" -#include "paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h" -#include "paddle/fluid/operators/elementwise/elementwise_xpu.h" -#include "paddle/fluid/platform/device/device_wrapper.h" -#else -#include -#include -#include "paddle/fluid/operators/elementwise/elementwise_op.h" - -// only can include the headers in paddle/phi/include dirs -#include "paddle/phi/kernels/elementwise_add_grad_kernel.h" -#include "paddle/phi/kernels/elementwise_add_kernel.h" -#endif - -namespace paddle { -namespace operators { - -template -class ElementwiseAddKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { -#ifdef __xpu__ - std::vector ins; - std::vector outs; - int axis = PackTensorsIntoVector(ctx, &ins, &outs); - const auto& xpu_ctx = - ctx.template device_context(); - paddle::operators::LaunchElementwiseCudaKernel, 1>( - xpu_ctx, ins, &outs, axis, kps::AddFunctor()); -#else - auto *x = ctx.Input("X"); - auto *y = ctx.Input("Y"); - auto *z = ctx.Output("Out"); - z->mutable_data(ctx.GetPlace()); - - auto &dev_ctx = ctx.device_context(); - int axis = ctx.Attr("axis"); - phi::AddRawKernel( - static_cast::TYPE &>(dev_ctx), - *x, *y, axis, z); -#endif - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.kps b/paddle/fluid/operators/elementwise/elementwise_add_op.kps deleted file mode 100644 index ecd52a310a..0000000000 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.kps +++ /dev/null @@ -1,61 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_XPU_KP - -// Please do not modify the following code -#if defined(__CUDA_ARCH__) -#undef __CUDA_ARCH__ -#endif - -#if defined(__CUDACC__) -#undef __CUDACC__ -#endif - -#if defined(__CUDA__) -#undef __CUDA__ -#endif - -#if defined(__NVCC__) -#undef __NVCC__ -#endif - -#include // NOLINT -#include "xpu/kernel/cluster_header.h" // NOLINT -#include "xpu/kernel/debug.h" // NOLINT -#include "xpu/kernel/math.h" // NOLINT - -#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" -#else -#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" -#include "paddle/phi/kernels/gpu/elementwise_grad.h" -#endif - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -#ifdef PADDLE_WITH_XPU_KP -REGISTER_OP_KERNEL(elementwise_add, KP, plat::XPUPlace, - ops::ElementwiseAddKernel); -#else -REGISTER_OP_CUDA_KERNEL( - grad_add, ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel>, - ops::ElementwiseAddKernel>); -#endif diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_add_op_npu.cc index d35e3f6641..178aa32957 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_add_op_npu.cc @@ -16,7 +16,6 @@ limitations under the License. */ #include #include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" #include "paddle/fluid/operators/elementwise/elementwise_npu.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc b/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc index feb73abf3f..22a5de4c60 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" #include #include #include "paddle/fluid/operators/elementwise/elementwise_op.h" diff --git a/paddle/fluid/operators/fused/attn_gemm.h b/paddle/fluid/operators/fused/attn_gemm.h index c4e73c6bf9..9542f0742e 100644 --- a/paddle/fluid/operators/fused/attn_gemm.h +++ b/paddle/fluid/operators/fused/attn_gemm.h @@ -14,9 +14,10 @@ limitations under the License. */ #include "paddle/fluid/platform/float16.h" #include "paddle/phi/kernels/funcs/blas/blas.h" -#include "paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h" #include "paddle/fluid/operators/kernel_primitives/kernel_primitives.h" #include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h" +#include "paddle/phi/kernels/funcs/broadcast_function.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" namespace paddle { namespace operators { @@ -67,9 +68,8 @@ class AttnMatMul { ins.emplace_back(bias); outs.emplace_back(bias_out); int elewise_add_axis = -1; - paddle::operators::LaunchElementwiseCudaKernel( - dev_ctx_, ins, &outs, elewise_add_axis, AddFunctor()); + phi::funcs::BroadcastKernel( + dev_ctx_, ins, &outs, elewise_add_axis, phi::funcs::AddFunctor()); } } diff --git a/paddle/fluid/operators/fused/fmha_ref.h b/paddle/fluid/operators/fused/fmha_ref.h index 3d75d127ab..0e9fba7393 100644 --- a/paddle/fluid/operators/fused/fmha_ref.h +++ b/paddle/fluid/operators/fused/fmha_ref.h @@ -12,12 +12,12 @@ limitations under the License. */ #pragma once #include "paddle/fluid/operators/dropout_impl.cu.h" -#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" -#include "paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h" #include "paddle/fluid/operators/fused/fused_softmax_mask.cu.h" #include "paddle/fluid/operators/transpose_op.cu.h" +#include "paddle/phi/kernels/funcs/broadcast_function.h" #include "paddle/phi/kernels/funcs/concat_and_split_functor.h" #include "paddle/phi/kernels/funcs/elementwise_base.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" #include "paddle/phi/kernels/funcs/functors.h" #include "paddle/phi/kernels/gpudnn/softmax_gpudnn.h" @@ -160,9 +160,9 @@ class FMHARef { ins.emplace_back(src_mask_tensor); outs.emplace_back(src_mask_out_tensor); int elewise_add_axis = -1; - paddle::operators::LaunchElementwiseCudaKernel( - dev_ctx_, ins, &outs, elewise_add_axis, AddFunctor()); + phi::funcs::BroadcastKernel( + dev_ctx_, ins, &outs, elewise_add_axis, + phi::funcs::AddFunctor()); phi::SoftmaxForwardCUDAKernelDriver( dev_ctx_, *src_mask_out_tensor, softmax_axis, softmax_out_tensor); diff --git a/paddle/fluid/operators/fused/fused_attention_op.cu b/paddle/fluid/operators/fused/fused_attention_op.cu index d26577f06f..ec8a4d962e 100644 --- a/paddle/fluid/operators/fused/fused_attention_op.cu +++ b/paddle/fluid/operators/fused/fused_attention_op.cu @@ -19,7 +19,8 @@ limitations under the License. */ #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" -#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" +#include "paddle/phi/kernels/funcs/broadcast_function.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/fluid/operators/fused/attention_layer_norm.h" @@ -543,10 +544,9 @@ class FusedAttentionGradKernel : public framework::OpKernel { ins.emplace_back(d_x); outs.emplace_back(d_x); int elewise_add_axis = -1; - paddle::operators::LaunchElementwiseCudaKernel( + phi::funcs::BroadcastKernel( ctx.cuda_device_context(), ins, &outs, elewise_add_axis, - AddFunctor()); + phi::funcs::AddFunctor()); } }; diff --git a/paddle/fluid/operators/fused/fused_feedforward_op.cu b/paddle/fluid/operators/fused/fused_feedforward_op.cu index c38d9f7d4b..2eb9885286 100644 --- a/paddle/fluid/operators/fused/fused_feedforward_op.cu +++ b/paddle/fluid/operators/fused/fused_feedforward_op.cu @@ -17,9 +17,10 @@ limitations under the License. */ #include "paddle/fluid/operators/matmul_v2_op.h" #include "paddle/phi/kernels/funcs/blas/blas.h" -#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" #include "paddle/fluid/operators/fused/fused_dropout_helper.h" #include "paddle/fluid/operators/layer_norm_kernel.cu.h" +#include "paddle/phi/kernels/funcs/broadcast_function.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #include "paddle/fluid/platform/collective_helper.h" @@ -345,9 +346,8 @@ class FusedFeedForwardGradKernel : public framework::OpKernel { ins[1] = d_x; outs[0] = d_x; int elewise_add_axis = -1; - paddle::operators::LaunchElementwiseCudaKernel( - ctx, ins, &outs, elewise_add_axis, AddFunctor()); + phi::funcs::BroadcastKernel( + ctx, ins, &outs, elewise_add_axis, phi::funcs::AddFunctor()); } void Compute(const framework::ExecutionContext& context) const override { diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu index fdd0208c3d..fe93d323c5 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu +++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu @@ -24,7 +24,6 @@ limitations under the License. */ #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" -#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/fluid/operators/fused/attention_layer_norm.h" diff --git a/paddle/phi/kernels/cpu/elementwise_add_kernel.cc b/paddle/phi/kernels/cpu/elementwise_add_kernel.cc index 6070264547..5019b9f570 100644 --- a/paddle/phi/kernels/cpu/elementwise_add_kernel.cc +++ b/paddle/phi/kernels/cpu/elementwise_add_kernel.cc @@ -34,6 +34,14 @@ void AddKernel(const Context& dev_ctx, AddRawKernel(dev_ctx, x, y, axis, out); } +template +void GradAddKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + AddKernel(dev_ctx, x, y, out); +} + } // namespace phi using complex64 = ::phi::dtype::complex; @@ -65,3 +73,15 @@ PD_REGISTER_KERNEL(add, int64_t, complex64, complex128) {} + +PD_REGISTER_KERNEL(grad_add, + CPU, + ALL_LAYOUT, + phi::GradAddKernel, + float, + double, + int16_t, + int, + int64_t, + complex64, + complex128) {} diff --git a/paddle/phi/kernels/kps/elementwise_add_kernel.cu b/paddle/phi/kernels/kps/elementwise_add_kernel.cu index 8f7d45771d..98e39ada32 100644 --- a/paddle/phi/kernels/kps/elementwise_add_kernel.cu +++ b/paddle/phi/kernels/kps/elementwise_add_kernel.cu @@ -33,6 +33,14 @@ void AddKernel(const Context& dev_ctx, AddRawKernel(dev_ctx, x, y, axis, out); } +template +void GradAddKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + AddKernel(dev_ctx, x, y, out); +} + } // namespace phi #ifdef PADDLE_WITH_XPU_KP @@ -71,4 +79,18 @@ PD_REGISTER_KERNEL(add, phi::dtype::bfloat16, complex64, complex128) {} + +PD_REGISTER_KERNEL(grad_add, + KPS, + ALL_LAYOUT, + phi::GradAddKernel, + float, + double, + int16_t, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16, + complex64, + complex128) {} #endif diff --git a/paddle/phi/ops/compat/elementwise_sig.cc b/paddle/phi/ops/compat/elementwise_sig.cc index c760c966b0..17fb185837 100644 --- a/paddle/phi/ops/compat/elementwise_sig.cc +++ b/paddle/phi/ops/compat/elementwise_sig.cc @@ -25,6 +25,11 @@ KernelSignature ElementwiseAddOpArgumentMapping( return KernelSignature("add_raw", {"X", "Y"}, {"axis"}, {"Out"}); } +KernelSignature ElementwiseGradAddOpArgumentMapping( + const ArgumentMappingContext& ctx) { + return KernelSignature("grad_add", {"X", "Y"}, {}, {"Out"}); +} + KernelSignature ElementwiseSubOpArgumentMapping( const ArgumentMappingContext& ctx) { int axis = paddle::any_cast(ctx.Attr("axis")); @@ -317,3 +322,4 @@ PD_REGISTER_ARG_MAPPING_FN(elementwise_heaviside_grad, phi::ElementwiseHeavisideGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(elementwise_pow_grad, phi::ElementwisePowGradOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(grad_add, phi::ElementwiseGradAddOpArgumentMapping); -- GitLab