From 6c7a03bd83fbb7b3646eeed83222d105569cb038 Mon Sep 17 00:00:00 2001 From: phlrain Date: Thu, 17 Mar 2022 02:55:45 +0000 Subject: [PATCH] update; test=develop --- paddle/fluid/operators/activation_op.h | 93 ------------------- paddle/phi/kernels/activation_kernel.h | 1 + .../phi/kernels/cpu/activation_grad_kernel.cc | 2 +- paddle/phi/kernels/cpu/activation_kernel.cc | 41 ++++---- .../phi/kernels/gpu/activation_grad_kernel.cu | 5 +- paddle/phi/kernels/gpu/activation_kernel.cu | 33 ++----- 6 files changed, 26 insertions(+), 149 deletions(-) diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 8a19a79fd43..526d35af6e0 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -466,99 +466,6 @@ using ReluGradGradFunctor = phi::funcs::ReluGradGradFunctor; template using ReluCUDAFunctor = phi::funcs::ReluCUDAFunctor; -// tanhshrink(x) = x - tanh(x) -// where tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x)) -template -struct TanhShrinkFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out) const { - out.device(d) = x - x.tanh(); - } -}; - -template -struct TanhShrinkGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = dout * (x.tanh() * x.tanh()); - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - -// tanhshrink(x) = x - tanh(x) -// where tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x)) -template -struct HardShrinkFunctor : public BaseActivationFunctor { - float threshold; - - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"threshold", &threshold}}; - } - template - void operator()(Device d, X x, Out out) const { - auto temp1 = x < static_cast(threshold * -1.f); - auto temp2 = x > static_cast(threshold); - out.device(d) = x * (temp1 || temp2).template cast(); - } -}; - -template -struct HardShrinkGradFunctor : public BaseActivationFunctor { - float threshold; - - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"threshold", &threshold}}; - } - - template - void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - auto temp1 = x < static_cast(threshold * -1.f); - auto temp2 = x > static_cast(threshold); - dx.device(d) = dout * (temp1 || temp2).template cast(); - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - -// softshrink(x) = x - lambda, if x > lambda; x + lambda, if x < -lambda; 0 -// otherwise -template -struct SoftShrinkFunctor : public BaseActivationFunctor { - float lambda; - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"lambda", &lambda}}; - } - - template - void operator()(Device d, X x, Out out) const { - auto lambdaT = static_cast(lambda); - auto temp1 = (x > lambdaT).template cast(); - auto temp2 = (x < -lambdaT).template cast(); - out.device(d) = temp1 * (x - lambdaT) + temp2 * (x + lambdaT); - } -}; - -template -struct SoftShrinkGradFunctor : public BaseActivationFunctor { - float lambda; - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"lambda", &lambda}}; - } - template - void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - auto lambdaT = static_cast(lambda); - auto temp1 = (x > lambdaT).template cast(); - auto temp2 = (x < -lambdaT).template cast(); - dx.device(d) = dout * (temp1 + temp2).template cast(); - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - template struct SqrtGradFunctor : public BaseActivationFunctor { template void LogitKernel(const Context& dev_ctx, diff --git a/paddle/phi/kernels/cpu/activation_grad_kernel.cc b/paddle/phi/kernels/cpu/activation_grad_kernel.cc index f878aea94f4..5b397e7774a 100644 --- a/paddle/phi/kernels/cpu/activation_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/activation_grad_kernel.cc @@ -103,7 +103,7 @@ DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Acosh, AcoshGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Atanh, AtanhGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(TanhShrink, TanhShrinkGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Silu, SiluGradFunctor); -DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepOut(Exp, ExpGradFunctor); +DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Exp, ExpGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu, ReluGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Tanh, TanhGradFunctor); diff --git a/paddle/phi/kernels/cpu/activation_kernel.cc b/paddle/phi/kernels/cpu/activation_kernel.cc index 9816cf118fb..edad9734cce 100644 --- a/paddle/phi/kernels/cpu/activation_kernel.cc +++ b/paddle/phi/kernels/cpu/activation_kernel.cc @@ -73,36 +73,27 @@ DEFINE_CPU_ACTIVATION_KERNEL(Relu, ReluCPUFunctor) DEFINE_CPU_ACTIVATION_KERNEL(Tanh, TanhFunctor) DEFINE_CPU_ACTIVATION_KERNEL(TanhShrink, TanhShrinkFunctor) DEFINE_CPU_ACTIVATION_KERNEL(Silu, SiluFunctor) -DEFINE_CPU_ACTIVATION_KERNEL(Exp, funcs::ExpFunctor) -DEFINE_CPU_ACTIVATION_KERNEL(Expm1, funcs::Expm1Functor) -DEFINE_CPU_ACTIVATION_KERNEL(Reciprocal, funcs::ReciprocalFunctor) -DEFINE_CPU_ACTIVATION_KERNEL(Square, funcs::SquareFunctor) -DEFINE_CPU_ACTIVATION_KERNEL(Sqrt, funcs::SqrtFunctor) -DEFINE_CPU_ACTIVATION_KERNEL(Rsqrt, funcs::RsqrtFunctor) - -DEFINE_CPU_ACTIVATION_KERNEL(Softsign, funcs::SoftsignFunctor) +DEFINE_CPU_ACTIVATION_KERNEL(Exp, ExpFunctor) +DEFINE_CPU_ACTIVATION_KERNEL(Expm1, Expm1Functor) +DEFINE_CPU_ACTIVATION_KERNEL(Reciprocal, ReciprocalFunctor) +DEFINE_CPU_ACTIVATION_KERNEL(Square, SquareFunctor) +DEFINE_CPU_ACTIVATION_KERNEL(Sqrt, SqrtFunctor) +DEFINE_CPU_ACTIVATION_KERNEL(Softsign, SoftsignFunctor) +DEFINE_CPU_ACTIVATION_KERNEL(Rsqrt, RsqrtFunctor) DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, LeakyReluFunctor, alpha) DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu, ThresholdedReluFunctor, threshold) -DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Mish, funcs::MishFunctor, threshold) -DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(BRelu, funcs::BReluFunctor, t_min, t_max) -DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(STanh, - funcs::STanhFunctor, - scale_a, - scale_b) -DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(Softplus, - funcs::SoftplusFunctor, - beta, - threshold) +DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Mish, MishFunctor, threshold) +DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(BRelu, BReluFunctor, t_min, t_max) +DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(STanh, STanhFunctor, scale_a, scale_b) +DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(Softplus, SoftplusFunctor, beta, threshold) DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(HardShrink, HardShrinkFunctor, threshold) DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(SoftShrink, SoftShrinkFunctor, lambda) DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Elu, ELUFunctor, alpha) -DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(BRelu, BReluFunctor, t_min, t_max) - } // namespace phi PD_REGISTER_KERNEL(relu, CPU, ALL_LAYOUT, phi::ReluKernel, float, double) {} @@ -130,12 +121,12 @@ PD_REGISTER_ACTIVATION_KERNEL(tanh_shrink, TanhShrinkKernel) PD_REGISTER_ACTIVATION_KERNEL(elu, EluKernel) PD_REGISTER_ACTIVATION_KERNEL(silu, SiluKernel) // PD_REGISTER_ACTIVATION_KERNEL(mish, Mish) -PD_REGISTER_ACTIVATION_KERNEL(stanh, STanh) -PD_REGISTER_ACTIVATION_KERNEL(reciprocal, Reciprocal) -PD_REGISTER_ACTIVATION_KERNEL(sqrt, Sqrt) -PD_REGISTER_ACTIVATION_KERNEL(rsqrt, Rsqrt) +PD_REGISTER_ACTIVATION_KERNEL(stanh, STanhKernel) +PD_REGISTER_ACTIVATION_KERNEL(reciprocal, ReciprocalKernel) +PD_REGISTER_ACTIVATION_KERNEL(sqrt, SqrtKernel) +PD_REGISTER_ACTIVATION_KERNEL(rsqrt, RsqrtKernel) // PD_REGISTER_ACTIVATION_KERNEL(softplus, Softplus) -PD_REGISTER_ACTIVATION_KERNEL(softsign, Softsign) +PD_REGISTER_ACTIVATION_KERNEL(softsign, SoftsignKernel) PD_REGISTER_KERNEL( exp, CPU, ALL_LAYOUT, phi::ExpKernel, float, double, int, int64_t) {} diff --git a/paddle/phi/kernels/gpu/activation_grad_kernel.cu b/paddle/phi/kernels/gpu/activation_grad_kernel.cu index 8d488d6d9cb..a7f89ff85d6 100644 --- a/paddle/phi/kernels/gpu/activation_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/activation_grad_kernel.cu @@ -157,7 +157,7 @@ DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Acosh, CudaAcoshGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Atanh, CudaAtanhGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(TanhShrink, CudaTanhShrinkGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Silu, CudaSiluGradFunctor); -DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepOut(Exp, CudaExpGradFunctor); +DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Exp, CudaExpGradFunctor); DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu, CudaLeakyReluGradFunctor, @@ -265,7 +265,6 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(leaky_relu_double_grad, LeakyReluDoubleGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(thresholded_relu_grad, ThresholdedReluGradKernel) -<<<<<<< HEAD PD_REGISTER_KERNEL(exp_grad, GPU, @@ -275,11 +274,9 @@ PD_REGISTER_KERNEL(exp_grad, double, int, int64_t) {} -======= PD_REGISTER_ACTIVATION_GRAD_KERNEL(soft_shrink_grad, SoftShrinkGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_shrink_grad, HardShrinkGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(tanh_shrink_grad, TanhShrinkGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(silu_grad, SiluGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(elu_grad, EluGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(elu_double_grad, EluDoubleGradKernel) ->>>>>>> 6849d33b62cacccb27797375a212e37a47ca9484 diff --git a/paddle/phi/kernels/gpu/activation_kernel.cu b/paddle/phi/kernels/gpu/activation_kernel.cu index 106d3b9cabd..ad2f61f392e 100644 --- a/paddle/phi/kernels/gpu/activation_kernel.cu +++ b/paddle/phi/kernels/gpu/activation_kernel.cu @@ -77,28 +77,6 @@ void ActivationGPUImpl(const Context& dev_ctx, dev_ctx, x, out, functor); \ } -<<<<<<< HEAD -DEFINE_GPU_ACTIVATION_KERNEL(Cos, funcs::CudaCosFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Tan, funcs::CudaTanFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Acos, funcs::CudaAcosFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Sin, funcs::CudaSinFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Asin, funcs::CudaAsinFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Atan, funcs::CudaAtanFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Sinh, funcs::CudaSinhFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Cosh, funcs::CudaCoshFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Asinh, funcs::CudaAsinhFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Acosh, funcs::CudaAcoshFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Atanh, funcs::CudaAtanhFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Relu, funcs::CudaReluFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Tanh, funcs::CudaTanhFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Exp, funcs::CudaExpFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Expm1, funcs::CudaExpm1Functor) -DEFINE_GPU_ACTIVATION_KERNEL(Reciprocal, funcs::CudaReciprocalFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Square, funcs::CudaSquareFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Sqrt, funcs::CudaSqrtFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Rsqrt, funcs::CudaRsqrtFunctor) -DEFINE_GPU_ACTIVATION_KERNEL(Softsign, funcs::CudaSoftsignFunctor) -======= DEFINE_GPU_ACTIVATION_KERNEL(Cos, CudaCosFunctor) DEFINE_GPU_ACTIVATION_KERNEL(Tan, CudaTanFunctor) DEFINE_GPU_ACTIVATION_KERNEL(Acos, CudaAcosFunctor) @@ -114,7 +92,13 @@ DEFINE_GPU_ACTIVATION_KERNEL(Relu, CudaReluFunctor) DEFINE_GPU_ACTIVATION_KERNEL(Tanh, CudaTanhFunctor) DEFINE_GPU_ACTIVATION_KERNEL(TanhShrink, CudaTanhShrinkFunctor) DEFINE_GPU_ACTIVATION_KERNEL(Silu, CudaSiluFunctor) ->>>>>>> 6849d33b62cacccb27797375a212e37a47ca9484 +DEFINE_GPU_ACTIVATION_KERNEL(Exp, CudaExpFunctor) +DEFINE_GPU_ACTIVATION_KERNEL(Expm1, CudaExpm1Functor) +DEFINE_GPU_ACTIVATION_KERNEL(Reciprocal, CudaReciprocalFunctor) +DEFINE_GPU_ACTIVATION_KERNEL(Square, CudaSquareFunctor) +DEFINE_GPU_ACTIVATION_KERNEL(Sqrt, CudaSqrtFunctor) +DEFINE_GPU_ACTIVATION_KERNEL(Rsqrt, CudaRsqrtFunctor) +DEFINE_GPU_ACTIVATION_KERNEL(Softsign, CudaSoftsignFunctor) DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, CudaLeakyReluFunctor, alpha) DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu, @@ -181,7 +165,6 @@ PD_REGISTER_ACTIVATION_KERNEL(tanh, TanhKernel) PD_REGISTER_ACTIVATION_KERNEL(brelu, BReluKernel) PD_REGISTER_ACTIVATION_KERNEL(thresholded_relu, ThresholdedReluKernel) PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel) -<<<<<<< HEAD PD_REGISTER_ACTIVATION_KERNEL(mish, MishKernel) PD_REGISTER_ACTIVATION_KERNEL(stanh, StanhKernel) PD_REGISTER_ACTIVATION_KERNEL(reciprocal, ReciprocalKernel) @@ -202,10 +185,8 @@ PD_REGISTER_KERNEL(expm1, PD_REGISTER_KERNEL(logit, GPU, ALL_LAYOUT, phi::LogitKernel, float, double) {} PD_REGISTER_KERNEL( square, GPU, ALL_LAYOUT, phi::SquareKernel, float, double, int, int64_t) {} -======= PD_REGISTER_ACTIVATION_KERNEL(hard_shrink, HardShrinkKernel) PD_REGISTER_ACTIVATION_KERNEL(soft_shrink, SoftShrinkKernel) PD_REGISTER_ACTIVATION_KERNEL(tanh_shrink, TanhShrinkKernel) PD_REGISTER_ACTIVATION_KERNEL(elu, EluKernel) PD_REGISTER_ACTIVATION_KERNEL(silu, SiluKernel) ->>>>>>> 6849d33b62cacccb27797375a212e37a47ca9484 -- GitLab