diff --git a/paddle/fluid/prim/api/composite_backward/composite_backward_api.h b/paddle/fluid/prim/api/composite_backward/composite_backward_api.h index a9d91b6e854898a5e037078aa6165beaea37f541..cd88139af23a75f1864e3515dd996cf85915dc87 100644 --- a/paddle/fluid/prim/api/composite_backward/composite_backward_api.h +++ b/paddle/fluid/prim/api/composite_backward/composite_backward_api.h @@ -60,9 +60,12 @@ void leaky_relu_grad(const Tensor& out, } template -void silu_grad(const Tensor& x, const Tensor& out_grad, Tensor* x_grad) { +void silu_grad(const Tensor& x, + const Tensor& out, + const Tensor& out_grad, + Tensor* x_grad) { if (x_grad) { - auto sigmoid = 1.0 / (1.0 + exp(-x)); + auto sigmoid = out / x; auto res = out_grad * sigmoid * (1.0 + x * (1.0 - sigmoid)); set_output(res, x_grad); } diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml index 6ae8c19006410e37a508a547ded5a5e8eb360135..27bdf904eee6e1df7ca0654232750592d370568e 100644 --- a/paddle/phi/api/yaml/backward.yaml +++ b/paddle/phi/api/yaml/backward.yaml @@ -1599,14 +1599,14 @@ - backward_op : silu_grad forward : silu (Tensor x) -> Tensor(out) - args : (Tensor x, Tensor out_grad) + args : (Tensor x, Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : silu_grad - composite : silu_grad(x, out_grad, x_grad) + composite : silu_grad(x, out, out_grad, x_grad) inplace : (out_grad -> x_grad) - backward_op : sin_double_grad diff --git a/paddle/phi/kernels/activation_grad_kernel.h b/paddle/phi/kernels/activation_grad_kernel.h index ca75a6e0b24a48d99fcc057735dfee333b7eeddd..302e0f6ba9080afe6e196e060af7f3d306ffc0a7 100644 --- a/paddle/phi/kernels/activation_grad_kernel.h +++ b/paddle/phi/kernels/activation_grad_kernel.h @@ -75,6 +75,12 @@ namespace phi { DenseTensor* dx); template +void SiluGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out, + const DenseTensor& dout, + DenseTensor* dx); +template void ReluDoubleGradKernel(const Context& dev_ctx, const DenseTensor& out, const DenseTensor& ddx, @@ -277,7 +283,6 @@ DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Asinh); DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Acosh); DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Atanh); DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(TanhShrink); -DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Silu); DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Square); DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Softsign); DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(LogSigmoid); diff --git a/paddle/phi/kernels/cpu/activation_grad_kernel.cc b/paddle/phi/kernels/cpu/activation_grad_kernel.cc index 9273f8393b5b35923001d6fe1f239e8f95ed85eb..7b9074ffa92f341d49b2aac0a44849f84e958a6b 100644 --- a/paddle/phi/kernels/cpu/activation_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/activation_grad_kernel.cc @@ -128,7 +128,6 @@ DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Asinh, AsinhGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Acosh, AcoshGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Atanh, AtanhGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(TanhShrink, TanhShrinkGradFunctor); -DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Silu, SiluGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Square, SquareGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Exp, ExpGradFunctor); @@ -190,6 +189,17 @@ DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT(HardSigmoid, slope, offset); +template +void SiluGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out, + const DenseTensor& dout, + DenseTensor* dx) { + funcs::SiluGradFunctor functor; + ActivationGradImpl>( + dev_ctx, &x, &out, &dout, dx, functor); +} + template void EluGradKernel(const Context& dev_ctx, const DenseTensor& x, diff --git a/paddle/phi/kernels/gpu/activation_grad_kernel.cu b/paddle/phi/kernels/gpu/activation_grad_kernel.cu index c0fb7342a80b65bfae189fe91135362d661f552a..aa703ede3bad6600cc0a54354eed6f225e6e65c6 100644 --- a/paddle/phi/kernels/gpu/activation_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/activation_grad_kernel.cu @@ -190,7 +190,6 @@ DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Asinh, CudaAsinhGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Acosh, CudaAcoshGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Atanh, CudaAtanhGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(TanhShrink, CudaTanhShrinkGradFunctor); -DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Silu, CudaSiluGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Square, CudaSquareGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Exp, CudaExpGradFunctor); @@ -249,6 +248,16 @@ DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT(HardSigmoid, slope, offset); +template +void SiluGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out, + const DenseTensor& dout, + DenseTensor* dx) { + funcs::CudaSiluGradFunctor functor; + ActivationGradGPUImpl>( + dev_ctx, &x, &out, &dout, dx, functor); +} template void EluGradKernel(const Context& dev_ctx, const DenseTensor& x, diff --git a/paddle/phi/kernels/xpu/activation_grad_kernel.cc b/paddle/phi/kernels/xpu/activation_grad_kernel.cc index 02243215f95886dbf5f0d4dbf2e1c85ab4a74d63..8aae57b1e498ba5f41240570773dcf3d472c5feb 100644 --- a/paddle/phi/kernels/xpu/activation_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/activation_grad_kernel.cc @@ -567,7 +567,6 @@ DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Tanh, XPUTanhGradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu, XPUReluGradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu6, XPURelu6GradFunctor); -DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Silu, XPUSiluGradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Log, XPULogGradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Square, XPUSquareGradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Swish, XPUSwishGradFunctor); @@ -605,6 +604,16 @@ void HardSwishGradKernel(const Context& dev_ctx, dev_ctx, &x, nullptr, &dout, dx, functor); } +template +void SiluGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out, + const DenseTensor& dout, + DenseTensor* dx) { + XPUSiluGradFunctor functor; + ActivationGradXPUImpl>( + dev_ctx, &x, &out, &dout, dx, functor); +} } // namespace phi PD_REGISTER_KERNEL(relu_grad,