diff --git a/paddle/fluid/platform/device/xpu/xpu2_op_list.h b/paddle/fluid/platform/device/xpu/xpu2_op_list.h index 7474ac88ac8db6f64c889e61352d346e0af78404..f8b15d4d4ee280ce8fb561b302a091496502207b 100644 --- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h +++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h @@ -220,6 +220,7 @@ XPUOpMap& get_kl2_ops() { XPUKernelSet({pOpKernelType(vartype::INT64, XPUPlace()), pOpKernelType(vartype::INT32, XPUPlace()), pOpKernelType(vartype::FP32, XPUPlace())})}, + {"exp_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"exp", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"expand_as_v2", XPUKernelSet({pOpKernelType(vartype::INT32, XPUPlace()), @@ -314,6 +315,10 @@ XPUOpMap& get_kl2_ops() { pOpKernelType(vartype::FP32, XPUPlace())})}, {"grid_sampler", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"hard_sigmoid_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"hard_sigmoid", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"hard_swish_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), pOpKernelType(vartype::FP16, XPUPlace())})}, diff --git a/paddle/phi/kernels/xpu/activation_grad_kernel.cc b/paddle/phi/kernels/xpu/activation_grad_kernel.cc index a30f63d176e5033b73eec79ad69a9d4fa91894eb..e3b5e1bfcd3fd0d20fc98d870a517b1e98a634ca 100644 --- a/paddle/phi/kernels/xpu/activation_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/activation_grad_kernel.cc @@ -160,6 +160,21 @@ int xpu_activation_backward(const Context& dev_ctx, return r; } +template +struct XPUExpGradFunctor : public funcs::BaseActivationFunctor { + using XPUType = typename XPUTypeTrait::Type; + template + void operator()(const Context& dev_ctx, + const DenseTensor* x, + const DenseTensor* out, + const DenseTensor* dout, + DenseTensor* dx) const { + int r = xpu_activation_backward( + dev_ctx, x, out, dout, dx, xpu::exp_grad); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "exp_grad"); + } +}; + template struct XPULogGradFunctor : public funcs::BaseActivationFunctor { template @@ -238,6 +253,39 @@ struct XPULeakyReluGradFunctor : public funcs::BaseActivationFunctor { } }; +template +struct XPUHardSigmoidGradFunctor : public funcs::BaseActivationFunctor { + using XPUType = typename XPUTypeTrait::Type; + float slope; + float offset; + typename funcs::BaseActivationFunctor::AttrPair GetAttrs() { + return {{"slope", &slope}, {"offset", &offset}}; + } + + template + void operator()(const Context& dev_ctx, + const DenseTensor* x, + const DenseTensor* out, + const DenseTensor* dout, + DenseTensor* dx) const { + const T* y_data = out->data(); + const T* y_grad = dout->data(); + T* x_grad = dx->data(); + + auto xpu_context = dev_ctx.x_context(); + int r = xpu::hard_sigmoid_grad( + xpu_context, + reinterpret_cast( + y_data), // hard_sigmoid_grad do not need x_data + reinterpret_cast(y_data), + reinterpret_cast(y_grad), + reinterpret_cast(x_grad), + dx->numel(), + slope); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "hard_sigmoid_grad"); + } +}; + template struct XPUHardSwishGradFunctor : public funcs::BaseActivationFunctor { float threshold; @@ -497,6 +545,7 @@ struct XPUSoftPlusGradFunctor : public funcs::BaseActivationFunctor { } }; +DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Exp, XPUExpGradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Reciprocal, XPUReciprocalGradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sigmoid, XPUSigmoidGradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sqrt, XPUSqrtGradFunctor); @@ -524,6 +573,10 @@ DEFINE_XPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(Softplus, XPUSoftPlusGradFunctor, beta, threshold) +DEFINE_XPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT(HardSigmoid, + XPUHardSigmoidGradFunctor, + slope, + offset) template void HardSwishGradKernel(const Context& dev_ctx, @@ -560,8 +613,10 @@ PD_REGISTER_KERNEL(tanh_grad, phi::TanhGradKernel, float, phi::dtype::float16) {} +PD_REGISTER_ACTIVATION_GRAD_KERNEL(exp_grad, ExpGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(log_grad, LogGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(leaky_relu_grad, LeakyReluGradKernel) +PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_sigmoid_grad, HardSigmoidGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_swish_grad, HardSwishGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(reciprocal_grad, ReciprocalGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(relu6_grad, Relu6GradKernel) diff --git a/paddle/phi/kernels/xpu/activation_kernel.cc b/paddle/phi/kernels/xpu/activation_kernel.cc index f730c38e8f0f22844c77b67264ff87fa75d18145..51f74bd34750a91a14d089212ff7c4ac5a7c705f 100644 --- a/paddle/phi/kernels/xpu/activation_kernel.cc +++ b/paddle/phi/kernels/xpu/activation_kernel.cc @@ -226,6 +226,25 @@ void PowKernel(const Context& dev_ctx, PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_pow"); } +template +struct XPUHardSigmoidFunctor : public funcs::BaseActivationFunctor { + float slope; + float offset; + typename funcs::BaseActivationFunctor::AttrPair GetAttrs() { + return {{"slope", &slope}, {"offset", &offset}}; + } + + template + void operator()(const Context& dev_ctx, + const DenseTensor& x, + DenseTensor* out) const { + using XPUType = typename XPUTypeTrait::Type; + int r = xpu_activation_1attr_func( + dev_ctx, x, out, slope, xpu::hard_sigmoid); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "hard_sigmoid"); + } +}; + template struct XPUHardSwishFunctor : public funcs::BaseActivationFunctor { float threshold; @@ -428,6 +447,10 @@ DEFINE_XPU_ACTIVATION_KERNEL_WITH_TWO_ATTRS(Softplus, XPUSoftplusFunctor, beta, threshold) +DEFINE_XPU_ACTIVATION_KERNEL_WITH_TWO_ATTRS(HardSigmoid, + XPUHardSigmoidFunctor, + slope, + offset) template void HardSwishRawKernel(const Context& dev_ctx, @@ -459,6 +482,7 @@ PD_REGISTER_KERNEL( PD_REGISTER_ACTIVATION_KERNEL(exp, ExpKernel) // no grad PD_REGISTER_ACTIVATION_KERNEL(log, LogKernel) PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel) +PD_REGISTER_ACTIVATION_KERNEL(hard_sigmoid, HardSigmoidKernel) PD_REGISTER_ACTIVATION_KERNEL(hard_swish_raw, HardSwishRawKernel) PD_REGISTER_ACTIVATION_KERNEL(mish, MishKernel) PD_REGISTER_ACTIVATION_KERNEL(pow, PowKernel)