From 90ab7403753acad5c93b425f6a909a526aa57a3d Mon Sep 17 00:00:00 2001 From: Lijunhui <1578034415@qq.com> Date: Wed, 2 Mar 2022 15:11:42 +0800 Subject: [PATCH] [KP] Activation op registration for XPU2. part 1/2 (#40002) --- .../{activation_op.cu => activation_op.kps} | 64 +++++++++++++++++++ .../platform/device/xpu/xpu_op_kpfirst_list.h | 26 ++++++++ 2 files changed, 90 insertions(+) rename paddle/fluid/operators/{activation_op.cu => activation_op.kps} (94%) diff --git a/paddle/fluid/operators/activation_op.cu b/paddle/fluid/operators/activation_op.kps similarity index 94% rename from paddle/fluid/operators/activation_op.cu rename to paddle/fluid/operators/activation_op.kps index e578ad899e..e1afb3919f 100644 --- a/paddle/fluid/operators/activation_op.cu +++ b/paddle/fluid/operators/activation_op.kps @@ -1861,3 +1861,67 @@ REGISTER_OP_CUDA_KERNEL( __macro(hard_swish, HardSwish, CudaHardSwishFunctor, \ CudaHardSwishGradFunctor); FOR_EACH_ACTIVATION_CUDA_OP(REGISTER_ACTIVATION_CUDA_KERNEL) + +#ifdef PADDLE_WITH_XPU_KP +#define REGISTER_ACTIVATION_XPU_KERNEL(act_type, op_name, functor, \ + grad_functor) \ + REGISTER_OP_KERNEL( \ + act_type, KP, plat::XPUPlace, \ + ops::ActivationCudaKernel>); \ + REGISTER_OP_KERNEL(act_type##_grad, KP, plat::XPUPlace, \ + ops::ActivationGradCudaKernel>); + +REGISTER_ACTIVATION_XPU_KERNEL(leaky_relu, LeakyRelu, CudaLeakyReluFunctor, + CudaLeakyReluGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(relu, Relu, CudaReluFunctor, + CudaReluGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(sigmoid, Sigmoid, CudaSigmoidFunctor, + CudaSigmoidGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(exp, Exp, CudaExpFunctor, CudaExpGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(log, Log, CudaLogFunctor, CudaLogGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(reciprocal, Reciprocal, CudaReciprocalFunctor, + CudaReciprocalGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(softplus, Softplus, CudaSoftplusFunctor, + CudaSoftplusGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(hard_swish, HardSwish, CudaHardSwishFunctor, + CudaHardSwishGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(elu, Elu, CudaELUFunctor, CudaELUGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(celu, Celu, CudaCELUFunctor, + CudaCELUGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(sqrt, Sqrt, CudaSqrtFunctor, + CudaSqrtGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(square, Square, CudaSquareFunctor, + CudaSquareGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(silu, Silu, CudaSiluFunctor, + CudaSiluGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(logsigmoid, LogSigmoid, CudaLogSigmoidFunctor, + CudaLogSigmoidGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(softshrink, SoftShrink, CudaSoftShrinkFunctor, + CudaSoftShrinkGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(ceil, Ceil, CudaCeilFunctor, + CudaZeroGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(floor, Floor, CudaFloorFunctor, + CudaZeroGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(log1p, Log1p, CudaLog1pFunctor, + CudaLog1pGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(brelu, BRelu, CudaBReluFunctor, + CudaBReluGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(soft_relu, SoftRelu, CudaSoftReluFunctor, + CudaSoftReluGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(softsign, Softsign, CudaSoftsignFunctor, + CudaSoftsignGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(relu6, Relu6, CudaRelu6Functor, + CudaRelu6GradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(hard_shrink, HardShrink, CudaHardShrinkFunctor, + CudaHardShrinkGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(hard_sigmoid, HardSigmoid, + CudaHardSigmoidFunctor, + CudaHardSigmoidGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(swish, Swish, CudaSwishFunctor, + CudaSwishGradFunctor); +REGISTER_ACTIVATION_XPU_KERNEL(thresholded_relu, ThresholdedRelu, + CudaThresholdedReluFunctor, + CudaThresholdedReluGradFunctor); + +#endif // PADDLE_WITH_XPU_KP diff --git a/paddle/fluid/platform/device/xpu/xpu_op_kpfirst_list.h b/paddle/fluid/platform/device/xpu/xpu_op_kpfirst_list.h index f79ef8505d..c5dff84723 100644 --- a/paddle/fluid/platform/device/xpu/xpu_op_kpfirst_list.h +++ b/paddle/fluid/platform/device/xpu/xpu_op_kpfirst_list.h @@ -30,6 +30,32 @@ XPUOpMap& get_kp_ops() { static XPUOpMap s_xpu_kp_kernels{ {"elementwise_add", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + // activation op + {"exp", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"hard_swish", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"leaky_relu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"softplus", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reciprocal", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"log", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sigmoid", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"relu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"celu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sqrt", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"square", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"silu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"logsigmoid", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"softshrink", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"ceil", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"floor", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"log1p", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"brelu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"soft_relu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"softsign", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"relu6", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"hard_shrink", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"hard_sigmoid", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, }; return s_xpu_kp_kernels; -- GitLab