From bef6f2e1e24821cdf654c6f4daf389c746dc1c3f Mon Sep 17 00:00:00 2001 From: Lijunhui <1578034415@qq.com> Date: Wed, 16 Mar 2022 21:42:09 +0800 Subject: [PATCH] [KP] Fix registry and add UT for thresholded_relu & softshrink (#40524) * init commit * correct namespace --- paddle/fluid/operators/activation_op.kps | 276 ++++++++++++++---- .../unittests/xpu/test_activation_op_xpu.py | 63 ++++ 2 files changed, 281 insertions(+), 58 deletions(-) diff --git a/paddle/fluid/operators/activation_op.kps b/paddle/fluid/operators/activation_op.kps index 22613cbe2a2..865943696c3 100644 --- a/paddle/fluid/operators/activation_op.kps +++ b/paddle/fluid/operators/activation_op.kps @@ -15,6 +15,8 @@ limitations under the License. */ #include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" +#include "paddle/phi/kernels/funcs/activation_functor.h" + namespace paddle { namespace operators { @@ -1148,63 +1150,221 @@ REGISTER_OP_CUDA_KERNEL( FOR_EACH_ACTIVATION_CUDA_OP(REGISTER_ACTIVATION_CUDA_KERNEL) #ifdef PADDLE_WITH_XPU_KP -#define REGISTER_ACTIVATION_XPU_KERNEL(act_type, op_name, functor, \ - grad_functor) \ - REGISTER_OP_KERNEL( \ - act_type, KP, plat::XPUPlace, \ - ops::ActivationCudaKernel>); \ - REGISTER_OP_KERNEL(act_type##_grad, KP, plat::XPUPlace, \ - ops::ActivationGradCudaKernel>); - -REGISTER_ACTIVATION_XPU_KERNEL(leaky_relu, LeakyRelu, CudaLeakyReluFunctor, - CudaLeakyReluGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(sigmoid, Sigmoid, CudaSigmoidFunctor, - CudaSigmoidGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(exp, Exp, CudaExpFunctor, CudaExpGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(log, Log, CudaLogFunctor, CudaLogGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(reciprocal, Reciprocal, CudaReciprocalFunctor, - CudaReciprocalGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(softplus, Softplus, CudaSoftplusFunctor, - CudaSoftplusGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(hard_swish, HardSwish, CudaHardSwishFunctor, - CudaHardSwishGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(elu, Elu, CudaELUFunctor, CudaELUGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(celu, Celu, CudaCELUFunctor, - CudaCELUGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(sqrt, Sqrt, CudaSqrtFunctor, - CudaSqrtGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(square, Square, CudaSquareFunctor, - CudaSquareGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(silu, Silu, CudaSiluFunctor, - CudaSiluGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(logsigmoid, LogSigmoid, CudaLogSigmoidFunctor, - CudaLogSigmoidGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(softshrink, SoftShrink, CudaSoftShrinkFunctor, - CudaSoftShrinkGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(ceil, Ceil, CudaCeilFunctor, - CudaZeroGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(floor, Floor, CudaFloorFunctor, - CudaZeroGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(log1p, Log1p, CudaLog1pFunctor, - CudaLog1pGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(brelu, BRelu, CudaBReluFunctor, - CudaBReluGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(soft_relu, SoftRelu, CudaSoftReluFunctor, - CudaSoftReluGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(softsign, Softsign, CudaSoftsignFunctor, - CudaSoftsignGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(relu6, Relu6, CudaRelu6Functor, - CudaRelu6GradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(hard_shrink, HardShrink, CudaHardShrinkFunctor, - CudaHardShrinkGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(hard_sigmoid, HardSigmoid, - CudaHardSigmoidFunctor, - CudaHardSigmoidGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(swish, Swish, CudaSwishFunctor, - CudaSwishGradFunctor); -REGISTER_ACTIVATION_XPU_KERNEL(thresholded_relu, ThresholdedRelu, - CudaThresholdedReluFunctor, - CudaThresholdedReluGradFunctor); +REGISTER_OP_KERNEL( + brelu, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + brelu_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL(ceil, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + ceil_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL(celu, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + celu_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL(elu, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + elu_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL(exp, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + exp_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL(floor, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + floor_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL( + hard_shrink, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + hard_shrink_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL( + hard_sigmoid, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + hard_sigmoid_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL(hard_swish, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + hard_swish_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL( + leaky_relu, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + leaky_relu_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL(log, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + log_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL(log1p, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + log1p_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL( + logsigmoid, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + logsigmoid_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL( + reciprocal, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + reciprocal_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL( + relu, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + relu_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL(relu6, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + relu6_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL(sigmoid, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + sigmoid_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL(silu, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + silu_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL(soft_relu, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + soft_relu_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL(softplus, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + softplus_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL( + softshrink, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + softshrink_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL(softsign, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + softsign_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL(sqrt, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + sqrt_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL(square, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + square_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL(swish, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + swish_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); + +REGISTER_OP_KERNEL( + thresholded_relu, KP, plat::XPUPlace, + ops::ActivationCudaKernel>); +REGISTER_OP_KERNEL( + thresholded_relu_grad, KP, plat::XPUPlace, + ops::ActivationGradCudaKernel>); #endif // PADDLE_WITH_XPU_KP diff --git a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py index 69bca8dd9ef..66f2e871dac 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py @@ -849,6 +849,38 @@ def ref_softsign(x): return out +class XPUTestSoftshrinkOP(XPUOpTestWrapper): + def __init__(self): + self.op_name = 'softshrink' + self.use_dynamic_create_class = False + + class XPUTestSoftshrink(TestActivationOPBase): + def set_case(self): + self.op_type = "softshrink" + self.dtype = self.in_type + + threshold = 0.5 + np.random.seed(1023) + x = np.random.uniform(0.25, 10, [10, 12]).astype(self.dtype) + out = ref_softshrink(x, threshold) + + self.inputs = {'X': x} + self.outputs = {'Out': out} + self.attrs = {'use_xpu': True} + + +support_types = get_xpu_op_support_types('softshrink') +for stype in support_types: + create_test_class(globals(), XPUTestSoftshrinkOP, stype) + + +def ref_softshrink(x, threshold=0.5): + out = np.copy(x) + out = (out < -threshold) * (out + threshold) + (out > threshold) * ( + out - threshold) + return out + + class XPUTestSwishOP(XPUOpTestWrapper): def __init__(self): self.op_name = 'swish' @@ -879,5 +911,36 @@ def ref_swish(x): return out +class XPUTestThresholdedReluOP(XPUOpTestWrapper): + def __init__(self): + self.op_name = 'thresholded_relu' + self.use_dynamic_create_class = False + + class XPUTestThresholdedRelu(TestActivationOPBase): + def set_case(self): + self.op_type = "thresholded_relu" + self.dtype = self.in_type + + threshold = 1.0 + np.random.seed(1024) + x = np.random.uniform(-20, 20, [10, 12]).astype(self.dtype) + x[np.abs(x) < 0.005] = 0.02 + out = ref_thresholded_relu(x, threshold) + + self.inputs = {'X': x} + self.outputs = {'Out': out} + self.attrs = {'use_xpu': True} + + +support_types = get_xpu_op_support_types('thresholded_relu') +for stype in support_types: + create_test_class(globals(), XPUTestThresholdedReluOP, stype) + + +def ref_thresholded_relu(x, threshold=1.0): + out = (x > threshold) * x + return out + + if __name__ == "__main__": unittest.main() -- GitLab