未验证 提交 bef6f2e1 编写于 作者: L Lijunhui 提交者: GitHub

[KP] Fix registry and add UT for thresholded_relu & softshrink (#40524)

* init commit

* correct namespace
上级 9fc89b34
...@@ -15,6 +15,8 @@ limitations under the License. */ ...@@ -15,6 +15,8 @@ limitations under the License. */
#include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h" #include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
#include "paddle/phi/kernels/funcs/activation_functor.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -1148,63 +1150,221 @@ REGISTER_OP_CUDA_KERNEL( ...@@ -1148,63 +1150,221 @@ REGISTER_OP_CUDA_KERNEL(
FOR_EACH_ACTIVATION_CUDA_OP(REGISTER_ACTIVATION_CUDA_KERNEL) FOR_EACH_ACTIVATION_CUDA_OP(REGISTER_ACTIVATION_CUDA_KERNEL)
#ifdef PADDLE_WITH_XPU_KP #ifdef PADDLE_WITH_XPU_KP
#define REGISTER_ACTIVATION_XPU_KERNEL(act_type, op_name, functor, \ REGISTER_OP_KERNEL(
grad_functor) \ brelu, KP, plat::XPUPlace,
REGISTER_OP_KERNEL( \ ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
act_type, KP, plat::XPUPlace, \ phi::funcs::CudaBReluFunctor<float>>);
ops::ActivationCudaKernel<plat::XPUDeviceContext, ops::functor<float>>); \ REGISTER_OP_KERNEL(
REGISTER_OP_KERNEL(act_type##_grad, KP, plat::XPUPlace, \ brelu_grad, KP, plat::XPUPlace,
ops::ActivationGradCudaKernel<plat::XPUDeviceContext, \ ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::grad_functor<float>>); phi::funcs::CudaBReluGradFunctor<float>>);
REGISTER_ACTIVATION_XPU_KERNEL(leaky_relu, LeakyRelu, CudaLeakyReluFunctor, REGISTER_OP_KERNEL(ceil, KP, plat::XPUPlace,
CudaLeakyReluGradFunctor); ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
REGISTER_ACTIVATION_XPU_KERNEL(sigmoid, Sigmoid, CudaSigmoidFunctor, ops::CudaCeilFunctor<float>>);
CudaSigmoidGradFunctor); REGISTER_OP_KERNEL(
REGISTER_ACTIVATION_XPU_KERNEL(exp, Exp, CudaExpFunctor, CudaExpGradFunctor); ceil_grad, KP, plat::XPUPlace,
REGISTER_ACTIVATION_XPU_KERNEL(log, Log, CudaLogFunctor, CudaLogGradFunctor); ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
REGISTER_ACTIVATION_XPU_KERNEL(reciprocal, Reciprocal, CudaReciprocalFunctor, ops::CudaZeroGradFunctor<float>>);
CudaReciprocalGradFunctor);
REGISTER_ACTIVATION_XPU_KERNEL(softplus, Softplus, CudaSoftplusFunctor, REGISTER_OP_KERNEL(celu, KP, plat::XPUPlace,
CudaSoftplusGradFunctor); ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
REGISTER_ACTIVATION_XPU_KERNEL(hard_swish, HardSwish, CudaHardSwishFunctor, ops::CudaCELUFunctor<float>>);
CudaHardSwishGradFunctor); REGISTER_OP_KERNEL(
REGISTER_ACTIVATION_XPU_KERNEL(elu, Elu, CudaELUFunctor, CudaELUGradFunctor); celu_grad, KP, plat::XPUPlace,
REGISTER_ACTIVATION_XPU_KERNEL(celu, Celu, CudaCELUFunctor, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
CudaCELUGradFunctor); ops::CudaCELUGradFunctor<float>>);
REGISTER_ACTIVATION_XPU_KERNEL(sqrt, Sqrt, CudaSqrtFunctor,
CudaSqrtGradFunctor); REGISTER_OP_KERNEL(elu, KP, plat::XPUPlace,
REGISTER_ACTIVATION_XPU_KERNEL(square, Square, CudaSquareFunctor, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
CudaSquareGradFunctor); ops::CudaELUFunctor<float>>);
REGISTER_ACTIVATION_XPU_KERNEL(silu, Silu, CudaSiluFunctor, REGISTER_OP_KERNEL(
CudaSiluGradFunctor); elu_grad, KP, plat::XPUPlace,
REGISTER_ACTIVATION_XPU_KERNEL(logsigmoid, LogSigmoid, CudaLogSigmoidFunctor, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
CudaLogSigmoidGradFunctor); ops::CudaELUGradFunctor<float>>);
REGISTER_ACTIVATION_XPU_KERNEL(softshrink, SoftShrink, CudaSoftShrinkFunctor,
CudaSoftShrinkGradFunctor); REGISTER_OP_KERNEL(exp, KP, plat::XPUPlace,
REGISTER_ACTIVATION_XPU_KERNEL(ceil, Ceil, CudaCeilFunctor, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
CudaZeroGradFunctor); ops::CudaExpFunctor<float>>);
REGISTER_ACTIVATION_XPU_KERNEL(floor, Floor, CudaFloorFunctor, REGISTER_OP_KERNEL(
CudaZeroGradFunctor); exp_grad, KP, plat::XPUPlace,
REGISTER_ACTIVATION_XPU_KERNEL(log1p, Log1p, CudaLog1pFunctor, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
CudaLog1pGradFunctor); ops::CudaExpGradFunctor<float>>);
REGISTER_ACTIVATION_XPU_KERNEL(brelu, BRelu, CudaBReluFunctor,
CudaBReluGradFunctor); REGISTER_OP_KERNEL(floor, KP, plat::XPUPlace,
REGISTER_ACTIVATION_XPU_KERNEL(soft_relu, SoftRelu, CudaSoftReluFunctor, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
CudaSoftReluGradFunctor); ops::CudaFloorFunctor<float>>);
REGISTER_ACTIVATION_XPU_KERNEL(softsign, Softsign, CudaSoftsignFunctor, REGISTER_OP_KERNEL(
CudaSoftsignGradFunctor); floor_grad, KP, plat::XPUPlace,
REGISTER_ACTIVATION_XPU_KERNEL(relu6, Relu6, CudaRelu6Functor, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
CudaRelu6GradFunctor); ops::CudaZeroGradFunctor<float>>);
REGISTER_ACTIVATION_XPU_KERNEL(hard_shrink, HardShrink, CudaHardShrinkFunctor,
CudaHardShrinkGradFunctor); REGISTER_OP_KERNEL(
REGISTER_ACTIVATION_XPU_KERNEL(hard_sigmoid, HardSigmoid, hard_shrink, KP, plat::XPUPlace,
CudaHardSigmoidFunctor, ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
CudaHardSigmoidGradFunctor); ops::CudaHardShrinkFunctor<float>>);
REGISTER_ACTIVATION_XPU_KERNEL(swish, Swish, CudaSwishFunctor, REGISTER_OP_KERNEL(
CudaSwishGradFunctor); hard_shrink_grad, KP, plat::XPUPlace,
REGISTER_ACTIVATION_XPU_KERNEL(thresholded_relu, ThresholdedRelu, ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
CudaThresholdedReluFunctor, ops::CudaHardShrinkGradFunctor<float>>);
CudaThresholdedReluGradFunctor);
REGISTER_OP_KERNEL(
hard_sigmoid, KP, plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaHardSigmoidFunctor<float>>);
REGISTER_OP_KERNEL(
hard_sigmoid_grad, KP, plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaHardSigmoidGradFunctor<float>>);
REGISTER_OP_KERNEL(hard_swish, KP, plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaHardSwishFunctor<float>>);
REGISTER_OP_KERNEL(
hard_swish_grad, KP, plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaHardSwishGradFunctor<float>>);
REGISTER_OP_KERNEL(
leaky_relu, KP, plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
phi::funcs::CudaLeakyReluFunctor<float>>);
REGISTER_OP_KERNEL(
leaky_relu_grad, KP, plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
phi::funcs::CudaLeakyReluGradFunctor<float>>);
REGISTER_OP_KERNEL(log, KP, plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaLogFunctor<float>>);
REGISTER_OP_KERNEL(
log_grad, KP, plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaLogGradFunctor<float>>);
REGISTER_OP_KERNEL(log1p, KP, plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaLog1pFunctor<float>>);
REGISTER_OP_KERNEL(
log1p_grad, KP, plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaLog1pGradFunctor<float>>);
REGISTER_OP_KERNEL(
logsigmoid, KP, plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaLogSigmoidFunctor<float>>);
REGISTER_OP_KERNEL(
logsigmoid_grad, KP, plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaLogSigmoidGradFunctor<float>>);
REGISTER_OP_KERNEL(
reciprocal, KP, plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaReciprocalFunctor<float>>);
REGISTER_OP_KERNEL(
reciprocal_grad, KP, plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaReciprocalGradFunctor<float>>);
REGISTER_OP_KERNEL(
relu, KP, plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
phi::funcs::CudaReluFunctor<float>>);
REGISTER_OP_KERNEL(
relu_grad, KP, plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
phi::funcs::CudaReluGradFunctor<float>>);
REGISTER_OP_KERNEL(relu6, KP, plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaRelu6Functor<float>>);
REGISTER_OP_KERNEL(
relu6_grad, KP, plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaRelu6GradFunctor<float>>);
REGISTER_OP_KERNEL(sigmoid, KP, plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSigmoidFunctor<float>>);
REGISTER_OP_KERNEL(
sigmoid_grad, KP, plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSigmoidGradFunctor<float>>);
REGISTER_OP_KERNEL(silu, KP, plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSiluFunctor<float>>);
REGISTER_OP_KERNEL(
silu_grad, KP, plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSiluGradFunctor<float>>);
REGISTER_OP_KERNEL(soft_relu, KP, plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftReluFunctor<float>>);
REGISTER_OP_KERNEL(
soft_relu_grad, KP, plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftReluGradFunctor<float>>);
REGISTER_OP_KERNEL(softplus, KP, plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftplusFunctor<float>>);
REGISTER_OP_KERNEL(
softplus_grad, KP, plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftplusGradFunctor<float>>);
REGISTER_OP_KERNEL(
softshrink, KP, plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftShrinkFunctor<float>>);
REGISTER_OP_KERNEL(
softshrink_grad, KP, plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftShrinkGradFunctor<float>>);
REGISTER_OP_KERNEL(softsign, KP, plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftsignFunctor<float>>);
REGISTER_OP_KERNEL(
softsign_grad, KP, plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSoftsignGradFunctor<float>>);
REGISTER_OP_KERNEL(sqrt, KP, plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSqrtFunctor<float>>);
REGISTER_OP_KERNEL(
sqrt_grad, KP, plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSqrtGradFunctor<float>>);
REGISTER_OP_KERNEL(square, KP, plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSquareFunctor<float>>);
REGISTER_OP_KERNEL(
square_grad, KP, plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSquareGradFunctor<float>>);
REGISTER_OP_KERNEL(swish, KP, plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSwishFunctor<float>>);
REGISTER_OP_KERNEL(
swish_grad, KP, plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaSwishGradFunctor<float>>);
REGISTER_OP_KERNEL(
thresholded_relu, KP, plat::XPUPlace,
ops::ActivationCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaThresholdedReluFunctor<float>>);
REGISTER_OP_KERNEL(
thresholded_relu_grad, KP, plat::XPUPlace,
ops::ActivationGradCudaKernel<paddle::platform::XPUDeviceContext,
ops::CudaThresholdedReluGradFunctor<float>>);
#endif // PADDLE_WITH_XPU_KP #endif // PADDLE_WITH_XPU_KP
...@@ -849,6 +849,38 @@ def ref_softsign(x): ...@@ -849,6 +849,38 @@ def ref_softsign(x):
return out return out
class XPUTestSoftshrinkOP(XPUOpTestWrapper):
def __init__(self):
self.op_name = 'softshrink'
self.use_dynamic_create_class = False
class XPUTestSoftshrink(TestActivationOPBase):
def set_case(self):
self.op_type = "softshrink"
self.dtype = self.in_type
threshold = 0.5
np.random.seed(1023)
x = np.random.uniform(0.25, 10, [10, 12]).astype(self.dtype)
out = ref_softshrink(x, threshold)
self.inputs = {'X': x}
self.outputs = {'Out': out}
self.attrs = {'use_xpu': True}
support_types = get_xpu_op_support_types('softshrink')
for stype in support_types:
create_test_class(globals(), XPUTestSoftshrinkOP, stype)
def ref_softshrink(x, threshold=0.5):
out = np.copy(x)
out = (out < -threshold) * (out + threshold) + (out > threshold) * (
out - threshold)
return out
class XPUTestSwishOP(XPUOpTestWrapper): class XPUTestSwishOP(XPUOpTestWrapper):
def __init__(self): def __init__(self):
self.op_name = 'swish' self.op_name = 'swish'
...@@ -879,5 +911,36 @@ def ref_swish(x): ...@@ -879,5 +911,36 @@ def ref_swish(x):
return out return out
class XPUTestThresholdedReluOP(XPUOpTestWrapper):
def __init__(self):
self.op_name = 'thresholded_relu'
self.use_dynamic_create_class = False
class XPUTestThresholdedRelu(TestActivationOPBase):
def set_case(self):
self.op_type = "thresholded_relu"
self.dtype = self.in_type
threshold = 1.0
np.random.seed(1024)
x = np.random.uniform(-20, 20, [10, 12]).astype(self.dtype)
x[np.abs(x) < 0.005] = 0.02
out = ref_thresholded_relu(x, threshold)
self.inputs = {'X': x}
self.outputs = {'Out': out}
self.attrs = {'use_xpu': True}
support_types = get_xpu_op_support_types('thresholded_relu')
for stype in support_types:
create_test_class(globals(), XPUTestThresholdedReluOP, stype)
def ref_thresholded_relu(x, threshold=1.0):
out = (x > threshold) * x
return out
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册