diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index d810ad8bd9f032e27445f53fb1d17bd9eb3c816c..c0168544a3b55feb7cb1598558380d755824bfd4 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -801,7 +801,7 @@ - backward_op : relu6_grad forward : relu6 (Tensor x) -> Tensor(out) - args : (Tensor out, Tensor out_grad, float threshold = 6) + args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta @@ -1010,7 +1010,7 @@ - backward_op : swish_grad forward : swish (Tensor x) -> Tensor(out) - args : (Tensor x, Tensor out_grad, float bete=1.0) + args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : GeneralUnaryGradInferMeta diff --git a/paddle/phi/api/yaml/sparse_backward.yaml b/paddle/phi/api/yaml/sparse_backward.yaml index 949a6c4c19b12cdaa636b41f96dc38e9db5c9e0f..c541129f7ffbbb77ddb657dfa2d0a11353fb21d0 100644 --- a/paddle/phi/api/yaml/sparse_backward.yaml +++ b/paddle/phi/api/yaml/sparse_backward.yaml @@ -252,7 +252,7 @@ - backward_op : relu6_grad forward : relu6(Tensor x) -> Tensor(out) - args : (Tensor out, Tensor out_grad, float threshold = 6) + args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta diff --git a/paddle/phi/kernels/activation_grad_kernel.h b/paddle/phi/kernels/activation_grad_kernel.h index b322ed5e02a290a08558937996b10aa93ce35e3d..ca75a6e0b24a48d99fcc057735dfee333b7eeddd 100644 --- a/paddle/phi/kernels/activation_grad_kernel.h +++ b/paddle/phi/kernels/activation_grad_kernel.h @@ -285,6 +285,7 @@ DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Log); DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Log2); DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Log10); DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Log1p); +DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Swish); DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT(Exp); DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT(Expm1); @@ -294,6 +295,7 @@ DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu); DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT(Tanh); DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT(Sigmoid); DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT(Sqrt); +DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu6); DECLARE_ACTIVATION_GRAD_KERNEL_NODEP(Round); DECLARE_ACTIVATION_GRAD_KERNEL_NODEP(Floor); @@ -303,11 +305,9 @@ DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu, alpha); DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(ThresholdedRelu, threshold); DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(SoftShrink, lambda); DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(HardShrink, threshold); -DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Swish, beta); DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Logit, eps); DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Mish, threshold); DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Celu, alpha); -DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPOUT(Relu6, threshold); DECLARE_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(HardTanh, t_min, t_max); DECLARE_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(STanh, scale_a, scale_b); diff --git a/paddle/phi/kernels/cpu/activation_grad_kernel.cc b/paddle/phi/kernels/cpu/activation_grad_kernel.cc index e15ae5bb89e90b7b2a142ac053dd3513ea1443c5..9273f8393b5b35923001d6fe1f239e8f95ed85eb 100644 --- a/paddle/phi/kernels/cpu/activation_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/activation_grad_kernel.cc @@ -136,12 +136,14 @@ DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Expm1, Expm1GradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Reciprocal, ReciprocalGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sqrt, SqrtGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Rsqrt, RsqrtGradFunctor); +DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu6, Relu6GradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Softsign, SoftsignGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(LogSigmoid, LogSigmoidGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Log, LogGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Log2, Log2GradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Log10, Log10GradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Log1p, Log1pGradFunctor); +DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Swish, SwishGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu, ReluGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Tanh, TanhGradFunctor); @@ -157,16 +159,12 @@ DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu, DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(ThresholdedRelu, ThresholdedReluGradFunctor, threshold); -DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPOUT(Relu6, - Relu6GradFunctor, - threshold); DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(SoftShrink, SoftShrinkGradFunctor, lambda); DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(HardShrink, HardShrinkGradFunctor, threshold); -DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Swish, SwishGradFunctor, beta); DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Mish, MishGradFunctor, diff --git a/paddle/phi/kernels/funcs/activation_functor.h b/paddle/phi/kernels/funcs/activation_functor.h index 78a1f8cb24f852ce129da6b5413898fa3e6ae349..6a3554318e5e6668314257adb3cf1129aa6857e1 100644 --- a/paddle/phi/kernels/funcs/activation_functor.h +++ b/paddle/phi/kernels/funcs/activation_functor.h @@ -1505,16 +1505,14 @@ struct Relu6Functor : public BaseActivationFunctor { template struct Relu6GradFunctor : public BaseActivationFunctor { - float threshold; - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"threshold", &threshold}}; - } + typename BaseActivationFunctor::AttrPair GetAttrs() { return {{}}; } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + float threshold = 6; dx.device(d) = dout * ((out > static_cast(0)) * (out < static_cast(threshold))) .template cast(); @@ -2188,10 +2186,7 @@ struct SwishFunctor : public BaseActivationFunctor { template struct SwishGradFunctor : public BaseActivationFunctor { - float beta; - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"beta", &beta}}; - } + typename BaseActivationFunctor::AttrPair GetAttrs() { return {{}}; } template { typename dOut, typename dX> void operator()(Device d, X x, Out fake_out, dOut dout, dX dx) const { + float beta = 1.0; auto temp1 = static_cast(1) / (static_cast(1) + (static_cast(-beta) * x).exp()); auto out = x * temp1; @@ -3285,14 +3281,12 @@ struct CudaRelu6Functor : public BaseActivationFunctor { template struct CudaRelu6GradFunctor : public BaseActivationFunctor { T zero = static_cast(0.0f); - float threshold; - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"threshold", &threshold}}; - } + typename BaseActivationFunctor::AttrPair GetAttrs() { return {{}}; } // dx = (out > 0 && out < t) ? dout : 0 __device__ __forceinline__ T operator()(const T dout, const T out) const { + float threshold = 6; T t = static_cast(threshold); return (out > zero && out < t) ? dout : zero; } @@ -3781,15 +3775,13 @@ template struct CudaSwishGradFunctor : public BaseActivationFunctor { using MPType = typename phi::dtype::MPTypeTrait::Type; MPType one = static_cast(1.0f); - float beta; - typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"beta", &beta}}; - } + typename BaseActivationFunctor::AttrPair GetAttrs() { return {{}}; } // dx = dout * (1 + exp(-b * x) + b * x * exp(-b * x) / (1 + exp(-b * x))^2) __device__ __forceinline__ T operator()(const T arg_dout, const T arg_x) const { + float beta = 1.0; MPType dout = static_cast(arg_dout); MPType x = static_cast(arg_x); MPType b = static_cast(beta); diff --git a/paddle/phi/kernels/gpu/activation_grad_kernel.cu b/paddle/phi/kernels/gpu/activation_grad_kernel.cu index 04a414fd5848e25baf445d1fcd8d165f09e5f47e..c0fb7342a80b65bfae189fe91135362d661f552a 100644 --- a/paddle/phi/kernels/gpu/activation_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/activation_grad_kernel.cu @@ -198,12 +198,14 @@ DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Expm1, CudaExpm1GradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Reciprocal, CudaReciprocalGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sqrt, CudaSqrtGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Rsqrt, CudaRsqrtGradFunctor); +DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu6, CudaRelu6GradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Softsign, CudaSoftsignGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(LogSigmoid, CudaLogSigmoidGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Log, CudaLogGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Log2, CudaLog2GradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Log10, CudaLog10GradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Log1p, CudaLog1pGradFunctor); +DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Swish, CudaSwishGradFunctor); DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu, CudaLeakyReluGradFunctor, @@ -217,9 +219,6 @@ DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(SoftShrink, DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(HardShrink, CudaHardShrinkGradFunctor, threshold); -DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Swish, - CudaSwishGradFunctor, - beta); DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Mish, CudaMishGradFunctor, @@ -227,9 +226,6 @@ DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Mish, DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Celu, CudaCELUGradFunctor, alpha); -DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPOUT(Relu6, - CudaRelu6GradFunctor, - threshold); DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPOUT(LogitCUDA, CudaLogitGradFunctor, eps); diff --git a/paddle/phi/kernels/onednn/activation_grad_kernel.cc b/paddle/phi/kernels/onednn/activation_grad_kernel.cc index 6355908c250bdee4878fc056345901da439da251..9b8626254c7b5d4c43546981c1e4c97a43e8eb38 100644 --- a/paddle/phi/kernels/onednn/activation_grad_kernel.cc +++ b/paddle/phi/kernels/onednn/activation_grad_kernel.cc @@ -204,9 +204,16 @@ DEFINE_ONEDNN_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu, DEFINE_ONEDNN_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Mish, MishOneDNNGradFunctor, threshold); -DEFINE_ONEDNN_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Swish, - SwishOneDNNGradFunctor, - beta); + +template +void SwishGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& dout, + DenseTensor* dx) { + SwishOneDNNGradFunctor functor; + float beta = 1.0; + functor(dev_ctx, x, dout, beta, 0, dx); +} template void EluGradKernel(const Context& dev_ctx, @@ -247,9 +254,9 @@ template void Relu6GradKernel(const Context& dev_ctx, const DenseTensor& out, const DenseTensor& dout, - float threshold, DenseTensor* dx) { Relu6OneDNNGradUseOutFunctor functor; + float threshold = 6; functor(dev_ctx, out, dout, 0, threshold, dx); } diff --git a/paddle/phi/kernels/sparse/impl/unary_grad_kernel_impl.h b/paddle/phi/kernels/sparse/impl/unary_grad_kernel_impl.h index 0709e6d946ffa7d582ee3b044707aab0c2c322fc..6a71a924526732e313005752a44f8bfa9a479f35 100644 --- a/paddle/phi/kernels/sparse/impl/unary_grad_kernel_impl.h +++ b/paddle/phi/kernels/sparse/impl/unary_grad_kernel_impl.h @@ -94,9 +94,9 @@ DEFINE_SPARSE_UNARY_GRAD_KERNEL(Log1p) DEFINE_SPARSE_UNARY_GRAD_KERNEL(Relu) DEFINE_SPARSE_UNARY_GRAD_KERNEL(Abs) DEFINE_SPARSE_UNARY_GRAD_KERNEL(Expm1) +DEFINE_SPARSE_UNARY_GRAD_KERNEL(Relu6) DEFINE_SPARSE_UNARY_GRAD_KERNEL_WITH_ONE_ATTR(Pow, factor) DEFINE_SPARSE_UNARY_GRAD_KERNEL_WITH_ONE_ATTR(LeakyRelu, alpha) -DEFINE_SPARSE_UNARY_GRAD_KERNEL_WITH_ONE_ATTR(Relu6, threshold) template void CastCooGradKernel(const Context& dev_ctx, diff --git a/paddle/phi/kernels/sparse/unary_grad_kernel.h b/paddle/phi/kernels/sparse/unary_grad_kernel.h index 88bf0e9002501a2bb7af6fe669f2668f0ac674f6..7440533057022ee5d2f0cfa69db663bf32cba4da 100644 --- a/paddle/phi/kernels/sparse/unary_grad_kernel.h +++ b/paddle/phi/kernels/sparse/unary_grad_kernel.h @@ -62,9 +62,9 @@ DECLARE_SPARSE_UNARY_GRAD_KERNEL(Sqrt) DECLARE_SPARSE_UNARY_GRAD_KERNEL(Log1p) DECLARE_SPARSE_UNARY_GRAD_KERNEL(Abs) DECLARE_SPARSE_UNARY_GRAD_KERNEL(Expm1) +DECLARE_SPARSE_UNARY_GRAD_KERNEL(Relu6) DECLARE_SPARSE_UNARY_GRAD_KERNEL_WITH_ONE_ATTR(Pow, factor) DECLARE_SPARSE_UNARY_GRAD_KERNEL_WITH_ONE_ATTR(LeakyRelu, alpha) -DECLARE_SPARSE_UNARY_GRAD_KERNEL_WITH_ONE_ATTR(Relu6, threshold) template void CastCooGradKernel(const Context& dev_ctx, diff --git a/paddle/phi/kernels/xpu/activation_grad_kernel.cc b/paddle/phi/kernels/xpu/activation_grad_kernel.cc index df9674a16d6250f70e7c2f1b06a745b564bab0eb..02243215f95886dbf5f0d4dbf2e1c85ab4a74d63 100644 --- a/paddle/phi/kernels/xpu/activation_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/activation_grad_kernel.cc @@ -351,10 +351,7 @@ struct XPUReluGradFunctor : public funcs::BaseActivationFunctor { template struct XPURelu6GradFunctor : public funcs::BaseActivationFunctor { using XPUType = typename XPUTypeTrait::Type; - float threshold; - typename funcs::BaseActivationFunctor::AttrPair GetAttrs() { - return {{"threshold", &threshold}}; - } + typename funcs::BaseActivationFunctor::AttrPair GetAttrs() { return {{}}; } template void operator()(const Context& dev_ctx, const DenseTensor* x, @@ -481,10 +478,7 @@ void PowGradKernel(const Context& dev_ctx, template struct XPUSwishGradFunctor : public funcs::BaseActivationFunctor { using XPUType = typename XPUTypeTrait::Type; - float beta; - typename funcs::BaseActivationFunctor::AttrPair GetAttrs() { - return {{"beta", &beta}}; - } + typename funcs::BaseActivationFunctor::AttrPair GetAttrs() { return {{}}; } template void operator()(const Context& dev_ctx, @@ -571,14 +565,13 @@ DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sigmoid, XPUSigmoidGradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sqrt, XPUSqrtGradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Tanh, XPUTanhGradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu, XPUReluGradFunctor); +DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu6, XPURelu6GradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Silu, XPUSiluGradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Log, XPULogGradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Square, XPUSquareGradFunctor); +DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Swish, XPUSwishGradFunctor); -DEFINE_XPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Swish, - XPUSwishGradFunctor, - beta); DEFINE_XPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Mish, XPUMishGradFunctor, threshold); @@ -586,10 +579,6 @@ DEFINE_XPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu, XPULeakyReluGradFunctor, alpha); -DEFINE_XPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPOUT(Relu6, - XPURelu6GradFunctor, - threshold); - DEFINE_XPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(Softplus, XPUSoftPlusGradFunctor, beta, diff --git a/paddle/phi/ops/compat/activation_sig.cc b/paddle/phi/ops/compat/activation_sig.cc index 804d0d63aa289151429fa1625b8fba488d6e43f2..e754c79ed1b29b09c06768cd0a2684e0be02c52f 100644 --- a/paddle/phi/ops/compat/activation_sig.cc +++ b/paddle/phi/ops/compat/activation_sig.cc @@ -41,8 +41,14 @@ namespace phi { DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(HardTanh, "hardtanh", "t_min" comma "t_max"); DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Mish, "mish", "threshold"); -DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Swish, "swish", "beta"); // NOLINT -DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Relu6, "relu6", "threshold"); // NOLINT + +KernelSignature SwishGradOpArgumentMapping(const ArgumentMappingContext& ctx) { + return KernelSignature("swish_grad", {"X", "Out@GRAD"}, {}, {"X@GRAD"}); +} + +KernelSignature Relu6GradOpArgumentMapping(const ArgumentMappingContext& ctx) { + return KernelSignature("relu6_grad", {"Out", "Out@GRAD"}, {}, {"X@GRAD"}); +} KernelSignature HardSwishGradOpArgumentMapping( const ArgumentMappingContext& ctx) { diff --git a/test/mkldnn/test_activation_mkldnn_op.py b/test/mkldnn/test_activation_mkldnn_op.py index 77c28e288697323a7edf10b615229e3361eeaf39..09ebe1ae21e8de2e841867f572a3a975206a0d49 100644 --- a/test/mkldnn/test_activation_mkldnn_op.py +++ b/test/mkldnn/test_activation_mkldnn_op.py @@ -16,7 +16,6 @@ import unittest import numpy as np from mkldnn_op_test import check_if_mkldnn_primitives_exist_in_bwd -from scipy.special import expit import paddle import paddle.nn.functional as F @@ -416,30 +415,6 @@ class TestMKLDNNAbsDim4(TestAbs): self.dtype = np.float32 -class TestMKLDNNSwishDim4(TestSwish): - def setUp(self): - super().setUp() - - x = np.random.uniform(0.1, 1, [2, 4, 3, 5]).astype(self.dtype) - beta = 2.3 - out = x * expit(beta * x) - - self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} - self.outputs = {'Out': out} - self.attrs = {"use_mkldnn": True, "beta": beta} - - def init_dtype(self): - self.dtype = np.float32 - - def test_check_output(self): - self.check_output(check_dygraph=False) - - def test_check_grad(self): - if self.dtype == np.float16: - return - self.check_grad(['X'], 'Out', check_dygraph=False) - - def ref_hardswish(x, threshold=6.0, scale=6.0, offset=3.0): x_dtype = x.dtype if x_dtype == 'float16':