From 067369212843e89b174dcfa9bba0d291072209bb Mon Sep 17 00:00:00 2001 From: RedContritio Date: Tue, 30 May 2023 14:36:39 +0800 Subject: [PATCH] support auto generate for activation_op hardswish (#53989) --- paddle/fluid/operators/activation_op.cc | 62 +++++-------------- paddle/phi/api/yaml/op_compat.yaml | 3 +- paddle/phi/api/yaml/static_backward.yaml | 12 ++++ paddle/phi/api/yaml/static_ops.yaml | 11 ++++ paddle/phi/ops/compat/activation_sig.cc | 9 --- .../incubate/autograd/composite_rules.py | 2 +- python/paddle/nn/functional/activation.py | 8 ++- 7 files changed, 49 insertions(+), 58 deletions(-) diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 375665bc8ee..aca8b571076 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -83,22 +83,22 @@ class ActivationGradOpMaker : public framework::SingleGradOpMaker { } } }; -class HardSwishCompositeGradOpMaker : public prim::CompositeGradOpMakerBase { - public: - using prim::CompositeGradOpMakerBase::CompositeGradOpMakerBase; - - protected: - void Apply() override { - paddle::Tensor x = this->GetSingleForwardInput("X"); - paddle::Tensor out_grad = this->GetSingleOutputGrad("Out"); - paddle::Tensor dx = this->GetSingleInputGrad("X"); - auto* dx_ptr = this->GetOutputPtr(&dx); - std::string dx_name = this->GetOutputName(dx); - VLOG(6) << "Runing hardswish_grad composite func"; - prim::hardswish_grad(x, out_grad, dx_ptr); - this->RecoverOutputName(dx, dx_name); - } -}; +// class HardSwishCompositeGradOpMaker : public prim::CompositeGradOpMakerBase { +// public: +// using prim::CompositeGradOpMakerBase::CompositeGradOpMakerBase; + +// protected: +// void Apply() override { +// paddle::Tensor x = this->GetSingleForwardInput("X"); +// paddle::Tensor out_grad = this->GetSingleOutputGrad("Out"); +// paddle::Tensor dx = this->GetSingleInputGrad("X"); +// auto* dx_ptr = this->GetOutputPtr(&dx); +// std::string dx_name = this->GetOutputName(dx); +// VLOG(6) << "Runing hardswish_grad composite func"; +// prim::hardswish_grad(x, out_grad, dx_ptr); +// this->RecoverOutputName(dx, dx_name); +// } +// }; phi::KernelKey GetKernelType(const framework::ExecutionContext& ctx, const framework::OperatorWithKernel& oper, @@ -217,32 +217,6 @@ Mish Activation Operator. } }; -class HardSwishOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "Input of HardSwish operator"); - AddOutput("Out", "Output of HardSwish operator"); - AddAttr("threshold", "The threshold parameter of HardSwish operator") - .SetDefault(6.0f); - AddAttr("scale", "The scale parameter of HardSwish operator") - .SetDefault(6.0f); - AddAttr("offset", "The offset parameter of HardSwish operator") - .SetDefault(3.0f); - AddComment(R"DOC( -HardSwish Activation Operator. - -The hard version of swish(https://arxiv.org/pdf/1905.02244.pdf). - -$$out = \frac{x * (min(max(0, x+offset), threshold))}{scale}$$ - -The threshold and scale should be positive. The offset can be either positive or negative. -The default parameters are set according to the above reference. -It is recommended to use the defaults for this activation. - -)DOC"); - } -}; - template class ActivationOpDoubleGrad : public framework::OperatorWithKernel { public: @@ -432,10 +406,6 @@ FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_OP); REGISTER_ACTIVATION_CPU_KERNEL(soft_relu, SoftRelu) REGISTER_ACTIVATION_OP(mish, Mish, MishFunctor, MishGradFunctor); -REGISTER_ACTIVATION_OP_WITH_COMP(hard_swish, - HardSwish, - HardSwishFunctor, - HardSwishGradFunctor); REGISTER_ACTIVATION_OP(swish, Swish, SwishFunctor, SwishGradFunctor); /* ========================== register checkpoint ===========================*/ diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml index 66a1c3e91e4..dfb60b46e35 100755 --- a/paddle/phi/api/yaml/op_compat.yaml +++ b/paddle/phi/api/yaml/op_compat.yaml @@ -1100,9 +1100,10 @@ x : X outputs : out : Out - backward : hard_swish_grad + backward : hardswish_grad (hard_swish_grad) extra : attrs : [bool use_mkldnn = false] + manual_signature : [hardswish] - op : hardtanh (brelu) backward : hardtanh_grad (brelu_grad) diff --git a/paddle/phi/api/yaml/static_backward.yaml b/paddle/phi/api/yaml/static_backward.yaml index 3107ea73571..68157095c91 100755 --- a/paddle/phi/api/yaml/static_backward.yaml +++ b/paddle/phi/api/yaml/static_backward.yaml @@ -43,6 +43,18 @@ func : frobenius_norm_grad param : [x, out, out_grad, axis, keepdim, reduce_all] +- backward_op : hardswish_grad + forward : hardswish (Tensor x, float threshold = 6.0f, float scale = 6.0f, float offset = 3.0f) -> Tensor(out) + args : (Tensor x, Tensor out_grad) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : hardswish_grad + param : [x, out_grad] + inplace : (out_grad -> x_grad) + - backward_op : relu6_grad forward : relu6 (Tensor x, float threshold = 6.0f) -> Tensor(out) args : (Tensor out, Tensor out_grad) diff --git a/paddle/phi/api/yaml/static_ops.yaml b/paddle/phi/api/yaml/static_ops.yaml index a88b0089366..24e5fa39cd7 100755 --- a/paddle/phi/api/yaml/static_ops.yaml +++ b/paddle/phi/api/yaml/static_ops.yaml @@ -180,6 +180,17 @@ backend : x force_backend : force_cpu +- op : hardswish + args : (Tensor x, float threshold = 6.0f, float scale = 6.0f, float offset = 3.0f) + output : Tensor(out) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : hardswish + param : [x] + backward : hardswish_grad + - op : less_equal args : (Tensor x, Tensor y, int axis = -1, bool force_cpu=false) output : Tensor(out) diff --git a/paddle/phi/ops/compat/activation_sig.cc b/paddle/phi/ops/compat/activation_sig.cc index 2d2ea6fdfdb..f5fef458000 100644 --- a/paddle/phi/ops/compat/activation_sig.cc +++ b/paddle/phi/ops/compat/activation_sig.cc @@ -47,11 +47,6 @@ KernelSignature SwishGradOpArgumentMapping( return KernelSignature("swish_grad", {"X", "Out@GRAD"}, {}, {"X@GRAD"}); } -KernelSignature HardSwishGradOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature("hardswish_grad", {"X", "Out@GRAD"}, {}, {"X@GRAD"}); -} - KernelSignature HardSwishOpArgumentMapping( const ArgumentMappingContext& ctx UNUSED) { return KernelSignature("hardswish", {"X"}, {}, {"Out"}); @@ -65,12 +60,8 @@ KernelSignature SwishOpArgumentMapping( } // namespace phi PD_REGISTER_BASE_KERNEL_NAME(hard_swish, hardswish); -PD_REGISTER_BASE_KERNEL_NAME(hard_swish_grad, hardswish_grad); - PD_REGISTER_ARG_MAPPING_FN(mish_grad, phi::MishGradOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(hard_swish_grad, - phi::HardSwishGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(hard_swish, phi::HardSwishOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(swish_grad, phi::SwishGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(swish, phi::SwishOpArgumentMapping); diff --git a/python/paddle/incubate/autograd/composite_rules.py b/python/paddle/incubate/autograd/composite_rules.py index 8a1c71395a7..26a4f145cac 100644 --- a/python/paddle/incubate/autograd/composite_rules.py +++ b/python/paddle/incubate/autograd/composite_rules.py @@ -433,9 +433,9 @@ def hard_swish_composite(x): maxmum(x + offset, 0), threshold ) * x / scale """ - offset = 3.0 threshold = 6.0 scale = 6.0 + offset = 3.0 full_shape = x.shape if len(x.shape) == 0 else [1] res = ( minimum( diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py index 9220d14ab80..9742ea25f8c 100644 --- a/python/paddle/nn/functional/activation.py +++ b/python/paddle/nn/functional/activation.py @@ -393,10 +393,16 @@ def hardswish(x, name=None): x, 'x', ['float16', 'uint16', 'float32', 'float64'], 'hardswish' ) + threshold = 6.0 + scale = 6.0 + offset = 3.0 helper = LayerHelper('hardswish', **locals()) out = helper.create_variable_for_type_inference(x.dtype) helper.append_op( - type='hard_swish', inputs={'X': x}, outputs={'Out': out} + type='hard_swish', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'threshold': threshold, 'scale': scale, 'offset': offset}, ) return out -- GitLab