From 067369212843e89b174dcfa9bba0d291072209bb Mon Sep 17 00:00:00 2001
From: RedContritio <RedContritio@qq.com>
Date: Tue, 30 May 2023 14:36:39 +0800
Subject: [PATCH] support auto generate for activation_op hardswish (#53989)

---
 paddle/fluid/operators/activation_op.cc       | 62 +++++--------------
 paddle/phi/api/yaml/op_compat.yaml            |  3 +-
 paddle/phi/api/yaml/static_backward.yaml      | 12 ++++
 paddle/phi/api/yaml/static_ops.yaml           | 11 ++++
 paddle/phi/ops/compat/activation_sig.cc       |  9 ---
 .../incubate/autograd/composite_rules.py      |  2 +-
 python/paddle/nn/functional/activation.py     |  8 ++-
 7 files changed, 49 insertions(+), 58 deletions(-)
diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc
index 375665bc8ee..aca8b571076 100644
--- a/paddle/fluid/operators/activation_op.cc
+++ b/paddle/fluid/operators/activation_op.cc
@@ -83,22 +83,22 @@ class ActivationGradOpMaker : public framework::SingleGradOpMaker<T> {
     }
   }
 };
-class HardSwishCompositeGradOpMaker : public prim::CompositeGradOpMakerBase {
- public:
-  using prim::CompositeGradOpMakerBase::CompositeGradOpMakerBase;
-
- protected:
-  void Apply() override {
-    paddle::Tensor x = this->GetSingleForwardInput("X");
-    paddle::Tensor out_grad = this->GetSingleOutputGrad("Out");
-    paddle::Tensor dx = this->GetSingleInputGrad("X");
-    auto* dx_ptr = this->GetOutputPtr(&dx);
-    std::string dx_name = this->GetOutputName(dx);
-    VLOG(6) << "Runing hardswish_grad composite func";
-    prim::hardswish_grad<prim::DescTensor>(x, out_grad, dx_ptr);
-    this->RecoverOutputName(dx, dx_name);
-  }
-};
+// class HardSwishCompositeGradOpMaker : public prim::CompositeGradOpMakerBase {
+//  public:
+//   using prim::CompositeGradOpMakerBase::CompositeGradOpMakerBase;
+
+//  protected:
+//   void Apply() override {
+//     paddle::Tensor x = this->GetSingleForwardInput("X");
+//     paddle::Tensor out_grad = this->GetSingleOutputGrad("Out");
+//     paddle::Tensor dx = this->GetSingleInputGrad("X");
+//     auto* dx_ptr = this->GetOutputPtr(&dx);
+//     std::string dx_name = this->GetOutputName(dx);
+//     VLOG(6) << "Runing hardswish_grad composite func";
+//     prim::hardswish_grad<prim::DescTensor>(x, out_grad, dx_ptr);
+//     this->RecoverOutputName(dx, dx_name);
+//   }
+// };
 
 phi::KernelKey GetKernelType(const framework::ExecutionContext& ctx,
                              const framework::OperatorWithKernel& oper,
@@ -217,32 +217,6 @@ Mish Activation Operator.
   }
 };
 
-class HardSwishOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput("X", "Input of HardSwish operator");
-    AddOutput("Out", "Output of HardSwish operator");
-    AddAttr<float>("threshold", "The threshold parameter of HardSwish operator")
-        .SetDefault(6.0f);
-    AddAttr<float>("scale", "The scale parameter of HardSwish operator")
-        .SetDefault(6.0f);
-    AddAttr<float>("offset", "The offset parameter of HardSwish operator")
-        .SetDefault(3.0f);
-    AddComment(R"DOC(
-HardSwish Activation Operator.
-
-The hard version of swish(https://arxiv.org/pdf/1905.02244.pdf).
-
-$$out = \frac{x * (min(max(0, x+offset), threshold))}{scale}$$
-
-The threshold and scale should be positive. The offset can be either positive or negative.
-The default parameters are set according to the above reference.
-It is recommended to use the defaults for this activation.
-
-)DOC");
-  }
-};
-
 template <ActBwdOpFwdDeps kDepValue>
 class ActivationOpDoubleGrad : public framework::OperatorWithKernel {
  public:
@@ -432,10 +406,6 @@ FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_OP);
 REGISTER_ACTIVATION_CPU_KERNEL(soft_relu, SoftRelu)
 
 REGISTER_ACTIVATION_OP(mish, Mish, MishFunctor, MishGradFunctor);
-REGISTER_ACTIVATION_OP_WITH_COMP(hard_swish,
-                                 HardSwish,
-                                 HardSwishFunctor,
-                                 HardSwishGradFunctor);
 REGISTER_ACTIVATION_OP(swish, Swish, SwishFunctor, SwishGradFunctor);
 
 /* ==========================  register checkpoint ===========================*/
diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml
index 66a1c3e91e4..dfb60b46e35 100755
--- a/paddle/phi/api/yaml/op_compat.yaml
+++ b/paddle/phi/api/yaml/op_compat.yaml
@@ -1100,9 +1100,10 @@
     x : X
   outputs :
     out : Out
-  backward : hard_swish_grad
+  backward : hardswish_grad (hard_swish_grad)
   extra :
     attrs : [bool use_mkldnn = false]
+  manual_signature : [hardswish]
 
 - op : hardtanh (brelu)
   backward : hardtanh_grad (brelu_grad)
diff --git a/paddle/phi/api/yaml/static_backward.yaml b/paddle/phi/api/yaml/static_backward.yaml
index 3107ea73571..68157095c91 100755
--- a/paddle/phi/api/yaml/static_backward.yaml
+++ b/paddle/phi/api/yaml/static_backward.yaml
@@ -43,6 +43,18 @@
     func : frobenius_norm_grad
     param : [x, out, out_grad, axis, keepdim, reduce_all]
 
+- backward_op : hardswish_grad
+  forward : hardswish (Tensor x, float threshold = 6.0f, float scale = 6.0f, float offset = 3.0f) -> Tensor(out)
+  args : (Tensor x, Tensor out_grad)
+  output : Tensor(x_grad)
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [x]
+  kernel :
+    func : hardswish_grad
+    param : [x, out_grad]
+  inplace : (out_grad -> x_grad)
+
 - backward_op : relu6_grad
   forward : relu6 (Tensor x, float threshold = 6.0f) -> Tensor(out)
   args : (Tensor out, Tensor out_grad)
diff --git a/paddle/phi/api/yaml/static_ops.yaml b/paddle/phi/api/yaml/static_ops.yaml
index a88b0089366..24e5fa39cd7 100755
--- a/paddle/phi/api/yaml/static_ops.yaml
+++ b/paddle/phi/api/yaml/static_ops.yaml
@@ -180,6 +180,17 @@
     backend : x
     force_backend : force_cpu
 
+- op : hardswish
+  args : (Tensor x, float threshold = 6.0f, float scale = 6.0f, float offset = 3.0f)
+  output : Tensor(out)
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [x]
+  kernel :
+    func : hardswish
+    param : [x]
+  backward : hardswish_grad
+
 - op : less_equal
   args : (Tensor x, Tensor y, int axis = -1, bool force_cpu=false)
   output : Tensor(out)
diff --git a/paddle/phi/ops/compat/activation_sig.cc b/paddle/phi/ops/compat/activation_sig.cc
index 2d2ea6fdfdb..f5fef458000 100644
--- a/paddle/phi/ops/compat/activation_sig.cc
+++ b/paddle/phi/ops/compat/activation_sig.cc
@@ -47,11 +47,6 @@ KernelSignature SwishGradOpArgumentMapping(
   return KernelSignature("swish_grad", {"X", "Out@GRAD"}, {}, {"X@GRAD"});
 }
 
-KernelSignature HardSwishGradOpArgumentMapping(
-    const ArgumentMappingContext& ctx UNUSED) {
-  return KernelSignature("hardswish_grad", {"X", "Out@GRAD"}, {}, {"X@GRAD"});
-}
-
 KernelSignature HardSwishOpArgumentMapping(
     const ArgumentMappingContext& ctx UNUSED) {
   return KernelSignature("hardswish", {"X"}, {}, {"Out"});
@@ -65,12 +60,8 @@ KernelSignature SwishOpArgumentMapping(
 }  // namespace phi
 
 PD_REGISTER_BASE_KERNEL_NAME(hard_swish, hardswish);
-PD_REGISTER_BASE_KERNEL_NAME(hard_swish_grad, hardswish_grad);
-
 PD_REGISTER_ARG_MAPPING_FN(mish_grad, phi::MishGradOpArgumentMapping);
 
-PD_REGISTER_ARG_MAPPING_FN(hard_swish_grad,
-                           phi::HardSwishGradOpArgumentMapping);
 PD_REGISTER_ARG_MAPPING_FN(hard_swish, phi::HardSwishOpArgumentMapping);
 PD_REGISTER_ARG_MAPPING_FN(swish_grad, phi::SwishGradOpArgumentMapping);
 PD_REGISTER_ARG_MAPPING_FN(swish, phi::SwishOpArgumentMapping);
diff --git a/python/paddle/incubate/autograd/composite_rules.py b/python/paddle/incubate/autograd/composite_rules.py
index 8a1c71395a7..26a4f145cac 100644
--- a/python/paddle/incubate/autograd/composite_rules.py
+++ b/python/paddle/incubate/autograd/composite_rules.py
@@ -433,9 +433,9 @@ def hard_swish_composite(x):
         maxmum(x + offset, 0), threshold
     ) * x / scale
     """
-    offset = 3.0
     threshold = 6.0
     scale = 6.0
+    offset = 3.0
     full_shape = x.shape if len(x.shape) == 0 else [1]
     res = (
         minimum(
diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py
index 9220d14ab80..9742ea25f8c 100644
--- a/python/paddle/nn/functional/activation.py
+++ b/python/paddle/nn/functional/activation.py
@@ -393,10 +393,16 @@ def hardswish(x, name=None):
             x, 'x', ['float16', 'uint16', 'float32', 'float64'], 'hardswish'
         )
 
+        threshold = 6.0
+        scale = 6.0
+        offset = 3.0
         helper = LayerHelper('hardswish', **locals())
         out = helper.create_variable_for_type_inference(x.dtype)
         helper.append_op(
-            type='hard_swish', inputs={'X': x}, outputs={'Out': out}
+            type='hard_swish',
+            inputs={'X': x},
+            outputs={'Out': out},
+            attrs={'threshold': threshold, 'scale': scale, 'offset': offset},
         )
         return out
 
-- 
GitLab