Phi softplus migration (#44542)

* add yaml and utests of phi softplus add yaml of softplus fix softplus bug in phi * update utests * bug fix * bug fix for test_layers * layer api match * match def and doc in ops.py * doc polish * fix unwanted modified of thresholded_relu * style imporve

Phi softplus migration (#44542)
* add yaml and utests of phi softplus add yaml of softplus fix softplus bug in phi * update utests * bug fix * bug fix for test_layers * layer api match * match def and doc in ops.py * doc polish * fix unwanted modified of thresholded_relu * style imporve
05515662 · Wang Bojun · GitHub · e3766da6 · 05515662 · 05515662
5 changed file
--- a/paddle/phi/api/yaml/legacy_api.yaml
+++ b/paddle/phi/api/yaml/legacy_api.yaml
@@ -2175,6 +2175,16 @@
    use_gpudnn : true
  backward : softmax_grad

+- api : softplus
+  args : (Tensor x, float beta, float threshold)
+  output : Tensor
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [x]
+  kernel :
+    func : softplus
+  backward : softplus_grad
+
 # softsign
 - api : softsign
  args : (Tensor x)

--- a/paddle/phi/api/yaml/legacy_backward.yaml
+++ b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -2046,6 +2046,18 @@
    func : softmax_grad
    use_gpudnn : true

+# softplus
+- backward_api : softplus_grad
+  forward : softplus (Tensor x, float beta, float threshold) -> Tensor(out)
+  args : (Tensor x, Tensor out_grad, float beta, float threshold)
+  output : Tensor(x_grad)
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [x]
+  kernel :
+    func : softplus_grad
+  inplace : (out_grad -> x_grad)
+
 - backward_api : softsign_grad
  forward : softsign (Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)

--- a/python/paddle/fluid/layers/ops.py
+++ b/python/paddle/fluid/layers/ops.py
@@ -32,7 +32,6 @@ __activations_noattr__ = [
    'silu',
    'logsigmoid',
    'tanh_shrink',
-    'softplus',
    'softsign',
    'tanh',
 ]
@@ -53,7 +52,15 @@ __inplace_unary_func__ = [
    'reciprocal_',
 ]

-__all__ = []
+__all__ = [
+    'softplus',
+    'softshrink',
+    'hard_shrink',
+    'cumsum',
+    'thresholded_relu',
+    'gelu',
+    'erf',
+]

 for _OP in set(__all__):
    globals()[_OP] = generate_layer_fn(_OP)
@@ -462,8 +469,40 @@ Examples:

 """)

-add_sample_code(
-    globals()["softplus"], r"""
+_softplus_ = generate_layer_fn('softplus')
+
+
+def softplus(x, beta: float = 1.0, threshold: float = 20.0, name=None):
+    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'softplus')
+    locals_val = locals().copy()
+    kwargs = dict()
+    for name, val in locals_val.items():
+        if val is not None:
+            kwargs[name] = val
+    return _softplus_(**kwargs)
+
+
+softplus.__doc__ = r"""
+    :alias_main: paddle.nn.functional.softplus
+    :alias: paddle.nn.functional.softplus, paddle.nn.functional.activation.softplus
+    :old_api: paddle.fluid.layers.softplus
+
+:strong:`Softplus Activation Operator`
+
+Equation:
+    .. math::
+        out = \\frac{1}{beta} * log(1 + e^{beta * x})
+        For numerical stability, the implementation reverts to the linear function when: beta * x > threshold.
+
+Args:
+    x(Tensor): Input of Softplus op, Tensor, dtype: float32 or float64
+    beta(float, optional): The value of beta for softplus. Default is 1
+    threshold (float, optional): The value of threshold for softplus. Default is 20
+    name(str, optional): Name for the operation (optional, default is None)
+
+Returns:
+    Variable: The output of Softplus op, Tensor, dtype: float32 or float64
+
 Examples:
    .. code-block:: python

@@ -474,8 +513,7 @@ Examples:
        out = F.softplus(x) 
        print(out)
        # [0.513015, 0.598139, 0.744397, 0.854355]
-
-""")
+"""

 add_sample_code(
    globals()["softsign"], r"""
@@ -492,8 +530,6 @@ Examples:

 """)

-__all__ += ['softshrink']
-
 _softshrink_ = generate_layer_fn('softshrink')


@@ -542,8 +578,6 @@ Examples:
        result = fluid.layers.softshrink(x=data, alpha=0.3)
 """

-__all__ += ['hard_shrink']
-
 _hard_shrink_ = generate_layer_fn('hard_shrink')


@@ -568,8 +602,6 @@ Examples:
    >>> result = fluid.layers.hard_shrink(x=data, threshold=0.3)
 """

-__all__ += ['cumsum']
-
 _cum_sum_ = generate_layer_fn('cumsum')


@@ -610,8 +642,6 @@ Examples:
        result = fluid.layers.cumsum(data, axis=0)
 """

-__all__ += ['thresholded_relu']
-
 _thresholded_relu_ = generate_layer_fn('thresholded_relu')


@@ -700,8 +730,6 @@ Examples:
        #        [-0.        , -0.        ,  1.0013918 ]], dtype=float32)
 """

-__all__ += ['gelu']
-
 _gelu_ = generate_layer_fn('gelu')


@@ -785,8 +813,6 @@ Examples:
        #        [ 0.08796856,  0.20387867,  0.2080159 ]], dtype=float32)
 """

-__all__ += ['erf']
-
 _erf_ = generate_layer_fn('erf')



--- a/python/paddle/fluid/tests/unittests/test_activation_op.py
+++ b/python/paddle/fluid/tests/unittests/test_activation_op.py
@@ -2676,6 +2676,7 @@ class TestSoftplus(TestActivation):

    def setUp(self):
        self.op_type = "softplus"
+        self.python_api = paddle.nn.functional.softplus
        self.init_dtype()

        beta = 2
@@ -2688,10 +2689,14 @@ class TestSoftplus(TestActivation):
        self.attrs = {'beta': beta, "threshold": threshold}
        self.outputs = {'Out': out}

+        self.check_eager = True
+
    def test_check_grad(self):
        if self.dtype == np.float16:
            return
-        self.check_grad(['X'], 'Out')
+        if hasattr(self, 'check_eager'):
+            check_eager = self.check_eager
+        self.check_grad(['X'], 'Out', check_eager=check_eager)


 @unittest.skipIf(not core.is_compiled_with_cuda(),

--- a/python/paddle/nn/functional/activation.py
+++ b/python/paddle/nn/functional/activation.py
@@ -1177,7 +1177,11 @@ def softplus(x, beta=1, threshold=20, name=None):
            x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3]))
            out = F.softplus(x) # [0.513015, 0.598139, 0.744397, 0.854355]
    """
-    if in_dynamic_mode():
+
+    if in_dygraph_mode():
+        return _C_ops.final_state_softplus(x, beta, threshold)
+
+    if _in_legacy_dygraph():
        return _C_ops.softplus(x, 'beta', beta, 'threshold', threshold)

    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],