未验证 提交 05515662 编写于 作者: W Wang Bojun 提交者: GitHub

Phi softplus migration (#44542)

* add yaml and utests of phi softplus

add yaml of softplus

fix softplus bug in phi

* update utests

* bug fix

* bug fix for test_layers

* layer api match

* match def and doc in ops.py

* doc polish

* fix unwanted modified of thresholded_relu

* style imporve
上级 e3766da6
...@@ -2175,6 +2175,16 @@ ...@@ -2175,6 +2175,16 @@
use_gpudnn : true use_gpudnn : true
backward : softmax_grad backward : softmax_grad
- api : softplus
args : (Tensor x, float beta, float threshold)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softplus
backward : softplus_grad
# softsign # softsign
- api : softsign - api : softsign
args : (Tensor x) args : (Tensor x)
......
...@@ -2046,6 +2046,18 @@ ...@@ -2046,6 +2046,18 @@
func : softmax_grad func : softmax_grad
use_gpudnn : true use_gpudnn : true
# softplus
- backward_api : softplus_grad
forward : softplus (Tensor x, float beta, float threshold) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float beta, float threshold)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softplus_grad
inplace : (out_grad -> x_grad)
- backward_api : softsign_grad - backward_api : softsign_grad
forward : softsign (Tensor x) -> Tensor(out) forward : softsign (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad) args : (Tensor x, Tensor out_grad)
......
...@@ -32,7 +32,6 @@ __activations_noattr__ = [ ...@@ -32,7 +32,6 @@ __activations_noattr__ = [
'silu', 'silu',
'logsigmoid', 'logsigmoid',
'tanh_shrink', 'tanh_shrink',
'softplus',
'softsign', 'softsign',
'tanh', 'tanh',
] ]
...@@ -53,7 +52,15 @@ __inplace_unary_func__ = [ ...@@ -53,7 +52,15 @@ __inplace_unary_func__ = [
'reciprocal_', 'reciprocal_',
] ]
__all__ = [] __all__ = [
'softplus',
'softshrink',
'hard_shrink',
'cumsum',
'thresholded_relu',
'gelu',
'erf',
]
for _OP in set(__all__): for _OP in set(__all__):
globals()[_OP] = generate_layer_fn(_OP) globals()[_OP] = generate_layer_fn(_OP)
...@@ -462,8 +469,40 @@ Examples: ...@@ -462,8 +469,40 @@ Examples:
""") """)
add_sample_code( _softplus_ = generate_layer_fn('softplus')
globals()["softplus"], r"""
def softplus(x, beta: float = 1.0, threshold: float = 20.0, name=None):
check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'softplus')
locals_val = locals().copy()
kwargs = dict()
for name, val in locals_val.items():
if val is not None:
kwargs[name] = val
return _softplus_(**kwargs)
softplus.__doc__ = r"""
:alias_main: paddle.nn.functional.softplus
:alias: paddle.nn.functional.softplus, paddle.nn.functional.activation.softplus
:old_api: paddle.fluid.layers.softplus
:strong:`Softplus Activation Operator`
Equation:
.. math::
out = \\frac{1}{beta} * log(1 + e^{beta * x})
For numerical stability, the implementation reverts to the linear function when: beta * x > threshold.
Args:
x(Tensor): Input of Softplus op, Tensor, dtype: float32 or float64
beta(float, optional): The value of beta for softplus. Default is 1
threshold (float, optional): The value of threshold for softplus. Default is 20
name(str, optional): Name for the operation (optional, default is None)
Returns:
Variable: The output of Softplus op, Tensor, dtype: float32 or float64
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -474,8 +513,7 @@ Examples: ...@@ -474,8 +513,7 @@ Examples:
out = F.softplus(x) out = F.softplus(x)
print(out) print(out)
# [0.513015, 0.598139, 0.744397, 0.854355] # [0.513015, 0.598139, 0.744397, 0.854355]
"""
""")
add_sample_code( add_sample_code(
globals()["softsign"], r""" globals()["softsign"], r"""
...@@ -492,8 +530,6 @@ Examples: ...@@ -492,8 +530,6 @@ Examples:
""") """)
__all__ += ['softshrink']
_softshrink_ = generate_layer_fn('softshrink') _softshrink_ = generate_layer_fn('softshrink')
...@@ -542,8 +578,6 @@ Examples: ...@@ -542,8 +578,6 @@ Examples:
result = fluid.layers.softshrink(x=data, alpha=0.3) result = fluid.layers.softshrink(x=data, alpha=0.3)
""" """
__all__ += ['hard_shrink']
_hard_shrink_ = generate_layer_fn('hard_shrink') _hard_shrink_ = generate_layer_fn('hard_shrink')
...@@ -568,8 +602,6 @@ Examples: ...@@ -568,8 +602,6 @@ Examples:
>>> result = fluid.layers.hard_shrink(x=data, threshold=0.3) >>> result = fluid.layers.hard_shrink(x=data, threshold=0.3)
""" """
__all__ += ['cumsum']
_cum_sum_ = generate_layer_fn('cumsum') _cum_sum_ = generate_layer_fn('cumsum')
...@@ -610,8 +642,6 @@ Examples: ...@@ -610,8 +642,6 @@ Examples:
result = fluid.layers.cumsum(data, axis=0) result = fluid.layers.cumsum(data, axis=0)
""" """
__all__ += ['thresholded_relu']
_thresholded_relu_ = generate_layer_fn('thresholded_relu') _thresholded_relu_ = generate_layer_fn('thresholded_relu')
...@@ -700,8 +730,6 @@ Examples: ...@@ -700,8 +730,6 @@ Examples:
# [-0. , -0. , 1.0013918 ]], dtype=float32) # [-0. , -0. , 1.0013918 ]], dtype=float32)
""" """
__all__ += ['gelu']
_gelu_ = generate_layer_fn('gelu') _gelu_ = generate_layer_fn('gelu')
...@@ -785,8 +813,6 @@ Examples: ...@@ -785,8 +813,6 @@ Examples:
# [ 0.08796856, 0.20387867, 0.2080159 ]], dtype=float32) # [ 0.08796856, 0.20387867, 0.2080159 ]], dtype=float32)
""" """
__all__ += ['erf']
_erf_ = generate_layer_fn('erf') _erf_ = generate_layer_fn('erf')
......
...@@ -2676,6 +2676,7 @@ class TestSoftplus(TestActivation): ...@@ -2676,6 +2676,7 @@ class TestSoftplus(TestActivation):
def setUp(self): def setUp(self):
self.op_type = "softplus" self.op_type = "softplus"
self.python_api = paddle.nn.functional.softplus
self.init_dtype() self.init_dtype()
beta = 2 beta = 2
...@@ -2688,10 +2689,14 @@ class TestSoftplus(TestActivation): ...@@ -2688,10 +2689,14 @@ class TestSoftplus(TestActivation):
self.attrs = {'beta': beta, "threshold": threshold} self.attrs = {'beta': beta, "threshold": threshold}
self.outputs = {'Out': out} self.outputs = {'Out': out}
self.check_eager = True
def test_check_grad(self): def test_check_grad(self):
if self.dtype == np.float16: if self.dtype == np.float16:
return return
self.check_grad(['X'], 'Out') if hasattr(self, 'check_eager'):
check_eager = self.check_eager
self.check_grad(['X'], 'Out', check_eager=check_eager)
@unittest.skipIf(not core.is_compiled_with_cuda(), @unittest.skipIf(not core.is_compiled_with_cuda(),
......
...@@ -1177,7 +1177,11 @@ def softplus(x, beta=1, threshold=20, name=None): ...@@ -1177,7 +1177,11 @@ def softplus(x, beta=1, threshold=20, name=None):
x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3])) x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3]))
out = F.softplus(x) # [0.513015, 0.598139, 0.744397, 0.854355] out = F.softplus(x) # [0.513015, 0.598139, 0.744397, 0.854355]
""" """
if in_dynamic_mode():
if in_dygraph_mode():
return _C_ops.final_state_softplus(x, beta, threshold)
if _in_legacy_dygraph():
return _C_ops.softplus(x, 'beta', beta, 'threshold', threshold) return _C_ops.softplus(x, 'beta', beta, 'threshold', threshold)
check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册