未验证 提交 8d194524 编写于 作者: Q Qi Li 提交者: GitHub

hardtanh prelu softmax, test=develop (#26431)

上级 6e5670b8
...@@ -1189,6 +1189,7 @@ def chunk_eval(input, ...@@ -1189,6 +1189,7 @@ def chunk_eval(input,
num_correct_chunks) num_correct_chunks)
@deprecated(since="2.0.0", update_to="paddle.nn.functional.softmax")
def softmax(input, use_cudnn=False, name=None, axis=-1): def softmax(input, use_cudnn=False, name=None, axis=-1):
""" """
This operator implements the softmax layer. The calculation process is as follows: This operator implements the softmax layer. The calculation process is as follows:
...@@ -8610,7 +8611,7 @@ def log(x, name=None): ...@@ -8610,7 +8611,7 @@ def log(x, name=None):
return out return out
@templatedoc() @deprecated(since="2.0.0", update_to="paddle.nn.functional.relu")
def relu(x, name=None): def relu(x, name=None):
""" """
${comment} ${comment}
...@@ -9269,7 +9270,7 @@ def pad2d(input, ...@@ -9269,7 +9270,7 @@ def pad2d(input,
return out return out
@templatedoc() @deprecated(since="2.0.0", update_to="paddle.nn.functional.elu")
def elu(x, alpha=1.0, name=None): def elu(x, alpha=1.0, name=None):
""" """
:alias_main: paddle.nn.functional.elu :alias_main: paddle.nn.functional.elu
...@@ -9585,6 +9586,7 @@ def swish(x, beta=1.0, name=None): ...@@ -9585,6 +9586,7 @@ def swish(x, beta=1.0, name=None):
return out return out
@deprecated(since="2.0.0", update_to="paddle.nn.functional.prelu")
def prelu(x, mode, param_attr=None, name=None): def prelu(x, mode, param_attr=None, name=None):
""" """
:api_attr: Static Graph :api_attr: Static Graph
......
...@@ -534,6 +534,63 @@ class TestHardShrinkAPI(unittest.TestCase): ...@@ -534,6 +534,63 @@ class TestHardShrinkAPI(unittest.TestCase):
F.hardshrink(x_fp16) F.hardshrink(x_fp16)
def ref_hardtanh(x, min=-1.0, max=1.0):
out = np.copy(x)
out[np.abs(x - min) < 0.005] = min + 0.02
out[np.abs(x - max) < 0.005] = max + 0.02
out = np.minimum(np.maximum(x, min), max)
return out
class TestHardtanhAPI(unittest.TestCase):
# test paddle.nn.Hardtanh, paddle.nn.functional.hardtanh
def setUp(self):
self.x_np = np.random.uniform(-3, 3, [10, 12]).astype('float32')
self.place=paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \
else paddle.CPUPlace()
def test_static_api(self):
with paddle.static.program_guard(paddle.static.Program()):
x = paddle.data('X', [10, 12])
out1 = F.hardtanh(x)
m = paddle.nn.Hardtanh()
out2 = m(x)
exe = paddle.static.Executor(self.place)
res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2])
out_ref = ref_hardtanh(self.x_np)
for r in res:
self.assertEqual(np.allclose(out_ref, r), True)
def test_dygraph_api(self):
paddle.disable_static(self.place)
x = paddle.to_variable(self.x_np)
out1 = F.hardtanh(x)
m = paddle.nn.Hardtanh()
out2 = m(x)
out_ref = ref_hardtanh(self.x_np)
for r in [out1, out2]:
self.assertEqual(np.allclose(out_ref, r.numpy()), True)
out1 = F.hardtanh(x, -2.0, 2.0)
m = paddle.nn.Hardtanh(-2.0, 2.0)
out2 = m(x)
out_ref = ref_hardtanh(self.x_np, -2.0, 2.0)
for r in [out1, out2]:
self.assertEqual(np.allclose(out_ref, r.numpy()), True)
paddle.enable_static()
def test_errors(self):
with paddle.static.program_guard(paddle.static.Program()):
# The input type must be Variable.
self.assertRaises(TypeError, F.hardtanh, 1)
# The input dtype must be float16, float32, float64.
x_int32 = paddle.data(name='x_int32', shape=[12, 10], dtype='int32')
self.assertRaises(TypeError, F.hardtanh, x_int32)
# support the input dtype is float16
x_fp16 = paddle.data(name='x_fp16', shape=[12, 10], dtype='float16')
F.hardtanh(x_fp16)
def ref_softshrink(x, threshold=0.5): def ref_softshrink(x, threshold=0.5):
out = np.copy(x) out = np.copy(x)
out = (out < -threshold) * (out + threshold) + (out > threshold) * ( out = (out < -threshold) * (out + threshold) + (out > threshold) * (
......
...@@ -18,23 +18,134 @@ import unittest ...@@ -18,23 +18,134 @@ import unittest
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import six import six
import paddle.fluid as fluid import paddle.fluid.core as core
from paddle.fluid import Program, program_guard from paddle.fluid import Program, program_guard
from op_test import OpTest, skip_check_grad_ci from op_test import OpTest, skip_check_grad_ci
import paddle
import paddle.nn.functional as F
def ref_prelu(x, weight):
x_t = x.copy()
weight = weight.reshape(1, -1, 1, 1)
neg_indices = x <= 0
assert x.shape == neg_indices.shape
x_t[neg_indices] = (x_t * weight)[neg_indices]
return (x_t, )
def ref_prelu_nn(x, num_parameters, init):
weight_np = np.full((num_parameters), init)
return ref_prelu(x, weight_np)
class TestPReluOpError(unittest.TestCase): class TestFunctionalPReluAPI(unittest.TestCase):
def test_errors(self): def setUp(self):
with program_guard(Program()): self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda(
) else paddle.CPUPlace()
self.x_np = np.random.uniform(-1., 1., [1, 2, 3, 4]).astype('float32')
self.weight_np_0 = np.random.randn(1).astype('float32')
self.weight_np_1 = np.random.randn(self.x_np.shape[1]).astype('float32')
def static_check(self, weight_np):
with paddle.static.program_guard(paddle.static.Program()):
x = paddle.data('X', self.x_np.shape, 'float32')
weight = paddle.data('Alpha', weight_np.shape, 'float32')
out = F.prelu(x, weight)
exe = paddle.static.Executor(self.place)
res = exe.run(feed={'X': self.x_np,
'Alpha': weight_np},
fetch_list=[out])
out_ref = ref_prelu(self.x_np, weight_np)
self.assertEqual(np.allclose(out_ref, res[0]), True)
def dygraph_check(self, weight_np):
paddle.disable_static(self.place)
x = paddle.to_tensor(self.x_np)
weight = paddle.to_tensor(weight_np)
out = F.prelu(x, weight)
out_ref = ref_prelu(self.x_np, weight_np)
self.assertEqual(np.allclose(out_ref, out.numpy()), True)
paddle.enable_static()
def test_static_api(self):
self.static_check(self.weight_np_0)
self.static_check(self.weight_np_1)
def test_dygraph_api(self):
self.dygraph_check(self.weight_np_0)
self.dygraph_check(self.weight_np_1)
def test_error(self):
with paddle.static.program_guard(paddle.static.Program()):
weight_fp32 = paddle.data(
name='weight_fp32', shape=[1], dtype='float32')
# The input type must be Variable. # The input type must be Variable.
self.assertRaises(TypeError, fluid.layers.prelu, 0.1, 'all') self.assertRaises(TypeError, F.prelu, x=1, weight=weight_fp32)
# The input dtype must be float16, float32, float64. # The input dtype must be float16, float32, float64.
x_int32 = fluid.data(name='x_int32', shape=[12, 10], dtype='int32') x_int32 = paddle.data(name='x_int32', shape=[2, 3], dtype='int32')
self.assertRaises(TypeError, fluid.layers.prelu, x_int32, 'all') self.assertRaises(TypeError, F.prelu, x=x_int32, weight=weight_fp32)
# support the input dtype is float32 # support the input dtype is float16
x_fp16 = fluid.layers.data( x_fp16 = paddle.data(name='x_fp16', shape=[2, 3], dtype='float16')
name='x_fp16', shape=[12, 10], dtype='float32') F.prelu(x=x_fp16, weight=weight_fp32)
fluid.layers.prelu(x_fp16, 'all')
class TestNNPReluAPI(unittest.TestCase):
def setUp(self):
self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda(
) else paddle.CPUPlace()
self.x_np = np.ones([1, 2, 3, 4]).astype('float32')
def test_static_api(self):
startup_program = paddle.static.Program()
train_program = paddle.static.Program()
with paddle.static.program_guard(train_program, startup_program):
x = paddle.data(name='X', shape=self.x_np.shape, dtype='float32')
m = paddle.nn.PReLU()
out = m(x)
exe = paddle.static.Executor(self.place)
exe.run(startup_program)
res = exe.run(train_program,
feed={'X': self.x_np},
fetch_list=[out])
out_ref = ref_prelu_nn(self.x_np, 1, 0.25)
self.assertEqual(np.allclose(out_ref, res[0]), True)
def test_dygraph_api(self):
paddle.disable_static(self.place)
x = paddle.to_tensor(self.x_np)
m = paddle.nn.PReLU()
out = m(x)
out_ref = ref_prelu_nn(self.x_np, 1, 0.25)
self.assertEqual(np.allclose(out_ref, out.numpy()), True)
x = paddle.to_tensor(self.x_np)
m = paddle.nn.PReLU(num_parameters=self.x_np.shape[1])
out = m(x)
out_ref = ref_prelu_nn(self.x_np, self.x_np.shape[1], 0.25)
self.assertEqual(np.allclose(out_ref, out.numpy()), True)
x = paddle.to_tensor(self.x_np)
m = paddle.nn.PReLU(init=0.5)
out = m(x)
out_ref = ref_prelu_nn(self.x_np, 1, 0.5)
self.assertEqual(np.allclose(out_ref, out.numpy()), True)
x = paddle.to_tensor(self.x_np)
m = paddle.nn.PReLU(weight_attr=fluid.ParamAttr(name="weight"))
out = m(x)
out_ref = ref_prelu_nn(self.x_np, 1, 0.25)
self.assertEqual(np.allclose(out_ref, out.numpy()), True)
x = paddle.to_tensor(self.x_np)
m = paddle.nn.PReLU(weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.5)))
out = m(x)
out_ref = ref_prelu_nn(self.x_np, 1, 0.5)
self.assertEqual(np.allclose(out_ref, out.numpy()), True)
paddle.enable_static()
class PReluTest(OpTest): class PReluTest(OpTest):
......
...@@ -35,6 +35,15 @@ def stable_softmax(x): ...@@ -35,6 +35,15 @@ def stable_softmax(x):
return exps / np.sum(exps) return exps / np.sum(exps)
def ref_softmax(x, axis=None, dtype=None):
x_t = x.copy()
if dtype is not None:
x_t = x_t.astype(dtype)
if axis is None:
axis = -1
return np.apply_along_axis(stable_softmax, axis, x_t)
class TestSoftmaxOp(OpTest): class TestSoftmaxOp(OpTest):
def get_x_shape(self): def get_x_shape(self):
return [10, 10] return [10, 10]
...@@ -93,20 +102,6 @@ class TestSoftmaxOp(OpTest): ...@@ -93,20 +102,6 @@ class TestSoftmaxOp(OpTest):
check_dygraph=(self.use_mkldnn == False)) check_dygraph=(self.use_mkldnn == False))
class TestSoftmaxOpError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
# The input type of softmax_op must be Variable.
x1 = fluid.create_lod_tensor(
np.array([[-1]]), [[1]], fluid.CPUPlace())
self.assertRaises(TypeError, fluid.layers.softmax, x1)
# The input dtype of softmax_op must be float16, float32 or float64.
x2 = fluid.layers.data(name='x2', shape=[4], dtype="int32")
self.assertRaises(TypeError, fluid.layers.softmax, x2)
x3 = fluid.layers.data(name='x3', shape=[4], dtype="float16")
fluid.layers.softmax(x3)
class TestSoftmaxOp2(TestSoftmaxOp): class TestSoftmaxOp2(TestSoftmaxOp):
def get_x_shape(self): def get_x_shape(self):
return [2, 3, 4, 5] return [2, 3, 4, 5]
...@@ -224,41 +219,59 @@ class TestSoftmaxFP16CUDNNOp2(TestSoftmaxFP16CUDNNOp): ...@@ -224,41 +219,59 @@ class TestSoftmaxFP16CUDNNOp2(TestSoftmaxFP16CUDNNOp):
return [2, 3, 4, 5] return [2, 3, 4, 5]
class TestNnFunctionalSoftmaxApi(unittest.TestCase): class TestSoftmaxAPI(unittest.TestCase):
def setUp(self): def setUp(self):
self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda(
) else paddle.CPUPlace() ) else paddle.CPUPlace()
self.x_np = np.random.uniform(-1., 1., [2, 3, 4, 5]).astype('float32') self.x_np = np.random.uniform(-1., 1., [2, 3, 4, 5]).astype('float32')
self.out_ref = np.apply_along_axis(stable_softmax, -1, self.x_np) self.out_ref = np.apply_along_axis(stable_softmax, -1, self.x_np)
def test_api_static(self): def test_static_check(self):
with program_guard(Program()): with paddle.static.program_guard(paddle.static.Program()):
x = paddle.data('X', self.x_np.shape, 'float32') x = paddle.data('X', self.x_np.shape, 'float32')
out = F.softmax(x) out1 = F.softmax(x)
m = paddle.nn.Softmax()
out2 = m(x)
exe = paddle.static.Executor(self.place) exe = paddle.static.Executor(self.place)
res = exe.run(feed={'X': self.x_np}, fetch_list=[out]) res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2])
self.assertEqual(np.allclose(self.out_ref, res[0]), True) out_ref = ref_softmax(self.x_np, axis=-1, dtype=None)
for r in res:
self.assertEqual(np.allclose(out_ref, r), True)
def test_api_imperative(self): def test_dygraph_check(self):
paddle.disable_static(self.place) paddle.disable_static(self.place)
x = paddle.to_variable(self.x_np) x = paddle.to_tensor(self.x_np)
out = F.softmax(x) out1 = F.softmax(x)
self.assertEqual(np.allclose(self.out_ref, out.numpy()), True) m = paddle.nn.Softmax()
out2 = m(x)
out = F.softmax(x, axis=0) out_ref = ref_softmax(self.x_np, axis=-1, dtype=None)
out_ref = np.apply_along_axis(stable_softmax, 0, self.x_np) for r in [out1, out2]:
self.assertEqual(np.allclose(out_ref, r.numpy()), True)
out1 = F.softmax(x, axis=0)
m = paddle.nn.Softmax(axis=0)
out2 = m(x)
out_ref = ref_softmax(self.x_np, axis=0, dtype=None)
for r in [out1, out2]:
self.assertEqual(np.allclose(out_ref, r.numpy()), True)
out = F.softmax(x, dtype=np.float64)
out_ref = ref_softmax(self.x_np, axis=-1, dtype=np.float64)
self.assertEqual(np.allclose(out_ref, out.numpy()), True) self.assertEqual(np.allclose(out_ref, out.numpy()), True)
paddle.enable_static() paddle.enable_static()
def test_error(self): def test_error(self):
with program_guard(Program(), Program()): with paddle.static.program_guard(paddle.static.Program()):
# The x should be variable and its dtype should be float32, float64. # The input type must be Variable.
self.assertRaises(TypeError, F.softmax, [1]) self.assertRaises(TypeError, F.softmax, 1)
# The input dtype must be float16, float32, float64.
x = paddle.data(name='x', shape=[2, 3], dtype='int32') x_int32 = paddle.data(name='x_int32', shape=[2, 3], dtype='int32')
self.assertRaises(TypeError, F.softmax, x) self.assertRaises(TypeError, F.softmax, x_int32)
# support the input dtype is float16
x_fp16 = paddle.data(name='x_fp16', shape=[2, 3], dtype='float16')
F.softmax(x_fp16)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -55,14 +55,15 @@ from .decode import gather_tree #DEFINE_ALIAS ...@@ -55,14 +55,15 @@ from .decode import gather_tree #DEFINE_ALIAS
from .layer.activation import ELU from .layer.activation import ELU
from .layer.activation import GELU from .layer.activation import GELU
from .layer.activation import Hardshrink from .layer.activation import Hardshrink
# from .layer.activation import PReLU #DEFINE_ALIAS from .layer.activation import Hardtanh
from .layer.activation import PReLU
from .layer.activation import ReLU from .layer.activation import ReLU
from .layer.activation import ReLU6 #DEFINE_ALIAS from .layer.activation import ReLU6 #DEFINE_ALIAS
from .layer.activation import SELU #DEFINE_ALIAS from .layer.activation import SELU #DEFINE_ALIAS
from .layer.activation import LeakyReLU #DEFINE_ALIAS from .layer.activation import LeakyReLU #DEFINE_ALIAS
from .layer.activation import Sigmoid #DEFINE_ALIAS from .layer.activation import Sigmoid #DEFINE_ALIAS
from .layer.activation import LogSigmoid from .layer.activation import LogSigmoid
# from .layer.activation import Softmax #DEFINE_ALIAS from .layer.activation import Softmax #DEFINE_ALIAS
from .layer.activation import Softplus #DEFINE_ALIAS from .layer.activation import Softplus #DEFINE_ALIAS
from .layer.activation import Softshrink #DEFINE_ALIAS from .layer.activation import Softshrink #DEFINE_ALIAS
from .layer.activation import Softsign #DEFINE_ALIAS from .layer.activation import Softsign #DEFINE_ALIAS
......
...@@ -30,13 +30,14 @@ from .activation import elu #DEFINE_ALIAS ...@@ -30,13 +30,14 @@ from .activation import elu #DEFINE_ALIAS
from .activation import erf #DEFINE_ALIAS from .activation import erf #DEFINE_ALIAS
from .activation import gelu #DEFINE_ALIAS from .activation import gelu #DEFINE_ALIAS
from .activation import hardshrink #DEFINE_ALIAS from .activation import hardshrink #DEFINE_ALIAS
from .activation import hardtanh #DEFINE_ALIAS
from .activation import hard_sigmoid #DEFINE_ALIAS from .activation import hard_sigmoid #DEFINE_ALIAS
from .activation import hard_swish #DEFINE_ALIAS from .activation import hard_swish #DEFINE_ALIAS
from .activation import hsigmoid #DEFINE_ALIAS from .activation import hsigmoid #DEFINE_ALIAS
from .activation import leaky_relu #DEFINE_ALIAS from .activation import leaky_relu #DEFINE_ALIAS
from .activation import logsigmoid #DEFINE_ALIAS from .activation import logsigmoid #DEFINE_ALIAS
from .activation import maxout #DEFINE_ALIAS from .activation import maxout #DEFINE_ALIAS
# from .activation import prelu #DEFINE_ALIAS from .activation import prelu #DEFINE_ALIAS
from .activation import relu #DEFINE_ALIAS from .activation import relu #DEFINE_ALIAS
from .activation import relu6 #DEFINE_ALIAS from .activation import relu6 #DEFINE_ALIAS
from .activation import selu #DEFINE_ALIAS from .activation import selu #DEFINE_ALIAS
......
...@@ -30,13 +30,14 @@ __all__ = [ ...@@ -30,13 +30,14 @@ __all__ = [
'erf', 'erf',
'gelu', 'gelu',
'hardshrink', 'hardshrink',
'hardtanh',
'hard_sigmoid', 'hard_sigmoid',
'hard_swish', 'hard_swish',
'hsigmoid', 'hsigmoid',
'leaky_relu', 'leaky_relu',
'logsigmoid', 'logsigmoid',
'maxout', 'maxout',
# 'prelu', 'prelu',
'relu', 'relu',
'relu6', 'relu6',
'selu', 'selu',
...@@ -49,7 +50,7 @@ __all__ = [ ...@@ -49,7 +50,7 @@ __all__ = [
'swish', 'swish',
'tanhshrink', 'tanhshrink',
'thresholded_relu', 'thresholded_relu',
'log_softmax' 'log_softmax',
] ]
import warnings import warnings
...@@ -111,10 +112,15 @@ def gelu(x, approximate=False, name=None): ...@@ -111,10 +112,15 @@ def gelu(x, approximate=False, name=None):
gelu activation. gelu activation.
if approximate is True if approximate is True
.. math:: .. math::
gelu(x) = 0.5 * x * (1 + tanh(\\sqrt{\\frac{2}{\\pi}} * (x + 0.044715x^{3}))) gelu(x) = 0.5 * x * (1 + tanh(\\sqrt{\\frac{2}{\\pi}} * (x + 0.044715x^{3})))
else else
.. math:: .. math::
gelu(x) = 0.5 * x * (1 + erf(\\frac{x}{\\sqrt{2}})) gelu(x) = 0.5 * x * (1 + erf(\\frac{x}{\\sqrt{2}}))
Parameters: Parameters:
...@@ -135,17 +141,9 @@ def gelu(x, approximate=False, name=None): ...@@ -135,17 +141,9 @@ def gelu(x, approximate=False, name=None):
paddle.disable_static() paddle.disable_static()
data = np.random.randn(2, 3).astype("float32") x = paddle.to_tensor(np.array([[-1, 0.5],[1, 1.5]]))
x = paddle.to_tensor(data) out1 = F.gelu(x) # [-0.158655 0.345731 0.841345 1.39979]
out2 = F.gelu(x, True) # [-0.158808 0.345714 0.841192 1.39957]
out = F.gelu(x)
data
# array([[ 0.87165993, -1.0541513 , -0.37214822],
# [ 0.15647964, 0.32496083, 0.33045998]], dtype=float32)
out
# array([[ 0.70456535, -0.15380788, -0.13207214],
# [ 0.08796856, 0.20387867, 0.2080159 ]], dtype=float32)
""" """
if in_dygraph_mode(): if in_dygraph_mode():
...@@ -187,7 +185,6 @@ def hardshrink(x, threshold=0.5, name=None): ...@@ -187,7 +185,6 @@ def hardshrink(x, threshold=0.5, name=None):
A Tensor with the same data type and shape as ``x`` . A Tensor with the same data type and shape as ``x`` .
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
...@@ -196,7 +193,7 @@ def hardshrink(x, threshold=0.5, name=None): ...@@ -196,7 +193,7 @@ def hardshrink(x, threshold=0.5, name=None):
paddle.disable_static() paddle.disable_static()
x = paddle.to_variable(np.array([-1, 0.3, 2.5])) x = paddle.to_tensor(np.array([-1, 0.3, 2.5]))
out = F.hardshrink(x) # [-1., 0., 2.5] out = F.hardshrink(x) # [-1., 0., 2.5]
""" """
...@@ -215,6 +212,58 @@ def hardshrink(x, threshold=0.5, name=None): ...@@ -215,6 +212,58 @@ def hardshrink(x, threshold=0.5, name=None):
return out return out
def hardtanh(x, min=-1.0, max=1.0, name=None):
"""
hardtanh activation
.. math::
hardtanh(x)= \\begin{cases}
max, \\text{if } x > max \\\\
min, \\text{if } x < min \\\\
x, \\text{otherwise}
\\end{cases}
Args:
x (Tensor): The input Tensor with data type float32, float64.
min (float, optional): The minimum value of the linear region range. Default is -1.
max (float, optional): The maximum value of the linear region range. Default is 1.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
Returns:
A Tensor with the same data type and shape as ``x`` .
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
import numpy as np
paddle.disable_static()
x = paddle.to_tensor(np.array([-1.5, 0.3, 2.5]))
out = F.hardtanh(x) # [-1., 0.3, 1.]
"""
if in_dygraph_mode():
return core.ops.brelu(x, 't_min', min, 't_max', max)
check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
'hardtanh')
helper = LayerHelper('hardtanh', **locals())
out = helper.create_variable_for_type_inference(dtype=x.dtype)
helper.append_op(
type='brelu',
inputs={'X': x},
outputs={'Out': out},
attrs={'t_min': min,
't_max': max})
return out
def hsigmoid(input, def hsigmoid(input,
label, label,
weight, weight,
...@@ -272,7 +321,6 @@ def hsigmoid(input, ...@@ -272,7 +321,6 @@ def hsigmoid(input,
Variable: A tensor with the cost of hierarchical sigmoid, its shape is [N, 1] and data type is the same as :attr:`input`. Variable: A tensor with the cost of hierarchical sigmoid, its shape is [N, 1] and data type is the same as :attr:`input`.
Examples: Examples:
.. code-block:: python .. code-block:: python
from paddle import fluid, nn from paddle import fluid, nn
...@@ -338,11 +386,86 @@ def hsigmoid(input, ...@@ -338,11 +386,86 @@ def hsigmoid(input,
return out return out
def prelu(x, weight, name=None):
"""
prelu activation.
.. math::
prelu(x) = max(0, x) + weight * min(0, x)
Parameters:
x (Tensor): The input Tensor with data type float32, float64.
weight (Tensor): The learnable parameter with data type same as ``x``.
The weight shape is [1] or [in], where `in` is the input channel of ``x``.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
Returns:
A Tensor with the same data type and shape as ``x`` .
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
import numpy as np
paddle.disable_static()
data = np.array([[[[-2.0, 3.0, -4.0, 5.0],
[ 3.0, -4.0, 5.0, -6.0],
[-7.0, -8.0, 8.0, 9.0]],
[[ 1.0, -2.0, -3.0, 4.0],
[-5.0, 6.0, 7.0, -8.0],
[ 6.0, 7.0, 8.0, 9.0]]]], 'float32')
x = paddle.to_tensor(data)
w = paddle.to_tensor(np.array([0.25]).astype('float32'))
out = F.prelu(x, w)
# [[[[-0.5 , 3. , -1. , 5. ],
# [ 3. , -1. , 5. , -1.5 ],
# [-1.75, -2. , 8. , 9. ]],
# [[ 1. , -0.5 , -0.75, 4. ],
# [-1.25, 6. , 7. , -2. ],
# [ 6. , 7. , 8. , 9. ]]]]
"""
check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'prelu')
check_variable_and_dtype(weight, 'weight',
['float16', 'float32', 'float64'], 'prelu')
helper = LayerHelper('prelu', **locals())
assert len(weight.shape
) == 1, "The dim count of weight shape should be 1 in prelu()."
# NOTE(): The input of this API should be ``N,C,...`` format,
# which means x.shape[0] is batch_size and x.shape[0] is channel.
mode = 'all'
if weight.shape[0] > 1:
assert len(
x.shape
) > 1, "The dim count of x should be equal or larger than 2 in prelu() when weight shape is not [1]."
assert weight.shape[0] == x.shape[
1], "The weight size should be equal to x input channel in prelu() when weight shape is not [1]."
mode = 'channel'
if in_dygraph_mode():
return core.ops.prelu(x, weight, 'mode', mode)
out = helper.create_variable_for_type_inference(x.dtype)
helper.append_op(
type="prelu",
inputs={"X": x,
"Alpha": weight},
outputs={"Out": out},
attrs={"mode": mode})
return out
def relu(x, name=None): def relu(x, name=None):
""" """
ReLU Activation. relu activation.
.. math: .. math::
out = max(x, 0) out = max(x, 0)
...@@ -381,9 +504,9 @@ def logsigmoid(x, name=None): ...@@ -381,9 +504,9 @@ def logsigmoid(x, name=None):
""" """
logsigmoid activation. logsigmoid activation.
.. math: .. math::
logsigmoid(x) = \log \frac{1}{1 + e^{-x}} logsigmoid(x) = log \\frac{1}{1 + e^{-x}}
Parameters: Parameters:
x (Tensor): The input Tensor with data type float32, float64. x (Tensor): The input Tensor with data type float32, float64.
...@@ -403,7 +526,7 @@ def logsigmoid(x, name=None): ...@@ -403,7 +526,7 @@ def logsigmoid(x, name=None):
paddle.disable_static() paddle.disable_static()
x = paddle.to_tensor(np.array([1.0, 2.0, 3.0, 4.0])) x = paddle.to_tensor(np.array([1.0, 2.0, 3.0, 4.0]))
out = F.logsigmoid(x) # [0.7310586, 0.880797, 0.95257413, 0.98201376] out = F.logsigmoid(x) # [-0.313262 -0.126928 -0.0485874 -0.0181499]
""" """
if in_dygraph_mode(): if in_dygraph_mode():
...@@ -514,7 +637,7 @@ def selu(x, ...@@ -514,7 +637,7 @@ def selu(x,
return out return out
def softmax(x, axis=-1, name=None): def softmax(x, axis=-1, dtype=None, name=None):
""" """
This operator implements the softmax layer. The calculation process is as follows: This operator implements the softmax layer. The calculation process is as follows:
...@@ -541,7 +664,7 @@ def softmax(x, axis=-1, name=None): ...@@ -541,7 +664,7 @@ def softmax(x, axis=-1, name=None):
.. math:: .. math::
out[i, j] = \\frac{\exp(x[i, j])}{\sum_j(exp(x[i, j])} softmax[i, j] = \\frac{\\exp(x[i, j])}{\\sum_j(exp(x[i, j])}
Example: Example:
...@@ -590,20 +713,26 @@ def softmax(x, axis=-1, name=None): ...@@ -590,20 +713,26 @@ def softmax(x, axis=-1, name=None):
[0.26762315, 0.26762315, 0.26762315, 0.26762315], [0.26762315, 0.26762315, 0.26762315, 0.26762315],
[0.72747516, 0.72747516, 0.72747516, 0.72747516]]] [0.72747516, 0.72747516, 0.72747516, 0.72747516]]]
Args: Parameters:
x (Tensor): The input multi-dimension Tensor with data type float32, float64. x (Tensor): The input Tensor with data type float32, float64.
axis (int, optional): The axis along which to perform softmax calculations. axis (int, optional): The axis along which to perform log_softmax
It should be in range [-D, D), where D is the dimensions of ``x`` . calculations. It should be in range [-D, D), where D is the
When ``axis`` < 0, it works the same way as :math:`axis + D` . dimensions of ``x`` . If ``axis`` < 0, it works the same way as
Default is -1. :math:`axis + D` . Default is -1.
dtype (str|np.dtype|core.VarDesc.VarType, optional): The desired data
type of the output tensor. If dtype is specified, ``x`` is casted
to ``dtype`` before the operation is performed. This is useful for
preventing data type overflows. Supported dtype: float32, float64.
If ``dtype`` is None, the output Tensor has the same dtype as x.
Default is None.
name (str, optional): Name for the operation (optional, default is None). name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`. For more information, please refer to :ref:`api_guide_Name`.
Returns: Returns:
A Tensor with the same data type and shape as ``x`` . A Tensor with the same shape and data type (use ``dtype`` if it is
specified) as x.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
...@@ -619,7 +748,10 @@ def softmax(x, axis=-1, name=None): ...@@ -619,7 +748,10 @@ def softmax(x, axis=-1, name=None):
[5.0, 6.0, 7.0, 8.0], [5.0, 6.0, 7.0, 8.0],
[6.0, 7.0, 8.0, 9.0]]], 'float32') [6.0, 7.0, 8.0, 9.0]]], 'float32')
x = paddle.to_tensor(x) x = paddle.to_tensor(x)
out = F.softmax(x) out1 = F.softmax(x)
out2 = F.softmax(x, dtype='float64')
# out1's data type is float32; out2's data type is float64
# out1 and out2's value is as follows:
# [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426], # [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
# [0.0320586 , 0.08714432, 0.23688282, 0.64391426], # [0.0320586 , 0.08714432, 0.23688282, 0.64391426],
# [0.07232949, 0.19661193, 0.19661193, 0.53444665]], # [0.07232949, 0.19661193, 0.19661193, 0.53444665]],
...@@ -627,7 +759,43 @@ def softmax(x, axis=-1, name=None): ...@@ -627,7 +759,43 @@ def softmax(x, axis=-1, name=None):
# [0.0320586 , 0.08714432, 0.23688282, 0.64391426], # [0.0320586 , 0.08714432, 0.23688282, 0.64391426],
# [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]] # [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]]
""" """
return paddle.fluid.layers.softmax(input=x, axis=axis, name=name)
if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)):
dtype = convert_np_dtype_to_dtype_(dtype)
use_cudnn = True if axis is -1 else False
if in_dygraph_mode():
outs_cast = x if dtype is None \
else core.ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype)
return core.ops.softmax(outs_cast, 'axis', axis, 'use_cudnn', use_cudnn)
if dtype is None:
check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
'softmax')
else:
check_dtype(dtype, 'dtype', ['float32', 'float64'], 'softmax',
'If dtype is not None, it only support float32 or float64.')
helper = LayerHelper("softmax", **locals())
outs_cast = x
if dtype is not None:
outs_cast = helper.create_variable_for_type_inference(dtype)
helper.append_op(
type='cast',
inputs={'X': x},
outputs={'Out': outs_cast},
attrs={'in_dtype': x.dtype,
'out_dtype': dtype})
outs_softmax = helper.create_variable_for_type_inference(outs_cast.dtype)
helper.append_op(
type='softmax',
inputs={'X': outs_cast},
outputs={'Out': outs_softmax},
attrs={'axis': axis,
'use_cudnn': use_cudnn})
return outs_softmax
def softplus(x, beta=1, threshold=20, name=None): def softplus(x, beta=1, threshold=20, name=None):
...@@ -820,7 +988,7 @@ def log_softmax(x, axis=-1, dtype=None, name=None): ...@@ -820,7 +988,7 @@ def log_softmax(x, axis=-1, dtype=None, name=None):
.. math:: .. math::
Out[i, j] = log(softmax(x)) Out[i, j] = log(softmax(x))
= log(\\frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}) = log(\frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])})
Parameters: Parameters:
x (Tensor): The input Tensor with data type float32, float64. x (Tensor): The input Tensor with data type float32, float64.
...@@ -869,8 +1037,6 @@ def log_softmax(x, axis=-1, dtype=None, name=None): ...@@ -869,8 +1037,6 @@ def log_softmax(x, axis=-1, dtype=None, name=None):
# [ -3.4401896 -2.4401896 -1.4401896 -0.44018966]]] # [ -3.4401896 -2.4401896 -1.4401896 -0.44018966]]]
""" """
if axis is None:
axis = -1
if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)): if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)):
dtype = convert_np_dtype_to_dtype_(dtype) dtype = convert_np_dtype_to_dtype_(dtype)
......
...@@ -18,25 +18,28 @@ __all__ = [ ...@@ -18,25 +18,28 @@ __all__ = [
'ELU', 'ELU',
'GELU', 'GELU',
'Hardshrink', 'Hardshrink',
# 'PReLU', 'Hardtanh',
'PReLU',
'ReLU', 'ReLU',
'ReLU6', 'ReLU6',
'SELU', 'SELU',
'LeakyReLU', 'LeakyReLU',
'Sigmoid', 'Sigmoid',
# 'Softmax', 'Softmax',
'Softplus', 'Softplus',
'Softshrink', 'Softshrink',
'Softsign', 'Softsign',
'Tanhshrink', 'Tanhshrink',
'LogSigmoid', 'LogSigmoid',
'LogSoftmax', 'LogSoftmax',
'HSigmoid' 'HSigmoid',
] ]
from ...fluid.dygraph import layers from ...fluid.dygraph import layers
from ...fluid import core from ...fluid import core
from ...fluid.framework import in_dygraph_mode from ...fluid.framework import in_dygraph_mode
from ...fluid.param_attr import ParamAttr
from ...fluid.initializer import Constant
from .. import functional as F from .. import functional as F
...@@ -114,18 +117,13 @@ class GELU(layers.Layer): ...@@ -114,18 +117,13 @@ class GELU(layers.Layer):
paddle.disable_static() paddle.disable_static()
data = np.random.randn(2, 3).astype("float32") x = paddle.to_tensor(np.array([[-1, 0.5],[1, 1.5]]))
x = paddle.to_tensor(data)
m = paddle.nn.GELU() m = paddle.nn.GELU()
out = m(x) out = m(x) # [-0.158655 0.345731 0.841345 1.39979]
data m = paddle.nn.GELU(True)
# array([[ 0.87165993, -1.0541513 , -0.37214822], out = m(x) # [-0.158808 0.345714 0.841192 1.39957]
# [ 0.15647964, 0.32496083, 0.33045998]], dtype=float32)
out
# array([[ 0.70456535, -0.15380788, -0.13207214],
# [ 0.08796856, 0.20387867, 0.2080159 ]], dtype=float32)
""" """
def __init__(self, approximate=False, name=None): def __init__(self, approximate=False, name=None):
...@@ -170,7 +168,7 @@ class Hardshrink(layers.Layer): ...@@ -170,7 +168,7 @@ class Hardshrink(layers.Layer):
paddle.disable_static() paddle.disable_static()
x = paddle.to_variable(np.array([-1, 0.3, 2.5])) x = paddle.to_tensor(np.array([-1, 0.3, 2.5]))
m = paddle.nn.Hardshrink() m = paddle.nn.Hardshrink()
out = m(x) # [-1., 0., 2.5] out = m(x) # [-1., 0., 2.5]
""" """
...@@ -184,6 +182,51 @@ class Hardshrink(layers.Layer): ...@@ -184,6 +182,51 @@ class Hardshrink(layers.Layer):
return F.hardshrink(x, self._threshold, self._name) return F.hardshrink(x, self._threshold, self._name)
class Hardtanh(layers.Layer):
"""
Hardtanh Activation
.. math::
Hardtanh(x)= \\begin{cases}
max, \\text{if } x > max \\\\
min, \\text{if } x < min \\\\
x, \\text{otherwise}
\\end{cases}
Parameters:
min (float, optional): The value of min for Hardtanh. Default is -1.
max (float, optional): The value of max for Hardtanh. Default is 1.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
Shape:
- input: Tensor with any shape.
- output: Tensor with the same shape as input.
Examples:
.. code-block:: python
import paddle
import numpy as np
paddle.disable_static()
x = paddle.to_tensor(np.array([-1.5, 0.3, 2.5]))
m = paddle.nn.Hardtanh()
out = m(x) # # [-1., 0.3, 1.]
"""
def __init__(self, min=-1.0, max=1.0, name=None):
super(Hardtanh, self).__init__()
self._min = min
self._max = max
self._name = name
def forward(self, x):
return F.hardtanh(x, self._min, self._max, self._name)
class HSigmoid(layers.Layer): class HSigmoid(layers.Layer):
""" """
:alias_main: paddle.nn.HSigmoid :alias_main: paddle.nn.HSigmoid
...@@ -320,11 +363,78 @@ class HSigmoid(layers.Layer): ...@@ -320,11 +363,78 @@ class HSigmoid(layers.Layer):
return out return out
class PReLU(layers.Layer):
"""
PReLU Activation.
.. math::
PReLU(x) = max(0, x) + weight * min(0, x)
Parameters:
num_parameters (int, optional): Number of `weight` to learn. The supported values are:
1 - a single parameter `alpha` is used for all input channels;
Number of channels - a seperate `alpha` is used for each input channel.
Default is 1.
init (float, optional): Init value of learnable `weight`. Default is 0.25.
weight_attr(ParamAttr, optional): The parameter attribute for the learnable `weight`.
Default is None. For more information, please refer to :ref:`api_fluid_ParamAttr`.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
Shape:
- input: Tensor with any shape.
- output: Tensor with the same shape as input.
Examples:
.. code-block:: python
import paddle
import numpy as np
paddle.disable_static()
data = np.array([[[[-2.0, 3.0, -4.0, 5.0],
[ 3.0, -4.0, 5.0, -6.0],
[-7.0, -8.0, 8.0, 9.0]],
[[ 1.0, -2.0, -3.0, 4.0],
[-5.0, 6.0, 7.0, -8.0],
[ 6.0, 7.0, 8.0, 9.0]]]], 'float32')
x = paddle.to_tensor(data)
m = paddle.nn.PReLU(1, 0.25)
out = m(x)
# [[[[-0.5 , 3. , -1. , 5. ],
# [ 3. , -1. , 5. , -1.5 ],
# [-1.75, -2. , 8. , 9. ]],
# [[ 1. , -0.5 , -0.75, 4. ],
# [-1.25, 6. , 7. , -2. ],
# [ 6. , 7. , 8. , 9. ]]]]
"""
def __init__(self, num_parameters=1, init=0.25, weight_attr=None,
name=None):
super(PReLU, self).__init__()
self._num_parameters = num_parameters
self._init = init
self._weight_attr = weight_attr
self._name = name
self._weight = self.create_parameter(
attr=self._weight_attr,
shape=[num_parameters],
dtype='float32',
is_bias=False,
default_initializer=Constant(init))
def forward(self, x):
return F.prelu(x, self._weight)
class ReLU(layers.Layer): class ReLU(layers.Layer):
""" """
ReLU Activation. ReLU Activation.
.. math: .. math::
ReLU(x) = max(x, 0) ReLU(x) = max(x, 0)
...@@ -488,7 +598,7 @@ class Sigmoid(layers.Layer): ...@@ -488,7 +598,7 @@ class Sigmoid(layers.Layer):
.. math:: .. math::
output = \\frac{1}{1 + e^{-x}} Sigmoid(x) = \frac{1}{1 + e^{-x}}
Parameters: Parameters:
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
...@@ -509,7 +619,7 @@ class Sigmoid(layers.Layer): ...@@ -509,7 +619,7 @@ class Sigmoid(layers.Layer):
paddle.disable_static() paddle.disable_static()
input_data = np.array([1.0, 2.0, 3.0, 4.0]).astype('float32') input_data = np.array([1.0, 2.0, 3.0, 4.0]).astype('float32')
m = paddle.nn.Sigmoid() m = paddle.nn.Sigmoid()
x = paddle.to_variable(input_data) x = paddle.to_tensor(input_data)
output = m(x) output = m(x)
print(output.numpy()) # [0.7310586, 0.880797, 0.95257413, 0.98201376] print(output.numpy()) # [0.7310586, 0.880797, 0.95257413, 0.98201376]
""" """
...@@ -687,9 +797,9 @@ class LogSigmoid(layers.Layer): ...@@ -687,9 +797,9 @@ class LogSigmoid(layers.Layer):
""" """
LogSigmoid Activation. LogSigmoid Activation.
.. math: .. math::
LogSigmoid(x) = \log \frac{1}{1 + e^{-x}} LogSigmoid(x) = log \\frac{1}{1 + e^{-x}}
Parameters: Parameters:
x (Tensor): The input Tensor with data type float32, or float64. x (Tensor): The input Tensor with data type float32, or float64.
...@@ -710,7 +820,7 @@ class LogSigmoid(layers.Layer): ...@@ -710,7 +820,7 @@ class LogSigmoid(layers.Layer):
x = paddle.to_tensor(np.array([1.0, 2.0, 3.0, 4.0])) x = paddle.to_tensor(np.array([1.0, 2.0, 3.0, 4.0]))
m = paddle.nn.LogSigmoid() m = paddle.nn.LogSigmoid()
out = m(x) # [0.7310586, 0.880797, 0.95257413, 0.98201376] out = m(x) # [-0.313262 -0.126928 -0.0485874 -0.0181499]
""" """
def __init__(self, name=None): def __init__(self, name=None):
...@@ -721,6 +831,137 @@ class LogSigmoid(layers.Layer): ...@@ -721,6 +831,137 @@ class LogSigmoid(layers.Layer):
return F.logsigmoid(x, self._name) return F.logsigmoid(x, self._name)
class Softmax(layers.Layer):
"""
Softmax Activation.
This operator implements the softmax layer. The calculation process is as follows:
1. The dimension :attr:`axis` of ``x`` will be permuted to the last.
2. Then ``x`` will be logically flattened to a 2-D matrix. The matrix's second
dimension(row length) is the same as the dimension :attr:`axis` of ``x``,
and the first dimension(column length) is the product of all other dimensions
of ``x``. For each row of the matrix, the softmax operator squashes the
K-dimensional(K is the width of the matrix, which is also the size of ``x``'s
dimension :attr:`axis`) vector of arbitrary real values to a K-dimensional
vector of real values in the range [0, 1] that add up to 1.
3. After the softmax operation is completed, the inverse operations of steps 1 and 2
are performed to restore the two-dimensional matrix to the same dimension as the ``x`` .
It computes the exponential of the given dimension and the sum of exponential
values of all the other dimensions in the K-dimensional vector input.
Then the ratio of the exponential of the given dimension and the sum of
exponential values of all the other dimensions is the output of the softmax
operator.
For each row :math:`i` and each column :math:`j` in the matrix, we have:
.. math::
Softmax[i, j] = \\frac{\\exp(x[i, j])}{\\sum_j(exp(x[i, j])}
Example:
.. code-block:: text
Case 1:
Input:
x.shape = [2, 3, 4]
x.data = [[[2.0, 3.0, 4.0, 5.0],
[3.0, 4.0, 5.0, 6.0],
[7.0, 8.0, 8.0, 9.0]],
[[1.0, 2.0, 3.0, 4.0],
[5.0, 6.0, 7.0, 8.0],
[6.0, 7.0, 8.0, 9.0]]]
Attrs:
axis = -1
Output:
out.shape = [2, 3, 4]
out.data = [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
[0.07232949, 0.19661193, 0.19661193, 0.53444665]],
[[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
[0.0320586 , 0.08714432, 0.23688282, 0.64391426]]]
Case 2:
Input:
x.shape = [2, 3, 4]
x.data = [[[2.0, 3.0, 4.0, 5.0],
[3.0, 4.0, 5.0, 6.0],
[7.0, 8.0, 8.0, 9.0]],
[[1.0, 2.0, 3.0, 4.0],
[5.0, 6.0, 7.0, 8.0],
[6.0, 7.0, 8.0, 9.0]]]
Attrs:
axis = 1
Output:
out.shape = [2, 3, 4]
out.data = [[[0.00657326, 0.00657326, 0.01714783, 0.01714783],
[0.01786798, 0.01786798, 0.04661262, 0.04661262],
[0.97555875, 0.97555875, 0.93623955, 0.93623955]],
[[0.00490169, 0.00490169, 0.00490169, 0.00490169],
[0.26762315, 0.26762315, 0.26762315, 0.26762315],
[0.72747516, 0.72747516, 0.72747516, 0.72747516]]]
Parameters:
axis (int, optional): The axis along which to perform log_softmax
calculations. It should be in range [-D, D), where D is the
dimensions of ``x`` . If ``axis`` < 0, it works the same way as
:math:`axis + D` . Default is -1.
dtype (str|np.dtype|core.VarDesc.VarType, optional): The desired data
type of the output tensor. If dtype is specified, ``x`` is casted
to ``dtype`` before the operation is performed. This is useful for
preventing data type overflows. Supported dtype: float32, float64.
If ``dtype`` is None, the output Tensor has the same dtype as x.
Default is None.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
Shape:
- input: Tensor with any shape.
- output: Tensor with the same shape as input.
Examples:
.. code-block:: python
import paddle
import numpy as np
paddle.disable_static()
x = np.array([[[2.0, 3.0, 4.0, 5.0],
[3.0, 4.0, 5.0, 6.0],
[7.0, 8.0, 8.0, 9.0]],
[[1.0, 2.0, 3.0, 4.0],
[5.0, 6.0, 7.0, 8.0],
[6.0, 7.0, 8.0, 9.0]]], 'float32')
x = paddle.to_tensor(x)
m = paddle.nn.Softmax()
out = m(x)
# [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
# [0.0320586 , 0.08714432, 0.23688282, 0.64391426],
# [0.07232949, 0.19661193, 0.19661193, 0.53444665]],
# [[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
# [0.0320586 , 0.08714432, 0.23688282, 0.64391426],
# [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]]
"""
def __init__(self, axis=-1, name=None):
super(Softmax, self).__init__()
self._axis = axis
self._dtype = None
self._name = name
def forward(self, x):
return F.softmax(x, self._axis, self._dtype, self._name)
class LogSoftmax(layers.Layer): class LogSoftmax(layers.Layer):
""" """
This operator implements the log_softmax layer. The calculation process is as follows: This operator implements the log_softmax layer. The calculation process is as follows:
...@@ -728,7 +969,7 @@ class LogSoftmax(layers.Layer): ...@@ -728,7 +969,7 @@ class LogSoftmax(layers.Layer):
.. math:: .. math::
Out[i, j] = log(softmax(x)) Out[i, j] = log(softmax(x))
= log(\\frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}) = log(\frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])})
Parameters: Parameters:
axis (int, optional): The axis along which to perform log_softmax axis (int, optional): The axis along which to perform log_softmax
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册