未验证 提交 78380587 编写于 作者: G Guo Sheng 提交者: GitHub

Fix api docs in RNN, Transformer, layer_norm, WeightNormParamAttr (#29235) (#29407)

* Fix api docs in RNN, Transformer, layer_norm, WeightNormParamAttr.
test=develop

* Fix api doc for print in label_smooth.
test=develop

* Update api docs according to review comments.
Add name argument in RNN back.
test=develop
上级 3a096724
...@@ -842,52 +842,52 @@ def linear_chain_crf(input, label, param_attr=None, length=None): ...@@ -842,52 +842,52 @@ def linear_chain_crf(input, label, param_attr=None, length=None):
def crf_decoding(input, param_attr, label=None, length=None): def crf_decoding(input, param_attr, label=None, length=None):
""" """
:api_attr: Static Graph :api_attr: Static Graph
${comment} ${comment}
Args: Args:
input(${emission_type}): ${emission_comment} input(Tensor): ${emission_comment}
param_attr (ParamAttr|None): To specify the weight parameter attribute. param_attr (ParamAttr|None): To specify the weight parameter attribute.
Default: None, which means the default weight parameter property is Default: None, which means the default weight parameter property is
used. See usage for details in :ref:`api_fluid_ParamAttr` . used. See usage for details in :ref:`api_paddle_fluid_param_attr_ParamAttr` .
label(${label_type}, optional): ${label_comment} label(${label_type}, optional): ${label_comment}
length(${length_type}, optional): ${length_comment} length(${length_type}, optional): ${length_comment}
Returns: Returns:
Variable: ${viterbi_path_comment} Tensor: ${viterbi_path_comment}
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid
import paddle import paddle
paddle.enable_static() paddle.enable_static()
# LoDTensor-based example # LoDTensor-based example
num_labels = 10 num_labels = 10
feature = fluid.data(name='word_emb', shape=[-1, 784], dtype='float32', lod_level=1) feature = paddle.static.data(name='word_emb', shape=[-1, 784], dtype='float32', lod_level=1)
label = fluid.data(name='label', shape=[-1, 1], dtype='int64', lod_level=1) label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64', lod_level=1)
emission = fluid.layers.fc(input=feature, size=num_labels) emission = paddle.static.nn.fc(feature, size=num_labels)
crf_cost = fluid.layers.linear_chain_crf(input=emission, label=label, crf_cost = paddle.fluid.layers.linear_chain_crf(input=emission, label=label,
param_attr=fluid.ParamAttr(name="crfw")) param_attr=paddle.ParamAttr(name="crfw"))
crf_decode = fluid.layers.crf_decoding(input=emission, crf_decode = paddle.static.nn.crf_decoding(input=emission,
param_attr=fluid.ParamAttr(name="crfw")) param_attr=paddle.ParamAttr(name="crfw"))
# Common tensor example # Common tensor example
num_labels, max_len = 10, 20 num_labels, max_len = 10, 20
feature = fluid.data(name='word_emb_pad', shape=[-1, max_len, 784], dtype='float32') feature = paddle.static.data(name='word_emb_pad', shape=[-1, max_len, 784], dtype='float32')
label = fluid.data(name='label_pad', shape=[-1, max_len, 1], dtype='int64') label = paddle.static.data(name='label_pad', shape=[-1, max_len, 1], dtype='int64')
length = fluid.data(name='length', shape=[-1, 1], dtype='int64') length = paddle.static.data(name='length', shape=[-1, 1], dtype='int64')
emission = fluid.layers.fc(input=feature, size=num_labels, emission = paddle.static.nn.fc(feature, size=num_labels,
num_flatten_dims=2) num_flatten_dims=2)
crf_cost = fluid.layers.linear_chain_crf(input=emission, label=label, length=length, crf_cost = paddle.fluid.layers.linear_chain_crf(input=emission, label=label, length=length,
param_attr=fluid.ParamAttr(name="crfw_pad")) param_attr=paddle.ParamAttr(name="crfw_pad"))
crf_decode = fluid.layers.crf_decoding(input=emission, length=length, crf_decode = paddle.static.nn.crf_decoding(input=emission, length=length,
param_attr=fluid.ParamAttr(name="crfw_pad")) param_attr=paddle.ParamAttr(name="crfw_pad"))
""" """
check_variable_and_dtype(input, 'input', ['float32', 'float64'], check_variable_and_dtype(input, 'input', ['float32', 'float64'],
'crf_decoding') 'crf_decoding')
...@@ -3427,7 +3427,7 @@ def layer_norm(input, ...@@ -3427,7 +3427,7 @@ def layer_norm(input,
- :math:`b`: the trainable bias parameter. - :math:`b`: the trainable bias parameter.
Args: Args:
input(Variable): A multi-dimension ``Tensor`` , and the data type is float32 or float64. input(Tensor): A multi-dimension ``Tensor`` , and the data type is float32 or float64.
scale(bool, optional): Whether to learn the adaptive gain :math:`g` after scale(bool, optional): Whether to learn the adaptive gain :math:`g` after
normalization. Default: True. normalization. Default: True.
shift(bool, optional): Whether to learn the adaptive bias :math:`b` after shift(bool, optional): Whether to learn the adaptive bias :math:`b` after
...@@ -3452,24 +3452,17 @@ def layer_norm(input, ...@@ -3452,24 +3452,17 @@ def layer_norm(input,
name(str): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` . name(str): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` .
Returns: Returns:
Variable: ``Tensor`` indicating the normalized result, the data type is the same as ``input`` , and the return dimension is the same as ``input`` . Tensor: ``Tensor`` indicating the normalized result, the data type is the same as ``input`` , and the return dimension is the same as ``input`` .
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid
import numpy as np
import paddle import paddle
paddle.enable_static() paddle.enable_static()
x = fluid.data(name='x', shape=[-1, 32, 32], dtype='float32') x = paddle.static.data(name='x', shape=[8, 32, 32], dtype='float32')
hidden1 = fluid.layers.layer_norm(input=x, begin_norm_axis=1) output = paddle.static.nn.layer_norm(input=x, begin_norm_axis=1)
place = fluid.CPUPlace() print(output.shape) # [8, 32, 32]
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
np_x = np.random.random(size=(8, 3, 32, 32)).astype('float32')
output = exe.run(feed={"x": np_x}, fetch_list = [hidden1])
print(output)
""" """
assert in_dygraph_mode( assert in_dygraph_mode(
) is not True, "please use LayerNorm instead of layer_norm in dygraph mode!" ) is not True, "please use LayerNorm instead of layer_norm in dygraph mode!"
...@@ -9736,7 +9729,7 @@ def prelu(x, mode, param_attr=None, name=None): ...@@ -9736,7 +9729,7 @@ def prelu(x, mode, param_attr=None, name=None):
if mode not in ['all', 'channel', 'element']: if mode not in ['all', 'channel', 'element']:
raise ValueError('mode should be one of all, channel, element.') raise ValueError('mode should be one of all, channel, element.')
alpha_shape = [1] alpha_shape = [1]
# NOTE(): The input of this API should be ``N,C,...`` format, # NOTE(): The input of this API should be ``N,C,...`` format,
# which means x.shape[0] is batch_size and x.shape[0] is channel. # which means x.shape[0] is batch_size and x.shape[0] is channel.
if mode == 'channel': if mode == 'channel':
assert len( assert len(
......
...@@ -225,8 +225,8 @@ class WeightNormParamAttr(ParamAttr): ...@@ -225,8 +225,8 @@ class WeightNormParamAttr(ParamAttr):
Note: Note:
``gradient_clip`` of ``ParamAttr`` HAS BEEN DEPRECATED since 2.0. ``gradient_clip`` of ``ParamAttr`` HAS BEEN DEPRECATED since 2.0.
Please use ``need_clip`` in ``ParamAttr`` to speficiy the clip scope. Please use ``need_clip`` in ``ParamAttr`` to speficiy the clip scope.
There are three clipping strategies: :ref:`api_paddle_nn_GradientClipByGlobalNorm` , There are three clipping strategies: :ref:`api_paddle_nn_ClipGradByGlobalNorm` ,
:ref:`api_fluid_clip_GradientClipByNorm` , :ref:`api_fluid_clip_GradientClipByValue` . :ref:`api_paddle_nn_ClipGradByNorm` , :ref:`api_paddle_nn_ClipGradByValue` .
Args: Args:
...@@ -244,8 +244,8 @@ class WeightNormParamAttr(ParamAttr): ...@@ -244,8 +244,8 @@ class WeightNormParamAttr(ParamAttr):
optimizer is :math:`global\_lr * parameter\_lr * scheduler\_factor`. optimizer is :math:`global\_lr * parameter\_lr * scheduler\_factor`.
Default 1.0. Default 1.0.
regularizer (WeightDecayRegularizer, optional): Regularization strategy. There are regularizer (WeightDecayRegularizer, optional): Regularization strategy. There are
two method: :ref:`api_paddle_fluid_regularizer_L1Decay` , two method: :ref:`api_paddle_regularizer_L1Decay` ,
:ref:`api_paddle_fluid_regularizer_L2DecayRegularizer`. :ref:`api_paddle_regularizer_L2Decay`.
If regularizer isralso set in ``optimizer`` If regularizer isralso set in ``optimizer``
(such as :ref:`api_paddle_optimizer_SGD` ), that regularizer setting in (such as :ref:`api_paddle_optimizer_SGD` ), that regularizer setting in
optimizer will be ignored. Default None, meaning there is no regularization. optimizer will be ignored. Default None, meaning there is no regularization.
......
...@@ -1554,7 +1554,7 @@ def label_smooth(label, prior_dist=None, epsilon=0.1, name=None): ...@@ -1554,7 +1554,7 @@ def label_smooth(label, prior_dist=None, epsilon=0.1, name=None):
paddle.disable_static() paddle.disable_static()
x = paddle.to_tensor(x_data, stop_gradient=False) x = paddle.to_tensor(x_data, stop_gradient=False)
output = paddle.nn.functional.label_smooth(x) output = paddle.nn.functional.label_smooth(x)
print(output.numpy()) print(output)
#[[[0.03333334 0.93333334 0.03333334] #[[[0.03333334 0.93333334 0.03333334]
# [0.93333334 0.03333334 0.93333334]]] # [0.93333334 0.03333334 0.93333334]]]
......
...@@ -643,7 +643,7 @@ class TransformerDecoderLayer(Layer): ...@@ -643,7 +643,7 @@ class TransformerDecoderLayer(Layer):
for linear in FFN. Otherwise, the three sub-layers all uses it as for linear in FFN. Otherwise, the three sub-layers all uses it as
`weight_attr` to create parameters. Default: None, which means the `weight_attr` to create parameters. Default: None, which means the
default weight parameter property is used. See usage for details default weight parameter property is used. See usage for details
in :ref:`api_fluid_ParamAttr` . in :ref:`api_paddle_fluid_param_attr_ParamAttr` .
bias_attr (ParamAttr|tuple|bool, optional): To specify the bias parameter property. bias_attr (ParamAttr|tuple|bool, optional): To specify the bias parameter property.
If it is a tuple, `bias_attr[0]` would be used as `bias_attr` for If it is a tuple, `bias_attr[0]` would be used as `bias_attr` for
self attention, `bias_attr[1]` would be used as `bias_attr` for self attention, `bias_attr[1]` would be used as `bias_attr` for
...@@ -1199,7 +1199,7 @@ class Transformer(Layer): ...@@ -1199,7 +1199,7 @@ class Transformer(Layer):
transformer_paddle = Transformer( transformer_paddle = Transformer(
d_model, n_head, dim_feedforward=dim_feedforward) d_model, n_head, dim_feedforward=dim_feedforward)
mask = transformer_paddle.generate_square_subsequent_mask(length) mask = transformer_paddle.generate_square_subsequent_mask(length)
print(mask.numpy()) print(mask)
# [[ 0. -inf -inf -inf -inf] # [[ 0. -inf -inf -inf -inf]
# [ 0. 0. -inf -inf -inf] # [ 0. 0. -inf -inf -inf]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册