未验证 提交 8fc7f1b6 编写于 作者: G Guo Sheng 提交者: GitHub

Fix api docs in RNN, Transformer, layer_norm, WeightNormParamAttr (#29235)

* Fix api docs in RNN, Transformer, layer_norm, WeightNormParamAttr.
test=develop

* Fix api doc for print in label_smooth.
test=develop

* Update api docs according to review comments.
Add name argument in RNN back.
test=develop
上级 c940f842
......@@ -842,52 +842,52 @@ def linear_chain_crf(input, label, param_attr=None, length=None):
def crf_decoding(input, param_attr, label=None, length=None):
"""
:api_attr: Static Graph
${comment}
Args:
input(${emission_type}): ${emission_comment}
input(Tensor): ${emission_comment}
param_attr (ParamAttr|None): To specify the weight parameter attribute.
Default: None, which means the default weight parameter property is
used. See usage for details in :ref:`api_fluid_ParamAttr` .
used. See usage for details in :ref:`api_paddle_fluid_param_attr_ParamAttr` .
label(${label_type}, optional): ${label_comment}
length(${length_type}, optional): ${length_comment}
Returns:
Variable: ${viterbi_path_comment}
Tensor: ${viterbi_path_comment}
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle
paddle.enable_static()
# LoDTensor-based example
num_labels = 10
feature = fluid.data(name='word_emb', shape=[-1, 784], dtype='float32', lod_level=1)
label = fluid.data(name='label', shape=[-1, 1], dtype='int64', lod_level=1)
emission = fluid.layers.fc(input=feature, size=num_labels)
feature = paddle.static.data(name='word_emb', shape=[-1, 784], dtype='float32', lod_level=1)
label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64', lod_level=1)
emission = paddle.static.nn.fc(feature, size=num_labels)
crf_cost = fluid.layers.linear_chain_crf(input=emission, label=label,
param_attr=fluid.ParamAttr(name="crfw"))
crf_decode = fluid.layers.crf_decoding(input=emission,
param_attr=fluid.ParamAttr(name="crfw"))
crf_cost = paddle.fluid.layers.linear_chain_crf(input=emission, label=label,
param_attr=paddle.ParamAttr(name="crfw"))
crf_decode = paddle.static.nn.crf_decoding(input=emission,
param_attr=paddle.ParamAttr(name="crfw"))
# Common tensor example
num_labels, max_len = 10, 20
feature = fluid.data(name='word_emb_pad', shape=[-1, max_len, 784], dtype='float32')
label = fluid.data(name='label_pad', shape=[-1, max_len, 1], dtype='int64')
length = fluid.data(name='length', shape=[-1, 1], dtype='int64')
emission = fluid.layers.fc(input=feature, size=num_labels,
feature = paddle.static.data(name='word_emb_pad', shape=[-1, max_len, 784], dtype='float32')
label = paddle.static.data(name='label_pad', shape=[-1, max_len, 1], dtype='int64')
length = paddle.static.data(name='length', shape=[-1, 1], dtype='int64')
emission = paddle.static.nn.fc(feature, size=num_labels,
num_flatten_dims=2)
crf_cost = fluid.layers.linear_chain_crf(input=emission, label=label, length=length,
param_attr=fluid.ParamAttr(name="crfw_pad"))
crf_decode = fluid.layers.crf_decoding(input=emission, length=length,
param_attr=fluid.ParamAttr(name="crfw_pad"))
crf_cost = paddle.fluid.layers.linear_chain_crf(input=emission, label=label, length=length,
param_attr=paddle.ParamAttr(name="crfw_pad"))
crf_decode = paddle.static.nn.crf_decoding(input=emission, length=length,
param_attr=paddle.ParamAttr(name="crfw_pad"))
"""
check_variable_and_dtype(input, 'input', ['float32', 'float64'],
'crf_decoding')
......@@ -3435,7 +3435,7 @@ def layer_norm(input,
- :math:`b`: the trainable bias parameter.
Args:
input(Variable): A multi-dimension ``Tensor`` , and the data type is float32 or float64.
input(Tensor): A multi-dimension ``Tensor`` , and the data type is float32 or float64.
scale(bool, optional): Whether to learn the adaptive gain :math:`g` after
normalization. Default: True.
shift(bool, optional): Whether to learn the adaptive bias :math:`b` after
......@@ -3460,24 +3460,17 @@ def layer_norm(input,
name(str): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` .
Returns:
Variable: ``Tensor`` indicating the normalized result, the data type is the same as ``input`` , and the return dimension is the same as ``input`` .
Tensor: ``Tensor`` indicating the normalized result, the data type is the same as ``input`` , and the return dimension is the same as ``input`` .
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
import paddle
paddle.enable_static()
x = fluid.data(name='x', shape=[-1, 32, 32], dtype='float32')
hidden1 = fluid.layers.layer_norm(input=x, begin_norm_axis=1)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
np_x = np.random.random(size=(8, 3, 32, 32)).astype('float32')
output = exe.run(feed={"x": np_x}, fetch_list = [hidden1])
print(output)
x = paddle.static.data(name='x', shape=[8, 32, 32], dtype='float32')
output = paddle.static.nn.layer_norm(input=x, begin_norm_axis=1)
print(output.shape) # [8, 32, 32]
"""
assert in_dygraph_mode(
) is not True, "please use LayerNorm instead of layer_norm in dygraph mode!"
......
......@@ -226,8 +226,8 @@ class WeightNormParamAttr(ParamAttr):
Note:
``gradient_clip`` of ``ParamAttr`` HAS BEEN DEPRECATED since 2.0.
Please use ``need_clip`` in ``ParamAttr`` to speficiy the clip scope.
There are three clipping strategies: :ref:`api_paddle_nn_GradientClipByGlobalNorm` ,
:ref:`api_fluid_clip_GradientClipByNorm` , :ref:`api_fluid_clip_GradientClipByValue` .
There are three clipping strategies: :ref:`api_paddle_nn_ClipGradByGlobalNorm` ,
:ref:`api_paddle_nn_ClipGradByNorm` , :ref:`api_paddle_nn_ClipGradByValue` .
Args:
......@@ -245,8 +245,8 @@ class WeightNormParamAttr(ParamAttr):
optimizer is :math:`global\_lr * parameter\_lr * scheduler\_factor`.
Default 1.0.
regularizer (WeightDecayRegularizer, optional): Regularization strategy. There are
two method: :ref:`api_paddle_fluid_regularizer_L1Decay` ,
:ref:`api_paddle_fluid_regularizer_L2DecayRegularizer`.
two method: :ref:`api_paddle_regularizer_L1Decay` ,
:ref:`api_paddle_regularizer_L2Decay`.
If regularizer isralso set in ``optimizer``
(such as :ref:`api_paddle_optimizer_SGD` ), that regularizer setting in
optimizer will be ignored. Default None, meaning there is no regularization.
......
......@@ -1554,7 +1554,7 @@ def label_smooth(label, prior_dist=None, epsilon=0.1, name=None):
paddle.disable_static()
x = paddle.to_tensor(x_data, stop_gradient=False)
output = paddle.nn.functional.label_smooth(x)
print(output.numpy())
print(output)
#[[[0.03333334 0.93333334 0.03333334]
# [0.93333334 0.03333334 0.93333334]]]
......
......@@ -643,7 +643,7 @@ class TransformerDecoderLayer(Layer):
for linear in FFN. Otherwise, the three sub-layers all uses it as
`weight_attr` to create parameters. Default: None, which means the
default weight parameter property is used. See usage for details
in :ref:`api_fluid_ParamAttr` .
in :ref:`api_paddle_fluid_param_attr_ParamAttr` .
bias_attr (ParamAttr|tuple|bool, optional): To specify the bias parameter property.
If it is a tuple, `bias_attr[0]` would be used as `bias_attr` for
self attention, `bias_attr[1]` would be used as `bias_attr` for
......@@ -1199,7 +1199,7 @@ class Transformer(Layer):
transformer_paddle = Transformer(
d_model, n_head, dim_feedforward=dim_feedforward)
mask = transformer_paddle.generate_square_subsequent_mask(length)
print(mask.numpy())
print(mask)
# [[ 0. -inf -inf -inf -inf]
# [ 0. 0. -inf -inf -inf]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册