Fixdocs (#47986)

* list112-122, test=document_fix * precommitfix, test=document_fix * list112-127, test=document_fix * fix_ResNetBasicBlock, test=document_fix * pre-commit_resnet, test=document_fix * refix, test=document * refix, test=document_fix

Fixdocs (#47986)
* list112-122, test=document_fix * precommitfix, test=document_fix * list112-127, test=document_fix * fix_ResNetBasicBlock, test=document_fix * pre-commit_resnet, test=document_fix * refix, test=document * refix, test=document_fix
91f4d1ce · ustiniankw · GitHub · 94c6ec86 · 91f4d1ce · 91f4d1ce
14 changed file
--- a/python/paddle/fft.py
+++ b/python/paddle/fft.py
@@ -626,6 +626,7 @@ def ifftn(x, s=None, axes=None, norm="backward", name=None):
 def rfftn(x, s=None, axes=None, norm="backward", name=None):
    """
    The N dimensional FFT for real input.
    This function computes the N-dimensional discrete Fourier Transform over
@@ -659,9 +660,9 @@ def rfftn(x, s=None, axes=None, norm="backward", name=None):
            three operations are shown below:
                - "backward": The factor of forward direction and backward direction are ``1``
-                and ``1/n`` respectively;
+                  and ``1/n`` respectively;
                - "forward": The factor of forward direction and backward direction are ``1/n``
-                and ``1`` respectively;
+                  and ``1`` respectively;
                - "ortho": The factor of forward direction and backword direction are both ``1/sqrt(n)``.
            Where ``n`` is the multiplication of each element in  ``s`` .
@@ -670,36 +671,35 @@ def rfftn(x, s=None, axes=None, norm="backward", name=None):
            refer to :ref:`api_guide_Name` .
    Returns:
-        out(Tensor): complex tensor
+        out(Tensor), complex tensor
    Examples:
+        .. code-block:: python
-    .. code-block:: python
+            import paddle
-        import paddle
-        # default, all axis will be used to exec fft
+            # default, all axis will be used to exec fft
-        x = paddle.ones((2, 3, 4))
+            x = paddle.ones((2, 3, 4))
-        print(paddle.fft.rfftn(x))
+            print(paddle.fft.rfftn(x))
-        # Tensor(shape=[2, 3, 3], dtype=complex64, place=CUDAPlace(0), stop_gradient=True,
+            # Tensor(shape=[2, 3, 3], dtype=complex64, place=CUDAPlace(0), stop_gradient=True,
-        #        [[[(24+0j), 0j     , 0j     ],
+            #        [[[(24+0j), 0j     , 0j     ],
-        #          [0j     , 0j     , 0j     ],
+            #          [0j     , 0j     , 0j     ],
-        #          [0j     , 0j     , 0j     ]],
+            #          [0j     , 0j     , 0j     ]],
-        #
+            #
-        #         [[0j     , 0j     , 0j     ],
+            #         [[0j     , 0j     , 0j     ],
-        #          [0j     , 0j     , 0j     ],
+            #          [0j     , 0j     , 0j     ],
-        #          [0j     , 0j     , 0j     ]]])
+            #          [0j     , 0j     , 0j     ]]])
-        # use axes(2, 0)
+            # use axes(2, 0)
-        print(paddle.fft.rfftn(x, axes=(2, 0)))
+            print(paddle.fft.rfftn(x, axes=(2, 0)))
-        # Tensor(shape=[2, 3, 3], dtype=complex64, place=CUDAPlace(0), stop_gradient=True,
+            # Tensor(shape=[2, 3, 3], dtype=complex64, place=CUDAPlace(0), stop_gradient=True,
-        #        [[[(8+0j), 0j     , 0j     ],
+            #        [[[(8+0j), 0j     , 0j     ],
-        #          [(8+0j), 0j     , 0j     ],
+            #          [(8+0j), 0j     , 0j     ],
-        #          [(8+0j), 0j     , 0j     ]],
+            #          [(8+0j), 0j     , 0j     ]],
-        #
+            #
-        #         [[0j     , 0j     , 0j     ],
+            #         [[0j     , 0j     , 0j     ],
-        #          [0j     , 0j     , 0j     ],
+            #          [0j     , 0j     , 0j     ],
-        #          [0j     , 0j     , 0j     ]]])
+            #          [0j     , 0j     , 0j     ]]])
    """
    return fftn_r2c(x, s, axes, norm, forward=True, onesided=True, name=name)

--- a/python/paddle/fluid/contrib/sparsity/supported_layer_list.py
+++ b/python/paddle/fluid/contrib/sparsity/supported_layer_list.py
@@ -82,15 +82,17 @@ supported_layers_and_prune_func_map = {}
 def add_supported_layer(layer, pruning_func=None):
    r"""
    Add supported layers and its corresponding pruning function.
    Args:
        name (string|Layer): The name or type of layer, needed to support. If layer is `Layer` then
-        it would be turn to string internally. ASP would use this name to match parameter's name and call
+                             it would be turn to string internally. ASP would use this name to match parameter's name and call
-        its the corresponding pruning function.
+                             its the corresponding pruning function.
        pruning_func (function, optional): a function type which receives five argument (weight_nparray,
-        m, n, func_name, param_name), weight_nparray is a nparray of weight, param_name is the name of weight,
+                                           m, n, func_name, param_name), weight_nparray is a nparray of weight, param_name is the name of weight,
-        m, n, and func_name, please see `prune_model` for details.
+                                           m, n, and func_name, please see `prune_model` for details.
    """
    name = None
    if isinstance(layer, str):

--- a/python/paddle/fluid/contrib/sparsity/utils.py
+++ b/python/paddle/fluid/contrib/sparsity/utils.py
@@ -92,20 +92,25 @@ class CheckMethod(Enum):
 def calculate_density(x):
    r"""
    Return the density of the input tensor.
    Args:
        x (nparray): The input tensor.
    Returns:
-        float: The density of :attr:`x`.
+        float, The density of :attr:`x`.
    Examples:
        .. code-block:: python
-          import paddle
-          import numpy as np
-          x = np.array([[0, 1, 3, 0],
+            import paddle
+            import numpy as np
+            x = np.array([[0, 1, 3, 0],
                        [1, 1, 0, 1]])
-          paddle.incubate.asp.calculate_density(x) # 0.625
+            paddle.incubate.asp.calculate_density(x) # 0.625
    """
    x_flattened = x.flatten()
    return float(np.nonzero(x_flattened)[0].size) / x_flattened.size

--- a/python/paddle/fluid/dygraph/layers.py
+++ b/python/paddle/fluid/dygraph/layers.py
@@ -177,13 +177,14 @@ class Layer:
    def train(self):
        """
        Sets this Layer and all its sublayers to training mode.
        This only effects certain modules like `Dropout` and `BatchNorm`.
        Returns:
            None
-        Example::
+        Examples:
            .. code-block:: python
                import paddle
@@ -260,6 +261,7 @@ class Layer:
    def apply(self, fn):
        """
        Applies ``fn`` recursively to every sublayer (as returned by ``.sublayers()``)
        as well as self. Typical use includes initializing the parameters of a model.
@@ -267,7 +269,7 @@ class Layer:
            fn (function): a function to be applied to each sublayer
        Returns:
-            Layer: self
+            Layer, self
        Example::
            .. code-block:: python
@@ -287,6 +289,7 @@ class Layer:
              net.apply(init_weights)
              print(net.state_dict())
        """
        for layer in self.children():
            layer.apply(fn)
@@ -296,10 +299,12 @@ class Layer:
        return self
    def full_name(self):
-        """Full name for this layer, composed by name_scope + "/" + MyLayer.__class__.__name__
+        """
+        Full name for this layer, composed by name_scope + "/" + MyLayer.__class__.__name__
        Returns:
-            str: full name of this layer.
+            str, full name of this layer.
        Example::
            .. code-block:: python
@@ -321,7 +326,9 @@ class Layer:
        return self._full_name
    def register_forward_post_hook(self, hook):
-        """Register a forward post-hook for Layer. The hook will be called after `forward` function has been computed.
+        """
+        Register a forward post-hook for Layer. The hook will be called after `forward` function has been computed.
        It should have the following form, `input` and `output` of the `hook` is `input` and `output` of the `Layer` respectively.
        User can use forward post-hook to change the output of the Layer or perform information statistics tasks on the Layer.
@@ -332,7 +339,7 @@ class Layer:
            hook(function): a function registered as a forward post-hook
        Returns:
-            HookRemoveHelper: a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .
+            HookRemoveHelper, a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .
        Examples:
            .. code-block:: python
@@ -364,13 +371,16 @@ class Layer:
                # hook change the linear's output to output * 2, so out0 is equal to out1 * 2.
                assert (out0.numpy() == (out1.numpy()) * 2).any()
        """
        hook_remove_helper = HookRemoveHelper(self._forward_post_hooks)
        self._forward_post_hooks[hook_remove_helper._hook_id] = hook
        return hook_remove_helper
    def register_forward_pre_hook(self, hook):
-        """Register a forward pre-hook for Layer. The hook will be called before `forward` function has been computed.
+        """
+        Register a forward pre-hook for Layer. The hook will be called before `forward` function has been computed.
        It should have the following form, `input` of the `hook` is `input` of the `Layer`,
        hook can either return a tuple or a single modified value in the hook. We will wrap the value into a tuple if
@@ -383,7 +393,7 @@ class Layer:
            hook(function): a function registered as a forward pre-hook
        Returns:
-            HookRemoveHelper: a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .
+            HookRemoveHelper, a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .
        Examples:
            .. code-block:: python
@@ -581,18 +591,20 @@ class Layer:
        )
    def parameters(self, include_sublayers=True):
-        """Returns a list of all Parameters from current layer and its sub-layers.
+        """
+        Returns a list of all Parameters from current layer and its sub-layers.
        Returns:
-            list of Tensor : a list of Parameters.
+            list of Tensor, a list of Parameters.
        Examples:
            .. code-block:: python
-            import paddle
+                import paddle
-            linear = paddle.nn.Linear(1,1)
+                linear = paddle.nn.Linear(1,1)
-            print(linear.parameters())  # print linear_0.w_0 and linear_0.b_0
+                print(linear.parameters())  # print linear_0.w_0 and linear_0.b_0
        """
        ret = [
@@ -604,7 +616,9 @@ class Layer:
        return ret
    def children(self):
-        """Returns an iterator over immediate children layers.
+        """
+        Returns an iterator over immediate children layers.
        Yields:
            Layer: a child layer
@@ -654,13 +668,15 @@ class Layer:
                yield name, layer
    def sublayers(self, include_self=False):
-        """Returns a list of sub layers.
+        """
+        Returns a list of sub layers.
        Parameters:
            include_self(bool, optional): Whether return self as sublayers. Default: False
        Returns:
-            list of Layer : a list of sub layers.
+            list of Layer, a list of sub layers.
        Examples:
            .. code-block:: python
@@ -839,13 +855,14 @@ class Layer:
    def buffers(self, include_sublayers=True):
        """
        Returns a list of all buffers from current layer and its sub-layers.
        Parameters:
            include_sublayers(bool, optional): Whether include the buffers of sublayers. If True, also include the buffers from sublayers. Default: True
        Returns:
-            list of Tensor : a list of buffers.
+            list of Tensor, a list of buffers.
        Examples:
            .. code-block:: python
@@ -1020,7 +1037,9 @@ class Layer:
        raise ValueError("Layer shouldn't implement backward")
    def add_sublayer(self, name, sublayer):
-        """Adds a sub Layer instance.
+        """
+        Adds a sub Layer instance.
        Added sublayer can be accessed by self.name
@@ -1028,7 +1047,7 @@ class Layer:
            name(str): name of this sublayer.
            sublayer(Layer): an instance of Layer.
        Returns:
-            Layer: the sublayer passed in.
+            Layer, the sublayer passed in.
        Examples:
            .. code-block:: python
@@ -1055,6 +1074,7 @@ class Layer:
                model = MySequential(fc1, fc2)
                for prefix, layer in model.named_sublayers():
                    print(prefix, layer)
        """
        assert isinstance(sublayer, Layer) or sublayer is None
@@ -1070,7 +1090,7 @@ class Layer:
            name(str): name of this sublayer.
            parameter(Parameter): an instance of Parameter.
        Returns:
-            Parameter: the parameter passed in.
+            Parameter, the parameter passed in.
        Examples:
            .. code-block:: python
@@ -1503,6 +1523,7 @@ class Layer:
        use_hook=True,
    ):
        '''
        Get all parameters and buffers of current layer and its sub-layers. And set them into a dict
        Parameters:
@@ -1511,7 +1532,7 @@ class Layer:
            use_hook(bool, optional) : If true, the operations contained in _state_dict_hooks will be appended to the destination. Default: True
        Retruns:
-            dict: a dict contains all the parameters and persistable buffers.
+            dict, a dict contains all the parameters and persistable buffers.
        Examples:
            .. code-block:: python

--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -1352,12 +1352,13 @@ class ParameterMetaClass(VariableMetaClass):
 class Variable(metaclass=VariableMetaClass):
    """
-    **Notes**:
-        **The constructor of Variable should not be invoked directly.**
-        **In Static Graph Mode: Please use** `Block.create_var` **to create a Static variable which has no data until being feed.**
+    Notes:
+        The constructor of Variable should not be invoked directly.
+        In Static Graph Mode: Please use ** `Block.create_var` ** to create a Static variable which has no data until being feed.
-        **In Dygraph Mode: Please use** :ref:`api_fluid_dygraph_to_variable` **to create a dygraph variable with real data**
+        In Dygraph Mode: Please use ** :ref:`api_fluid_dygraph_to_variable` ** to create a dygraph variable with real data.
    In Fluid, every input and output of an OP is a variable. In most
    cases, variables are used for holding different kinds of data or training
@@ -1513,12 +1514,13 @@ class Variable(metaclass=VariableMetaClass):
    def detach(self):
        """
        Returns a new Variable, detached from the current graph.
        It will share data with origin Variable and without tensor copy.
        In addition, the detached Variable doesn't provide gradient propagation.
        Returns:
-             ( :ref:`api_guide_Variable_en` | dtype is same as current Variable): The detached Variable.
+             ( :ref:`api_guide_Variable_en` | dtype is same as current Variable), The detached Variable.
        Examples:
            .. code-block:: python
@@ -1532,6 +1534,7 @@ class Variable(metaclass=VariableMetaClass):
                # create a detached Variable
                y = x.detach()
        """
        assert (
@@ -2081,6 +2084,7 @@ class Variable(metaclass=VariableMetaClass):
    @property
    def T(self):
        """
        Permute current Variable with its dimensions reversed.
        If `n` is the dimensions of `x` , `x.T` is equivalent to `x.transpose([n-1, n-2, ..., 0])`.
@@ -2099,6 +2103,7 @@ class Variable(metaclass=VariableMetaClass):
                x_T_np = exe.run(paddle.static.default_main_program(), fetch_list=[x_T])[0]
                print(x_T_np.shape)
                # (5, 3, 2)
        """
        if len(self.shape) == 1:
            return self
@@ -2137,7 +2142,7 @@ class Variable(metaclass=VariableMetaClass):
        as ``out = assign(tensor)`` .
        Returns:
-            Variable: The cloned Variable.
+            Variable, The cloned Variable.
        Examples:
            .. code-block:: python
@@ -2167,6 +2172,7 @@ class Variable(metaclass=VariableMetaClass):
    def _set_error_clip(self, error_clip):
        """
        Set the error_clip.
        Args:
@@ -2174,11 +2180,13 @@ class Variable(metaclass=VariableMetaClass):
        Returns:
            None
        """
        self.error_clip = error_clip
    def _set_info(self, key, value):
        """
        Set key-value information for this variable.
        Args:
@@ -2187,6 +2195,7 @@ class Variable(metaclass=VariableMetaClass):
        Returns:
            None
        """
        if not hasattr(self, "_info"):
            self._info = {}
@@ -2194,6 +2203,7 @@ class Variable(metaclass=VariableMetaClass):
    def _get_info(self, key):
        """
        Get the information of this variable corresponding to key.
        Args:
@@ -2201,6 +2211,7 @@ class Variable(metaclass=VariableMetaClass):
        Returns:
            object
        """
        if hasattr(self, "_info") and key in self._info:
            return self._info[key]
@@ -2208,7 +2219,9 @@ class Variable(metaclass=VariableMetaClass):
    def _slice_indices(self, slice, length):
        """
        Reference implementation for the slice.indices method.
        """
        # Compute step and length as integers.
        step = 1 if slice.step is None else slice.step
@@ -2379,7 +2392,7 @@ class Variable(metaclass=VariableMetaClass):
                Default: None
        Returns:
-            Tensor: the value in given scope.
+            Tensor, the value in given scope.
        Examples:
            .. code-block:: python
@@ -2434,6 +2447,7 @@ class Variable(metaclass=VariableMetaClass):
    def set_value(self, value, scope=None):
        '''
        Set the value to the tensor in given scope.
        Args:
@@ -2473,6 +2487,7 @@ class Variable(metaclass=VariableMetaClass):
                    if var.persistable:
                        t_load = paddle.load(path+var.name+'.pdtensor')
                        var.set_value(t_load)
        '''
        # The 'framework' is a low-level module, and 'executor'
@@ -2543,10 +2558,11 @@ class Variable(metaclass=VariableMetaClass):
    def size(self):
        """
        Returns the number of elements for current Variable, which is a int64 Variable with shape [1]
        Returns:
-            Variable: the number of elements for current Variable
+            Variable, the number of elements for current Variable
        Examples:
            .. code-block:: python
@@ -2560,6 +2576,7 @@ class Variable(metaclass=VariableMetaClass):
                # get the number of elements of the Variable
                y = x.size()
        """
        output = self.block.create_var(
@@ -2574,23 +2591,27 @@ class Variable(metaclass=VariableMetaClass):
    def _set_attr(self, name, val):
        """
        Set the value of attribute by attribute's name.
        Args:
            name(str): the attribute name.
            val(int|str|list): the value of the attribute.
        """
        self._update_desc_attr(name, val)
    def _has_attr(self, name):
        """
        Whether this Variable has the attribute with the name `name` or not.
        Args:
            name(str): the attribute name.
        Returns:
-            bool: True if has this attribute.
+            bool, True if has this attribute.
        """
        return self.desc.has_attr(name)
@@ -2620,7 +2641,7 @@ class Variable(metaclass=VariableMetaClass):
            name(str): the attribute name.
        Returns:
-            int|str|list: The attribute value. The return value
+            int|str|list, The attribute value. The return value
            can be any valid attribute type.
        """
        return self.desc.attr(name)
@@ -3193,14 +3214,16 @@ class Operator:
    def input(self, name):
        r"""
        Get the input arguments according to the input parameter name.
        Args:
            name(str): The input parameter name.
        Returns:
-            list: return the list of argument names that associated with \
+            list, return the list of argument names that associated with \
                the specific parameter name.
        """
        return self.desc.input(name)

--- a/python/paddle/fluid/layers/metric_op.py
+++ b/python/paddle/fluid/layers/metric_op.py
@@ -37,22 +37,29 @@ __all__ = ['accuracy', 'auc']
 def accuracy(input, label, k=1, correct=None, total=None):
    """
    accuracy layer.
    Refer to the https://en.wikipedia.org/wiki/Precision_and_recall
    This function computes the accuracy using the input and label.
    If the correct label occurs in top k predictions, then correct will increment by one.
-    Note: the dtype of accuracy is determined by input. the input and label dtype can be different.
+    Note:
+        the dtype of accuracy is determined by input. the input and label dtype can be different.
    Args:
        input(Tensor): The input of accuracy layer, which is the predictions of network. A Tensor with type float32,float64.
            The shape is ``[sample_number, class_dim]`` .
        label(Tensor): The label of dataset.  Tensor with type int32,int64. The shape is ``[sample_number, 1]`` .
-        k(int): The top k predictions for each class will be checked. Data type is int64 or int32.
+        k(int, optional): The top k predictions for each class will be checked. Data type is int64 or int32. Default is 1.
-        correct(Tensor): The correct predictions count. A Tensor with type int64 or int32.
+        correct(Tensor, optional): The correct predictions count. A Tensor with type int64 or int32. Default is None.
-        total(Tensor): The total entries count. A tensor with type int64 or int32.
+        total(Tensor, optional): The total entries count. A tensor with type int64 or int32. Default is None.
    Returns:
-        Tensor: The correct rate. A Tensor with type float32.
+        Tensor, The correct rate. A Tensor with type float32.
    Examples:
        .. code-block:: python
            import numpy as np
            import paddle
            import paddle.static as static
@@ -72,6 +79,7 @@ def accuracy(input, label, k=1, correct=None, total=None):
                        fetch_list=[result[0]])
            print(output)
            #[array([0.], dtype=float32)]
    """
    if _non_static_mode():
        if correct is None:

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -9057,6 +9057,7 @@ def pow(x, factor=1.0, name=None):
 @deprecated(since="2.0.0", update_to="paddle.static.nn.prelu")
 def prelu(x, mode, param_attr=None, data_format="NCHW", name=None):
    r"""
    prelu activation.
    .. math::
@@ -9071,26 +9072,20 @@ def prelu(x, mode, param_attr=None, data_format="NCHW", name=None):
        element: All elements do not share alpha. Each element has its own alpha.
    Parameters:
        x (Tensor): The input Tensor or LoDTensor with data type float32.
        mode (str): The mode for weight sharing.
+        param_attr (ParamAttr|None, optional): The parameter attribute for the learnable
-        param_attr (ParamAttr|None, optional): The parameter attribute for the learnable \
+            weight (alpha), it can be create by ParamAttr. None by default.
-        weight (alpha), it can be create by ParamAttr. None by default. \
+            For detailed information, please refer to :ref:`api_fluid_ParamAttr`.
-        For detailed information, please refer to :ref:`api_fluid_ParamAttr`.
-        name (str, optional): Name for the operation (optional, default is None). \
-        For more information, please refer to :ref:`api_guide_Name`.
        data_format(str, optional): Data format that specifies the layout of input.
            It may be "NC", "NCL", "NCHW", "NCDHW", "NLC", "NHWC" or "NDHWC". Default: "NCHW".
+        name (str, optional): Name for the operation (optional, default is None).
+            For more information, please refer to :ref:`api_guide_Name`.
    Returns:
-        Tensor: A tensor with the same shape and data type as x.
+        Tensor, A tensor with the same shape and data type as x.
    Examples:
        .. code-block:: python
            import paddle

--- a/python/paddle/incubate/nn/functional/fused_transformer.py
+++ b/python/paddle/incubate/nn/functional/fused_transformer.py
@@ -284,9 +284,11 @@ def fused_bias_dropout_residual_layer_norm(
    name=None,
 ):
    r"""
    The fused_bias_dropout_residual_layer_norm operator. The pseudo code is as follows:
    .. code-block:: python
        y = layer_norm(residual + dropout(bias + x))
    Parameters:
@@ -315,10 +317,9 @@ def fused_bias_dropout_residual_layer_norm(
        name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
    Returns:
-        Tensor: The output Tensor, the data type and shape is same as `x`.
+        Tensor, The output Tensor, the data type and shape is same as `x`.
    Examples:
        .. code-block:: python
            # required: gpu
@@ -336,6 +337,7 @@ def fused_bias_dropout_residual_layer_norm(
                x, residual, bias)
            # [2, 4, 128]
            print(output.shape)
    """
    seed = None
    if mode not in ('downscale_in_infer', 'upscale_in_train'):

--- a/python/paddle/incubate/nn/layer/fused_transformer.py
+++ b/python/paddle/incubate/nn/layer/fused_transformer.py
@@ -705,6 +705,7 @@ class FusedFeedForward(Layer):
 class FusedTransformerEncoderLayer(Layer):
    """
    FusedTransformerEncoderLayer is composed of two sub-layers which are self (multi-head)
    attention and feedforward network. Before and after each sub-layer, pre-process
    and post-precess would be applied on the input and output accordingly. If
@@ -746,7 +747,6 @@ class FusedTransformerEncoderLayer(Layer):
    Examples:
        .. code-block:: python
            # required: gpu
@@ -759,6 +759,7 @@ class FusedTransformerEncoderLayer(Layer):
            attn_mask = paddle.rand((2, 2, 4, 4))
            encoder_layer = FusedTransformerEncoderLayer(128, 2, 512)
            enc_output = encoder_layer(enc_input, attn_mask)  # [2, 4, 128]
    """
    def __init__(
@@ -835,7 +836,9 @@ class FusedTransformerEncoderLayer(Layer):
    def forward(self, src, src_mask=None, cache=None):
        """
        Applies a Transformer encoder layer on the input.
        Parameters:
            src (Tensor): The input of Transformer encoder layer. It is
                a tensor with shape `[batch_size, sequence_length, d_model]`.
@@ -851,17 +854,19 @@ class FusedTransformerEncoderLayer(Layer):
                `-INF` values and the others have 0 values. It can be None when
                nothing wanted or needed to be prevented attention to. Default None.
            cache (Tensor, optional): It is an instance of `MultiHeadAttention.Cache`.
-                See `TransformerEncoderLayer.gen_cache` for more details. It is
+                See :ref:`api_paddle_nn_TransformerEncoderLayer`.gen_cache for more details. It is
                only used for inference and should be None for training. Default
                None.
        Returns:
-            Tensor|tuple: It is a tensor that has the same shape and data type \
+            Tensor|tuple, It is a tensor that has the same shape and data type \
                as `enc_input`, representing the output of Transformer encoder \
                layer. Or a tuple if `cache` is not None, except for encoder \
                layer output, the tuple includes the new cache which is same \
                as input `cache` argument but `incremental_cache` has an \
                incremental length. See `MultiHeadAttention.gen_cache` and \
                `MultiHeadAttention.forward` for more details.
        """
        src_mask = _convert_attention_mask(src_mask, src.dtype)
        if cache is None:

--- a/python/paddle/incubate/operators/graph_khop_sampler.py
+++ b/python/paddle/incubate/operators/graph_khop_sampler.py
@@ -28,6 +28,7 @@ def graph_khop_sampler(
    name=None,
 ):
    """
    Graph Khop Sampler API.
    This API is mainly used in Graph Learning domain, and the main purpose is to
@@ -50,38 +51,36 @@ def graph_khop_sampler(
        sample_sizes (list|tuple): The number of neighbors and number of layers we want
                                   to sample. The data type should be int, and the shape
                                   should only have one dimension.
-        sorted_eids (Tensor): The sorted edge ids, should not be None when `return_eids`
+        sorted_eids (Tensor, optional): The sorted edge ids, should not be None when `return_eids`
                              is True. The shape should be [num_edges, 1], and the data
-                              type should be the same with `row`.
+                              type should be the same with `row`. Default is None.
-        return_eids (bool): Whether to return the id of the sample edges. Default is False.
+        return_eids (bool, optional): Whether to return the id of the sample edges. Default is False.
        name (str, optional): Name for the operation (optional, default is None).
                              For more information, please refer to :ref:`api_guide_Name`.
    Returns:
-        edge_src (Tensor): The src index of the output edges, also means the first column of
+        - edge_src (Tensor), The src index of the output edges, also means the first column of
-                           the edges. The shape is [num_sample_edges, 1] currently.
+          the edges. The shape is [num_sample_edges, 1] currently.
-        edge_dst (Tensor): The dst index of the output edges, also means the second column
+        - edge_dst (Tensor), The dst index of the output edges, also means the second column
-                           of the edges. The shape is [num_sample_edges, 1] currently.
+          of the edges. The shape is [num_sample_edges, 1] currently.
-        sample_index (Tensor): The original id of the input nodes and sampled neighbor nodes.
+        - sample_index (Tensor), The original id of the input nodes and sampled neighbor nodes.
-        reindex_nodes (Tensor): The reindex id of the input nodes.
+        - reindex_nodes (Tensor), The reindex id of the input nodes.
-        edge_eids (Tensor): Return the id of the sample edges if `return_eids` is True.
+        - edge_eids (Tensor), Return the id of the sample edges if `return_eids` is True.
    Examples:
        .. code-block:: python
-        import paddle
+            import paddle
-        row = [3, 7, 0, 9, 1, 4, 2, 9, 3, 9, 1, 9, 7]
+            row = [3, 7, 0, 9, 1, 4, 2, 9, 3, 9, 1, 9, 7]
-        colptr = [0, 2, 4, 5, 6, 7, 9, 11, 11, 13, 13]
+            colptr = [0, 2, 4, 5, 6, 7, 9, 11, 11, 13, 13]
-        nodes = [0, 8, 1, 2]
+            nodes = [0, 8, 1, 2]
-        sample_sizes = [2, 2]
+            sample_sizes = [2, 2]
-        row = paddle.to_tensor(row, dtype="int64")
+            row = paddle.to_tensor(row, dtype="int64")
-        colptr = paddle.to_tensor(colptr, dtype="int64")
+            colptr = paddle.to_tensor(colptr, dtype="int64")
-        nodes = paddle.to_tensor(nodes, dtype="int64")
+            nodes = paddle.to_tensor(nodes, dtype="int64")
-        edge_src, edge_dst, sample_index, reindex_nodes = \
+            edge_src, edge_dst, sample_index, reindex_nodes = paddle.incubate.graph_khop_sampler(row, colptr, nodes, sample_sizes, False)
-            paddle.incubate.graph_khop_sampler(row, colptr, nodes, sample_sizes, False)
    """

--- a/python/paddle/incubate/operators/graph_reindex.py
+++ b/python/paddle/incubate/operators/graph_reindex.py
@@ -35,6 +35,7 @@ def graph_reindex(
    name=None,
 ):
    """
    Graph Reindex API.
    This API is mainly used in Graph Learning domain, which should be used
@@ -42,11 +43,11 @@ def graph_reindex(
    is to reindex the ids information of the input nodes, and return the
    corresponding graph edges after reindex.
-    **Notes**:
+    Notes:
        The number in x should be unique, otherwise it would cause potential errors.
-    Besides, we also support multi-edge-types neighbors reindexing. If we have different
+        Besides, we also support multi-edge-types neighbors reindexing. If we have different
-    edge_type neighbors for x, we should concatenate all the neighbors and count of x.
+        edge_type neighbors for x, we should concatenate all the neighbors and count of x.
-    We will reindex all the nodes from 0.
+        We will reindex all the nodes from 0.
    Take input nodes x = [0, 1, 2] as an example.
    If we have neighbors = [8, 9, 0, 4, 7, 6, 7], and count = [2, 3, 2],
@@ -60,53 +61,52 @@ def graph_reindex(
                            should be the same with `x`.
        count (Tensor): The neighbor count of the input nodes `x`. And the
                        data type should be int32.
-        value_buffer (Tensor|None): Value buffer for hashtable. The data type should
+        value_buffer (Tensor, optional): Value buffer for hashtable. The data type should
-                                    be int32, and should be filled with -1.
+                                    be int32, and should be filled with -1. Default is None.
-        index_buffer (Tensor|None): Index buffer for hashtable. The data type should
+        index_buffer (Tensor, optional): Index buffer for hashtable. The data type should
-                                    be int32, and should be filled with -1.
+                                    be int32, and should be filled with -1. Default is None.
-        flag_buffer_hashtable (bool): Whether to use buffer for hashtable to speed up.
+        flag_buffer_hashtable (bool, optional): Whether to use buffer for hashtable to speed up.
                                      Default is False. Only useful for gpu version currently.
        name (str, optional): Name for the operation (optional, default is None).
                              For more information, please refer to :ref:`api_guide_Name`.
    Returns:
-        reindex_src (Tensor): The source node index of graph edges after reindex.
+        - reindex_src (Tensor), The source node index of graph edges after reindex.
-        reindex_dst (Tensor): The destination node index of graph edges after reindex.
+        - reindex_dst (Tensor), The destination node index of graph edges after reindex.
-        out_nodes (Tensor): The index of unique input nodes and neighbors before reindex,
+        - out_nodes (Tensor), The index of unique input nodes and neighbors before reindex,
-                            where we put the input nodes `x` in the front, and put neighbor
+          where we put the input nodes `x` in the front, and put neighbor
-                            nodes in the back.
+          nodes in the back.
    Examples:
        .. code-block:: python
-        import paddle
+            import paddle
-        x = [0, 1, 2]
+            x = [0, 1, 2]
-        neighbors_e1 = [8, 9, 0, 4, 7, 6, 7]
+            neighbors_e1 = [8, 9, 0, 4, 7, 6, 7]
-        count_e1 = [2, 3, 2]
+            count_e1 = [2, 3, 2]
-        x = paddle.to_tensor(x, dtype="int64")
+            x = paddle.to_tensor(x, dtype="int64")
-        neighbors_e1 = paddle.to_tensor(neighbors_e1, dtype="int64")
+            neighbors_e1 = paddle.to_tensor(neighbors_e1, dtype="int64")
-        count_e1 = paddle.to_tensor(count_e1, dtype="int32")
+            count_e1 = paddle.to_tensor(count_e1, dtype="int32")
-        reindex_src, reindex_dst, out_nodes = \
+            reindex_src, reindex_dst, out_nodes = \
-             paddle.incubate.graph_reindex(x, neighbors_e1, count_e1)
+                paddle.incubate.graph_reindex(x, neighbors_e1, count_e1)
-        # reindex_src: [3, 4, 0, 5, 6, 7, 6]
+            # reindex_src: [3, 4, 0, 5, 6, 7, 6]
-        # reindex_dst: [0, 0, 1, 1, 1, 2, 2]
+            # reindex_dst: [0, 0, 1, 1, 1, 2, 2]
-        # out_nodes: [0, 1, 2, 8, 9, 4, 7, 6]
+            # out_nodes: [0, 1, 2, 8, 9, 4, 7, 6]
-        neighbors_e2 = [0, 2, 3, 5, 1]
+            neighbors_e2 = [0, 2, 3, 5, 1]
-        count_e2 = [1, 3, 1]
+            count_e2 = [1, 3, 1]
-        neighbors_e2 = paddle.to_tensor(neighbors_e2, dtype="int64")
+            neighbors_e2 = paddle.to_tensor(neighbors_e2, dtype="int64")
-        count_e2 = paddle.to_tensor(count_e2, dtype="int32")
+            count_e2 = paddle.to_tensor(count_e2, dtype="int32")
-        neighbors = paddle.concat([neighbors_e1, neighbors_e2])
+            neighbors = paddle.concat([neighbors_e1, neighbors_e2])
-        count = paddle.concat([count_e1, count_e2])
+            count = paddle.concat([count_e1, count_e2])
-        reindex_src, reindex_dst, out_nodes = \
+            reindex_src, reindex_dst, out_nodes = \
-             paddle.incubate.graph_reindex(x, neighbors, count)
+                paddle.incubate.graph_reindex(x, neighbors, count)
-        # reindex_src: [3, 4, 0, 5, 6, 7, 6, 0, 2, 8, 9, 1]
+            # reindex_src: [3, 4, 0, 5, 6, 7, 6, 0, 2, 8, 9, 1]
-        # reindex_dst: [0, 0, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2]
+            # reindex_dst: [0, 0, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2]
-        # out_nodes: [0, 1, 2, 8, 9, 4, 7, 6, 3, 5]
+            # out_nodes: [0, 1, 2, 8, 9, 4, 7, 6, 3, 5]
    """
    if flag_buffer_hashtable:

--- a/python/paddle/incubate/xpu/resnet_block.py
+++ b/python/paddle/incubate/xpu/resnet_block.py
@@ -325,6 +325,7 @@ def resnet_basic_block(
 class ResNetBasicBlock(Layer):
    r"""
    ResNetBasicBlock is designed for optimize the performence of the basic unit of ssd resnet block.
    If has_shortcut = True, it can calculate 3 Conv2D, 3 BatchNorm and 2 ReLU in one time.
    If has_shortcut = False, it can calculate 2 Conv2D, 2 BatchNorm and 2 ReLU in one time. In this
@@ -362,14 +363,14 @@ class ResNetBasicBlock(Layer):
            and variance are also used during train period. Default: False.
        is_test (bool, optional): A flag indicating whether it is in
            test phrase or not. Default: False.
-        filter_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
+        filter_attr (ParamAttr, optional): The parameter attribute for learnable parameters/weights
            of conv2d. If it is set to None or one attribute of ParamAttr, conv2d
            will create ParamAttr as param_attr. Default: None.
-        scale_attr (ParamAttr|None): The parameter attribute for Parameter `scale`
+        scale_attr (ParamAttr, optional): The parameter attribute for Parameter `scale`
            of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm will create ParamAttr
            as param_attr, the name of scale can be set in ParamAttr. If the Initializer of the param_attr is not set,
            the parameter is initialized with Xavier. Default: None.
-        bias_attr (ParamAttr|None): The parameter attribute for the bias of batch_norm.
+        bias_attr (ParamAttr, optional): The parameter attribute for the bias of batch_norm.
            If it is set to None or one attribute of ParamAttr, batch_norm
            will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
            If the Initializer of the bias_attr is not set, the bias is initialized zero.
@@ -396,7 +397,6 @@ class ResNetBasicBlock(Layer):
    Examples:
        .. code-block:: python
            # required: xpu
@@ -426,6 +426,7 @@ class ResNetBasicBlock(Layer):
            out = resnet_basic_block.forward(x)
            print(out.shape) # [2, 8, 16, 16]
    """
    def __init__(

--- a/python/paddle/signal.py
+++ b/python/paddle/signal.py
@@ -259,6 +259,7 @@ def stft(
    name=None,
 ):
    r"""
    Short-time Fourier transform (STFT).
    The STFT computes the discrete Fourier transforms (DFT) of short overlapping
@@ -271,9 +272,12 @@ def stft(
    Where:
    - :math:`t`: The :math:`t`-th input window.
    - :math:`\omega`: Frequency :math:`0 \leq \omega < \text{n\_fft}` for `onesided=False`,
-        or :math:`0 \leq \omega < \lfloor \text{n\_fft} / 2 \rfloor + 1` for `onesided=True`.
+      or :math:`0 \leq \omega < \lfloor \text{n\_fft} / 2 \rfloor + 1` for `onesided=True`.
    - :math:`N`: Value of `n_fft`.
    - :math:`H`: Value of `hop_length`.
    Args:
@@ -300,9 +304,9 @@ def stft(
            to set this property. For more information, please refer to :ref:`api_guide_Name`.
    Returns:
-        The complex STFT output tensor with shape `[..., n_fft//2 + 1, num_frames]`(
+        The complex STFT output tensor with shape `[..., n_fft//2 + 1, num_frames]`
-            real-valued input and `onesided` is `True`) or `[..., n_fft, num_frames]`(
+        (real-valued input and `onesided` is `True`) or `[..., n_fft, num_frames]`
-            `onesided` is `False`)
+        (`onesided` is `False`)
    Examples:
        .. code-block:: python
@@ -319,6 +323,7 @@ def stft(
            x = paddle.randn([8, 48000], dtype=paddle.float64) + \
                    paddle.randn([8, 48000], dtype=paddle.float64)*1j  # [8, 48000] complex128
            y1 = stft(x, n_fft=512, center=False, onesided=False)  # [8, 512, 372]
    """
    check_variable_and_dtype(
        x, 'x', ['float32', 'float64', 'complex64', 'complex128'], 'stft'

--- a/python/paddle/sparse/nn/layer/activation.py
+++ b/python/paddle/sparse/nn/layer/activation.py
@@ -20,6 +20,7 @@ __all__ = []
 class ReLU(Layer):
    """
    Sparse ReLU Activation, requiring x to be a SparseCooTensor or SparseCsrTensor.
    .. math::
@@ -44,6 +45,7 @@ class ReLU(Layer):
            relu = paddle.sparse.nn.ReLU()
            out = relu(sparse_x)
            # [0., 0., 1.]
    """
    def __init__(self, name=None):
@@ -60,6 +62,7 @@ class ReLU(Layer):
 class Softmax(Layer):
    r"""
    Sparse Softmax Activation, requiring x to be a SparseCooTensor or SparseCsrTensor.
    Note:
@@ -129,6 +132,7 @@ class Softmax(Layer):
 class ReLU6(Layer):
    """
    Sparse ReLU6 Activation, requiring x to be a SparseCooTensor or SparseCsrTensor.
    .. math::
@@ -152,6 +156,7 @@ class ReLU6(Layer):
            sparse_x = dense_x.to_sparse_coo(1)
            relu6 = paddle.sparse.nn.ReLU6()
            out = relu6(sparse_x)
    """
    def __init__(self, name=None):
@@ -168,6 +173,7 @@ class ReLU6(Layer):
 class LeakyReLU(Layer):
    r"""
    Sparse Leaky ReLU Activation, requiring x to be a SparseCooTensor or SparseCsrTensor.
    .. math::
@@ -199,6 +205,7 @@ class LeakyReLU(Layer):
            sparse_x = dense_x.to_sparse_coo(1)
            leaky_relu = paddle.sparse.nn.LeakyReLU(0.5)
            out = leaky_relu(sparse_x)
    """
    def __init__(self, negative_slope=0.01, name=None):