diff --git a/python/paddle/fft.py b/python/paddle/fft.py
index 1e4ca9237469ba54628dfa965294d780a02f7743..a1748c76b928569674ce1dd7457b6541d0a6eed2 100644
--- a/python/paddle/fft.py
+++ b/python/paddle/fft.py
@@ -626,6 +626,7 @@ def ifftn(x, s=None, axes=None, norm="backward", name=None):
 
 def rfftn(x, s=None, axes=None, norm="backward", name=None):
     """
+
     The N dimensional FFT for real input.
 
     This function computes the N-dimensional discrete Fourier Transform over
@@ -659,9 +660,9 @@ def rfftn(x, s=None, axes=None, norm="backward", name=None):
             three operations are shown below:
 
                 - "backward": The factor of forward direction and backward direction are ``1``
-                and ``1/n`` respectively;
+                  and ``1/n`` respectively;
                 - "forward": The factor of forward direction and backward direction are ``1/n``
-                and ``1`` respectively;
+                  and ``1`` respectively;
                 - "ortho": The factor of forward direction and backword direction are both ``1/sqrt(n)``.
 
             Where ``n`` is the multiplication of each element in  ``s`` .
@@ -670,36 +671,35 @@ def rfftn(x, s=None, axes=None, norm="backward", name=None):
             refer to :ref:`api_guide_Name` .
 
     Returns:
-        out(Tensor): complex tensor
+        out(Tensor), complex tensor
 
     Examples:
+        .. code-block:: python
 
-    .. code-block:: python
-
-        import paddle
+            import paddle
 
-        # default, all axis will be used to exec fft
-        x = paddle.ones((2, 3, 4))
-        print(paddle.fft.rfftn(x))
-        # Tensor(shape=[2, 3, 3], dtype=complex64, place=CUDAPlace(0), stop_gradient=True,
-        #        [[[(24+0j), 0j     , 0j     ],
-        #          [0j     , 0j     , 0j     ],
-        #          [0j     , 0j     , 0j     ]],
-        #
-        #         [[0j     , 0j     , 0j     ],
-        #          [0j     , 0j     , 0j     ],
-        #          [0j     , 0j     , 0j     ]]])
-
-        # use axes(2, 0)
-        print(paddle.fft.rfftn(x, axes=(2, 0)))
-        # Tensor(shape=[2, 3, 3], dtype=complex64, place=CUDAPlace(0), stop_gradient=True,
-        #        [[[(8+0j), 0j     , 0j     ],
-        #          [(8+0j), 0j     , 0j     ],
-        #          [(8+0j), 0j     , 0j     ]],
-        #
-        #         [[0j     , 0j     , 0j     ],
-        #          [0j     , 0j     , 0j     ],
-        #          [0j     , 0j     , 0j     ]]])
+            # default, all axis will be used to exec fft
+            x = paddle.ones((2, 3, 4))
+            print(paddle.fft.rfftn(x))
+            # Tensor(shape=[2, 3, 3], dtype=complex64, place=CUDAPlace(0), stop_gradient=True,
+            #        [[[(24+0j), 0j     , 0j     ],
+            #          [0j     , 0j     , 0j     ],
+            #          [0j     , 0j     , 0j     ]],
+            #
+            #         [[0j     , 0j     , 0j     ],
+            #          [0j     , 0j     , 0j     ],
+            #          [0j     , 0j     , 0j     ]]])
+
+            # use axes(2, 0)
+            print(paddle.fft.rfftn(x, axes=(2, 0)))
+            # Tensor(shape=[2, 3, 3], dtype=complex64, place=CUDAPlace(0), stop_gradient=True,
+            #        [[[(8+0j), 0j     , 0j     ],
+            #          [(8+0j), 0j     , 0j     ],
+            #          [(8+0j), 0j     , 0j     ]],
+            #
+            #         [[0j     , 0j     , 0j     ],
+            #          [0j     , 0j     , 0j     ],
+            #          [0j     , 0j     , 0j     ]]])
 
     """
     return fftn_r2c(x, s, axes, norm, forward=True, onesided=True, name=name)
diff --git a/python/paddle/fluid/contrib/sparsity/supported_layer_list.py b/python/paddle/fluid/contrib/sparsity/supported_layer_list.py
index f55a877b4b7f364e476d06050ed7624a1f3143c6..b0b64f27eccc1ec5849f43ea1a57d584354e1527 100644
--- a/python/paddle/fluid/contrib/sparsity/supported_layer_list.py
+++ b/python/paddle/fluid/contrib/sparsity/supported_layer_list.py
@@ -82,15 +82,17 @@ supported_layers_and_prune_func_map = {}
 
 def add_supported_layer(layer, pruning_func=None):
     r"""
+
     Add supported layers and its corresponding pruning function.
 
     Args:
         name (string|Layer): The name or type of layer, needed to support. If layer is `Layer` then
-        it would be turn to string internally. ASP would use this name to match parameter's name and call
-        its the corresponding pruning function.
+                             it would be turn to string internally. ASP would use this name to match parameter's name and call
+                             its the corresponding pruning function.
         pruning_func (function, optional): a function type which receives five argument (weight_nparray,
-        m, n, func_name, param_name), weight_nparray is a nparray of weight, param_name is the name of weight,
-        m, n, and func_name, please see `prune_model` for details.
+                                           m, n, func_name, param_name), weight_nparray is a nparray of weight, param_name is the name of weight,
+                                           m, n, and func_name, please see `prune_model` for details.
+
     """
     name = None
     if isinstance(layer, str):
diff --git a/python/paddle/fluid/contrib/sparsity/utils.py b/python/paddle/fluid/contrib/sparsity/utils.py
index b5be3887380aeac9c948aadda0f5ef30afcec0fa..b9a5c0a7b31dafddd657234890602d4aeb3dd186 100644
--- a/python/paddle/fluid/contrib/sparsity/utils.py
+++ b/python/paddle/fluid/contrib/sparsity/utils.py
@@ -92,20 +92,25 @@ class CheckMethod(Enum):
 
 def calculate_density(x):
     r"""
+
     Return the density of the input tensor.
 
     Args:
         x (nparray): The input tensor.
+
     Returns:
-        float: The density of :attr:`x`.
+        float, The density of :attr:`x`.
+
     Examples:
         .. code-block:: python
-          import paddle
-          import numpy as np
 
-          x = np.array([[0, 1, 3, 0],
+            import paddle
+            import numpy as np
+
+            x = np.array([[0, 1, 3, 0],
                         [1, 1, 0, 1]])
-          paddle.incubate.asp.calculate_density(x) # 0.625
+            paddle.incubate.asp.calculate_density(x) # 0.625
+
     """
     x_flattened = x.flatten()
     return float(np.nonzero(x_flattened)[0].size) / x_flattened.size
diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py
index 5e15519bd962788697c95e81e3f2510809e452bb..1593cc78e6a2c5714b7001679dfe3ae1a02ec99b 100644
--- a/python/paddle/fluid/dygraph/layers.py
+++ b/python/paddle/fluid/dygraph/layers.py
@@ -177,13 +177,14 @@ class Layer:
 
     def train(self):
         """
+
         Sets this Layer and all its sublayers to training mode.
         This only effects certain modules like `Dropout` and `BatchNorm`.
 
         Returns:
             None
 
-        Example::
+        Examples:
             .. code-block:: python
 
                 import paddle
@@ -260,6 +261,7 @@ class Layer:
 
     def apply(self, fn):
         """
+
         Applies ``fn`` recursively to every sublayer (as returned by ``.sublayers()``)
         as well as self. Typical use includes initializing the parameters of a model.
 
@@ -267,7 +269,7 @@ class Layer:
             fn (function): a function to be applied to each sublayer
 
         Returns:
-            Layer: self
+            Layer, self
 
         Example::
             .. code-block:: python
@@ -287,6 +289,7 @@ class Layer:
               net.apply(init_weights)
 
               print(net.state_dict())
+
         """
         for layer in self.children():
             layer.apply(fn)
@@ -296,10 +299,12 @@ class Layer:
         return self
 
     def full_name(self):
-        """Full name for this layer, composed by name_scope + "/" + MyLayer.__class__.__name__
+        """
+
+        Full name for this layer, composed by name_scope + "/" + MyLayer.__class__.__name__
 
         Returns:
-            str: full name of this layer.
+            str, full name of this layer.
 
         Example::
             .. code-block:: python
@@ -321,7 +326,9 @@ class Layer:
         return self._full_name
 
     def register_forward_post_hook(self, hook):
-        """Register a forward post-hook for Layer. The hook will be called after `forward` function has been computed.
+        """
+
+        Register a forward post-hook for Layer. The hook will be called after `forward` function has been computed.
 
         It should have the following form, `input` and `output` of the `hook` is `input` and `output` of the `Layer` respectively.
         User can use forward post-hook to change the output of the Layer or perform information statistics tasks on the Layer.
@@ -332,7 +339,7 @@ class Layer:
             hook(function): a function registered as a forward post-hook
 
         Returns:
-            HookRemoveHelper: a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .
+            HookRemoveHelper, a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .
 
         Examples:
             .. code-block:: python
@@ -364,13 +371,16 @@ class Layer:
 
                 # hook change the linear's output to output * 2, so out0 is equal to out1 * 2.
                 assert (out0.numpy() == (out1.numpy()) * 2).any()
+
         """
         hook_remove_helper = HookRemoveHelper(self._forward_post_hooks)
         self._forward_post_hooks[hook_remove_helper._hook_id] = hook
         return hook_remove_helper
 
     def register_forward_pre_hook(self, hook):
-        """Register a forward pre-hook for Layer. The hook will be called before `forward` function has been computed.
+        """
+
+        Register a forward pre-hook for Layer. The hook will be called before `forward` function has been computed.
 
         It should have the following form, `input` of the `hook` is `input` of the `Layer`,
         hook can either return a tuple or a single modified value in the hook. We will wrap the value into a tuple if
@@ -383,7 +393,7 @@ class Layer:
             hook(function): a function registered as a forward pre-hook
 
         Returns:
-            HookRemoveHelper: a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .
+            HookRemoveHelper, a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .
 
         Examples:
             .. code-block:: python
@@ -581,18 +591,20 @@ class Layer:
         )
 
     def parameters(self, include_sublayers=True):
-        """Returns a list of all Parameters from current layer and its sub-layers.
+        """
+
+        Returns a list of all Parameters from current layer and its sub-layers.
 
         Returns:
-            list of Tensor : a list of Parameters.
+            list of Tensor, a list of Parameters.
 
         Examples:
             .. code-block:: python
 
-            import paddle
+                import paddle
 
-            linear = paddle.nn.Linear(1,1)
-            print(linear.parameters())  # print linear_0.w_0 and linear_0.b_0
+                linear = paddle.nn.Linear(1,1)
+                print(linear.parameters())  # print linear_0.w_0 and linear_0.b_0
 
         """
         ret = [
@@ -604,7 +616,9 @@ class Layer:
         return ret
 
     def children(self):
-        """Returns an iterator over immediate children layers.
+        """
+
+        Returns an iterator over immediate children layers.
 
         Yields:
             Layer: a child layer
@@ -654,13 +668,15 @@ class Layer:
                 yield name, layer
 
     def sublayers(self, include_self=False):
-        """Returns a list of sub layers.
+        """
+
+        Returns a list of sub layers.
 
         Parameters:
             include_self(bool, optional): Whether return self as sublayers. Default: False
 
         Returns:
-            list of Layer : a list of sub layers.
+            list of Layer, a list of sub layers.
 
         Examples:
             .. code-block:: python
@@ -839,13 +855,14 @@ class Layer:
 
     def buffers(self, include_sublayers=True):
         """
+
         Returns a list of all buffers from current layer and its sub-layers.
 
         Parameters:
             include_sublayers(bool, optional): Whether include the buffers of sublayers. If True, also include the buffers from sublayers. Default: True
 
         Returns:
-            list of Tensor : a list of buffers.
+            list of Tensor, a list of buffers.
 
         Examples:
             .. code-block:: python
@@ -1020,7 +1037,9 @@ class Layer:
         raise ValueError("Layer shouldn't implement backward")
 
     def add_sublayer(self, name, sublayer):
-        """Adds a sub Layer instance.
+        """
+
+        Adds a sub Layer instance.
 
         Added sublayer can be accessed by self.name
 
@@ -1028,7 +1047,7 @@ class Layer:
             name(str): name of this sublayer.
             sublayer(Layer): an instance of Layer.
         Returns:
-            Layer: the sublayer passed in.
+            Layer, the sublayer passed in.
 
         Examples:
             .. code-block:: python
@@ -1055,6 +1074,7 @@ class Layer:
                 model = MySequential(fc1, fc2)
                 for prefix, layer in model.named_sublayers():
                     print(prefix, layer)
+
         """
         assert isinstance(sublayer, Layer) or sublayer is None
 
@@ -1070,7 +1090,7 @@ class Layer:
             name(str): name of this sublayer.
             parameter(Parameter): an instance of Parameter.
         Returns:
-            Parameter: the parameter passed in.
+            Parameter, the parameter passed in.
         Examples:
             .. code-block:: python
 
@@ -1503,6 +1523,7 @@ class Layer:
         use_hook=True,
     ):
         '''
+
         Get all parameters and buffers of current layer and its sub-layers. And set them into a dict
 
         Parameters:
@@ -1511,7 +1532,7 @@ class Layer:
             use_hook(bool, optional) : If true, the operations contained in _state_dict_hooks will be appended to the destination. Default: True
 
         Retruns:
-            dict: a dict contains all the parameters and persistable buffers.
+            dict, a dict contains all the parameters and persistable buffers.
 
         Examples:
             .. code-block:: python
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index 4fc525003f71d25b3af98f7852b0cb36b38438bb..c5e0631ecd4dad9a01a3c43255674a49162b3e6e 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -1352,12 +1352,13 @@ class ParameterMetaClass(VariableMetaClass):
 
 class Variable(metaclass=VariableMetaClass):
     """
-    **Notes**:
-        **The constructor of Variable should not be invoked directly.**
 
-        **In Static Graph Mode: Please use** `Block.create_var` **to create a Static variable which has no data until being feed.**
+    Notes:
+        The constructor of Variable should not be invoked directly.
+
+        In Static Graph Mode: Please use ** `Block.create_var` ** to create a Static variable which has no data until being feed.
 
-        **In Dygraph Mode: Please use** :ref:`api_fluid_dygraph_to_variable` **to create a dygraph variable with real data**
+        In Dygraph Mode: Please use ** :ref:`api_fluid_dygraph_to_variable` ** to create a dygraph variable with real data.
 
     In Fluid, every input and output of an OP is a variable. In most
     cases, variables are used for holding different kinds of data or training
@@ -1513,12 +1514,13 @@ class Variable(metaclass=VariableMetaClass):
 
     def detach(self):
         """
+
         Returns a new Variable, detached from the current graph.
         It will share data with origin Variable and without tensor copy.
         In addition, the detached Variable doesn't provide gradient propagation.
 
         Returns:
-             ( :ref:`api_guide_Variable_en` | dtype is same as current Variable): The detached Variable.
+             ( :ref:`api_guide_Variable_en` | dtype is same as current Variable), The detached Variable.
 
         Examples:
             .. code-block:: python
@@ -1532,6 +1534,7 @@ class Variable(metaclass=VariableMetaClass):
 
                 # create a detached Variable
                 y = x.detach()
+
         """
 
         assert (
@@ -2081,6 +2084,7 @@ class Variable(metaclass=VariableMetaClass):
     @property
     def T(self):
         """
+
         Permute current Variable with its dimensions reversed.
 
         If `n` is the dimensions of `x` , `x.T` is equivalent to `x.transpose([n-1, n-2, ..., 0])`.
@@ -2099,6 +2103,7 @@ class Variable(metaclass=VariableMetaClass):
                 x_T_np = exe.run(paddle.static.default_main_program(), fetch_list=[x_T])[0]
                 print(x_T_np.shape)
                 # (5, 3, 2)
+
         """
         if len(self.shape) == 1:
             return self
@@ -2137,7 +2142,7 @@ class Variable(metaclass=VariableMetaClass):
         as ``out = assign(tensor)`` .
 
         Returns:
-            Variable: The cloned Variable.
+            Variable, The cloned Variable.
 
         Examples:
             .. code-block:: python
@@ -2167,6 +2172,7 @@ class Variable(metaclass=VariableMetaClass):
 
     def _set_error_clip(self, error_clip):
         """
+
         Set the error_clip.
 
         Args:
@@ -2174,11 +2180,13 @@ class Variable(metaclass=VariableMetaClass):
 
         Returns:
             None
+
         """
         self.error_clip = error_clip
 
     def _set_info(self, key, value):
         """
+
         Set key-value information for this variable.
 
         Args:
@@ -2187,6 +2195,7 @@ class Variable(metaclass=VariableMetaClass):
 
         Returns:
             None
+
         """
         if not hasattr(self, "_info"):
             self._info = {}
@@ -2194,6 +2203,7 @@ class Variable(metaclass=VariableMetaClass):
 
     def _get_info(self, key):
         """
+
         Get the information of this variable corresponding to key.
 
         Args:
@@ -2201,6 +2211,7 @@ class Variable(metaclass=VariableMetaClass):
 
         Returns:
             object
+
         """
         if hasattr(self, "_info") and key in self._info:
             return self._info[key]
@@ -2208,7 +2219,9 @@ class Variable(metaclass=VariableMetaClass):
 
     def _slice_indices(self, slice, length):
         """
+
         Reference implementation for the slice.indices method.
+
         """
         # Compute step and length as integers.
         step = 1 if slice.step is None else slice.step
@@ -2379,7 +2392,7 @@ class Variable(metaclass=VariableMetaClass):
                 Default: None
 
         Returns:
-            Tensor: the value in given scope.
+            Tensor, the value in given scope.
 
         Examples:
             .. code-block:: python
@@ -2434,6 +2447,7 @@ class Variable(metaclass=VariableMetaClass):
 
     def set_value(self, value, scope=None):
         '''
+
         Set the value to the tensor in given scope.
 
         Args:
@@ -2473,6 +2487,7 @@ class Variable(metaclass=VariableMetaClass):
                     if var.persistable:
                         t_load = paddle.load(path+var.name+'.pdtensor')
                         var.set_value(t_load)
+
         '''
 
         # The 'framework' is a low-level module, and 'executor'
@@ -2543,10 +2558,11 @@ class Variable(metaclass=VariableMetaClass):
 
     def size(self):
         """
+
         Returns the number of elements for current Variable, which is a int64 Variable with shape [1]
 
         Returns:
-            Variable: the number of elements for current Variable
+            Variable, the number of elements for current Variable
 
         Examples:
             .. code-block:: python
@@ -2560,6 +2576,7 @@ class Variable(metaclass=VariableMetaClass):
 
                 # get the number of elements of the Variable
                 y = x.size()
+
         """
 
         output = self.block.create_var(
@@ -2574,23 +2591,27 @@ class Variable(metaclass=VariableMetaClass):
 
     def _set_attr(self, name, val):
         """
+
         Set the value of attribute by attribute's name.
 
         Args:
             name(str): the attribute name.
             val(int|str|list): the value of the attribute.
+
         """
         self._update_desc_attr(name, val)
 
     def _has_attr(self, name):
         """
+
         Whether this Variable has the attribute with the name `name` or not.
 
         Args:
             name(str): the attribute name.
 
         Returns:
-            bool: True if has this attribute.
+            bool, True if has this attribute.
+
         """
         return self.desc.has_attr(name)
 
@@ -2620,7 +2641,7 @@ class Variable(metaclass=VariableMetaClass):
             name(str): the attribute name.
 
         Returns:
-            int|str|list: The attribute value. The return value
+            int|str|list, The attribute value. The return value
             can be any valid attribute type.
         """
         return self.desc.attr(name)
@@ -3193,14 +3214,16 @@ class Operator:
 
     def input(self, name):
         r"""
+
         Get the input arguments according to the input parameter name.
 
         Args:
             name(str): The input parameter name.
 
         Returns:
-            list: return the list of argument names that associated with \
+            list, return the list of argument names that associated with \
                 the specific parameter name.
+
         """
         return self.desc.input(name)
 
diff --git a/python/paddle/fluid/layers/metric_op.py b/python/paddle/fluid/layers/metric_op.py
index 8a63b55089e8b9db040c9a02694c01ec1630856c..3179f5d568c4f8781a6989bc34bd3bfea0e5dbf8 100755
--- a/python/paddle/fluid/layers/metric_op.py
+++ b/python/paddle/fluid/layers/metric_op.py
@@ -37,22 +37,29 @@ __all__ = ['accuracy', 'auc']
 
 def accuracy(input, label, k=1, correct=None, total=None):
     """
+
     accuracy layer.
     Refer to the https://en.wikipedia.org/wiki/Precision_and_recall
     This function computes the accuracy using the input and label.
     If the correct label occurs in top k predictions, then correct will increment by one.
-    Note: the dtype of accuracy is determined by input. the input and label dtype can be different.
+
+    Note:
+        the dtype of accuracy is determined by input. the input and label dtype can be different.
+
     Args:
         input(Tensor): The input of accuracy layer, which is the predictions of network. A Tensor with type float32,float64.
             The shape is ``[sample_number, class_dim]`` .
         label(Tensor): The label of dataset.  Tensor with type int32,int64. The shape is ``[sample_number, 1]`` .
-        k(int): The top k predictions for each class will be checked. Data type is int64 or int32.
-        correct(Tensor): The correct predictions count. A Tensor with type int64 or int32.
-        total(Tensor): The total entries count. A tensor with type int64 or int32.
+        k(int, optional): The top k predictions for each class will be checked. Data type is int64 or int32. Default is 1.
+        correct(Tensor, optional): The correct predictions count. A Tensor with type int64 or int32. Default is None.
+        total(Tensor, optional): The total entries count. A tensor with type int64 or int32. Default is None.
+
     Returns:
-        Tensor: The correct rate. A Tensor with type float32.
+        Tensor, The correct rate. A Tensor with type float32.
+
     Examples:
         .. code-block:: python
+
             import numpy as np
             import paddle
             import paddle.static as static
@@ -72,6 +79,7 @@ def accuracy(input, label, k=1, correct=None, total=None):
                         fetch_list=[result[0]])
             print(output)
             #[array([0.], dtype=float32)]
+
     """
     if _non_static_mode():
         if correct is None:
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 96ca8a459bd5002be46c01289e85c4c68bc4055b..1f74a79a91b7ceb644fc9d428e373d284634c910 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -9057,6 +9057,7 @@ def pow(x, factor=1.0, name=None):
 @deprecated(since="2.0.0", update_to="paddle.static.nn.prelu")
 def prelu(x, mode, param_attr=None, data_format="NCHW", name=None):
     r"""
+
     prelu activation.
 
     .. math::
@@ -9071,26 +9072,20 @@ def prelu(x, mode, param_attr=None, data_format="NCHW", name=None):
         element: All elements do not share alpha. Each element has its own alpha.
 
     Parameters:
-
         x (Tensor): The input Tensor or LoDTensor with data type float32.
-
         mode (str): The mode for weight sharing.
-
-        param_attr (ParamAttr|None, optional): The parameter attribute for the learnable \
-        weight (alpha), it can be create by ParamAttr. None by default. \
-        For detailed information, please refer to :ref:`api_fluid_ParamAttr`.
-
-        name (str, optional): Name for the operation (optional, default is None). \
-        For more information, please refer to :ref:`api_guide_Name`.
-
+        param_attr (ParamAttr|None, optional): The parameter attribute for the learnable
+            weight (alpha), it can be create by ParamAttr. None by default.
+            For detailed information, please refer to :ref:`api_fluid_ParamAttr`.
         data_format(str, optional): Data format that specifies the layout of input.
             It may be "NC", "NCL", "NCHW", "NCDHW", "NLC", "NHWC" or "NDHWC". Default: "NCHW".
+        name (str, optional): Name for the operation (optional, default is None).
+            For more information, please refer to :ref:`api_guide_Name`.
 
     Returns:
-        Tensor: A tensor with the same shape and data type as x.
+        Tensor, A tensor with the same shape and data type as x.
 
     Examples:
-
         .. code-block:: python
 
             import paddle
diff --git a/python/paddle/incubate/nn/functional/fused_transformer.py b/python/paddle/incubate/nn/functional/fused_transformer.py
index 0887cd56aefe42e166d61c67ac570f0fef6d20da..e6c8f33efb2b311371fc34017cf8a801cbf774b1 100644
--- a/python/paddle/incubate/nn/functional/fused_transformer.py
+++ b/python/paddle/incubate/nn/functional/fused_transformer.py
@@ -284,9 +284,11 @@ def fused_bias_dropout_residual_layer_norm(
     name=None,
 ):
     r"""
+
     The fused_bias_dropout_residual_layer_norm operator. The pseudo code is as follows:
 
     .. code-block:: python
+
         y = layer_norm(residual + dropout(bias + x))
 
     Parameters:
@@ -315,10 +317,9 @@ def fused_bias_dropout_residual_layer_norm(
         name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
 
     Returns:
-        Tensor: The output Tensor, the data type and shape is same as `x`.
+        Tensor, The output Tensor, the data type and shape is same as `x`.
 
     Examples:
-
         .. code-block:: python
 
             # required: gpu
@@ -336,6 +337,7 @@ def fused_bias_dropout_residual_layer_norm(
                 x, residual, bias)
             # [2, 4, 128]
             print(output.shape)
+
     """
     seed = None
     if mode not in ('downscale_in_infer', 'upscale_in_train'):
diff --git a/python/paddle/incubate/nn/layer/fused_transformer.py b/python/paddle/incubate/nn/layer/fused_transformer.py
index 5bf553f0eb77b65fb22f3050bf3a933ab92b0cce..72b074a68cb1548cce917862cf1c8aa16d3fde9f 100644
--- a/python/paddle/incubate/nn/layer/fused_transformer.py
+++ b/python/paddle/incubate/nn/layer/fused_transformer.py
@@ -705,6 +705,7 @@ class FusedFeedForward(Layer):
 
 class FusedTransformerEncoderLayer(Layer):
     """
+
     FusedTransformerEncoderLayer is composed of two sub-layers which are self (multi-head)
     attention and feedforward network. Before and after each sub-layer, pre-process
     and post-precess would be applied on the input and output accordingly. If
@@ -746,7 +747,6 @@ class FusedTransformerEncoderLayer(Layer):
 
 
     Examples:
-
         .. code-block:: python
 
             # required: gpu
@@ -759,6 +759,7 @@ class FusedTransformerEncoderLayer(Layer):
             attn_mask = paddle.rand((2, 2, 4, 4))
             encoder_layer = FusedTransformerEncoderLayer(128, 2, 512)
             enc_output = encoder_layer(enc_input, attn_mask)  # [2, 4, 128]
+
     """
 
     def __init__(
@@ -835,7 +836,9 @@ class FusedTransformerEncoderLayer(Layer):
 
     def forward(self, src, src_mask=None, cache=None):
         """
+
         Applies a Transformer encoder layer on the input.
+
         Parameters:
             src (Tensor): The input of Transformer encoder layer. It is
                 a tensor with shape `[batch_size, sequence_length, d_model]`.
@@ -851,17 +854,19 @@ class FusedTransformerEncoderLayer(Layer):
                 `-INF` values and the others have 0 values. It can be None when
                 nothing wanted or needed to be prevented attention to. Default None.
             cache (Tensor, optional): It is an instance of `MultiHeadAttention.Cache`.
-                See `TransformerEncoderLayer.gen_cache` for more details. It is
+                See :ref:`api_paddle_nn_TransformerEncoderLayer`.gen_cache for more details. It is
                 only used for inference and should be None for training. Default
                 None.
+
         Returns:
-            Tensor|tuple: It is a tensor that has the same shape and data type \
+            Tensor|tuple, It is a tensor that has the same shape and data type \
                 as `enc_input`, representing the output of Transformer encoder \
                 layer. Or a tuple if `cache` is not None, except for encoder \
                 layer output, the tuple includes the new cache which is same \
                 as input `cache` argument but `incremental_cache` has an \
                 incremental length. See `MultiHeadAttention.gen_cache` and \
                 `MultiHeadAttention.forward` for more details.
+
         """
         src_mask = _convert_attention_mask(src_mask, src.dtype)
         if cache is None:
diff --git a/python/paddle/incubate/operators/graph_khop_sampler.py b/python/paddle/incubate/operators/graph_khop_sampler.py
index 821c4b418ed7ea3d55cd024b062392f7a53106f7..bbe8d6a5646d37fb1ef3d1180f947ebbcfb8f0d1 100644
--- a/python/paddle/incubate/operators/graph_khop_sampler.py
+++ b/python/paddle/incubate/operators/graph_khop_sampler.py
@@ -28,6 +28,7 @@ def graph_khop_sampler(
     name=None,
 ):
     """
+
     Graph Khop Sampler API.
 
     This API is mainly used in Graph Learning domain, and the main purpose is to
@@ -50,38 +51,36 @@ def graph_khop_sampler(
         sample_sizes (list|tuple): The number of neighbors and number of layers we want
                                    to sample. The data type should be int, and the shape
                                    should only have one dimension.
-        sorted_eids (Tensor): The sorted edge ids, should not be None when `return_eids`
+        sorted_eids (Tensor, optional): The sorted edge ids, should not be None when `return_eids`
                               is True. The shape should be [num_edges, 1], and the data
-                              type should be the same with `row`.
-        return_eids (bool): Whether to return the id of the sample edges. Default is False.
+                              type should be the same with `row`. Default is None.
+        return_eids (bool, optional): Whether to return the id of the sample edges. Default is False.
         name (str, optional): Name for the operation (optional, default is None).
                               For more information, please refer to :ref:`api_guide_Name`.
 
     Returns:
-        edge_src (Tensor): The src index of the output edges, also means the first column of
-                           the edges. The shape is [num_sample_edges, 1] currently.
-        edge_dst (Tensor): The dst index of the output edges, also means the second column
-                           of the edges. The shape is [num_sample_edges, 1] currently.
-        sample_index (Tensor): The original id of the input nodes and sampled neighbor nodes.
-        reindex_nodes (Tensor): The reindex id of the input nodes.
-        edge_eids (Tensor): Return the id of the sample edges if `return_eids` is True.
+        - edge_src (Tensor), The src index of the output edges, also means the first column of
+          the edges. The shape is [num_sample_edges, 1] currently.
+        - edge_dst (Tensor), The dst index of the output edges, also means the second column
+          of the edges. The shape is [num_sample_edges, 1] currently.
+        - sample_index (Tensor), The original id of the input nodes and sampled neighbor nodes.
+        - reindex_nodes (Tensor), The reindex id of the input nodes.
+        - edge_eids (Tensor), Return the id of the sample edges if `return_eids` is True.
 
     Examples:
-
         .. code-block:: python
 
-        import paddle
+            import paddle
 
-        row = [3, 7, 0, 9, 1, 4, 2, 9, 3, 9, 1, 9, 7]
-        colptr = [0, 2, 4, 5, 6, 7, 9, 11, 11, 13, 13]
-        nodes = [0, 8, 1, 2]
-        sample_sizes = [2, 2]
-        row = paddle.to_tensor(row, dtype="int64")
-        colptr = paddle.to_tensor(colptr, dtype="int64")
-        nodes = paddle.to_tensor(nodes, dtype="int64")
+            row = [3, 7, 0, 9, 1, 4, 2, 9, 3, 9, 1, 9, 7]
+            colptr = [0, 2, 4, 5, 6, 7, 9, 11, 11, 13, 13]
+            nodes = [0, 8, 1, 2]
+            sample_sizes = [2, 2]
+            row = paddle.to_tensor(row, dtype="int64")
+            colptr = paddle.to_tensor(colptr, dtype="int64")
+            nodes = paddle.to_tensor(nodes, dtype="int64")
 
-        edge_src, edge_dst, sample_index, reindex_nodes = \
-            paddle.incubate.graph_khop_sampler(row, colptr, nodes, sample_sizes, False)
+            edge_src, edge_dst, sample_index, reindex_nodes = paddle.incubate.graph_khop_sampler(row, colptr, nodes, sample_sizes, False)
 
     """
 
diff --git a/python/paddle/incubate/operators/graph_reindex.py b/python/paddle/incubate/operators/graph_reindex.py
index d721c9a002e1895b7650b2e7bc343dab2a6c2a69..0ac5f0246f26ce8146527b0843df309de892ec6a 100644
--- a/python/paddle/incubate/operators/graph_reindex.py
+++ b/python/paddle/incubate/operators/graph_reindex.py
@@ -35,6 +35,7 @@ def graph_reindex(
     name=None,
 ):
     """
+
     Graph Reindex API.
 
     This API is mainly used in Graph Learning domain, which should be used
@@ -42,11 +43,11 @@ def graph_reindex(
     is to reindex the ids information of the input nodes, and return the
     corresponding graph edges after reindex.
 
-    **Notes**:
+    Notes:
         The number in x should be unique, otherwise it would cause potential errors.
-    Besides, we also support multi-edge-types neighbors reindexing. If we have different
-    edge_type neighbors for x, we should concatenate all the neighbors and count of x.
-    We will reindex all the nodes from 0.
+        Besides, we also support multi-edge-types neighbors reindexing. If we have different
+        edge_type neighbors for x, we should concatenate all the neighbors and count of x.
+        We will reindex all the nodes from 0.
 
     Take input nodes x = [0, 1, 2] as an example.
     If we have neighbors = [8, 9, 0, 4, 7, 6, 7], and count = [2, 3, 2],
@@ -60,53 +61,52 @@ def graph_reindex(
                             should be the same with `x`.
         count (Tensor): The neighbor count of the input nodes `x`. And the
                         data type should be int32.
-        value_buffer (Tensor|None): Value buffer for hashtable. The data type should
-                                    be int32, and should be filled with -1.
-        index_buffer (Tensor|None): Index buffer for hashtable. The data type should
-                                    be int32, and should be filled with -1.
-        flag_buffer_hashtable (bool): Whether to use buffer for hashtable to speed up.
+        value_buffer (Tensor, optional): Value buffer for hashtable. The data type should
+                                    be int32, and should be filled with -1. Default is None.
+        index_buffer (Tensor, optional): Index buffer for hashtable. The data type should
+                                    be int32, and should be filled with -1. Default is None.
+        flag_buffer_hashtable (bool, optional): Whether to use buffer for hashtable to speed up.
                                       Default is False. Only useful for gpu version currently.
         name (str, optional): Name for the operation (optional, default is None).
                               For more information, please refer to :ref:`api_guide_Name`.
 
     Returns:
-        reindex_src (Tensor): The source node index of graph edges after reindex.
-        reindex_dst (Tensor): The destination node index of graph edges after reindex.
-        out_nodes (Tensor): The index of unique input nodes and neighbors before reindex,
-                            where we put the input nodes `x` in the front, and put neighbor
-                            nodes in the back.
+        - reindex_src (Tensor), The source node index of graph edges after reindex.
+        - reindex_dst (Tensor), The destination node index of graph edges after reindex.
+        - out_nodes (Tensor), The index of unique input nodes and neighbors before reindex,
+          where we put the input nodes `x` in the front, and put neighbor
+          nodes in the back.
 
     Examples:
-
         .. code-block:: python
 
-        import paddle
-
-        x = [0, 1, 2]
-        neighbors_e1 = [8, 9, 0, 4, 7, 6, 7]
-        count_e1 = [2, 3, 2]
-        x = paddle.to_tensor(x, dtype="int64")
-        neighbors_e1 = paddle.to_tensor(neighbors_e1, dtype="int64")
-        count_e1 = paddle.to_tensor(count_e1, dtype="int32")
-
-        reindex_src, reindex_dst, out_nodes = \
-             paddle.incubate.graph_reindex(x, neighbors_e1, count_e1)
-        # reindex_src: [3, 4, 0, 5, 6, 7, 6]
-        # reindex_dst: [0, 0, 1, 1, 1, 2, 2]
-        # out_nodes: [0, 1, 2, 8, 9, 4, 7, 6]
-
-        neighbors_e2 = [0, 2, 3, 5, 1]
-        count_e2 = [1, 3, 1]
-        neighbors_e2 = paddle.to_tensor(neighbors_e2, dtype="int64")
-        count_e2 = paddle.to_tensor(count_e2, dtype="int32")
-
-        neighbors = paddle.concat([neighbors_e1, neighbors_e2])
-        count = paddle.concat([count_e1, count_e2])
-        reindex_src, reindex_dst, out_nodes = \
-             paddle.incubate.graph_reindex(x, neighbors, count)
-        # reindex_src: [3, 4, 0, 5, 6, 7, 6, 0, 2, 8, 9, 1]
-        # reindex_dst: [0, 0, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2]
-        # out_nodes: [0, 1, 2, 8, 9, 4, 7, 6, 3, 5]
+            import paddle
+
+            x = [0, 1, 2]
+            neighbors_e1 = [8, 9, 0, 4, 7, 6, 7]
+            count_e1 = [2, 3, 2]
+            x = paddle.to_tensor(x, dtype="int64")
+            neighbors_e1 = paddle.to_tensor(neighbors_e1, dtype="int64")
+            count_e1 = paddle.to_tensor(count_e1, dtype="int32")
+
+            reindex_src, reindex_dst, out_nodes = \
+                paddle.incubate.graph_reindex(x, neighbors_e1, count_e1)
+            # reindex_src: [3, 4, 0, 5, 6, 7, 6]
+            # reindex_dst: [0, 0, 1, 1, 1, 2, 2]
+            # out_nodes: [0, 1, 2, 8, 9, 4, 7, 6]
+
+            neighbors_e2 = [0, 2, 3, 5, 1]
+            count_e2 = [1, 3, 1]
+            neighbors_e2 = paddle.to_tensor(neighbors_e2, dtype="int64")
+            count_e2 = paddle.to_tensor(count_e2, dtype="int32")
+
+            neighbors = paddle.concat([neighbors_e1, neighbors_e2])
+            count = paddle.concat([count_e1, count_e2])
+            reindex_src, reindex_dst, out_nodes = \
+                paddle.incubate.graph_reindex(x, neighbors, count)
+            # reindex_src: [3, 4, 0, 5, 6, 7, 6, 0, 2, 8, 9, 1]
+            # reindex_dst: [0, 0, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2]
+            # out_nodes: [0, 1, 2, 8, 9, 4, 7, 6, 3, 5]
 
     """
     if flag_buffer_hashtable:
diff --git a/python/paddle/incubate/xpu/resnet_block.py b/python/paddle/incubate/xpu/resnet_block.py
index 726a1676da125ddabe8bba526296cc0be2ee0f9d..a02dcffeff8979294a72648b66d89059a3493834 100644
--- a/python/paddle/incubate/xpu/resnet_block.py
+++ b/python/paddle/incubate/xpu/resnet_block.py
@@ -325,6 +325,7 @@ def resnet_basic_block(
 
 class ResNetBasicBlock(Layer):
     r"""
+
     ResNetBasicBlock is designed for optimize the performence of the basic unit of ssd resnet block.
     If has_shortcut = True, it can calculate 3 Conv2D, 3 BatchNorm and 2 ReLU in one time.
     If has_shortcut = False, it can calculate 2 Conv2D, 2 BatchNorm and 2 ReLU in one time. In this
@@ -362,14 +363,14 @@ class ResNetBasicBlock(Layer):
             and variance are also used during train period. Default: False.
         is_test (bool, optional): A flag indicating whether it is in
             test phrase or not. Default: False.
-        filter_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
+        filter_attr (ParamAttr, optional): The parameter attribute for learnable parameters/weights
             of conv2d. If it is set to None or one attribute of ParamAttr, conv2d
             will create ParamAttr as param_attr. Default: None.
-        scale_attr (ParamAttr|None): The parameter attribute for Parameter `scale`
+        scale_attr (ParamAttr, optional): The parameter attribute for Parameter `scale`
             of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm will create ParamAttr
             as param_attr, the name of scale can be set in ParamAttr. If the Initializer of the param_attr is not set,
             the parameter is initialized with Xavier. Default: None.
-        bias_attr (ParamAttr|None): The parameter attribute for the bias of batch_norm.
+        bias_attr (ParamAttr, optional): The parameter attribute for the bias of batch_norm.
             If it is set to None or one attribute of ParamAttr, batch_norm
             will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
             If the Initializer of the bias_attr is not set, the bias is initialized zero.
@@ -396,7 +397,6 @@ class ResNetBasicBlock(Layer):
 
 
     Examples:
-
         .. code-block:: python
 
             # required: xpu
@@ -426,6 +426,7 @@ class ResNetBasicBlock(Layer):
             out = resnet_basic_block.forward(x)
 
             print(out.shape) # [2, 8, 16, 16]
+
     """
 
     def __init__(
diff --git a/python/paddle/signal.py b/python/paddle/signal.py
index 82d46b81967634fe4e30ac4f71b6316ecb4c227b..5b6879c2855bc7c66d37ae5bcaf6b5db7530aa63 100644
--- a/python/paddle/signal.py
+++ b/python/paddle/signal.py
@@ -259,6 +259,7 @@ def stft(
     name=None,
 ):
     r"""
+
     Short-time Fourier transform (STFT).
 
     The STFT computes the discrete Fourier transforms (DFT) of short overlapping
@@ -271,9 +272,12 @@ def stft(
 
     Where:
     - :math:`t`: The :math:`t`-th input window.
+
     - :math:`\omega`: Frequency :math:`0 \leq \omega < \text{n\_fft}` for `onesided=False`,
-        or :math:`0 \leq \omega < \lfloor \text{n\_fft} / 2 \rfloor + 1` for `onesided=True`.
+      or :math:`0 \leq \omega < \lfloor \text{n\_fft} / 2 \rfloor + 1` for `onesided=True`.
+
     - :math:`N`: Value of `n_fft`.
+
     - :math:`H`: Value of `hop_length`.
 
     Args:
@@ -300,9 +304,9 @@ def stft(
             to set this property. For more information, please refer to :ref:`api_guide_Name`.
 
     Returns:
-        The complex STFT output tensor with shape `[..., n_fft//2 + 1, num_frames]`(
-            real-valued input and `onesided` is `True`) or `[..., n_fft, num_frames]`(
-            `onesided` is `False`)
+        The complex STFT output tensor with shape `[..., n_fft//2 + 1, num_frames]`
+        (real-valued input and `onesided` is `True`) or `[..., n_fft, num_frames]`
+        (`onesided` is `False`)
 
     Examples:
         .. code-block:: python
@@ -319,6 +323,7 @@ def stft(
             x = paddle.randn([8, 48000], dtype=paddle.float64) + \
                     paddle.randn([8, 48000], dtype=paddle.float64)*1j  # [8, 48000] complex128
             y1 = stft(x, n_fft=512, center=False, onesided=False)  # [8, 512, 372]
+
     """
     check_variable_and_dtype(
         x, 'x', ['float32', 'float64', 'complex64', 'complex128'], 'stft'
diff --git a/python/paddle/sparse/nn/layer/activation.py b/python/paddle/sparse/nn/layer/activation.py
index 91d5c198189dd9a490a62261cf2de8700f0d02ad..f87901123a5c0b6da85879a37c005e33c4287208 100644
--- a/python/paddle/sparse/nn/layer/activation.py
+++ b/python/paddle/sparse/nn/layer/activation.py
@@ -20,6 +20,7 @@ __all__ = []
 
 class ReLU(Layer):
     """
+
     Sparse ReLU Activation, requiring x to be a SparseCooTensor or SparseCsrTensor.
 
     .. math::
@@ -44,6 +45,7 @@ class ReLU(Layer):
             relu = paddle.sparse.nn.ReLU()
             out = relu(sparse_x)
             # [0., 0., 1.]
+
     """
 
     def __init__(self, name=None):
@@ -60,6 +62,7 @@ class ReLU(Layer):
 
 class Softmax(Layer):
     r"""
+
     Sparse Softmax Activation, requiring x to be a SparseCooTensor or SparseCsrTensor.
 
     Note:
@@ -129,6 +132,7 @@ class Softmax(Layer):
 
 class ReLU6(Layer):
     """
+
     Sparse ReLU6 Activation, requiring x to be a SparseCooTensor or SparseCsrTensor.
 
     .. math::
@@ -152,6 +156,7 @@ class ReLU6(Layer):
             sparse_x = dense_x.to_sparse_coo(1)
             relu6 = paddle.sparse.nn.ReLU6()
             out = relu6(sparse_x)
+
     """
 
     def __init__(self, name=None):
@@ -168,6 +173,7 @@ class ReLU6(Layer):
 
 class LeakyReLU(Layer):
     r"""
+
     Sparse Leaky ReLU Activation, requiring x to be a SparseCooTensor or SparseCsrTensor.
 
     .. math::
@@ -199,6 +205,7 @@ class LeakyReLU(Layer):
             sparse_x = dense_x.to_sparse_coo(1)
             leaky_relu = paddle.sparse.nn.LeakyReLU(0.5)
             out = leaky_relu(sparse_x)
+
     """
 
     def __init__(self, negative_slope=0.01, name=None):