diff --git a/python/paddle/fft.py b/python/paddle/fft.py
index 4b6a93edc447b9a4cdc758135dec53eb2bbdba44..ec0ab7776d1114a210234ab60a58d48dab82b986 100644
--- a/python/paddle/fft.py
+++ b/python/paddle/fft.py
@@ -166,7 +166,7 @@ def fft(x, n=None, axis=-1, norm="backward", name=None):
             by `axis` is used.
         axis (int, optional): Axis used to calculate FFT. If not specified, the last axis 
             is used by default.       
-        norm (str): Indicates which direction to scale the `forward` or `backward` transform
+        norm (str, optional): Indicates which direction to scale the `forward` or `backward` transform
             pair and what normalization factor to use. The parameter value must be one 
             of "forward" or "backward" or "ortho". Default is "backward", meaning no normalization on
             the forward transforms and scaling by ``1/n`` on the `ifft`. "forward" instead applies 
@@ -235,7 +235,7 @@ def ifft(x, n=None, axis=-1, norm="backward", name=None):
             by `axis` is used.
         axis (int, optional): Axis used to calculate FFT. If not specified, the last axis 
             is used by default.       
-        norm (str): Indicates which direction to scale the `forward` or `backward` transform
+        norm (str, optional): Indicates which direction to scale the `forward` or `backward` transform
             pair and what normalization factor to use. The parameter value must be one 
             of "forward" or "backward" or "ortho". Default is "backward", meaning no normalization on
             the forward transforms and scaling by ``1/n`` on the `ifft`. "forward" instead applies 
@@ -303,6 +303,12 @@ def rfft(x, n=None, axis=-1, norm="backward", name=None):
             the forward/backward  pair of transforms is scaled and with what 
             normalization factor. Include {"backward", "ortho", "forward"}, 
             default value is "backward".
+            
+                - "backward": The factor of forward direction and backward direction are ``1`` and ``1/n`` respectively;
+                - "forward": The factor of forward direction and backward direction are ``1/n`` and ``1`` respectively;
+                - "ortho": The factor of forward direction and backword direction are both ``1/sqrt(n)``.
+                
+            Where ``n`` is the multiplication of each element in  ``s`` .
         name(str, optional): The default value is None.  Normally there is no 
             need for user to set this property. For more information, please 
             refer to :ref:`api_guide_Name` . 
@@ -310,11 +316,10 @@ def rfft(x, n=None, axis=-1, norm="backward", name=None):
     Returns:
         out(Tensor) : complex tensor
 
-    Raises:
-
-
     Examples:
+    
     .. code-block:: python
+    
         import paddle
 
         x = paddle.to_tensor([0.0, 1.0, 0.0, 0.0])
@@ -348,7 +353,7 @@ def irfft(x, n=None, axis=-1, norm="backward", name=None):
             along the ` axis'.
         axis (int, optional): Axis used to calculate FFT. If not specified, the last axis 
             is used by default.       
-        norm (str): Indicates which direction to scale the `forward` or `backward` transform
+        norm (str, optional): Indicates which direction to scale the `forward` or `backward` transform
             pair and what normalization factor to use. The parameter value must be one 
             of "forward" or "backward" or "ortho". Default is "backward".
         name (str, optional): The default value is None.  Normally there is no need for user to set 
@@ -365,15 +370,13 @@ def irfft(x, n=None, axis=-1, norm="backward", name=None):
 
         .. code-block:: python
 
-            import numpy as np
             import paddle
 
-            x = np.array([1, -1j, -1])
-            xp = paddle.to_tensor(x)
-            irfft_xp = paddle.fft.irfft(xp).numpy()
-            print(irfft_xp)
-            #  [0. 1. 0. 0.]
-
+            x = paddle.to_tensor([1, -1j, -1])
+            irfft_x = paddle.fft.irfft(x)
+            print(irfft_x)
+            # Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [0., 1., 0., 0.])
     """
     return fft_c2r(x, n, axis, norm, forward=False, name=name)
 
@@ -392,7 +395,7 @@ def hfft(x, n=None, axis=-1, norm="backward", name=None):
             along the ` axis'.
         axis (int,optional): Axis used to calculate FFT. If not specified, the last axis 
             is used by default.       
-        norm (str): Indicates which direction to scale the `forward` or `backward` transform
+        norm (str, optional): Indicates which direction to scale the `forward` or `backward` transform
             pair and what normalization factor to use. The parameter value must be one 
             of "forward" or "backward" or "ortho". Default is "backward".
         name (str, optional): The default value is None.  Normally there is no need for user to set 
@@ -409,14 +412,13 @@ def hfft(x, n=None, axis=-1, norm="backward", name=None):
 
         .. code-block:: python
 
-            import numpy as np
             import paddle
 
-            x = np.array([1, -1j, -1])
-            xp = paddle.to_tensor(x)
-            hfft_xp = paddle.fft.hfft(xp).numpy()
-            print(hfft_xp)
-            #  [0. 0. 0. 4.]
+            x = paddle.to_tensor([1, -1j, -1])
+            hfft_x = paddle.fft.hfft(x)
+            print(hfft_x)
+            # Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [0., 0., 0., 4.])
     """
 
     return fft_c2r(x, n, axis, norm, forward=True, name=name)
@@ -456,7 +458,9 @@ def ihfft(x, n=None, axis=-1, norm="backward", name=None):
         out(Tensor) : complex tensor.
 
     Examples:
+    
     .. code-block:: python
+    
         import paddle 
 
         spectrum = paddle.to_tensor([10.0, -5.0, 0.0, -1.0, 0.0, -5.0])
@@ -490,7 +494,7 @@ def fftn(x, s=None, axes=None, norm="backward", name=None):
             by `axes` is used.
         axes (sequence of ints, optional): Axes used to calculate FFT. If not given, the last ``len(s)``
             axes are used, or all axes if `s` is also not specified.      
-        norm (str): Indicates which direction to scale the `forward` or `backward` transform
+        norm (str, optional): Indicates which direction to scale the `forward` or `backward` transform
             pair and what normalization factor to use. The parameter value must be one 
             of "forward" or "backward" or "ortho". Default is "backward", meaning no normalization on
             the forward transforms and scaling by ``1/n`` on the `ifft`. "forward" instead applies 
@@ -570,7 +574,7 @@ def ifftn(x, s=None, axes=None, norm="backward", name=None):
             by `axes` is used.
         axes (sequence of ints, optional): Axes used to calculate FFT. If not given, the last ``len(s)``
             axes are used, or all axes if `s` is also not specified.      
-        norm (str): Indicates which direction to scale the `forward` or `backward` transform
+        norm (str, optional): Indicates which direction to scale the `forward` or `backward` transform
             pair and what normalization factor to use. The parameter value must be one 
             of "forward" or "backward" or "ortho". Default is "backward", meaning no normalization on
             the forward transforms and scaling by ``1/n`` on the `ifft`. "forward" instead applies 
@@ -587,18 +591,21 @@ def ifftn(x, s=None, axes=None, norm="backward", name=None):
 
         .. code-block:: python
 
-            import numpy as np
             import paddle
 
-            x = np.eye(3)
-            xp = paddle.to_tensor(x)
-            ifftn_xp = paddle.fft.ifftn(xp, axes=(1,)).numpy()
-            print(ifftn_xp)
-
-            #   [[ 0.33333333+0.j          0.33333333+0.j          0.33333333-0.j        ]
-            #   [ 0.33333333+0.j         -0.16666667+0.28867513j -0.16666667-0.28867513j]
-            #   [ 0.33333333+0.j         -0.16666667-0.28867513j -0.16666667+0.28867513j]]
-
+            x = paddle.eye(3)
+            ifftn_x = paddle.fft.ifftn(x, axes=(1,))
+            print(ifftn_x)
+            # Tensor(shape=[3, 3], dtype=complex64, place=Place(cpu), stop_gradient=True,
+            #        [[ (0.3333333432674408+0j)                  ,
+            #           (0.3333333432674408-0j)                  ,
+            #           (0.3333333432674408+0j)                  ],
+            #         [ (0.3333333432674408+0j)                  ,
+            #          (-0.1666666716337204+0.28867512941360474j),
+            #          (-0.1666666716337204-0.28867512941360474j)],
+            #         [ (0.3333333432674408+0j)                  ,
+            #          (-0.1666666716337204-0.28867512941360474j),
+            #          (-0.1666666716337204+0.28867512941360474j)]])
     """
     if is_integer(x) or is_floating_point(x):
         return fftn_r2c(x,
@@ -630,20 +637,29 @@ def rfftn(x, s=None, axes=None, norm="backward", name=None):
 
     Args:
         x(Tensor) : Input tensor, taken to be real.
-        s(Sequence[int]) : Shape to use from the exec fft. The final element of 
+        s(Sequence[int], optional) : Shape to use from the exec fft. The final element of 
             `s` corresponds to `n` for ``rfft(x, n)``, while for the remaining 
             axes, it corresponds to `n` for ``fft(x, n)``. Along any axis, if 
             the given shape is smaller than that of the input, the input is 
             cropped.  If it is larger, the input is padded with zeros. if `s` is 
             not given, the shape of the input along the axes specified by `axes` 
             is used.
-        axes(Sequence[int]) : Axes over which to compute the FFT.  If not given, 
+        axes(Sequence[int], optional) : Axes over which to compute the FFT.  If not given, 
             the last ``len(s)`` axes are used, or all axes if `s` is also not 
             specified.
         norm(str, optional) : Normalization mode, indicates which direction of 
             the forward/backward pair of transforms is scaled and with what 
             normalization factor. Include {"backward", "ortho", "forward"}, 
-            default value is "backward".
+            default value is "backward". The details of 
+            three operations are shown below:
+            
+                - "backward": The factor of forward direction and backward direction are ``1`` 
+                and ``1/n`` respectively;
+                - "forward": The factor of forward direction and backward direction are ``1/n`` 
+                and ``1`` respectively;
+                - "ortho": The factor of forward direction and backword direction are both ``1/sqrt(n)``.
+                
+            Where ``n`` is the multiplication of each element in  ``s`` .
         name(str, optional): The default value is None.  Normally there is no 
             need for user to set this property. For more information, please 
             refer to :ref:`api_guide_Name` . 
@@ -651,12 +667,10 @@ def rfftn(x, s=None, axes=None, norm="backward", name=None):
     Returns:
         out(Tensor): complex tensor
 
-
-    Raises:
-        ValueError: If `s` and `axes` have different length.
-
     Examples:
+    
     .. code-block:: python
+    
         import paddle
 
         # default, all axis will be used to exec fft
@@ -694,7 +708,7 @@ def irfftn(x, s=None, axes=None, norm="backward", name=None):
     Fourier Transform for real input over any number of axes in an
     M-D array by means of the Fast Fourier Transform (FFT). In
     other words, ``irfftn(rfftn(x), x.shape) == x`` to within numerical
-    accuracy. (The ``a.shape`` is necessary like ``len(a)`` is for `irfft`,
+    accuracy. (The ``x.shape`` is necessary like ``len(x)`` is for `irfft`,
     and for the same reason.)
 
     The input should be ordered in the same way as is returned by `rfftn`,
@@ -704,19 +718,26 @@ def irfftn(x, s=None, axes=None, norm="backward", name=None):
     Args:
         x (Tensor): The input data. It's a Tensor type.
         s (sequence of ints, optional): The length of the output transform axis. 
-            (``s[0]`` refers to axis 0, ``s[1]`` to axis 1, etc.). `s` is also the
-            number of input points used along this axis, except for the last axis,
-            where ``s[-1]//2+1`` points of the input are used. Along any axis, if 
-            the shape indicated by `s` is smaller than that of the input, the input 
-            is cropped. If it is larger, the input is padded with zeros. 
-            If `s` is not given, the shape of the input along the axes specified by axes 
-            is used. Except for the last axis which is taken to be ``2*(k-1)`` where 
-            ``k`` is the length of the input along that axis.
+            (``s[0]`` refers to axis 0, ``s[1]`` to axis 1, etc.). 
+            
+            - `s` is also the number of input points used along this axis, except for the last axis, where ``s[-1]//2+1`` points of the input are used. 
+            - Along any axis, if the shape indicated by `s` is smaller than that of the input, the input is cropped. If it is larger, the input is padded with zeros. 
+            - If `s` is not given, the shape of the input along the axes specified by axes is used. Except for the last axis which is taken to be ``2*(k-1)`` 
+            
+            where ``k`` is the length of the input along that axis.
+            
         axes (sequence of ints, optional): Axes over which to compute the inverse FFT. If not given, the last
             `len(s)` axes are used, or all axes if `s` is also not specified.      
         norm (str): Indicates which direction to scale the `forward` or `backward` transform
             pair and what normalization factor to use. The parameter value must be one 
-            of "forward" or "backward" or "ortho". Default is "backward".
+            of "forward" or "backward" or "ortho". Default is "backward". The details of 
+            three operations are shown below:
+            
+                - "backward": The factor of forward direction and backward direction are ``1`` and ``1/n`` respectively;
+                - "forward": The factor of forward direction and backward direction are ``1/n`` and ``1`` respectively;
+                - "ortho": The factor of forward direction and backword direction are both ``1/sqrt(n)``.
+                
+            Where ``n`` is the multiplication of each element in  ``s`` .
         name (str, optional): The default value is None.  Normally there is no need for user to set 
             this property. For more information, please refer to :ref:`api_guide_Name`. 
     
@@ -733,14 +754,17 @@ def irfftn(x, s=None, axes=None, norm="backward", name=None):
 
         .. code-block:: python
 
-            import numpy as np
             import paddle
 
-            x = (np.array([2, 2, 3]) + 1j * np.array([2, 2, 3])).astype(np.complex128)
-            xp = paddle.to_tensor(x)
-            irfftn_xp = paddle.fft.irfftn(xp).numpy()
-            print(irfftn_xp)
-            #  [ 2.25 -1.25  0.25  0.75]
+            x = paddle.to_tensor([2.+2.j, 2.+2.j, 3.+3.j]).astype(paddle.complex128)
+            print(x)
+            irfftn_x = paddle.fft.irfftn(x)
+            print(irfftn_x)
+            
+            # Tensor(shape=[3], dtype=complex128, place=Place(cpu), stop_gradient=True,
+            #        [(2+2j), (2+2j), (3+3j)])
+            # Tensor(shape=[4], dtype=float64, place=Place(cpu), stop_gradient=True,
+            #        [ 2.25000000, -1.25000000,  0.25000000,  0.75000000])
     
     """
     return fftn_c2r(x, s, axes, norm, forward=False, name=name)
@@ -770,7 +794,7 @@ def hfftn(x, s=None, axes=None, norm="backward", name=None):
             ``k`` is the length of the input along that axis.
         axes (sequence of ints, optional): Axes over which to compute the inverse FFT. If not given, the last
             `len(s)` axes are used, or all axes if `s` is also not specified.      
-        norm (str): Indicates which direction to scale the `forward` or `backward` transform
+        norm (str, optional): Indicates which direction to scale the `forward` or `backward` transform
             pair and what normalization factor to use. The parameter value must be one 
             of "forward" or "backward" or "ortho". Default is "backward".
         name (str, optional): The default value is None.  Normally there is no need for user to set 
@@ -784,16 +808,13 @@ def hfftn(x, s=None, axes=None, norm="backward", name=None):
 
         .. code-block:: python
 
-            import numpy as np
             import paddle
 
-            x = (np.array([2, 2, 3]) + 1j * np.array([2, 2, 3])).astype(np.complex128)
-            xp = paddle.to_tensor(x)
-            hfftn_xp = paddle.fft.hfftn(xp).numpy()
-            print(hfftn_xp)
-            #  [ 9.  3.  1. -5.]
-
-
+            x = paddle.to_tensor([(2+2j), (2+2j), (3+3j)])
+            hfftn_x = paddle.fft.hfftn(x)
+            print(hfftn_x)
+            # Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [ 9.,  3.,  1., -5.])
     """
     return fftn_c2r(x, s, axes, norm, forward=True, name=name)
 
@@ -814,7 +835,7 @@ def ihfftn(x, s=None, axes=None, norm="backward", name=None):
             of the input, the input is cropped. If it is larger, the input is 
             padded with zeros. if `s` is not given, the shape of the input 
             along the axes specified by `axes` is used.
-        axis(Sequence[int], optional) : Axis over which to compute the inverse FFT. If not
+        axes(Sequence[int], optional) : Axis over which to compute the inverse FFT. If not
             given, the last axis is used.
         norm(str, optional) : Normalization mode, indicates which direction of 
             the forward/backward pair of transforms is scaled and with what 
@@ -828,7 +849,9 @@ def ihfftn(x, s=None, axes=None, norm="backward", name=None):
         out(Tensor) : complex tensor.
 
     Examples:
+    
     .. code-block:: python
+    
         import paddle 
 
         spectrum = paddle.to_tensor([10.0, -5.0, 0.0, -1.0, 0.0, -5.0])
@@ -838,7 +861,6 @@ def ihfftn(x, s=None, axes=None, norm="backward", name=None):
         print(paddle.fft.ihfft(spectrum))
         #  Tensor(shape = [4], dtype = complex64, place = CUDAPlace(0), stop_gradient = True,
         #         [(-0.1666666716337204+0j),  (1-1.9868215517249155e-08j), (2.3333334922790527-1.9868215517249155e-08j),  (3.5+0j)])
-
     """
     return fftn_r2c(x, s, axes, norm, forward=False, onesided=True, name=name)
 
@@ -863,7 +885,7 @@ def fft2(x, s=None, axes=(-2, -1), norm="backward", name=None):
             by `axes` is used. Default is None.
         axes (sequence of ints, optional):  Axes over which to compute the FFT. It should be a 
             sequence of 2 integers. If not specified, the last two axes are used by default.       
-        norm (str): Indicates which direction to scale the `forward` or `backward` transform
+        norm (str, optional): Indicates which direction to scale the `forward` or `backward` transform
             pair and what normalization factor to use. The parameter value must be one 
             of "forward" or "backward" or "ortho". Default is "backward".
         name (str, optional): The default value is None.  Normally there is no need for user to set 
@@ -872,11 +894,6 @@ def fft2(x, s=None, axes=(-2, -1), norm="backward", name=None):
     Returns:
         Complex tensor. The truncated or zero-padded input, transformed along the axes indicated by `axes`, 
         or the last two axes if `axes` is not given.
-    
-    Raises:
-        ValueError: if `s` not be a sequence of 2 integers or None.
-        ValueError: if `axes` not be a sequence of 2 integers or None.
-        ValueError: If the input dimension is smaller than 2.
 
     Examples:
 
@@ -934,7 +951,7 @@ def ifft2(x, s=None, axes=(-2, -1), norm="backward", name=None):
             by `axes` is used. Default is None.
         axes (sequence of ints, optional):  Axes over which to compute the FFT. It should be a 
             sequence of 2 integers. If not specified, the last two axes are used by default.       
-        norm (str): Indicates which direction to scale the `forward` or `backward` transform
+        norm (str, optional): Indicates which direction to scale the `forward` or `backward` transform
             pair and what normalization factor to use. The parameter value must be one 
             of "forward" or "backward" or "ortho". Default is "backward".
         name (str, optional): The default value is None.  Normally there is no need for user to set 
@@ -944,11 +961,6 @@ def ifft2(x, s=None, axes=(-2, -1), norm="backward", name=None):
         Complex tensor. The truncated or zero-padded input, transformed along the axes indicated by `axes`, 
         or the last two axes if `axes` is not given.
 
-    Raises:
-        ValueError: if `s` not be a sequence of 2 integers or None.
-        ValueError: if `axes` not be a sequence of 2 integers or None.
-        ValueError: If the input dimension is smaller than 2.
-
     Examples:
 
         .. code-block:: python
@@ -986,12 +998,19 @@ def rfft2(x, s=None, axes=(-2, -1), norm="backward", name=None):
 
     Args:
         x(Tensor): Input tensor, taken to be real.
-        s(Sequence[int]) : Shape of the FFT.
+        s(Sequence[int], optional) : Shape of the FFT.
         axes(Sequence[int], optional): Axes over which to compute the FFT.
         norm(str, optional) : {"backward", "ortho", "forward"}, 
             default is "backward". Indicates which direction of the 
             forward/backward pair of transforms is scaled and with what 
-            normalization factor.
+            normalization factor. The details of 
+            three operations are shown below:
+            
+                - "backward": The factor of forward direction and backward direction are ``1`` and ``1/n`` respectively;
+                - "forward": The factor of forward direction and backward direction are ``1/n`` and ``1`` respectively;
+                - "ortho": The factor of forward direction and backword direction are both ``1/sqrt(n)``.
+                
+            Where ``n`` is the multiplication of each element in  ``s`` .
         name(str, optional): The default value is None.  Normally there is no 
             need for user to set this property. For more information, please 
             refer to :ref:`api_guide_Name` . 
@@ -999,12 +1018,10 @@ def rfft2(x, s=None, axes=(-2, -1), norm="backward", name=None):
     Returns: 
         out(Tensor): The result of the real 2-D FFT.
 
-    Raises:
-
-
     Examples:
 
     .. code-block:: python
+    
         import paddle
         import numpy as np
 
@@ -1040,34 +1057,34 @@ def irfft2(x, s=None, axes=(-2, -1), norm="backward", name=None):
         s (sequence of ints, optional): Shape of the real output to the inverse FFT. Default is None.
         axes (sequence of ints, optional): The axes over which to compute the inverse FFT. Axes 
             must be two-dimensional. If not specified, the last two axes are used by default.       
-        norm (str): Indicates which direction to scale the `forward` or `backward` transform
+        norm (str, optional): Indicates which direction to scale the `forward` or `backward` transform
             pair and what normalization factor to use. The parameter value must be one 
-            of "forward" or "backward" or "ortho". Default is "backward".
+            of "forward" or "backward" or "ortho". Default is "backward". The details of 
+            three operations are shown below:
+            
+                - "backward": The factor of forward direction and backward direction are ``1`` and ``1/n`` respectively;
+                - "forward": The factor of forward direction and backward direction are ``1/n`` and ``1`` respectively;
+                - "ortho": The factor of forward direction and backword direction are both ``1/sqrt(n)``.
+                
+            Where ``n`` is the multiplication of each element in  ``s`` .
         name (str, optional): The default value is None.  Normally there is no need for user to set 
             this property. For more information, please refer to :ref:`api_guide_Name` . 
     
     Returns:
         Real tensor. The result of the inverse real 2-D FFT.
-
-    Raises:
-        ValueError: if `s` not be a sequence of 2 integers or None.
-        ValueError: if `axes` not be a sequence of 2 integers or None.
-        ValueError: If the input dimension is smaller than 2.
     
     Examples:
 
         .. code-block:: python
 
-            import numpy as np
             import paddle
 
-            x = (np.array([[3,2,3],[2, 2, 3]]) + 1j * np.array([[3,2,3],[2, 2, 3]])).astype(np.complex128)
-            xp = paddle.to_tensor(x)
-            irfft2_xp = paddle.fft.irfft2(xp).numpy()
-            print(irfft2_xp)
-            #  [[ 2.375 -1.125  0.375  0.875]
-            #   [ 0.125  0.125  0.125  0.125]]
-
+            x = paddle.to_tensor([[3.+3.j, 2.+2.j, 3.+3.j], [2.+2.j, 2.+2.j, 3.+3.j]])
+            irfft2_x = paddle.fft.irfft2(x)
+            print(irfft2_x)
+            # Tensor(shape=[2, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [[ 2.37500000, -1.12500000,  0.37500000,  0.87500000],
+            #         [ 0.12500000,  0.12500000,  0.12500000,  0.12500000]])
     """
     _check_at_least_ndim(x, 2)
     if s is not None:
@@ -1101,26 +1118,18 @@ def hfft2(x, s=None, axes=(-2, -1), norm="backward", name=None):
     Returns:
         Real tensor. The real result of the 2-D Hermitian complex real FFT.
     
-    Raises:
-        ValueError: if `s` not be a sequence of 2 integers or None.
-        ValueError: if `axes` not be a sequence of 2 integers or None.
-        ValueError: If the input dimension is smaller than 2.
-    
     Examples:
 
         .. code-block:: python
 
-            import numpy as np
             import paddle
 
-            x = (np.array([[3,2,3],[2, 2, 3]]) + 1j * np.array([[3,2,3],[2, 2, 3]])).astype(np.complex128)
-            xp = paddle.to_tensor(x)
-            hfft2_xp = paddle.fft.hfft2(xp).numpy()
-            print(hfft2_xp)
-            #  [[19.  7.  3. -9.]
-            #   [ 1.  1.  1.  1.]]
-
-
+            x = paddle.to_tensor([[3.+3.j, 2.+2.j, 3.+3.j], [2.+2.j, 2.+2.j, 3.+3.j]])
+            hfft2_x = paddle.fft.hfft2(x)
+            print(hfft2_x)
+            # Tensor(shape=[2, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [[19.,  7.,  3., -9.],
+            #         [ 1.,  1.,  1.,  1.]])
     """
     _check_at_least_ndim(x, 2)
     if s is not None:
@@ -1144,12 +1153,12 @@ def ihfft2(x, s=None, axes=(-2, -1), norm="backward", name=None):
     For more details see `ihfftn`.
 
     Args:
-        x(Tensor): Input tensor
+        x(Tensor): Input tensor.
         s(Sequence[int], optional): Shape of the real input to the inverse FFT.
         axes(Sequance[int], optional): The axes over which to compute the 
             inverse fft. Default is the last two axes.
         norm(str, optional): {"backward", "ortho", "forward"}. Default is 
-        "backward".
+            "backward".
         name(str, optional): The default value is None.  Normally there is no 
             need for user to set this property. For more information, please 
             refer to :ref:`api_guide_Name` . 
@@ -1254,6 +1263,8 @@ def rfftfreq(n, d=1.0, dtype=None, name=None):
     Args:
         n (int): Dimension inputed.
         d (scalar, optional): Sample spacing (inverse of the sampling rate). Defaults is 1.
+        dtype (str, optional): The data type of returns. Defaults is the data type of returns 
+            of ``paddle.get_default_dtype()``.
         name (str, optional): The default value is None.  Normally there is no need for user to set 
             this property. For more information, please refer to :ref:`api_guide_Name`.
 
diff --git a/python/paddle/fluid/dataloader/dataset.py b/python/paddle/fluid/dataloader/dataset.py
index 1c4728b4223626b63bfea8e659fdcf27437e8da0..bd3bb87a79fdb1e94febda24feaacf84ed61e75a 100755
--- a/python/paddle/fluid/dataloader/dataset.py
+++ b/python/paddle/fluid/dataloader/dataset.py
@@ -411,7 +411,7 @@ class Subset(Dataset):
         indices (sequence): Indices in the whole set selected for subset.
 
     Returns:
-        Dataset: A Dataset which is the subset of the original dataset.
+        List[Dataset]: A Dataset which is the subset of the original dataset.
     
     Examples:
 
diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py
index d52a4b225ca7562d0be02c4faada873947057a68..239224101f2b405b7a463e3afa088a6b653480e2 100644
--- a/python/paddle/nn/functional/common.py
+++ b/python/paddle/nn/functional/common.py
@@ -176,7 +176,7 @@ def interpolate(x,
                 name=None):
     """
 
-    This op resizes a batch of images.
+    This API resizes a batch of images.
     The input must be a 3-D Tensor of the shape (num_batches, channels, in_w)
     or 4-D (num_batches, channels, in_h, in_w), or a 5-D Tensor of the shape
     (num_batches, channels, in_d, in_h, in_w) or (num_batches, in_d, in_h, in_w, channels),
@@ -341,46 +341,28 @@ def interpolate(x,
         A 3-D Tensor of the shape (num_batches, channels, out_w) or (num_batches, out_w, channels),
         A 4-D Tensor of the shape (num_batches, channels, out_h, out_w) or (num_batches, out_h, out_w, channels),
         or 5-D Tensor of the shape (num_batches, channels, out_d, out_h, out_w) or (num_batches, out_d, out_h, out_w, channels).
-    Raises:
-        TypeError: size should be a list or tuple or Tensor.
-        ValueError: The 'mode' of image_resize can only be 'linear', 'bilinear',
-                    'trilinear', 'bicubic', 'area' or 'nearest' currently.
-        ValueError: 'linear' only support 3-D tensor.
-        ValueError: 'bilinear' and 'bicubic' only support 4-D tensor.
-        ValueError: 'nearest' only support 4-D or 5-D tensor.
-        ValueError: 'trilinear' only support 5-D tensor.
-        ValueError: One of size and scale_factor must not be None.
-        ValueError: size length should be 1 for input 3-D tensor.
-        ValueError: size length should be 2 for input 4-D tensor.
-        ValueError: size length should be 3 for input 5-D tensor.
-        ValueError: scale_factor should be greater than zero.
-        TypeError: align_corners should be a bool value
-        ValueError: align_mode can only be '0' or '1'
-        ValueError: data_format can only be 'NCW', 'NWC', 'NCHW', 'NHWC', 'NCDHW' or 'NDHWC'.
+
 
     Examples:
         .. code-block:: python
 
-	        import paddle
-	        import numpy as np
-            import paddle.nn.functional as F
-            
-            # given out size
-            input_data = np.random.rand(2,3,6,10).astype("float32")
-            x = paddle.to_tensor(input_data)
-            output_1 = F.interpolate(x=x, size=[12,12])
-    	    print(output_1.shape)
-	        # [2L, 3L, 12L, 12L]
-            
-            # given scale
-            output_2 = F.interpolate(x=x, scale_factor=[2,1])
-            print(output_2.shape)
-            # [2L, 3L, 12L, 10L]
-            
-            # bilinear interp
-            output_3 = F.interpolate(x=x, scale_factor=[2,1], mode="bilinear")
-            print(output_2.shape)
-            # [2L, 3L, 12L, 10L]
+		import paddle
+		import paddle.nn.functional as F
+
+		input_data = paddle.randn(shape=(2,3,6,10)).astype(paddle.float32)
+		output_1 = F.interpolate(x=input_data, size=[12,12])
+		print(output_1.shape)
+		    # [2L, 3L, 12L, 12L]
+
+		# given scale
+		output_2 = F.interpolate(x=input_data, scale_factor=[2,1])
+		print(output_2.shape)
+		# [2L, 3L, 12L, 10L]
+
+		# bilinear interp
+		output_3 = F.interpolate(x=input_data, scale_factor=[2,1], mode="bilinear")
+		print(output_2.shape)
+		# [2L, 3L, 12L, 10L]
     """
     data_format = data_format.upper()
     resample = mode.upper()
@@ -668,7 +650,7 @@ def upsample(x,
              data_format='NCHW',
              name=None):
     """
-    This op resizes a batch of images.
+    This API resizes a batch of images.
 
     The input must be a 3-D Tensor of the shape (num_batches, channels, in_w)
     or 4-D (num_batches, channels, in_h, in_w), or a 5-D Tensor of the shape
@@ -716,6 +698,7 @@ def upsample(x,
 
     Example:
     .. code-block:: text
+    
         For scale_factor:
             if align_corners = True && out_size > 1 :
               scale_factor = (in_size-1.0)/(out_size-1.0)
@@ -801,23 +784,23 @@ def upsample(x,
     Parameters:
         x (Tensor): 3-D, 4-D or 5-D Tensor, its data type is float32, float64, or uint8,
                           its data format is specified by :attr:`data_format`.
-        size (list|tuple|Tensor|None): Output shape of image resize
+        size (list|tuple|Tensor|None, optional): Output shape of image resize
              layer, the shape is (out_w, ) when input is a 3-D Tensor, the shape is (out_h, out_w) 
              when input is a 4-D Tensor and is (out_d, out_h, out_w) when input is a 5-D Tensor. 
              Default: None. If a list/tuple, each element can be an integer or a Tensor of shape: [1].
              If a Tensor , its dimensions size should be a 1.
-        scale_factor (float|Tensor|list|tuple|None): The multiplier for the input height or width. At
+        scale_factor (float|Tensor|list|tuple|None, optional): The multiplier for the input height or width. At
              least one of :attr:`size` or :attr:`scale_factor` must be set.
              And :attr:`size` has a higher priority than :attr:`scale_factor`.Has to match input size if 
              it is either a list or a tuple or a Tensor.
              Default: None.
-        mode (str): The resample method. It supports 'linear', 'nearest', 'bilinear',
+        mode (str, optional): The resample method. It supports 'linear', 'nearest', 'bilinear',
                        'bicubic' and 'trilinear' currently. Default: 'nearest'
-        align_corners(bool) :  An optional bool, If True, the centers of the 4 corner pixels of the
+        align_corners(bool, optional) :  An optional bool, If True, the centers of the 4 corner pixels of the
                                input and output tensors are aligned, preserving the values at the
                                corner pixels.
                                Default: False
-        align_mode(int)  :  An optional for linear/bilinear/trilinear interpolation. Refer to the formula in the example above,
+        align_mode(int, optional)  :  An optional for linear/bilinear/trilinear interpolation. Refer to the formula in the example above,
                             it can be \'0\' for src_idx = scale_factor*(dst_indx+0.5)-0.5 , can be \'1\' for
                             src_idx = scale_factor*dst_index.
         data_format (str, optional): Specify the data format of the input, and the data format of the output
@@ -832,32 +815,19 @@ def upsample(x,
         A 3-D Tensor of the shape (num_batches, channels, out_w) or (num_batches, out_w, channels),
         A 4-D Tensor of the shape (num_batches, channels, out_h, out_w) or (num_batches, out_h, out_w, channels),
         or 5-D Tensor of the shape (num_batches, channels, out_d, out_h, out_w) or (num_batches, out_d, out_h, out_w, channels).
-    Raises:
-        TypeError: size should be a list or tuple or Tensor.
-        ValueError: The 'mode' of image_resize can only be 'linear', 'bilinear',
-                    'trilinear', 'bicubic', or 'nearest' currently.
-        ValueError: 'linear' only support 3-D tensor.
-        ValueError: 'bilinear', 'bicubic' and 'nearest' only support 4-D tensor.
-        ValueError: 'trilinear' only support 5-D tensor.
-        ValueError: One of size and scale_factor must not be None.
-        ValueError: size length should be 1 for input 3-D tensor.
-        ValueError: size length should be 2 for input 4-D tensor.
-        ValueError: size length should be 3 for input 5-D tensor.
-        ValueError: scale_factor should be greater than zero.
-        TypeError: align_corners should be a bool value
-        ValueError: align_mode can only be '0' or '1'
-        ValueError: data_format can only be 'NCW', 'NWC', 'NCHW', 'NHWC', 'NCDHW' or 'NDHWC'.
+
         Examples:
         .. code-block:: python
-            import paddle
-            import numpy as np
-            import paddle.nn.functional as F
+	
+		import paddle
+		import paddle.nn as nn
 
-            input_data = np.random.rand(2,3,6,10).astype("float32")
-            input = paddle.to_tensor(input_data)
-            output = F.upsample(x=input, size=[12,12])
-            print(output.shape)
-            # [2L, 3L, 12L, 12L]
+		input_data = paddle.randn(shape=(2,3,6,10)).astype(paddle.float32)
+		upsample_out = paddle.nn.Upsample(size=[12,12])
+
+		output = upsample_out(x=input_data)
+		print(output.shape)
+		# [2L, 3L, 12L, 12L]
 
     """
     return interpolate(x, size, scale_factor, mode, align_corners, align_mode,
@@ -884,17 +854,17 @@ def bilinear(x1, x2, weight, bias=None, name=None):
     Examples:
        .. code-block:: python
 
-        import paddle
-        import numpy
-        import paddle.nn.functional as F
-
-        x1 = numpy.random.random((5, 5)).astype('float32')
-        x2 = numpy.random.random((5, 4)).astype('float32')
-        w = numpy.random.random((1000, 5, 4)).astype('float32')
-        b = numpy.random.random((1, 1000)).astype('float32')
+		import paddle
+		import paddle.nn.functional as F
 
-        result = F.bilinear(paddle.to_tensor(x1), paddle.to_tensor(x2), paddle.to_tensor(w), paddle.to_tensor(b))           # result shape [5, 1000]
+		x1 = paddle.randn((5, 5)).astype(paddle.float32)
+		x2 = paddle.randn((5, 4)).astype(paddle.float32)
+		w = paddle.randn((1000, 5, 4)).astype(paddle.float32)
+		b = paddle.randn((1, 1000)).astype(paddle.float32)
 
+		result = F.bilinear(x1, x2, w, b)
+		print(result.shape)
+		# [5, 1000]
     """
 
     if in_dygraph_mode():
@@ -933,10 +903,10 @@ def dropout(x,
 
     Args:
         x (Tensor): The input tensor. The data type is float32 or float64.
-        p (float|int): Probability of setting units to zero. Default 0.5.
-        axis (int|list|tuple): The axis along which the dropout is performed. Default None.
-        training (bool): A flag indicating whether it is in train phrase or not. Default True.
-        mode(str): ['upscale_in_train'(default) | 'downscale_in_infer'].
+        p (float|int, optional): Probability of setting units to zero. Default 0.5.
+        axis (int|list|tuple, optional): The axis along which the dropout is performed. Default None.
+        training (bool, optional): A flag indicating whether it is in train phrase or not. Default True.
+        mode(str, optional): ['upscale_in_train'(default) | 'downscale_in_infer'].
 
                            1. upscale_in_train(default), upscale the output at training time
 
@@ -1036,22 +1006,38 @@ def dropout(x,
 
         .. code-block:: python
 
-            import paddle
-            import numpy as np
-
-            x = np.array([[1,2,3], [4,5,6]]).astype('float32')
-            x = paddle.to_tensor(x)
-            y_train = paddle.nn.functional.dropout(x, 0.5)
-            y_test = paddle.nn.functional.dropout(x, 0.5, training=False) 
-            y_0 = paddle.nn.functional.dropout(x, axis=0)
-            y_1 = paddle.nn.functional.dropout(x, axis=1)
-            y_01 = paddle.nn.functional.dropout(x, axis=[0,1])
-            print(x)
-            print(y_train)
-            print(y_test)
-            print(y_0)
-            print(y_1)
-            print(y_01)
+		import paddle
+
+		x = paddle.to_tensor([[1,2,3], [4,5,6]]).astype(paddle.float32)
+		y_train = paddle.nn.functional.dropout(x, 0.5)
+		y_test = paddle.nn.functional.dropout(x, 0.5, training=False)
+		y_0 = paddle.nn.functional.dropout(x, axis=0)
+		y_1 = paddle.nn.functional.dropout(x, axis=1)
+		y_01 = paddle.nn.functional.dropout(x, axis=[0,1])
+		print(x)
+		# Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+		#        [[1., 2., 3.],
+		#         [4., 5., 6.]])
+		print(y_train)
+		# Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+		#        [[2. , 0. , 6. ],
+		#         [8. , 0. , 12.]])
+		print(y_test)
+		# Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+		#        [[1., 2., 3.],
+		#         [4., 5., 6.]])
+		print(y_0)
+		# Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+		#        [[0. , 0. , 0. ],
+		#         [8. , 10., 12.]])
+		print(y_1)
+		# Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+		#        [[2. , 0. , 6. ],
+		#         [8. , 0. , 12.]])
+		print(y_01)
+		# Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+		#        [[0. , 0. , 0. ],
+		#         [8. , 0. , 12.]])
 
     """
     if not isinstance(p, (float, int, Variable)):
@@ -1199,17 +1185,16 @@ def dropout2d(x, p=0.5, training=True, data_format='NCHW', name=None):
         .. code-block:: python
 
             import paddle
-            import numpy as np
 
-            x = np.random.random(size=(2, 3, 4, 5)).astype('float32')
-            x = paddle.to_tensor(x)
+            x = paddle.randn(shape=(2, 3, 4, 5)).astype(paddle.float32)
             y_train = paddle.nn.functional.dropout2d(x)  #train
             y_test = paddle.nn.functional.dropout2d(x, training=False) #test
             for i in range(2):
                 for j in range(3):
-                    print(x.numpy()[i,j,:,:])
-                    print(y_train.numpy()[i,j,:,:]) # may all 0
-                    print(y_test.numpy()[i,j,:,:])
+                    print(x[i,j,:,:])
+                    print(y_train[i,j,:,:]) # may all 0
+                    print(y_test[i,j,:,:])
+
     """
     input_shape = x.shape
     if len(input_shape) != 4:
@@ -1252,16 +1237,15 @@ def dropout3d(x, p=0.5, training=True, data_format='NCDHW', name=None):
     Examples:
         .. code-block:: python
 
-            import paddle
-            import numpy as np
+		import paddle
+
+		x = paddle.randn(shape=(2, 3, 4, 5, 6)).astype(paddle.float32)
+		y_train = paddle.nn.functional.dropout3d(x)  #train
+		y_test = paddle.nn.functional.dropout3d(x, training=False) #test
+		print(x[0,0,:,:,:])
+		print(y_train[0,0,:,:,:]) # may all 0
+		print(y_test[0,0,:,:,:])
 
-            x = np.random.random(size=(2, 3, 4, 5, 6)).astype('float32')
-            x = paddle.to_tensor(x)
-            y_train = paddle.nn.functional.dropout3d(x)  #train
-            y_test = paddle.nn.functional.dropout3d(x, training=False) #test
-            print(x.numpy()[0,0,:,:,:])
-            print(y_train.numpy()[0,0,:,:,:]) # may all 0
-            print(y_test.numpy()[0,0,:,:,:])
     """
 
     input_shape = x.shape
@@ -1301,17 +1285,19 @@ def alpha_dropout(x, p=0.5, training=True, name=None):
     Examples:
         .. code-block:: python
 
-            import paddle
-            import numpy as np
-
-            x = np.array([[-1, 1], [-1, 1]]).astype('float32')
-            x = paddle.to_tensor(x)
-            y_train = paddle.nn.functional.alpha_dropout(x, 0.5)
-            y_test = paddle.nn.functional.alpha_dropout(x, 0.5, training=False)
-            print(x)
-            print(y_train)
-            # [[-0.10721093, 1.6655989 ], [-0.7791938, -0.7791938]] (randomly)
-            print(y_test)
+		import paddle
+
+		x = paddle.to_tensor([[-1, 1], [-1, 1]]).astype(paddle.float32)
+		y_train = paddle.nn.functional.alpha_dropout(x, 0.5)
+		y_test = paddle.nn.functional.alpha_dropout(x, 0.5, training=False)
+		print(y_train)
+		# Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+		#        [[-0.10721093, -0.77919382],
+		#         [-0.10721093,  1.66559887]]) (randomly)
+		print(y_test)
+		# Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+		#        [[-1.,  1.],
+		#         [-1.,  1.]])
     """
     if not isinstance(p, (float, int)):
         raise TypeError("p argument should be a float or int")
@@ -2058,7 +2044,7 @@ def fold(x,
          name=None):
     r"""
     
-    This Op is used to combines an array of sliding local blocks into a large containing
+    Combines an array of sliding local blocks into a large containing
     tensor. also known as col2im when operated on batched 2D image tensor. Fold calculates each 
     combined value in the resulting large tensor by summing all values from all containing blocks. 
 
@@ -2067,9 +2053,10 @@ def fold(x,
     can be calculated as following.
 
     .. math::
-        H_out &= output_size[0]
-        W_out &= output_size[1]
-        C_out &= C_in / kernel\_sizes[0] / kernel\_sizes[1]
+    
+        H_{out} &= output\_size[0] \\
+        W_{out} &= output\_size[1] \\
+        C_{out} &= \frac{C_{in}}{kernel\_sizes[0]\times kernel\_sizes[1]} \\
 
     Parameters:
         x(Tensor):                3-D Tensor, input tensor of format [N, C, L],
@@ -2078,17 +2065,17 @@ def fold(x,
                                   or an interger o treated as [o, o].
         kernel_sizes(int|list|tuple):   The size of convolution kernel, should be [k_h, k_w]
                                   or an integer k treated as [k, k].
-        strides(int|list|tuple):        The strides, should be [stride_h, stride_w]
+        strides(int|list|tuple, optional):        The strides, should be [stride_h, stride_w]
                                   or an integer stride treated as [sride, stride].
                                   For default, strides will be [1, 1].
-        paddings(int|list|tuple):       The paddings of each dimension, should be
+        paddings(int|list|tuple, optional):       The paddings of each dimension, should be
                                   [padding_top, padding_left, padding_bottom, padding_right]
                                   or [padding_h, padding_w] or an integer padding.
                                   If [padding_h, padding_w] was given, it will expanded to
                                   [padding_h, padding_w, padding_h, padding_w]. If an integer
                                   padding was given, [padding, padding, padding, padding] will
                                   be used. For default, paddings will be [0, 0, 0, 0]
-        dilations(int|list|tuple):      the dilations of convolution kernel, should be
+        dilations(int|list|tuple, optional):      the dilations of convolution kernel, should be
                                   [dilation_h, dilation_w], or an integer dilation treated as
                                   [dilation, dilation]. For default, it will be [1, 1].
         name(str, optional): The default value is None.
diff --git a/python/paddle/nn/functional/input.py b/python/paddle/nn/functional/input.py
index 01a5f991f420e0e05be09a434486cc5518f062ec..9eeae1c696fd856ad6ffc774633c2d2e989ab9e8 100644
--- a/python/paddle/nn/functional/input.py
+++ b/python/paddle/nn/functional/input.py
@@ -118,17 +118,17 @@ def one_hot(x, num_classes, name=None):
 
 def embedding(x, weight, padding_idx=None, sparse=False, name=None):
     r"""
-    The operator is used to lookup embeddings vector of ids provided by :attr:`x` .
+    Used to lookup embeddings vector of ids provided by :attr:`x` .
 
     The shape of output Tensor is generated by appending the last dimension of the input Tensor shape
     with embedding size.
 
-    **Note:** The id in :attr:`x` must satisfy :math:`0 =< id < weight.shape[0]` ,
-    otherwise the program will throw an exception and exit.
+    Note:
+        The id in :attr:`x` must satisfy :math:`0 =< id < weight.shape[0]` ,
+        otherwise the program will throw an exception and exit.
 
     .. code-block:: text
-
-        Case 1:
+    
             x is a Tensor.
                 padding_idx = -1
                 x.data = [[1, 3], [2, 4], [4, 127]]
@@ -151,17 +151,17 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None):
             satisfy :math:`0<= id < weight.shape[0]` .
         weight (Tensor): The weight. A Tensor with shape of lookup table parameter. It should have two elements which
             indicates the size of the dictionary of embeddings and the size of each embedding vector respectively.
-        sparse(bool): The flag indicating whether to use sparse update. This parameter only
+        sparse(bool, optional): The flag indicating whether to use sparse update. This parameter only
             affects the performance of the backwards gradient update. It is recommended to set
             True because sparse update is faster. But some optimizers does not support sparse update,
             such as :ref:`api_paddle_optimizer_adadelta_Adadelta` , :ref:`api_paddle_optimizer_adamax_Adamax` , :ref:`api_paddle_optimizer_lamb_Lamb`.
             In these cases, sparse must be False. Default: False.
-        padding_idx(int|long|None): padding_idx needs to be in the interval [-weight.shape[0], weight.shape[0]).
+        padding_idx(int|long|None, optional): padding_idx needs to be in the interval [-weight.shape[0], weight.shape[0]).
             If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
             to :math:`weight.shape[0] + padding\_idx` . It will output all-zero padding data whenever lookup
             encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
             If set None, it makes no effect to output. Default: None.
-        name(str|None): For detailed information, please refer
+        name(str|None, optional): For detailed information, please refer
            to :ref:`api_guide_Name`. Usually name is no need to set and
            None by default.
 
@@ -171,13 +171,12 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None):
     Examples:
 
         .. code-block:: python
-
-            import numpy as np
+        
             import paddle
             import paddle.nn as nn
 
-            x0 = np.arange(3, 6).reshape((3, 1)).astype(np.int64)
-            w0 = np.full(shape=(10, 3), fill_value=2).astype(np.float32)
+            x0 = paddle.arange(3, 6).reshape((3, 1)).astype(paddle.int64)
+            w0 = paddle.full(shape=(10, 3), fill_value=2).astype(paddle.float32)
 
             # x.data = [[3], [4], [5]]
             # x.shape = [3, 1]
diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py
index 21e2aafe916f0b1ccf667d984f4924c410b6fe05..59e4f7e81b9d45cd778a815b6be2de31dc40a40a 100755
--- a/python/paddle/nn/functional/pooling.py
+++ b/python/paddle/nn/functional/pooling.py
@@ -192,23 +192,16 @@ def avg_pool1d(x,
     Returns:
         Tensor: The output tensor of pooling result. The data type is same as input tensor.
 
-    Raises:
-        ValueError: If `padding` is a string, but not "SAME" or "VALID".
-        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
-        ValueError: If `padding` is a list or tuple but its length is greater than 1.
-        ShapeError: If the input is not a 3-D tensor.
-        ShapeError: If the output's shape calculated is not greater than 0.
-
     Examples:
         .. code-block:: python
           
             import paddle
-            import paddle.nn.functional as F
-            import numpy as np
+            import paddle.nn as nn
 
-            data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
-            out = F.avg_pool1d(data, kernel_size=2, stride=2, padding=0)
-            # out shape: [1, 3, 16]
+            data = paddle.uniform([1, 3, 32], paddle.float32)
+            AvgPool1D = nn.AvgPool1D(kernel_size=2, stride=2, padding=0)
+            pool_out = AvgPool1D(data)
+            # pool_out shape: [1, 3, 16]
     """
     """NCL to NCHW"""
     data_format = "NCHW"
@@ -318,20 +311,14 @@ def avg_pool2d(x,
     Returns:
         Tensor: The output tensor of pooling result. The data type is same as input tensor.
     
-    Raises:
-        ValueError: If `padding` is a string, but not "SAME" or "VALID".
-        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
-        ShapeError: If the output's shape calculated is not greater than 0.
-    
     Examples:
         .. code-block:: python
           
             import paddle
             import paddle.nn.functional as F
-            import numpy as np
             
             # avg pool2d
-            x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
+            x = paddle.uniform([1, 3, 32, 32], paddle.float32)
             out = F.avg_pool2d(x,
                             kernel_size=2,
                             stride=2, padding=0)
@@ -446,19 +433,13 @@ def avg_pool3d(x,
     
     Returns:
         Tensor: The output tensor of pooling result. The data type is same as input tensor.
-    
-    Raises:
-        ValueError: If `padding` is a string, but not "SAME" or "VALID".
-        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
-        ShapeError: If the output's shape calculated is not greater than 0.
-    
+
     Examples:
         .. code-block:: python
           
           import paddle
-          import numpy as np
 
-          x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32))
+          x = paddle.uniform([1, 3, 32, 32, 32], paddle.float32)
           # avg pool3d
           out = paddle.nn.functional.avg_pool3d(
                                             x,
@@ -581,9 +562,8 @@ def max_pool1d(x,
 
           import paddle
           import paddle.nn.functional as F
-          import numpy as np
 
-          data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
+          data = paddle.uniform([1, 3, 32], paddle.float32)
           pool_out = F.max_pool1d(data, kernel_size=2, stride=2, padding=0)
           # pool_out shape: [1, 3, 16]
           pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
@@ -1350,8 +1330,10 @@ def adaptive_avg_pool1d(x, output_size, name=None):
         x (Tensor): The input Tensor of pooling, which is a 3-D tensor with shape :math:`[N, C, L]`, where :math:`N` is batch size, :math:`C` is the number of channels and :math:`L` is the length of the feature. The data type is float32 or float64.
         output_size (int): The target output size. Its data type must be int.
         name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
+    
     Returns:
         Tensor: The result of 1D adaptive average pooling. Its data type is same as input.
+    
     Examples:
         .. code-block:: python
 
@@ -1409,8 +1391,16 @@ def adaptive_avg_pool1d(x, output_size, name=None):
 
 def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
     """
-    This API implements adaptive average pooling 2d operation.
-    See more details in :ref:`api_nn_pooling_AdaptiveAvgPool2d` .
+    Applies 2D adaptive avg pooling on input tensor. The h and w dimensions
+    of the output tensor are determined by the parameter output_size.
+    
+    For avg adaptive pool2d:
+    ..  math::
+        hstart &= floor(i * H_{in} / H_{out})
+        hend &= ceil((i + 1) * H_{in} / H_{out})
+        wstart &= floor(j * W_{in} / W_{out})
+        wend &= ceil((j + 1) * W_{in} / W_{out})
+        Output(i ,j) &= \frac{\sum Input[hstart:hend, wstart:wend]}{(hend - hstart) * (wend - wstart)}
 
     Args:
         x (Tensor): The input tensor of adaptive avg pool2d operator, which is a 4-D tensor.
@@ -1426,8 +1416,7 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
                              None by default.
     Returns:
         Tensor: The output tensor of avg adaptive pool2d result. The data type is same as input tensor.
-    Raises:
-        ValueError: If `data_format` is not "NCHW" or "NHWC".
+
     Examples:
         .. code-block:: python
 
@@ -1515,8 +1504,19 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
 
 def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
     """
-    This API implements adaptive average pooling 3d operation.
-    See more details in :ref:`api_nn_pooling_AdaptiveAvgPool3d` .
+    This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions
+    of the output tensor are determined by the parameter output_size.
+    
+    For avg adaptive pool3d:
+    ..  math::
+        dstart &= floor(i * D_{in} / D_{out})
+        dend &= ceil((i + 1) * D_{in} / D_{out})
+        hstart &= floor(j * H_{in} / H_{out})
+        hend &= ceil((j + 1) * H_{in} / H_{out})
+        wstart &= floor(k * W_{in} / W_{out})
+        wend &= ceil((k + 1) * W_{in} / W_{out})
+        Output(i ,j, k) &= \frac{\sum Input[dstart:dend, hstart:hend, wstart:wend]}
+            {(dend - dstart) * (hend - hstart) * (wend - wstart)}
 
     Args:
         x (Tensor): The input tensor of adaptive avg pool3d operator, which is a 5-D tensor.
@@ -1532,8 +1532,7 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
                              None by default.
     Returns:
         Tensor: The output tensor of avg adaptive pool3d result. The data type is same as input tensor.
-    Raises:
-        ValueError: If `data_format` is not "NCDHW" or "NDHWC".
+
     Examples:
         .. code-block:: python
 
@@ -1556,12 +1555,10 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
             #                 output[:, :, i, j, k] =
             #                     avg(input[:, :, dstart:dend, hstart: hend, wstart: wend])
             import paddle
-            import numpy as np
-            input_data = np.random.rand(2, 3, 8, 32, 32)
-            x = paddle.to_tensor(input_data)
-            # x.shape is [2, 3, 8, 32, 32]
+
+            input_data = paddle.randn(shape=(2, 3, 8, 32, 32))
             out = paddle.nn.functional.adaptive_avg_pool3d(
-                            x = x,
+                            x = input_data,
                             output_size=[3, 3, 3])
             # out.shape is [2, 3, 3, 3, 3]
     """
@@ -1654,9 +1651,8 @@ def adaptive_max_pool1d(x, output_size, return_mask=False, name=None):
               #
               import paddle
               import paddle.nn.functional as F
-              import numpy as np
 
-              data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
+              data = paddle.uniform([1, 3, 32], paddle.float32)
               pool_out = F.adaptive_max_pool1d(data, output_size=16)
               # pool_out shape: [1, 3, 16])
               pool_out, indices = F.adaptive_max_pool1d(data, output_size=16, return_mask=True)
@@ -1740,13 +1736,10 @@ def adaptive_max_pool2d(x, output_size, return_mask=False, name=None):
               #             output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend])
               #
               import paddle
-              import numpy as np
 
-              input_data = np.random.rand(2, 3, 32, 32)
-              x = paddle.to_tensor(input_data)
-              # x.shape is [2, 3, 32, 32]
+              input_data = paddle.randn(shape=(2, 3, 32, 32))
               out = paddle.nn.functional.adaptive_max_pool2d(
-                            x = x,
+                            x = input_data,
                             output_size=[3, 3])
               # out.shape is [2, 3, 3, 3]
     """
@@ -1833,13 +1826,10 @@ def adaptive_max_pool3d(x, output_size, return_mask=False, name=None):
               #             output[:, :, i, j, k] = max(input[:, :, dstart: dend, hstart: hend, wstart: wend])
               #
               import paddle
-              import numpy as np
 
-              input_data = np.random.rand(2, 3, 8, 32, 32)
-              x = paddle.to_tensor(input_data)
-              # x.shape is [2, 3, 8, 32, 32]
+              input_data = paddle.randn(shape=(2, 3, 8, 32, 32))
               out = paddle.nn.functional.adaptive_max_pool3d(
-                            x = x,
+                            x = input_data,
                             output_size=[3, 3, 3])
               # out.shape is [2, 3, 3, 3, 3]
     """
diff --git a/python/paddle/nn/layer/activation.py b/python/paddle/nn/layer/activation.py
index c03864a19d58cc33b7568ce9b252d392d517039a..68cc0cedb8f27ef5839fa1883f6c21716956432a 100644
--- a/python/paddle/nn/layer/activation.py
+++ b/python/paddle/nn/layer/activation.py
@@ -140,11 +140,10 @@ class GELU(Layer):
 
     Examples:
         .. code-block:: python
-
+        
             import paddle
-            import numpy as np
 
-            x = paddle.to_tensor(np.array([[-1, 0.5],[1, 1.5]]))
+            x = paddle.to_tensor([[-1, 0.5],[1, 1.5]])
 
             m = paddle.nn.GELU()
             out = m(x) # [-0.158655 0.345731 0.841345 1.39979]
diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py
index 05230c19c489b28b9b9ea04afac26d0a170c67b6..ee5641f5d1257e6e63994bfca75a32251731969c 100644
--- a/python/paddle/nn/layer/common.py
+++ b/python/paddle/nn/layer/common.py
@@ -1279,9 +1279,8 @@ class CosineSimilarity(Layer):
 
 class Embedding(Layer):
     r"""
-    **Embedding Layer**
-
-    This interface is used to construct a callable object of the ``Embedding`` class.
+    
+    Embedding Layer, used to construct a callable object of the ``Embedding`` class.
     For specific usage, refer to code examples. It implements the function of the Embedding Layer.
     This layer is used to lookup embeddings vector of ids provided by :attr:`x` .
     It automatically constructs a 2D embedding matrix based on the
@@ -1290,8 +1289,9 @@ class Embedding(Layer):
     The shape of output Tensor is generated by appending an emb_size dimension to the
     last dimension of the input Tensor shape.
 
-    **Note:** The id in :attr:`x` must satisfy :math:`0 =< id < num_embeddings` ,
-    otherwise the program will throw an exception and exit.
+    Note:
+        The id in :attr:`x` must satisfy :math:`0 =< id < num_embeddings` ,
+        otherwise the program will throw an exception and exit.
 
     .. code-block:: text
 
@@ -1318,23 +1318,23 @@ class Embedding(Layer):
         num_embeddings (int): Just one element which indicate the size
             of the dictionary of embeddings.
         embedding_dim (int):  Just one element which indicate the size of each embedding vector respectively.
-        padding_idx(int|long|None): padding_idx needs to be in the interval [-num_embeddings, num_embeddings).
+        padding_idx(int|long|None, optional): padding_idx needs to be in the interval [-num_embeddings, num_embeddings).
             If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
             to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup
             encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
             If set None, it makes no effect to output. Default: None.
-        sparse(bool): The flag indicating whether to use sparse update. This parameter only
+        sparse(bool, optional): The flag indicating whether to use sparse update. This parameter only
             affects the performance of the backwards gradient update. It is recommended to set
             True because sparse update is faster. But some optimizer does not support sparse update,
             such as :ref:`api_paddle_optimizer_adadelta_Adadelta` , :ref:`api_paddle_optimizer_adamax_Adamax` , :ref:`api_paddle_optimizer_lamb_Lamb`.
             In these case, sparse must be False. Default: False.
-        weight_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the
+        weight_attr(ParamAttr, optional): To specify the weight parameter property. Default: None, which means the
             default weight parameter property is used. See usage for details in :ref:`api_ParamAttr` . In addition,
             user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter.
             The local word vector needs to be transformed into numpy format, and the shape of local word
             vector should be consistent with :attr:`num_embeddings` . Then :ref:`api_initializer_NumpyArrayInitializer`
             is used to load custom or pre-trained word vectors. See code example for details.
-        name(str|None): For detailed information, please refer
+        name(str|None, optional): For detailed information, please refer
                to :ref:`api_guide_Name`. Usually name is no need to set and
                None by default.
 
@@ -1514,7 +1514,7 @@ class Unfold(Layer):
 class Fold(Layer):
     r"""
 
-    This Op is used to combines an array of sliding local blocks into a large containing
+    Combines an array of sliding local blocks into a large containing
     tensor. also known as col2im when operated on batched 2D image tensor. Fold calculates each 
     combined value in the resulting large tensor by summing all values from all containing blocks. 
 
@@ -1523,26 +1523,27 @@ class Fold(Layer):
     can be calculated as following.
 
     .. math::
-        H_out &= output_size[0]
-        W_out &= output_size[1]
-        C_out &= C_in / kernel\_sizes[0] / kernel\_sizes[1]
+    
+        H_{out} &= output\_size[0] \\
+        W_{out} &= output\_size[1] \\
+        C_{out} &= \frac{C_{in}}{kernel\_sizes[0]\times kernel\_sizes[1]} \\
 
     Parameters:
         output_sizes(list):       The size of output size, should be [output_size_h, output_size_w]
                                   or an interger o treated as [o, o].
         kernel_sizes(int|list|tuple):   The size of convolution kernel, should be [k_h, k_w]
                                   or an integer k treated as [k, k].
-        strides(int|list|tuple):        The strides, should be [stride_h, stride_w]
+        strides(int|list|tuple, optional):        The strides, should be [stride_h, stride_w]
                                   or an integer stride treated as [sride, stride].
                                   For default, strides will be [1, 1].
-        paddings(int|list|tuple):       The paddings of each dimension, should be
+        paddings(int|list|tuple, optional):       The paddings of each dimension, should be
                                   [padding_top, padding_left, padding_bottom, padding_right]
                                   or [padding_h, padding_w] or an integer padding.
                                   If [padding_h, padding_w] was given, it will expanded to
                                   [padding_h, padding_w, padding_h, padding_w]. If an integer
                                   padding was given, [padding, padding, padding, padding] will
                                   be used. For default, paddings will be [0, 0, 0, 0]
-        dilations(int|list|tuple):      the dilations of convolution kernel, should be
+        dilations(int|list|tuple, optional):      the dilations of convolution kernel, should be
                                   [dilation_h, dilation_w], or an integer dilation treated as
                                   [dilation, dilation]. For default, it will be [1, 1].
         name(str, optional): The default value is None.
diff --git a/python/paddle/nn/layer/pooling.py b/python/paddle/nn/layer/pooling.py
index 46ae56a463ea3a87cdea8aeb33be34b72db94389..ccba13316a17b516dbc2283720ea2490cec5a3f1 100755
--- a/python/paddle/nn/layer/pooling.py
+++ b/python/paddle/nn/layer/pooling.py
@@ -53,22 +53,15 @@ class AvgPool1D(Layer):
         name(str, optional): For eed to detailed information, please refer to :ref:`api_guide_Name`.
             Usually name is no nset and None by default.
 
-    Returns:
-        A callable object of AvgPool1D.
-
-    Raises:
-        ValueError: If `padding` is a string, but not "SAME" or "VALID".
-        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
-        ValueError: If `padding` is a list or tuple but its length greater than 1.
-        ShapeError: If the input is not a 3-D tensor.
-        ShapeError: If the output's shape calculated is not greater than 0.
-
     Shape:
         - x(Tensor): The input tensor of avg pool1d operator, which is a 3-D tensor.
           The data type can be float32, float64.
         - output(Tensor): The output tensor of avg pool1d  operator, which is a 3-D tensor.
           The data type is same as input x.
 
+    Returns:
+        A callable object of AvgPool1D.
+        
     Examples:
 
         .. code-block:: python
@@ -164,10 +157,7 @@ class AvgPool2D(Layer):
 
     Returns:
         A callable object of AvgPool2D.
-    Raises:
-        ValueError: If `padding` is a string, but not "SAME" or "VALID".
-        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
-        ShapeError: If the output's shape calculated is not greater than 0.
+
     Examples:
         .. code-block:: python
 
@@ -255,10 +245,6 @@ class AvgPool3D(Layer):
 
     Returns:
         A callable object of AvgPool3D.
-    Raises:
-        ValueError: If `padding` is a string, but not "SAME" or "VALID".
-        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
-        ShapeError: If the output's shape calculated is not greater than 0.
 
     Shape:
         - x(Tensor): The input tensor of avg pool3d operator, which is a 5-D tensor.