[xdoctest] reformat example code with google style in No.36-43 (#56440)

0a15b0db · yuchen202 · GitHub · 71e28b12 · 0a15b0db · 0a15b0db
8 changed file
--- a/python/paddle/nn/functional/activation.py
+++ b/python/paddle/nn/functional/activation.py
@@ -49,12 +49,15 @@ def celu(x, alpha=1.0, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
-            x = paddle.to_tensor([[-1., 6.], [1., 15.6]])
-            out = F.celu(x, alpha=0.2)
-            # [[-0.19865242,  6.        ],
-            #  [ 1.        , 15.60000038]]
+            >>> import paddle
+            >>> import paddle.nn.functional as F
+
+            >>> x = paddle.to_tensor([[-1., 6.], [1., 15.6]])
+            >>> out = F.celu(x, alpha=0.2)
+            >>> print(out)
+            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[-0.19865242,  6.        ],
+             [ 1.        , 15.60000038]])
    """
    if alpha == 0:
        raise ZeroDivisionError("alpha cannot be 0 for celu")
@@ -100,13 +103,15 @@ def elu(x, alpha=1.0, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x = paddle.to_tensor([[-1., 6.], [1., 15.6]])
-            out = F.elu(x, alpha=0.2)
-            # [[-0.12642411  6.        ]
-            #  [ 1.          15.6      ]]
+            >>> x = paddle.to_tensor([[-1., 6.], [1., 15.6]])
+            >>> out = F.elu(x, alpha=0.2)
+            >>> print(out)
+            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[-0.12642412,  6.        ],
+             [ 1.        , 15.60000038]])
    """

    if in_dynamic_mode():
@@ -168,16 +173,20 @@ def gelu(x, approximate=False, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
-
-            x = paddle.to_tensor([[-1, 0.5], [1, 1.5]])
-            out1 = F.gelu(x)
-            # [[-0.15865529,  0.34573123],
-            #  [ 0.84134471,  1.39978933]]
-            out2 = F.gelu(x, True)
-            # [[-0.15880799,  0.34571400],
-            #  [ 0.84119201,  1.39957154]]
+            >>> import paddle
+            >>> import paddle.nn.functional as F
+
+            >>> x = paddle.to_tensor([[-1, 0.5], [1, 1.5]])
+            >>> out1 = F.gelu(x)
+            >>> print(out1)
+            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[-0.15865529,  0.34573123],
+             [ 0.84134471,  1.39978933]])
+            >>> out2 = F.gelu(x, True)
+            >>> print(out2)
+            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[-0.15880796,  0.34571400],
+             [ 0.84119201,  1.39957154]])
    """

    if in_dynamic_mode():
@@ -223,11 +232,15 @@ def hardshrink(x, threshold=0.5, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F
+
+            >>> x = paddle.to_tensor([-1, 0.3, 2.5])
+            >>> out = F.hardshrink(x)
+            >>> print(out)
+            Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [-1.       ,  0.       , 2.50000000])

-            x = paddle.to_tensor([-1, 0.3, 2.5])
-            out = F.hardshrink(x) # [-1., 0., 2.5]

    """
    if in_dynamic_mode():
@@ -274,11 +287,14 @@ def hardtanh(x, min=-1.0, max=1.0, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x = paddle.to_tensor([-1.5, 0.3, 2.5])
-            out = F.hardtanh(x) # [-1., 0.3, 1.]
+            >>> x = paddle.to_tensor([-1.5, 0.3, 2.5])
+            >>> out = F.hardtanh(x)
+            >>> print(out)
+            Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [-1.       , 0.30000001,  1.       ])
    """

    if in_dynamic_mode():
@@ -338,11 +354,14 @@ def hardsigmoid(x, slope=0.1666667, offset=0.5, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x = paddle.to_tensor([-4., 5., 1.])
-            out = F.hardsigmoid(x) # [0., 1., 0.666667]
+            >>> x = paddle.to_tensor([-4., 5., 1.])
+            >>> out = F.hardsigmoid(x)
+            >>> print(out)
+            Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [0.        , 1.        , 0.66666669])
    """

    if in_dynamic_mode():
@@ -390,11 +409,14 @@ def hardswish(x, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x = paddle.to_tensor([-4., 5., 1.])
-            out = F.hardswish(x) # [0., 5., 0.666667]
+            >>> x = paddle.to_tensor([-4., 5., 1.])
+            >>> out = F.hardswish(x)
+            >>> print(out)
+            Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [-0.       , 5.        , 0.66666669])
    """
    if in_dynamic_mode():
        return _C_ops.hardswish(x)
@@ -442,13 +464,14 @@ def leaky_relu(x, negative_slope=0.01, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x = paddle.to_tensor([-2., 0., 1.])
-            out = F.leaky_relu(x)
-            print(out)
-            # [-0.02, 0., 1.]
+            >>> x = paddle.to_tensor([-2., 0., 1.])
+            >>> out = F.leaky_relu(x)
+            >>> print(out)
+            Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [-0.02000000,  0.        ,  1.        ])

    """
    if in_dynamic_mode():
@@ -502,25 +525,26 @@ def prelu(x, weight, data_format="NCHW", name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
-
-            data = paddle.to_tensor([[[[-2.0,  3.0, -4.0,  5.0],
-                               [ 3.0, -4.0,  5.0, -6.0],
-                               [-7.0, -8.0,  8.0,  9.0]],
-                              [[ 1.0, -2.0, -3.0,  4.0],
-                               [-5.0,  6.0,  7.0, -8.0],
-                               [ 6.0,  7.0,  8.0,  9.0]]]], dtype='float32')
-
-            w = paddle.to_tensor([0.25], dtype='float32')
-            out = F.prelu(data, w)
-            print(out)
-            # [[[[-0.5 ,  3.  , -1.  ,  5.  ],
-            #    [ 3.  , -1.  ,  5.  , -1.5 ],
-            #    [-1.75, -2.  ,  8.  ,  9.  ]],
-            #   [[ 1.  , -0.5 , -0.75,  4.  ],
-            #    [-1.25,  6.  ,  7.  , -2.  ],
-            #    [ 6.  ,  7.  ,  8.  ,  9.  ]]]]
+            >>> import paddle
+            >>> import paddle.nn.functional as F
+
+            >>> data = paddle.to_tensor([[[[-2.0,  3.0, -4.0,  5.0],
+            ...                            [ 3.0, -4.0,  5.0, -6.0],
+            ...                            [-7.0, -8.0,  8.0,  9.0]],
+            ...                           [[ 1.0, -2.0, -3.0,  4.0],
+            ...                            [-5.0,  6.0,  7.0, -8.0],
+            ...                            [ 6.0,  7.0,  8.0,  9.0]]]], dtype='float32')
+
+            >>> w = paddle.to_tensor([0.25], dtype='float32')
+            >>> out = F.prelu(data, w)
+            >>> print(out)
+            Tensor(shape=[1, 2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[[[-0.50000000,  3.        , -1.        ,  5.        ],
+               [ 3.        , -1.        ,  5.        , -1.50000000],
+               [-1.75000000, -2.        ,  8.        ,  9.        ]],
+              [[ 1.        , -0.50000000, -0.75000000,  4.        ],
+               [-1.25000000,  6.        ,  7.        , -2.        ],
+               [ 6.        ,  7.        ,  8.        ,  9.        ]]]])
    """
    assert (
        len(weight.shape) == 0 or len(weight.shape) == 1
@@ -634,24 +658,24 @@ def rrelu(x, lower=1.0 / 8.0, upper=1.0 / 3.0, training=True, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
-
-            input_tensor = paddle.to_tensor([[[[-2.0,  3.0, -4.0,  5.0],
-                                            [ 3.0, -4.0,  5.0, -6.0],
-                                            [-7.0, -8.0,  8.0,  9.0]],
-                                            [[ 1.0, -2.0, -3.0,  4.0],
-                                            [-5.0,  6.0,  7.0, -8.0],
-                                            [ 6.0,  7.0,  8.0,  9.0]]]], dtype='float32')
-
-            out = F.rrelu(input_tensor, 0.1, 0.3)
-            print(out)
-            #[[[[-0.20000899  3.         -0.8810822   5.        ]
-            #   [ 3.         -0.55175185  5.         -1.0776101 ]
-            #   [-1.0680687  -1.9896201   8.          9.        ]]
-            #  [[ 1.         -0.5238267  -0.65515125  4.        ]
-            #   [-1.3766339   6.          7.         -2.3465784 ]
-            #   [ 6.          7.          8.          9.        ]]]]
+            >>> import paddle
+            >>> import paddle.nn.functional as F
+            >>> paddle.seed(1)
+            >>> input_tensor = paddle.to_tensor([[[[-2.0,  3.0, -4.0,  5.0],
+            ...                                    [ 3.0, -4.0,  5.0, -6.0],
+            ...                                    [-7.0, -8.0,  8.0,  9.0]],
+            ...                                   [[ 1.0, -2.0, -3.0,  4.0],
+            ...                                    [-5.0,  6.0,  7.0, -8.0],
+            ...                                    [ 6.0,  7.0,  8.0,  9.0]]]], dtype='float32')
+            >>> out = F.rrelu(input_tensor, 0.1, 0.3)
+            >>> print(out)
+            Tensor(shape=[1, 2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[[[-0.20715050,  3.        , -1.01193857,  5.        ],
+               [ 3.        , -0.94084597,  5.        , -0.65544695],
+               [-1.24268556, -2.34339547,  8.        ,  9.        ]],
+              [[ 1.        , -0.44942653, -0.68969047,  4.        ],
+               [-1.03736508,  6.        ,  7.        , -0.95799232],
+               [ 6.        ,  7.        ,  8.        ,  9.        ]]]])
    """
    if not isinstance(lower, float) or not isinstance(upper, float):
        raise TypeError(
@@ -722,13 +746,14 @@ def relu(x, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x = paddle.to_tensor([-2, 0, 1], dtype='float32')
-            out = F.relu(x)
-            print(out)
-            # [0., 0., 1.]
+            >>> x = paddle.to_tensor([-2, 0, 1], dtype='float32')
+            >>> out = F.relu(x)
+            >>> print(out)
+            Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [0., 0., 1.])
    """

    if in_dynamic_mode():
@@ -770,11 +795,14 @@ def log_sigmoid(x, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0])
-            out = F.log_sigmoid(x) # [-0.313262 -0.126928 -0.0485874 -0.0181499]
+            >>> x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0])
+            >>> out = F.log_sigmoid(x)
+            >>> print(out)
+            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [-0.31326166, -0.12692805, -0.04858733, -0.01814996])
    """

    if in_dynamic_mode():
@@ -830,20 +858,25 @@ def maxout(x, groups, axis=1, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
-
-            x = paddle.rand([1, 2, 3, 4])
-            # [[[[0.5002636  0.22272532 0.17402348 0.2874594 ]
-            #    [0.95313174 0.6228939  0.7129065  0.7087491 ]
-            #    [0.02879342 0.88725346 0.61093384 0.38833922]]
-            #   [[0.5231306  0.03807496 0.91661984 0.15602879]
-            #    [0.666127   0.616567   0.30741522 0.24044901]
-            #    [0.7142536  0.7351477  0.31588817 0.23782359]]]]
-            out = F.maxout(x, groups=2)
-            # [[[[0.5231306  0.22272532 0.91661984 0.2874594 ]
-            #    [0.95313174 0.6228939  0.7129065  0.7087491 ]
-            #    [0.7142536  0.88725346 0.61093384 0.38833922]]]]
+            >>> import paddle
+            >>> import paddle.nn.functional as F
+
+            >>> paddle.seed(2023)
+            >>> x = paddle.rand([1, 2, 3, 4])
+            >>> print(x)
+            Tensor(shape=[1, 2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[[[0.86583614, 0.52014720, 0.25960937, 0.90525323],
+               [0.42400089, 0.40641287, 0.97020894, 0.74437362],
+               [0.51785129, 0.73292869, 0.97786582, 0.04315904]],
+              [[0.42639419, 0.71958369, 0.20811461, 0.19731510],
+               [0.38424349, 0.14603184, 0.22713774, 0.44607511],
+               [0.21657862, 0.67685395, 0.46460176, 0.92382854]]]])
+            >>> out = F.maxout(x, groups=2)
+            >>> print(out)
+            Tensor(shape=[1, 1, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[[[0.86583614, 0.71958369, 0.25960937, 0.90525323],
+               [0.42400089, 0.40641287, 0.97020894, 0.74437362],
+               [0.51785129, 0.73292869, 0.97786582, 0.92382854]]]])
    """
    if in_dynamic_mode():
        return _C_ops.maxout(x, groups, axis)
@@ -888,13 +921,14 @@ def relu6(x, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x = paddle.to_tensor([-1, 0.3, 6.5])
-            out = F.relu6(x)
-            print(out)
-            # [0, 0.3, 6]
+            >>> x = paddle.to_tensor([-1, 0.3, 6.5])
+            >>> out = F.relu6(x)
+            >>> print(out)
+            Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [0.        , 0.30000001, 6.        ])
    """
    threshold = 6.0
    if in_dynamic_mode():
@@ -945,13 +979,15 @@ def selu(
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x = paddle.to_tensor([[0.0, 1.0],[2.0, 3.0]])
-            out = F.selu(x)
-            print(out)
-            # [[0, 1.050701],[2.101402, 3.152103]]
+            >>> x = paddle.to_tensor([[0.0, 1.0],[2.0, 3.0]])
+            >>> out = F.selu(x)
+            >>> print(out)
+            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[0.        , 1.05070102],
+             [2.10140204, 3.15210295]])
    """
    if scale <= 1.0:
        raise ValueError(
@@ -1000,11 +1036,14 @@ def silu(x, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0])
-            out = F.silu(x) # [ 0.731059, 1.761594, 2.857722, 3.928055 ]
+            >>> x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0])
+            >>> out = F.silu(x)
+            >>> print(out)
+            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [0.73105860, 1.76159406, 2.85772228, 3.92805505])
    """

    if in_dynamic_mode():
@@ -1111,25 +1150,35 @@ def softmax(x, axis=-1, dtype=None, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
-
-            x = paddle.to_tensor([[[2.0, 3.0, 4.0, 5.0],
-                        [3.0, 4.0, 5.0, 6.0],
-                        [7.0, 8.0, 8.0, 9.0]],
-                        [[1.0, 2.0, 3.0, 4.0],
-                        [5.0, 6.0, 7.0, 8.0],
-                        [6.0, 7.0, 8.0, 9.0]]],dtype='float32')
-            out1 = F.softmax(x)
-            out2 = F.softmax(x, dtype='float64')
-            # out1's data type is float32; out2's data type is float64
-            # out1 and out2's value is as follows:
-            # [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
-            #   [0.0320586 , 0.08714432, 0.23688282, 0.64391426],
-            #   [0.07232949, 0.19661193, 0.19661193, 0.53444665]],
-            # [[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
-            #   [0.0320586 , 0.08714432, 0.23688282, 0.64391426],
-            #   [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]]
+            >>> import paddle
+            >>> import paddle.nn.functional as F
+
+            >>> x = paddle.to_tensor([[[2.0, 3.0, 4.0, 5.0],
+            ...                        [3.0, 4.0, 5.0, 6.0],
+            ...                        [7.0, 8.0, 8.0, 9.0]],
+            ...                       [[1.0, 2.0, 3.0, 4.0],
+            ...                        [5.0, 6.0, 7.0, 8.0],
+            ...                        [6.0, 7.0, 8.0, 9.0]]],dtype='float32')
+            >>> out1 = F.softmax(x)
+            >>> out2 = F.softmax(x, dtype='float64')
+            >>> #out1's data type is float32; out2's data type is float64
+            >>> #out1 and out2's value is as follows:
+            >>> print(out1)
+            >>> print(out2)
+            Tensor(shape=[2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[[0.03205860, 0.08714432, 0.23688284, 0.64391428],
+              [0.03205860, 0.08714432, 0.23688284, 0.64391428],
+              [0.07232949, 0.19661194, 0.19661194, 0.53444666]],
+             [[0.03205860, 0.08714432, 0.23688284, 0.64391428],
+              [0.03205860, 0.08714432, 0.23688284, 0.64391428],
+              [0.03205860, 0.08714432, 0.23688284, 0.64391428]]])
+            Tensor(shape=[2, 3, 4], dtype=float64, place=Place(cpu), stop_gradient=True,
+            [[[0.03205860, 0.08714432, 0.23688282, 0.64391426],
+              [0.03205860, 0.08714432, 0.23688282, 0.64391426],
+              [0.07232949, 0.19661193, 0.19661193, 0.53444665]],
+             [[0.03205860, 0.08714432, 0.23688282, 0.64391426],
+              [0.03205860, 0.08714432, 0.23688282, 0.64391426],
+              [0.03205860, 0.08714432, 0.23688282, 0.64391426]]])
    """

    if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)):
@@ -1214,11 +1263,14 @@ def softplus(x, beta=1, threshold=20, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3], dtype='float32')
-            out = F.softplus(x) # [0.513015, 0.598139, 0.744397, 0.854355]
+            >>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3], dtype='float32')
+            >>> out = F.softplus(x)
+            >>> print(out)
+            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [0.51301527, 0.59813893, 0.74439669, 0.85435522])
    """

    if in_dynamic_mode():
@@ -1264,14 +1316,14 @@ def softshrink(x, threshold=0.5, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x = paddle.to_tensor([-0.9, -0.2, 0.1, 0.8])
-            out = F.softshrink(x)
-            print(out)
-            # Tensor(shape=[4], dtype=float32, place=Place(gpu:0), stop_gradient=True,
-            #        [-0.39999998,  0.        ,  0.        ,  0.30000001])
+            >>> x = paddle.to_tensor([-0.9, -0.2, 0.1, 0.8])
+            >>> out = F.softshrink(x)
+            >>> print(out)
+            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [-0.39999998,  0.        ,  0.        ,  0.30000001])
    """
    if threshold < 0:
        raise ValueError(
@@ -1315,14 +1367,14 @@ def softsign(x, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
-            out = F.softsign(x)
-            print(out)
-            # Tensor(shape=[4], dtype=float32, place=Place(gpu:0), stop_gradient=True,
-            #        [-0.28571430, -0.16666666,  0.09090909,  0.23076925])
+            >>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
+            >>> out = F.softsign(x)
+            >>> print(out)
+            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [-0.28571430, -0.16666666,  0.09090909,  0.23076925])
    """
    if in_dynamic_mode():
        return _C_ops.softsign(x)
@@ -1354,14 +1406,14 @@ def swish(x, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x = paddle.to_tensor([-2., 0., 1.])
-            out = F.swish(x)
-            print(out)
-            # Tensor(shape=[3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
-            #        [-0.23840584,  0.        ,  0.73105854])
+            >>> x = paddle.to_tensor([-2., 0., 1.])
+            >>> out = F.swish(x)
+            >>> print(out)
+            Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [-0.23840584,  0.        ,  0.73105860])
    """
    if in_dynamic_mode():
        return _C_ops.swish(x)
@@ -1403,11 +1455,14 @@ def mish(x, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x = paddle.to_tensor([-5., 0., 5.])
-            out = F.mish(x) # [-0.03357624, 0., 4.99955208]
+            >>> x = paddle.to_tensor([-5., 0., 5.])
+            >>> out = F.mish(x)
+            >>> print(out)
+            Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [-0.03357624,  0.        ,  4.99955177])
    """
    if in_dynamic_mode():
        return _C_ops.mish(x, 20)
@@ -1439,14 +1494,14 @@ def tanhshrink(x, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
-            out = F.tanhshrink(x)
-            print(out)
-            # Tensor(shape=[4], dtype=float32, place=Place(gpu:0), stop_gradient=True,
-            #        [-0.02005106, -0.00262468,  0.00033200,  0.00868741])
+            >>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
+            >>> out = F.tanhshrink(x)
+            >>> print(out)
+            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [-0.02005100, -0.00262472,  0.00033201,  0.00868741])
    """
    if in_dynamic_mode():
        return _C_ops.tanh_shrink(x)
@@ -1488,14 +1543,14 @@ def thresholded_relu(x, threshold=1.0, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x = paddle.to_tensor([2., 0., 1.])
-            out = F.thresholded_relu(x)
-            print(out)
-            # Tensor(shape=[3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
-            #        [2., 0., 0.])
+            >>> x = paddle.to_tensor([2., 0., 1.])
+            >>> out = F.thresholded_relu(x)
+            >>> print(out)
+            Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [2., 0., 0.])
    """

    if in_dynamic_mode():
@@ -1561,26 +1616,35 @@ def log_softmax(x, axis=-1, dtype=None, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
-
-            x = [[[-2.0, 3.0, -4.0, 5.0],
-                  [3.0, -4.0, 5.0, -6.0],
-                  [-7.0, -8.0, 8.0, 9.0]],
-                 [[1.0, -2.0, -3.0, 4.0],
-                  [-5.0, 6.0, 7.0, -8.0],
-                  [6.0, 7.0, 8.0, 9.0]]]
-            x = paddle.to_tensor(x)
-            out1 = F.log_softmax(x)
-            out2 = F.log_softmax(x, dtype='float64')
-            # out1's data type is float32; out2's data type is float64
-            # out1 and out2's value is as follows:
-            # [[[ -7.1278396   -2.1278396   -9.127839    -0.12783948]
-            #   [ -2.1270514   -9.127051    -0.12705144 -11.127051  ]
-            #   [-16.313261   -17.313261    -1.3132617   -0.31326184]]
-            #  [[ -3.0518122   -6.051812    -7.051812    -0.051812  ]
-            #   [-12.313267    -1.3132664   -0.3132665  -15.313267  ]
-            #   [ -3.4401896   -2.4401896   -1.4401896   -0.44018966]]]
+            >>> import paddle
+            >>> import paddle.nn.functional as F
+            >>> x = [[[-2.0, 3.0, -4.0, 5.0],
+            ...       [3.0, -4.0, 5.0, -6.0],
+            ...       [-7.0, -8.0, 8.0, 9.0]],
+            ...      [[1.0, -2.0, -3.0, 4.0],
+            ...       [-5.0, 6.0, 7.0, -8.0],
+            ...       [6.0, 7.0, 8.0, 9.0]]]
+            >>> x = paddle.to_tensor(x)
+            >>> out1 = F.log_softmax(x)
+            >>> out2 = F.log_softmax(x, dtype='float64')
+            >>> #out1's data type is float32; out2's data type is float64
+            >>> #out1 and out2's value is as follows:
+            >>> print(out1)
+            Tensor(shape=[2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[[-7.12783957 , -2.12783957 , -9.12783909 , -0.12783945 ],
+              [-2.12705135 , -9.12705135 , -0.12705141 , -11.12705135],
+              [-16.31326103, -17.31326103, -1.31326187 , -0.31326184 ]],
+             [[-3.05181193 , -6.05181217 , -7.05181217 , -0.05181199 ],
+              [-12.31326675, -1.31326652 , -0.31326646 , -15.31326675],
+              [-3.44018984 , -2.44018984 , -1.44018972 , -0.44018975 ]]])
+            >>> print(out2)
+            Tensor(shape=[2, 3, 4], dtype=float64, place=Place(cpu), stop_gradient=True,
+            [[[-7.12783948 , -2.12783948 , -9.12783948 , -0.12783948 ],
+              [-2.12705141 , -9.12705141 , -0.12705141 , -11.12705141],
+              [-16.31326180, -17.31326180, -1.31326180 , -0.31326180 ]],
+             [[-3.05181198 , -6.05181198 , -7.05181198 , -0.05181198 ],
+              [-12.31326640, -1.31326640 , -0.31326640 , -15.31326640],
+              [-3.44018970 , -2.44018970 , -1.44018970 , -0.44018970 ]]])
    """

    if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)):
@@ -1655,17 +1719,16 @@ def glu(x, axis=-1, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            from paddle.nn import functional as F
-
-            x = paddle.to_tensor(
-                [[-0.22014759, -1.76358426,  0.80566144,  0.04241343],
-                    [-1.94900405, -1.89956081,  0.17134808, -1.11280477]]
-            )
-            print(F.glu(x))
-            # Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=True,
-            #        [[-0.15216254, -0.90048921],
-            #         [-1.05778778, -0.46985325]])
+            >>> import paddle
+            >>> from paddle.nn import functional as F
+            >>> x = paddle.to_tensor(
+            ...     [[-0.22014759, -1.76358426,  0.80566144,  0.04241343],
+            ...         [-1.94900405, -1.89956081,  0.17134808, -1.11280477]]
+            ... )
+            >>> print(F.glu(x))
+            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[-0.15216254, -0.90048921],
+            [-1.05778778, -0.46985325]])

    """
    check_variable_and_dtype(
@@ -1727,18 +1790,19 @@ def gumbel_softmax(x, temperature=1.0, hard=False, axis=-1, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
-
-            logits = paddle.randn([4, 6])
-            temperature = 0.01
-            gumbel_softmax = F.gumbel_softmax(logits, temperature)
-            print(gumbel_softmax)
-            # out's value is as follows:
-            # [[0.00000001, 1.        , 0.00000000, 0.00000000, 0.00000006, 0.00000000],
-            # [0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 1.        ],
-            # [0.00000062, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.99999940],
-            # [0.00000000, 0.00000000, 0.00000000, 0.00001258, 0.99998736, 0.00000000]]
+            >>> import paddle
+            >>> import paddle.nn.functional as F
+
+            >>> paddle.seed(2023)
+            >>> logits = paddle.randn([4, 6])
+            >>> temperature = 0.01
+            >>> gumbel_softmax = F.gumbel_softmax(logits, temperature)
+            >>> print(gumbel_softmax)
+            Tensor(shape=[4, 6], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[0.00000000, 1.        , 0.00000000, 0.00000000, 0.00000000, 0.00000000],
+             [0.00000000, 0.00000000, 1.        , 0.00000000, 0.00000000, 0.00000000],
+             [0.00000000, 0.00000004, 0.00000000, 0.00000000, 1.        , 0.00000000],
+             [0.00000000, 1.        , 0.00000000, 0.00000000, 0.00000000, 0.00000000]])

    """
    if in_dynamic_mode():

--- a/python/paddle/nn/functional/common.py
+++ b/python/paddle/nn/functional/common.py
@@ -95,11 +95,11 @@ def unfold(x, kernel_sizes, strides=1, paddings=0, dilations=1, name=None):

        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x = paddle.randn((100,3,224,224))
-            y = F.unfold(x, [3, 3], 1, 1, 1)
+            >>> x = paddle.randn((100,3,224,224))
+            >>> y = F.unfold(x, [3, 3], 1, 1, 1)
    """

    helper = LayerHelper("unfold", **locals())
@@ -348,23 +348,21 @@ def interpolate(
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
-
-            input_data = paddle.randn(shape=(2,3,6,10)).astype(paddle.float32)
-            output_1 = F.interpolate(x=input_data, size=[12,12])
-            print(output_1.shape)
-            # [2L, 3L, 12L, 12L]
-
-            # given scale
-            output_2 = F.interpolate(x=input_data, scale_factor=[2,1])
-            print(output_2.shape)
-            # [2L, 3L, 12L, 10L]
-
-            # bilinear interp
-            output_3 = F.interpolate(x=input_data, scale_factor=[2,1], mode="bilinear")
-            print(output_2.shape)
-            # [2L, 3L, 12L, 10L]
+            >>> import paddle
+            >>> import paddle.nn.functional as F
+
+            >>> input_data = paddle.randn(shape=(2,3,6,10)).astype(paddle.float32)
+            >>> output_1 = F.interpolate(x=input_data, size=[12,12])
+            >>> print(output_1.shape)
+            [2, 3, 12, 12]
+            >>> # given scale
+            >>> output_2 = F.interpolate(x=input_data, scale_factor=[2,1])
+            >>> print(output_2.shape)
+            [2, 3, 12, 10]
+            >>> # bilinear interp
+            >>> output_3 = F.interpolate(x=input_data, scale_factor=[2,1], mode="bilinear")
+            >>> print(output_2.shape)
+            [2, 3, 12, 10]
    """
    data_format = data_format.upper()
    resample = mode.upper()
@@ -877,15 +875,14 @@ def upsample(
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn as nn
-
-            input_data = paddle.randn(shape=(2,3,6,10)).astype(paddle.float32)
-            upsample_out = paddle.nn.Upsample(size=[12,12])
+            >>> import paddle
+            >>> import paddle.nn as nn

-            output = upsample_out(x=input_data)
-            print(output.shape)
-            # [2L, 3L, 12L, 12L]
+            >>> input_data = paddle.randn(shape=(2,3,6,10)).astype(paddle.float32)
+            >>> upsample_out = paddle.nn.Upsample(size=[12,12])
+            >>> output = upsample_out(x=input_data)
+            >>> print(output.shape)
+            [2, 3, 12, 12]

    """
    return interpolate(
@@ -913,17 +910,16 @@ def bilinear(x1, x2, weight, bias=None, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x1 = paddle.randn((5, 5)).astype(paddle.float32)
-            x2 = paddle.randn((5, 4)).astype(paddle.float32)
-            w = paddle.randn((1000, 5, 4)).astype(paddle.float32)
-            b = paddle.randn((1, 1000)).astype(paddle.float32)
-
-            result = F.bilinear(x1, x2, w, b)
-            print(result.shape)
-            # [5, 1000]
+            >>> x1 = paddle.randn((5, 5)).astype(paddle.float32)
+            >>> x2 = paddle.randn((5, 4)).astype(paddle.float32)
+            >>> w = paddle.randn((1000, 5, 4)).astype(paddle.float32)
+            >>> b = paddle.randn((1, 1000)).astype(paddle.float32)
+            >>> result = F.bilinear(x1, x2, w, b)
+            >>> print(result.shape)
+            [5, 1000]
    """

    if in_dynamic_mode():
@@ -1061,39 +1057,38 @@ def dropout(

        .. code-block:: python

-            import paddle
-
-            x = paddle.to_tensor([[1,2,3], [4,5,6]]).astype(paddle.float32)
-            y_train = paddle.nn.functional.dropout(x, 0.5)
-            y_test = paddle.nn.functional.dropout(x, 0.5, training=False)
-            y_0 = paddle.nn.functional.dropout(x, axis=0)
-            y_1 = paddle.nn.functional.dropout(x, axis=1)
-            y_01 = paddle.nn.functional.dropout(x, axis=[0,1])
-            print(x)
-            # Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
-            #        [[1., 2., 3.],
-            #         [4., 5., 6.]])
-            print(y_train)
-            # Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
-            #        [[2. , 0. , 6. ],
-            #         [8. , 0. , 12.]])
-            print(y_test)
-            # Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
-            #        [[1., 2., 3.],
-            #         [4., 5., 6.]])
-            print(y_0)
-            # Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
-            #        [[0. , 0. , 0. ],
-            #         [8. , 10., 12.]])
-            print(y_1)
-            # Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
-            #        [[2. , 0. , 6. ],
-            #         [8. , 0. , 12.]])
-            print(y_01)
-            # Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
-            #        [[0. , 0. , 0. ],
-            #         [8. , 0. , 12.]])
-
+            >>> import paddle
+            >>> paddle.seed(2023)
+            >>> x = paddle.to_tensor([[1,2,3], [4,5,6]]).astype(paddle.float32)
+            >>> y_train = paddle.nn.functional.dropout(x, 0.5)
+            >>> y_test = paddle.nn.functional.dropout(x, 0.5, training=False)
+            >>> y_0 = paddle.nn.functional.dropout(x, axis=0)
+            >>> y_1 = paddle.nn.functional.dropout(x, axis=1)
+            >>> y_01 = paddle.nn.functional.dropout(x, axis=[0,1])
+            >>> print(x)
+            Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[1., 2., 3.],
+             [4., 5., 6.]])
+            >>> print(y_train)
+            Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[2., 4., 0.],
+            [8., 0., 0.]])
+            >>> print(y_test)
+            Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[1., 2., 3.],
+             [4., 5., 6.]])
+            >>> print(y_0)
+            Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[2., 4., 6.],
+             [8. , 10., 12.]])
+            >>> print(y_1)
+            Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[2. , 4. , 6. ],
+             [8. , 10., 12.]])
+            >>> print(y_01)
+            Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[0., 0., 6.],
+             [0., 0., 0.]])
    """
    if not isinstance(p, (float, int, Variable)):
        raise TypeError("p argument should be a number or Variable")
@@ -1258,17 +1253,106 @@ def dropout2d(x, p=0.5, training=True, data_format='NCHW', name=None):
    Examples:
        .. code-block:: python

-            import paddle
-
-            x = paddle.randn(shape=(2, 3, 4, 5)).astype(paddle.float32)
-            y_train = paddle.nn.functional.dropout2d(x)  #train
-            y_test = paddle.nn.functional.dropout2d(x, training=False) #test
-            for i in range(2):
-                for j in range(3):
-                    print(x[i,j,:,:])
-                    print(y_train[i,j,:,:]) # may all 0
-                    print(y_test[i,j,:,:])
-
+            >>> import paddle
+            >>> paddle.seed(1)
+            >>> x = paddle.randn(shape=(2, 3, 4, 5)).astype(paddle.float32)
+            >>> y_train = paddle.nn.functional.dropout2d(x)  #train
+            >>> y_test = paddle.nn.functional.dropout2d(x, training=False) #test
+            >>> for i in range(2):
+            ...     for j in range(3):
+            ...         print(x[i,j,:,:])
+            ...         print(y_train[i,j,:,:]) # may all 0
+            ...         print(y_test[i,j,:,:])
+            Tensor(shape=[4, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[-0.30557564,  0.11855337,  0.41220093, -0.09968963,  1.50014710],
+             [ 1.24004936, -0.92485696,  0.08612321,  1.15149164, -0.09276631],
+             [ 1.22873247, -1.46587241, -1.30802727,  0.19496460,  1.73776841],
+             [ 0.40092674,  0.67630458,  0.72265440,  1.31720388, -1.41899264]])
+            Tensor(shape=[4, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[-0.61115128,  0.23710674,  0.82440186, -0.19937925,  3.00029421],
+             [ 2.48009872, -1.84971392,  0.17224643,  2.30298328, -0.18553263],
+             [ 2.45746493, -2.93174481, -2.61605453,  0.38992921,  3.47553682],
+             [ 0.80185348,  1.35260916,  1.44530880,  2.63440776, -2.83798528]])
+            Tensor(shape=[4, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[-0.30557564,  0.11855337,  0.41220093, -0.09968963,  1.50014710],
+             [ 1.24004936, -0.92485696,  0.08612321,  1.15149164, -0.09276631],
+             [ 1.22873247, -1.46587241, -1.30802727,  0.19496460,  1.73776841],
+             [ 0.40092674,  0.67630458,  0.72265440,  1.31720388, -1.41899264]])
+            Tensor(shape=[4, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[ 0.88350385, -1.14767575,  0.51043051, -0.10051888, -0.61305630],
+             [-0.12084112,  0.48506257, -1.13189507,  0.62806708, -0.80003673],
+             [ 0.51513153, -0.08890446,  0.22753835,  0.11557858,  0.78117645],
+             [ 1.47505593,  0.84618902, -0.38528305, -1.05887091,  0.16592593]])
+            Tensor(shape=[4, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[ 1.76700771, -2.29535151,  1.02086103, -0.20103776, -1.22611260],
+             [-0.24168225,  0.97012514, -2.26379013,  1.25613415, -1.60007346],
+             [ 1.03026307, -0.17780893,  0.45507669,  0.23115715,  1.56235290],
+             [ 2.95011187,  1.69237804, -0.77056611, -2.11774182,  0.33185187]])
+            Tensor(shape=[4, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[ 0.88350385, -1.14767575,  0.51043051, -0.10051888, -0.61305630],
+             [-0.12084112,  0.48506257, -1.13189507,  0.62806708, -0.80003673],
+             [ 0.51513153, -0.08890446,  0.22753835,  0.11557858,  0.78117645],
+             [ 1.47505593,  0.84618902, -0.38528305, -1.05887091,  0.16592593]])
+            Tensor(shape=[4, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[-1.46668839, -0.38117948,  1.18678427,  0.38740095,  0.29117522],
+             [-0.13538910, -0.14527084, -0.04912176, -0.26063353,  0.23640174],
+             [ 0.45643106,  0.60587281, -1.03242552, -0.45319262, -1.57911122],
+             [-0.08732958, -0.75898546,  0.14563090, -1.73751652, -0.89109969]])
+            Tensor(shape=[4, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[-0., -0., 0. , 0. , 0. ],
+             [-0., -0., -0., -0., 0. ],
+             [0. , 0. , -0., -0., -0.],
+             [-0., -0., 0. , -0., -0.]])
+            Tensor(shape=[4, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[-1.46668839, -0.38117948,  1.18678427,  0.38740095,  0.29117522],
+             [-0.13538910, -0.14527084, -0.04912176, -0.26063353,  0.23640174],
+             [ 0.45643106,  0.60587281, -1.03242552, -0.45319262, -1.57911122],
+             [-0.08732958, -0.75898546,  0.14563090, -1.73751652, -0.89109969]])
+            Tensor(shape=[4, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[-0.32110816, -0.76044011,  0.34456784, -0.39410326,  0.37896338],
+             [ 0.52747023,  0.72711533,  0.29204839,  0.72493637,  0.31128070],
+             [ 0.58046782, -1.78499067, -1.67504823, -0.38590902, -0.26243693],
+             [ 0.96669912,  0.43670532, -0.38109761,  0.78405094, -2.17882323]])
+            Tensor(shape=[4, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[-0., -0., 0. , -0., 0. ],
+             [0. , 0. , 0. , 0. , 0. ],
+             [0. , -0., -0., -0., -0.],
+             [0. , 0. , -0., 0. , -0.]])
+            Tensor(shape=[4, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[-0.32110816, -0.76044011,  0.34456784, -0.39410326,  0.37896338],
+             [ 0.52747023,  0.72711533,  0.29204839,  0.72493637,  0.31128070],
+             [ 0.58046782, -1.78499067, -1.67504823, -0.38590902, -0.26243693],
+             [ 0.96669912,  0.43670532, -0.38109761,  0.78405094, -2.17882323]])
+            Tensor(shape=[4, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[ 0.17168395,  0.45112833,  0.63307828,  2.38763475, -1.27247131],
+             [ 0.56171960, -1.09584677,  0.38300961, -0.57512099,  0.31011426],
+             [-0.95336407, -1.04852903, -0.21312937, -0.53549880, -0.00074209],
+             [ 2.22819090,  1.12403083, -0.04198794, -1.51167727, -0.42699185]])
+            Tensor(shape=[4, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[0. , 0. , 0. , 0. , -0.],
+             [0. , -0., 0. , -0., 0. ],
+             [-0., -0., -0., -0., -0.],
+             [0. , 0. , -0., -0., -0.]])
+            Tensor(shape=[4, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[ 0.17168395,  0.45112833,  0.63307828,  2.38763475, -1.27247131],
+             [ 0.56171960, -1.09584677,  0.38300961, -0.57512099,  0.31011426],
+             [-0.95336407, -1.04852903, -0.21312937, -0.53549880, -0.00074209],
+             [ 2.22819090,  1.12403083, -0.04198794, -1.51167727, -0.42699185]])
+            Tensor(shape=[4, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[ 0.62503546, -0.20989063, -0.22046235, -0.38679042, -1.02590704],
+             [ 1.04561794,  1.08428383, -0.52219963, -1.56003857,  0.89213932],
+             [-0.16578521,  0.14524542, -0.45563069,  0.48180851,  1.35843253],
+             [ 1.07669640, -0.84535235, -1.18651557,  0.79144061, -0.45565742]])
+            Tensor(shape=[4, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[0. , -0., -0., -0., -0.],
+             [0. , 0. , -0., -0., 0. ],
+             [-0., 0. , -0., 0. , 0. ],
+             [0. , -0., -0., 0. , -0.]])
+            Tensor(shape=[4, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[ 0.62503546, -0.20989063, -0.22046235, -0.38679042, -1.02590704],
+             [ 1.04561794,  1.08428383, -0.52219963, -1.56003857,  0.89213932],
+             [-0.16578521,  0.14524542, -0.45563069,  0.48180851,  1.35843253],
+             [ 1.07669640, -0.84535235, -1.18651557,  0.79144061, -0.45565742]])
    """
    input_shape = x.shape
    if len(input_shape) != 4:
@@ -1317,14 +1401,14 @@ def dropout3d(x, p=0.5, training=True, data_format='NCDHW', name=None):
    Examples:
        .. code-block:: python

-            import paddle
+            >>> import paddle

-            x = paddle.randn(shape=(2, 3, 4, 5, 6)).astype(paddle.float32)
-            y_train = paddle.nn.functional.dropout3d(x)  #train
-            y_test = paddle.nn.functional.dropout3d(x, training=False) #test
-            print(x[0,0,:,:,:])
-            print(y_train[0,0,:,:,:]) # may all 0
-            print(y_test[0,0,:,:,:])
+            >>> x = paddle.randn(shape=(2, 3, 4, 5, 6)).astype(paddle.float32)
+            >>> y_train = paddle.nn.functional.dropout3d(x)  #train
+            >>> y_test = paddle.nn.functional.dropout3d(x, training=False) #test
+            >>> print(x[0,0,:,:,:])
+            >>> print(y_train[0,0,:,:,:]) # may all 0
+            >>> print(y_test[0,0,:,:,:])

    """

@@ -1371,19 +1455,19 @@ def alpha_dropout(x, p=0.5, training=True, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-
-            x = paddle.to_tensor([[-1, 1], [-1, 1]]).astype(paddle.float32)
-            y_train = paddle.nn.functional.alpha_dropout(x, 0.5)
-            y_test = paddle.nn.functional.alpha_dropout(x, 0.5, training=False)
-            print(y_train)
-            # Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
-            #        [[-0.10721093, -0.77919382],
-            #         [-0.10721093,  1.66559887]]) (randomly)
-            print(y_test)
-            # Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
-            #        [[-1.,  1.],
-            #         [-1.,  1.]])
+            >>> import paddle
+            >>> paddle.seed(1)
+            >>> x = paddle.to_tensor([[-1, 1], [-1, 1]]).astype(paddle.float32)
+            >>> y_train = paddle.nn.functional.alpha_dropout(x, 0.5)
+            >>> y_test = paddle.nn.functional.alpha_dropout(x, 0.5, training=False)
+            >>> print(y_train)
+            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[-0.77919382,  1.66559887],
+            [-0.10721093, -0.77919382]])
+            >>> print(y_test)
+            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[-1.,  1.],
+            [-1.,  1.]])
    """
    if not isinstance(p, (float, int)):
        raise TypeError("p argument should be a float or int")
@@ -1516,32 +1600,35 @@ def pad(x, pad, mode='constant', value=0.0, data_format="NCHW", name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
-
-            # example 1
-            x_shape = (1, 1, 3)
-            x = paddle.arange(paddle.prod(paddle.to_tensor(x_shape)), dtype="float32").reshape(x_shape) + 1
-            y = F.pad(x, [0, 0, 0, 0, 2, 3], value=1, mode='constant', data_format="NCL")
-            print(y)
-            # [[[1. 1. 1. 2. 3. 1. 1. 1.]]]
-
-            # example 2
-            x_shape = (1, 1, 3)
-            x = paddle.arange(paddle.prod(paddle.to_tensor(x_shape)), dtype="float32").reshape(x_shape) + 1
-            y = F.pad(x, [2, 3], value=1, mode='constant', data_format="NCL")
-            print(y)
-            # [[[1. 1. 1. 2. 3. 1. 1. 1.]]]
-
-            # example 3
-            x_shape = (1, 1, 2, 3)
-            x = paddle.arange(paddle.prod(paddle.to_tensor(x_shape)), dtype="float32").reshape(x_shape) + 1
-            y = F.pad(x, [1, 2, 1, 1], value=1, mode='circular')
-            print(y)
-            # [[[[6. 4. 5. 6. 4. 5.]
-            #    [3. 1. 2. 3. 1. 2.]
-            #    [6. 4. 5. 6. 4. 5.]
-            #    [3. 1. 2. 3. 1. 2.]]]]
+            >>> import paddle
+            >>> import paddle.nn.functional as F
+
+            >>> # example 1
+            >>> x_shape = (1, 1, 3)
+            >>> x = paddle.arange(paddle.prod(paddle.to_tensor(x_shape)), dtype="float32").reshape(x_shape) + 1
+            >>> y = F.pad(x, [0, 0, 0, 0, 2, 3], value=1, mode='constant', data_format="NCL")
+            >>> print(y)
+            Tensor(shape=[1, 1, 8], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[[1., 1., 1., 2., 3., 1., 1., 1.]]])
+
+            >>> # example 2
+            >>> x_shape = (1, 1, 3)
+            >>> x = paddle.arange(paddle.prod(paddle.to_tensor(x_shape)), dtype="float32").reshape(x_shape) + 1
+            >>> y = F.pad(x, [2, 3], value=1, mode='constant', data_format="NCL")
+            >>> print(y)
+            Tensor(shape=[1, 1, 8], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[[1., 1., 1., 2., 3., 1., 1., 1.]]])
+
+            >>> # example 3
+            >>> x_shape = (1, 1, 2, 3)
+            >>> x = paddle.arange(paddle.prod(paddle.to_tensor(x_shape)), dtype="float32").reshape(x_shape) + 1
+            >>> y = F.pad(x, [1, 2, 1, 1], value=1, mode='circular')
+            >>> print(y)
+            Tensor(shape=[1, 1, 4, 6], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[[[6., 4., 5., 6., 4., 5.],
+               [3., 1., 2., 3., 1., 2.],
+               [6., 4., 5., 6., 4., 5.],
+               [3., 1., 2., 3., 1., 2.]]]])
    """
    assert mode in [
        'reflect',
@@ -1713,16 +1800,18 @@ def zeropad2d(x, padding, data_format="NCHW", name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
-            x_shape = paddle.to_tensor([1, 1, 2, 3])
-            x = paddle.arange(paddle.prod(x_shape), dtype="float32").reshape(x_shape) + 1
-            y = F.zeropad2d(x, [1, 2, 1, 1])
-            print(y)
-            # [[[[0. 0. 0. 0. 0. 0.]
-            #    [0. 1. 2. 3. 0. 0.]
-            #    [0. 4. 5. 6. 0. 0.]
-            #    [0. 0. 0. 0. 0. 0.]]]]
+            >>> import paddle
+            >>> import paddle.nn.functional as F
+
+            >>> x_shape = paddle.to_tensor([1, 1, 2, 3])
+            >>> x = paddle.arange(paddle.prod(x_shape), dtype="float32").reshape(x_shape) + 1
+            >>> y = F.zeropad2d(x, [1, 2, 1, 1])
+            >>> print(y)
+            Tensor(shape=[1, 1, 4, 6], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[[[0., 0., 0., 0., 0., 0.],
+               [0., 1., 2., 3., 0., 0.],
+               [0., 4., 5., 6., 0., 0.],
+               [0., 0., 0., 0., 0., 0.]]]])
    """

    return pad(
@@ -1767,16 +1856,17 @@ def cosine_similarity(x1, x2, axis=1, eps=1e-8):
    Code Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn as nn
+            >>> import paddle
+            >>> import paddle.nn as nn

-            paddle.seed(1)
-            x1 = paddle.randn(shape=[2, 3])
-            x2 = paddle.randn(shape=[2, 3])
+            >>> paddle.seed(1)
+            >>> x1 = paddle.randn(shape=[2, 3])
+            >>> x2 = paddle.randn(shape=[2, 3])

-            result = paddle.nn.functional.cosine_similarity(x1, x2, axis=0)
-            print(result)
-            # [0.97689527,  0.99996042, -0.55138415]
+            >>> result = paddle.nn.functional.cosine_similarity(x1, x2, axis=0)
+            >>> print(result)
+            Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [ 0.97689527,  0.99996042, -0.55138415])

    """
    w12 = sum(paddle.multiply(x1, x2), axis=axis)
@@ -1822,21 +1912,29 @@ def linear(x, weight, bias=None, name=None):
    Examples:
        .. code-block:: python

-          import paddle
-
-          x = paddle.randn((3, 2), dtype="float32")
-          # x: [[-0.32342386 -1.200079  ]
-          #     [ 0.7979031  -0.90978354]
-          #     [ 0.40597573  1.8095392 ]]
-          weight = paddle.full(shape=[2, 4], fill_value="0.5", dtype="float32", name="weight")
-          # weight: [[0.5 0.5 0.5 0.5]
-          #          [0.5 0.5 0.5 0.5]]
-          bias = paddle.ones(shape=[4], dtype="float32", name="bias")
-          # bias: [1. 1. 1. 1.]
-          y = paddle.nn.functional.linear(x, weight, bias)
-          # y: [[0.23824859 0.23824859 0.23824859 0.23824859]
-          #     [0.9440598  0.9440598  0.9440598  0.9440598 ]
-          #     [2.1077576  2.1077576  2.1077576  2.1077576 ]]
+            >>> import paddle
+            >>> paddle.seed(2023)
+            >>> x = paddle.randn((3, 2), dtype="float32")
+            >>> print(x)
+            Tensor(shape=[3, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[ 0.06132207,  1.11349595],
+             [ 0.41906244, -0.24858207],
+             [-1.85169315, -1.50370061]])
+            >>> weight = paddle.full(shape=[2, 4], fill_value="0.5", dtype="float32", name="weight")
+            >>> print(weight)
+            Tensor(shape=[2, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[0.50000000, 0.50000000, 0.50000000, 0.50000000],
+             [0.50000000, 0.50000000, 0.50000000, 0.50000000]])
+            >>> bias = paddle.ones(shape=[4], dtype="float32", name="bias")
+            >>> print(bias)
+            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [1., 1., 1., 1.])
+            >>> y = paddle.nn.functional.linear(x, weight, bias)
+            >>> print(y)
+            Tensor(shape=[3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[ 1.58740902,  1.58740902,  1.58740902,  1.58740902],
+             [ 1.08524013,  1.08524013,  1.08524013,  1.08524013],
+             [-0.67769694, -0.67769694, -0.67769694, -0.67769694]])
    """
    if in_dynamic_mode():
        # TODO(jiabin): using addmm for fast forward route
@@ -1921,17 +2019,17 @@ def label_smooth(label, prior_dist=None, epsilon=0.1, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            paddle.disable_static()
+            >>> import paddle
+            >>> paddle.disable_static()

-            x = paddle.to_tensor([[[0, 1, 0],
-                                [ 1,  0, 1]]], dtype="float32", stop_gradient=False)
+            >>> x = paddle.to_tensor([[[0, 1, 0],
+            >>>                     [ 1,  0, 1]]], dtype="float32", stop_gradient=False)

-            output = paddle.nn.functional.label_smooth(x)
-            print(output)
-            # Tensor(shape=[1, 2, 3], dtype=float32, place=Place(gpu:0), stop_gradient=False,
-            #        [[[0.03333334, 0.93333334, 0.03333334],
-            #          [0.93333334, 0.03333334, 0.93333334]]])
+            >>> output = paddle.nn.functional.label_smooth(x)
+            >>> print(output)
+            Tensor(shape=[1, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
+            [[[0.03333334, 0.93333334, 0.03333334],
+            [0.93333334, 0.03333334, 0.93333334]]])
    """
    if epsilon > 1.0 or epsilon < 0.0:
        raise ValueError("The value of epsilon must be between 0 and 1.")
@@ -2002,67 +2100,64 @@ def class_center_sample(label, num_classes, num_samples, group=None):
    .. code-block:: python
        :name: code-example1

-        # CPU or single GPU
-        import paddle
-        num_classes = 20
-        batch_size = 10
-        num_samples = 6
-        label = paddle.randint(low=0, high=num_classes, shape=[batch_size], dtype='int64')
-        remapped_label, sampled_class_index = paddle.nn.functional.class_center_sample(label, num_classes, num_samples)
-
-        print(label)
-        print(remapped_label)
-        print(sampled_class_index)
-
-        # the output is
-        #Tensor(shape=[10], dtype=int64, place=CPUPlace, stop_gradient=True,
-        #       [11, 5 , 1 , 3 , 12, 2 , 15, 19, 18, 19])
-        #Tensor(shape=[10], dtype=int64, place=CPUPlace, stop_gradient=True,
-        #       [4, 3, 0, 2, 5, 1, 6, 8, 7, 8])
-        #Tensor(shape=[9], dtype=int64, place=CPUPlace, stop_gradient=True,
-        #       [1 , 2 , 3 , 5 , 11, 12, 15, 18, 19])
+        >>> # CPU or single GPU
+        >>> import paddle
+        >>> num_classes = 20
+        >>> batch_size = 10
+        >>> num_samples = 6
+        >>> paddle.seed(2023)
+        >>> label = paddle.randint(low=0, high=num_classes, shape=[batch_size], dtype='int64')
+        >>> remapped_label, sampled_class_index = paddle.nn.functional.class_center_sample(label, num_classes, num_samples)
+        >>> print(label)
+        Tensor(shape=[10], dtype=int64, place=Place(cpu), stop_gradient=True,
+        [17, 10, 5 , 18, 8 , 8 , 19, 14, 10, 14])
+        >>> print(remapped_label)
+        Tensor(shape=[10], dtype=int64, place=Place(cpu), stop_gradient=True,
+        [4, 2, 0, 5, 1, 1, 6, 3, 2, 3])
+        >>> print(sampled_class_index)
+        Tensor(shape=[7], dtype=int64, place=Place(cpu), stop_gradient=True,
+        [5 , 8 , 10, 14, 17, 18, 19])

    .. code-block:: python
        :name: code-example2

-        # required: distributed
-        # Multi GPU, test_class_center_sample.py
-        import paddle
-        import paddle.distributed as dist
-        strategy = dist.fleet.DistributedStrategy()
-        dist.fleet.init(is_collective=True, strategy=strategy)
-        batch_size = 10
-        num_samples = 6
-        rank_id = dist.get_rank()
-        # num_classes of each GPU can be different, e.g num_classes_list = [10, 8]
-        num_classes_list = [10, 10]
-        num_classes = paddle.sum(paddle.to_tensor(num_classes_list))
-        label = paddle.randint(low=0, high=num_classes.item(), shape=[batch_size], dtype='int64')
-        label_list = []
-        dist.all_gather(label_list, label)
-        label = paddle.concat(label_list, axis=0)
-        remapped_label, sampled_class_index = paddle.nn.functional.class_center_sample(label, num_classes_list[rank_id], num_samples)
-
-        print(label)
-        print(remapped_label)
-        print(sampled_class_index)
-
-        #python -m paddle.distributed.launch --gpus=0,1 test_class_center_sample.py
-        # rank 0 output:
-        #Tensor(shape=[20], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
-        #       [10, 17, 15, 11, 9 , 12, 18, 18, 17, 18, 19, 2 , 8 , 13, 11, 13, 9 , 10, 0 , 4 ])
-        #Tensor(shape=[20], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
-        #       [6 , 11, 10, 7 , 4 , 8 , 12, 12, 11, 12, 13, 1 , 3 , 9 , 7 , 9 , 4 , 6 , 0 , 2 ])
-        #Tensor(shape=[6], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
-        #       [0, 2, 4, 8, 9, 3])
-
-        # rank 1 output:
-        #Tensor(shape=[20], dtype=int64, place=CUDAPlace(1), stop_gradient=True,
-        #       [10, 17, 15, 11, 9 , 12, 18, 18, 17, 18, 19, 2 , 8 , 13, 11, 13, 9 , 10, 0 , 4 ])
-        #Tensor(shape=[20], dtype=int64, place=CUDAPlace(1), stop_gradient=True,
-        #       [6 , 11, 10, 7 , 4 , 8 , 12, 12, 11, 12, 13, 1 , 3 , 9 , 7 , 9 , 4 , 6 , 0 , 2 ])
-        #Tensor(shape=[7], dtype=int64, place=CUDAPlace(1), stop_gradient=True,
-        #       [0, 1, 2, 3, 5, 7, 8])
+        >>> # doctest: +REQUIRES(env:DISTRIBUTED)
+        >>> # required: distributed
+        >>> # Multi GPU, test_class_center_sample.py
+        >>> import paddle
+        >>> import paddle.distributed as dist
+        >>> strategy = dist.fleet.DistributedStrategy()
+        >>> dist.fleet.init(is_collective=True, strategy=strategy)
+        >>> batch_size = 10
+        >>> num_samples = 6
+        >>> rank_id = dist.get_rank()
+        >>> # num_classes of each GPU can be different, e.g num_classes_list = [10, 8]
+        >>> num_classes_list = [10, 10]
+        >>> num_classes = paddle.sum(paddle.to_tensor(num_classes_list))
+        >>> label = paddle.randint(low=0, high=num_classes.item(), shape=[batch_size], dtype='int64')
+        >>> label_list = []
+        >>> dist.all_gather(label_list, label)
+        >>> label = paddle.concat(label_list, axis=0)
+        >>> remapped_label, sampled_class_index = paddle.nn.functional.class_center_sample(label, num_classes_list[rank_id], num_samples)
+
+        >>> print(label)
+        >>> print(remapped_label)
+        >>> print(sampled_class_index)
+        >>> #python -m paddle.distributed.launch --gpus=0,1 test_class_center_sample.py
+        >>> # rank 0 output:
+        Tensor(shape=[20], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
+        [10, 17, 15, 11, 9 , 12, 18, 18, 17, 18, 19, 2 , 8 , 13, 11, 13, 9 , 10, 0 , 4 ])
+        Tensor(shape=[20], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
+        [6 , 11, 10, 7 , 4 , 8 , 12, 12, 11, 12, 13, 1 , 3 , 9 , 7 , 9 , 4 , 6 , 0 , 2 ])
+        Tensor(shape=[6], dtype=int64, place=CUDAPlace(0), stop_gradient=True,
+        [0, 2, 4, 8, 9, 3])
+        >>> # rank 1 output:
+        Tensor(shape=[20], dtype=int64, place=CUDAPlace(1), stop_gradient=True,
+        [10, 17, 15, 11, 9 , 12, 18, 18, 17, 18, 19, 2 , 8 , 13, 11, 13, 9 , 10, 0 , 4 ])
+        Tensor(shape=[20], dtype=int64, place=CUDAPlace(1), stop_gradient=True,
+        [6 , 11, 10, 7 , 4 , 8 , 12, 12, 11, 12, 13, 1 , 3 , 9 , 7 , 9 , 4 , 6 , 0 , 2 ])
+        Tensor(shape=[7], dtype=int64, place=CUDAPlace(1), stop_gradient=True,
+        [0, 1, 2, 3, 5, 7, 8])
    """
    if not (group is False or group is None or hasattr(group, 'is_member')):
        raise ValueError(
@@ -2216,12 +2311,15 @@ def fold(

        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x = paddle.randn([2,3*2*2,12])
-            y = F.fold(x, output_sizes=[4, 5], kernel_sizes=2)
-            # y.shape = [2,3,4,5]
+            >>> x = paddle.randn([2,3*2*2,12])
+            >>> y = F.fold(x, output_sizes=[4, 5], kernel_sizes=2)
+            >>> x = paddle.randn([2,3*2*2,12])
+            >>> y = F.fold(x, output_sizes=[4, 5], kernel_sizes=2)
+            >>> print(y.shape)
+            [2, 3, 4, 5]

    """


--- a/python/paddle/nn/functional/conv.py
+++ b/python/paddle/nn/functional/conv.py
@@ -368,24 +368,24 @@ def conv1d(
    Examples:
        .. code-block:: python

-          import paddle
-          import paddle.nn.functional as F
-
-          x = paddle.to_tensor([[[4, 8, 1, 9],
-                                 [7, 2, 0, 9],
-                                 [6, 9, 2, 6]]], dtype="float32")
-          w = paddle.to_tensor([[[9, 3, 4],
-                                 [0, 0, 7],
-                                 [2, 5, 6]],
-                                [[0, 3, 4],
-                                 [2, 9, 7],
-                                 [5, 6, 8]]], dtype="float32")
-
-          y = F.conv1d(x, w)
-          print(y)
-          # Tensor(shape=[1, 2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=True,
-          #        [[[133., 238.],
-          #          [160., 211.]]])
+            >>> import paddle
+            >>> import paddle.nn.functional as F
+
+            >>> x = paddle.to_tensor([[[4, 8, 1, 9],
+            ...                        [7, 2, 0, 9],
+            ...                        [6, 9, 2, 6]]], dtype="float32")
+            >>> w = paddle.to_tensor([[[9, 3, 4],
+            ...                        [0, 0, 7],
+            ...                        [2, 5, 6]],
+            ...                       [[0, 3, 4],
+            ...                        [2, 9, 7],
+            ...                        [5, 6, 8]]], dtype="float32")
+
+            >>> y = F.conv1d(x, w)
+            >>> print(y)
+            Tensor(shape=[1, 2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[[133., 238.],
+            [160., 211.]]])
    """
    cudnn_version = get_cudnn_version()
    if cudnn_version is not None:
@@ -632,16 +632,16 @@ def conv2d(
    Examples:
        .. code-block:: python

-          import paddle
-          import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-          x_var = paddle.randn((2, 3, 8, 8), dtype='float32')
-          w_var = paddle.randn((6, 3, 3, 3), dtype='float32')
+            >>> x_var = paddle.randn((2, 3, 8, 8), dtype='float32')
+            >>> w_var = paddle.randn((6, 3, 3, 3), dtype='float32')

-          y_var = F.conv2d(x_var, w_var)
+            >>> y_var = F.conv2d(x_var, w_var)

-          print(y_var.shape)
-          # [2, 6, 6, 6]
+            >>> print(y_var.shape)
+            [2, 6, 6, 6]
    """
    # entry checks
    if data_format not in ["NCHW", "NHWC"]:
@@ -887,20 +887,20 @@ def conv1d_transpose(
    Examples:
        .. code-block:: python

-          import paddle
-          import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-          # shape: (1, 2, 4)
-          x = paddle.to_tensor([[[4, 0, 9, 7],
-                                [8, 0, 9, 2,]]], dtype="float32")
-          # shape: (2, 1, 2)
-          w = paddle.to_tensor([[[7, 0]],
-                                [[4, 2]]], dtype="float32")
+            >>> # shape: (1, 2, 4)
+            >>> x = paddle.to_tensor([[[4, 0, 9, 7],
+            >>>                       [8, 0, 9, 2,]]], dtype="float32")
+            >>> # shape: (2, 1, 2)
+            >>> w = paddle.to_tensor([[[7, 0]],
+            >>>                       [[4, 2]]], dtype="float32")

-          y = F.conv1d_transpose(x, w)
-          print(y)
-          # Tensor(shape=[1, 1, 5], dtype=float32, place=Place(gpu:0), stop_gradient=True,
-          #        [[[60., 16., 99., 75., 4. ]]])
+            >>> y = F.conv1d_transpose(x, w)
+            >>> print(y)
+            Tensor(shape=[1, 1, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[[60., 16., 99., 75., 4. ]]])
    """
    cudnn_version = get_cudnn_version()
    if cudnn_version is not None:
@@ -1183,16 +1183,16 @@ def conv2d_transpose(
    Examples:
        .. code-block:: python

-          import paddle
-          import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-          x_var = paddle.randn((2, 3, 8, 8), dtype='float32')
-          w_var = paddle.randn((3, 6, 3, 3), dtype='float32')
+            >>> x_var = paddle.randn((2, 3, 8, 8), dtype='float32')
+            >>> w_var = paddle.randn((3, 6, 3, 3), dtype='float32')

-          y_var = F.conv2d_transpose(x_var, w_var)
+            >>> y_var = F.conv2d_transpose(x_var, w_var)

-          print(y_var.shape)
-          # [2, 6, 10, 10]
+            >>> print(y_var.shape)
+            [2, 6, 10, 10]
    """

    if data_format not in ['NCHW', 'NHWC']:
@@ -1476,16 +1476,16 @@ def conv3d(
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            x_var = paddle.randn((2, 3, 8, 8, 8), dtype='float32')
-            w_var = paddle.randn((6, 3, 3, 3, 3), dtype='float32')
+            >>> x_var = paddle.randn((2, 3, 8, 8, 8), dtype='float32')
+            >>> w_var = paddle.randn((6, 3, 3, 3, 3), dtype='float32')

-            y_var = F.conv3d(x_var, w_var)
+            >>> y_var = F.conv3d(x_var, w_var)

-            print(y_var.shape)
-            # [2, 6, 6, 6, 6]
+            >>> print(y_var.shape)
+            [2, 6, 6, 6, 6]
    """
    # entry check
    if data_format not in ["NCDHW", "NDHWC"]:
@@ -1688,18 +1688,18 @@ def conv3d_transpose(
        variable storing transposed convolution and non-linearity activation result.

    Examples:
-       .. code-block:: python
+        .. code-block:: python

-          import paddle
-          import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-          x_var = paddle.randn((2, 3, 8, 8, 8), dtype='float32')
-          w_var = paddle.randn((3, 6, 3, 3, 3), dtype='float32')
+            >>> x_var = paddle.randn((2, 3, 8, 8, 8), dtype='float32')
+            >>> w_var = paddle.randn((3, 6, 3, 3, 3), dtype='float32')

-          y_var = F.conv3d_transpose(x_var, w_var)
+            >>> y_var = F.conv3d_transpose(x_var, w_var)

-          print(y_var.shape)
-          # [2, 6, 10, 10, 10]
+            >>> print(y_var.shape)
+            [2, 6, 10, 10, 10]
    """
    # entry checks
    if data_format not in ["NCDHW", "NDHWC"]:

--- a/python/paddle/nn/functional/distance.py
+++ b/python/paddle/nn/functional/distance.py
@@ -59,14 +59,13 @@ def pairwise_distance(x, y, p=2.0, epsilon=1e-6, keepdim=False, name=None):
    Examples:
        .. code-block:: python

-            import paddle
-            x = paddle.to_tensor([[1., 3.], [3., 5.]], dtype=paddle.float64)
-            y = paddle.to_tensor([[5., 6.], [7., 8.]], dtype=paddle.float64)
-            distance = paddle.nn.functional.pairwise_distance(x, y)
-            print(distance)
-            # Tensor(shape=[2], dtype=float64, place=Place(gpu:0), stop_gradient=True,
-            #        [4.99999860, 4.99999860])
-
+            >>> import paddle
+            >>> x = paddle.to_tensor([[1., 3.], [3., 5.]], dtype=paddle.float64)
+            >>> y = paddle.to_tensor([[5., 6.], [7., 8.]], dtype=paddle.float64)
+            >>> distance = paddle.nn.functional.pairwise_distance(x, y)
+            >>> print(distance)
+            Tensor(shape=[2], dtype=float64, place=Place(cpu), stop_gradient=True,
+            [4.99999860, 4.99999860])
    """
    if in_dynamic_mode():
        sub = _C_ops.subtract(x, y)

--- a/python/paddle/nn/functional/extension.py
+++ b/python/paddle/nn/functional/extension.py
@@ -55,48 +55,46 @@ def diag_embed(input, offset=0, dim1=-2, dim2=-1):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
-
-            diag_embed_input = paddle.arange(6)
-
-            diag_embed_output1 = F.diag_embed(diag_embed_input)
-            print(diag_embed_output1)
-            # Tensor(shape=[6, 6], dtype=int64, place=Place(cpu), stop_gradient=True,
-            #        [[0, 0, 0, 0, 0, 0],
-            #         [0, 1, 0, 0, 0, 0],
-            #         [0, 0, 2, 0, 0, 0],
-            #         [0, 0, 0, 3, 0, 0],
-            #         [0, 0, 0, 0, 4, 0],
-            #         [0, 0, 0, 0, 0, 5]])
-
-            diag_embed_output2 = F.diag_embed(diag_embed_input, offset=-1, dim1=0,dim2=1 )
-            print(diag_embed_output2)
-            # Tensor(shape=[7, 7], dtype=int64, place=Place(cpu), stop_gradient=True,
-            #        [[0, 0, 0, 0, 0, 0, 0],
-            #         [0, 0, 0, 0, 0, 0, 0],
-            #         [0, 1, 0, 0, 0, 0, 0],
-            #         [0, 0, 2, 0, 0, 0, 0],
-            #         [0, 0, 0, 3, 0, 0, 0],
-            #         [0, 0, 0, 0, 4, 0, 0],
-            #         [0, 0, 0, 0, 0, 5, 0]])
-
-            diag_embed_input_2dim = paddle.reshape(diag_embed_input,[2,3])
-            print(diag_embed_input_2dim)
-            # Tensor(shape=[2, 3], dtype=int64, place=Place(cpu), stop_gradient=True,
-            #        [[0, 1, 2],
-            #         [3, 4, 5]])
-            diag_embed_output3 = F.diag_embed(diag_embed_input_2dim,offset= 0, dim1=0, dim2=2 )
-            print(diag_embed_output3)
-            # Tensor(shape=[3, 2, 3], dtype=int64, place=Place(cpu), stop_gradient=True,
-            #        [[[0, 0, 0],
-            #          [3, 0, 0]],
-
-            #         [[0, 1, 0],
-            #          [0, 4, 0]],
-
-            #         [[0, 0, 2],
-            #          [0, 0, 5]]])
+            >>> import paddle
+            >>> import paddle.nn.functional as F
+
+            >>> diag_embed_input = paddle.arange(6)
+
+            >>> diag_embed_output1 = F.diag_embed(diag_embed_input)
+            >>> print(diag_embed_output1)
+            Tensor(shape=[6, 6], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[0, 0, 0, 0, 0, 0],
+             [0, 1, 0, 0, 0, 0],
+             [0, 0, 2, 0, 0, 0],
+             [0, 0, 0, 3, 0, 0],
+             [0, 0, 0, 0, 4, 0],
+             [0, 0, 0, 0, 0, 5]])
+
+            >>> diag_embed_output2 = F.diag_embed(diag_embed_input, offset=-1, dim1=0,dim2=1 )
+            >>> print(diag_embed_output2)
+            Tensor(shape=[7, 7], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[0, 0, 0, 0, 0, 0, 0],
+             [0, 0, 0, 0, 0, 0, 0],
+             [0, 1, 0, 0, 0, 0, 0],
+             [0, 0, 2, 0, 0, 0, 0],
+             [0, 0, 0, 3, 0, 0, 0],
+             [0, 0, 0, 0, 4, 0, 0],
+             [0, 0, 0, 0, 0, 5, 0]])
+
+            >>> diag_embed_input_2dim = paddle.reshape(diag_embed_input,[2,3])
+            >>> print(diag_embed_input_2dim)
+            Tensor(shape=[2, 3], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[0, 1, 2],
+            [3, 4, 5]])
+            >>> diag_embed_output3 = F.diag_embed(diag_embed_input_2dim,offset= 0, dim1=0, dim2=2 )
+            >>> print(diag_embed_output3)
+            Tensor(shape=[3, 2, 3], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[[0, 0, 0],
+              [3, 0, 0]],
+             [[0, 1, 0],
+              [0, 4, 0]],
+             [[0, 0, 2],
+              [0, 0, 5]]])
    """
    if not isinstance(input, Variable):
        input = assign(input)
@@ -200,16 +198,16 @@ def sequence_mask(x, maxlen=None, dtype='int64', name=None):
    Examples:
        .. code-block:: python

-            import paddle
+            >>> import paddle

-            lengths = paddle.to_tensor([10, 9, 8])
-            mask = paddle.nn.functional.sequence_mask(lengths)
+            >>> lengths = paddle.to_tensor([10, 9, 8])
+            >>> mask = paddle.nn.functional.sequence_mask(lengths)

-            print(mask)
-            # Tensor(shape=[3, 10], dtype=int64, place=Place(gpu:0), stop_gradient=True,
-            #        [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-            #         [1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
-            #         [1, 1, 1, 1, 1, 1, 1, 1, 0, 0]])
+            >>> print(mask)
+            Tensor(shape=[3, 10], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+             [1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
+             [1, 1, 1, 1, 1, 1, 1, 1, 0, 0]])

    """

@@ -296,14 +294,24 @@ def gather_tree(ids, parents):
    Examples:
        .. code-block:: python

-            import paddle
+            >>> import paddle

-            ids = paddle.to_tensor([[[2, 2], [6, 1]], [[3, 9], [6, 1]], [[0, 1], [9, 0]]])
+            >>> ids = paddle.to_tensor([[[2, 2], [6, 1]], [[3, 9], [6, 1]], [[0, 1], [9, 0]]])

-            parents = paddle.to_tensor([[[0, 0], [1, 1]], [[1, 0], [1, 0]], [[0, 0], [0, 1]]])
+            >>> parents = paddle.to_tensor([[[0, 0], [1, 1]], [[1, 0], [1, 0]], [[0, 0], [0, 1]]])
+
+            >>> final_sequences = paddle.nn.functional.gather_tree(ids, parents)
+            >>> [[[2, 2], [1, 6]], [[3, 3], [6, 1]], [[0, 1], [9, 0]]]
+            >>> final_sequences = paddle.nn.functional.gather_tree(ids, parents)
+            >>> print(final_sequences)
+            Tensor(shape=[3, 2, 2], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[[2, 2],
+              [1, 6]],
+             [[3, 3],
+              [6, 1]],
+             [[0, 1],
+              [9, 0]]])

-            final_sequences = paddle.nn.functional.gather_tree(ids, parents)
-            # [[[2, 2], [1, 6]], [[3, 3], [6, 1]], [[0, 1], [9, 0]]]

    """
    if ids.ndim != 3:
@@ -388,11 +396,11 @@ def temporal_shift(x, seg_num, shift_ratio=0.25, name=None, data_format="NCHW"):
    Examples:
        .. code-block:: python

-            import paddle
-            import paddle.nn.functional as F
+            >>> import paddle
+            >>> import paddle.nn.functional as F

-            input = paddle.randn([6, 4, 2, 2])
-            out = F.temporal_shift(x=input, seg_num=2, shift_ratio=0.2)
+            >>> input = paddle.randn([6, 4, 2, 2])
+            >>> out = F.temporal_shift(x=input, seg_num=2, shift_ratio=0.2)
    """
    if data_format not in ["NCHW", "NHWC"]:
        raise ValueError(

--- a/python/paddle/nn/functional/flash_attention.py
+++ b/python/paddle/nn/functional/flash_attention.py
@@ -181,13 +181,12 @@ def flash_attention(
    Examples:
        .. code-block:: python

-            # required: skiptest
-            import paddle
+            >>> import paddle

-            q = paddle.rand((1, 128, 2, 16), dtype=paddle.float16)
+            >>> paddle.seed(1)
+            >>> q = paddle.rand((1, 128, 2, 16))

-            output = paddle.nn.functional.flash_attention(q, q, q, 0.9, False, False)
-            print(output)
+            >>> output = paddle.nn.functional.flash_attention.flash_attention(q, q, q, 0.9, False, False)
    """
    head_dim = query.shape[3]
    sdp_func_name = _select_sdp(head_dim)
@@ -340,13 +339,12 @@ def flash_attn_unpadded(
    Examples:
        .. code-block:: python

-            # required: skiptest
-            import paddle
-
-            q = paddle.rand((1, 128, 2, 16), dtype=paddle.float16)
+            >>> import paddle
+            >>> paddle.seed(1)
+            >>> q = paddle.rand((1, 128, 2, 16))

-            output = paddle.nn.functional.flash_attn_unpadded(q, q, q, 0.9, False, False)
-            print(output)
+            >>> output = paddle.nn.functional.flash_attention.flash_attn_unpadded(q, q, q, 0.9, False, False)
+            >>> print(output)
    """
    if in_dynamic_mode():
        (

--- a/python/paddle/nn/initializer/uniform.py
+++ b/python/paddle/nn/initializer/uniform.py
@@ -158,24 +158,33 @@ class Uniform(UniformInitializer):
    Examples:
        .. code-block:: python

-            import paddle
-
-            data = paddle.ones(shape=[3, 1, 2], dtype='float32')
-            weight_attr = paddle.framework.ParamAttr(
-                name="linear_weight",
-                initializer=paddle.nn.initializer.Uniform(low=-0.5, high=0.5))
-            bias_attr = paddle.framework.ParamAttr(
-                name="linear_bias",
-                initializer=paddle.nn.initializer.Uniform(low=-0.5, high=0.5))
-            linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
-            # linear.weight:  [[-0.46245047  0.05260676]
-            #                  [ 0.38054508  0.29169726]]
-            # linear.bias:  [-0.2734719   0.23939109]
-
-            res = linear(data)
-            # res:  [[[-0.3553773  0.5836951]]
-            #        [[-0.3553773  0.5836951]]
-            #        [[-0.3553773  0.5836951]]]
+            >>> import paddle
+            >>> paddle.seed(1)
+            >>> data = paddle.ones(shape=[3, 1, 2], dtype='float32')
+            >>> weight_attr = paddle.framework.ParamAttr(
+            ...     name="linear_weight",
+            ...     initializer=paddle.nn.initializer.Uniform(low=-0.5, high=0.5))
+            >>> bias_attr = paddle.framework.ParamAttr(
+            ...     name="linear_bias",
+            ...     initializer=paddle.nn.initializer.Uniform(low=-0.5, high=0.5))
+            >>> linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
+            >>> print(linear.weight)
+            Parameter containing:
+            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
+            [[-0.48212373,  0.26492310],
+             [ 0.17605734, -0.45379421]])
+
+            >>> print(linear.bias)
+            Parameter containing:
+            Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False,
+            [-0.11236754,  0.46462214])
+
+            >>> res = linear(data)
+            >>> print(res)
+            Tensor(shape=[3, 1, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
+            [[[-0.41843393,  0.27575102]],
+             [[-0.41843393,  0.27575102]],
+             [[-0.41843393,  0.27575102]]])
    """

    def __init__(self, low=-1.0, high=1.0, name=None):

--- a/python/paddle/nn/initializer/xavier.py
+++ b/python/paddle/nn/initializer/xavier.py
@@ -214,24 +214,33 @@ class XavierNormal(XavierInitializer):
    Examples:
        .. code-block:: python

-            import paddle
-
-            data = paddle.ones(shape=[3, 1, 2], dtype='float32')
-            weight_attr = paddle.framework.ParamAttr(
-                name="linear_weight",
-                initializer=paddle.nn.initializer.XavierNormal())
-            bias_attr = paddle.framework.ParamAttr(
-                name="linear_bias",
-                initializer=paddle.nn.initializer.XavierNormal())
-            linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
-            # inear.weight:  [[ 0.06910077 -0.18103665]
-            #                 [-0.02546741 -1.0402188 ]]
-            # linear.bias:  [-0.5012929   0.12418364]
-
-            res = linear(data)
-            # res:  [[[-0.4576595 -1.0970719]]
-            #        [[-0.4576595 -1.0970719]]
-            #        [[-0.4576595 -1.0970719]]]
+            >>> import paddle
+            >>> paddle.seed(1)
+            >>> data = paddle.ones(shape=[3, 1, 2], dtype='float32')
+            >>> weight_attr = paddle.framework.ParamAttr(
+            ...     name="linear_weight",
+            ...     initializer=paddle.nn.initializer.XavierNormal())
+            >>> bias_attr = paddle.framework.ParamAttr(
+            ...     name="linear_bias",
+            ...     initializer=paddle.nn.initializer.XavierNormal())
+            >>> linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
+            >>> print(linear.weight)
+            Parameter containing:
+            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
+            [[-0.21607460,  0.08382989],
+             [ 0.29147008, -0.07049121]])
+
+            >>> print(linear.bias)
+            Parameter containing:
+            Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False,
+            [1.06076419, 0.87684733])
+
+            >>> res = linear(data)
+            >>> print(res)
+            Tensor(shape=[3, 1, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
+            [[[1.13615966, 0.89018601]],
+             [[1.13615966, 0.89018601]],
+             [[1.13615966, 0.89018601]]])
    """

    def __init__(self, fan_in=None, fan_out=None, name=None):
@@ -266,24 +275,32 @@ class XavierUniform(XavierInitializer):
    Examples:
        .. code-block:: python

-            import paddle
-
-            data = paddle.ones(shape=[3, 1, 2], dtype='float32')
-            weight_attr = paddle.framework.ParamAttr(
-                name="linear_weight",
-                initializer=paddle.nn.initializer.XavierUniform())
-            bias_attr = paddle.framework.ParamAttr(
-                name="linear_bias",
-                initializer=paddle.nn.initializer.XavierUniform())
-            linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
-            # linear.weight:  [[-0.04229349 -1.1248565 ]
-            #                  [-0.10789523 -0.5938053 ]]
-            # linear.bias:  [ 1.1983747  -0.40201235]
-
-            res = linear(data)
-            # res:  [[[ 1.0481861 -2.1206741]]
-            #        [[ 1.0481861 -2.1206741]]
-            #        [[ 1.0481861 -2.1206741]]]
+            >>> import paddle
+            >>> paddle.seed(1)
+            >>> data = paddle.ones(shape=[3, 1, 2], dtype='float32')
+            >>> weight_attr = paddle.framework.ParamAttr(
+            ...     name="linear_weight",
+            ...     initializer=paddle.nn.initializer.XavierUniform())
+            >>> bias_attr = paddle.framework.ParamAttr(
+            ...     name="linear_bias",
+            ...     initializer=paddle.nn.initializer.XavierUniform())
+            >>> linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
+            >>> print(linear.weight)
+            Parameter containing:
+            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
+            [[-1.18095720,  0.64892638],
+             [ 0.43125069, -1.11156428]])
+            >>> print(linear.bias)
+            Parameter containing:
+            Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False,
+            [-0.27524316,  1.13808715])
+
+            >>> res = linear(data)
+            >>> print(res)
+            Tensor(shape=[3, 1, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
+            [[[-1.02494967,  0.67544925]],
+             [[-1.02494967,  0.67544925]],
+             [[-1.02494967,  0.67544925]]])
    """

    def __init__(self, fan_in=None, fan_out=None, name=None):