diff --git a/python/paddle/nn/decode.py b/python/paddle/nn/decode.py
index 030a2a048815b3303275a0c2fff0a85c4ca85789..4563df700e4115281b0e644ffc72429d41cd7276 100644
--- a/python/paddle/nn/decode.py
+++ b/python/paddle/nn/decode.py
@@ -189,20 +189,20 @@ class BeamSearchDecoder(Decoder):
 
         .. code-block:: python
 
-            import numpy as np
-            import paddle
-            from paddle.nn import BeamSearchDecoder, dynamic_decode
-            from paddle.nn import GRUCell, Linear, Embedding
-            trg_embeder = Embedding(100, 32)
-            output_layer = Linear(32, 32)
-            decoder_cell = GRUCell(input_size=32, hidden_size=32)
-            decoder = BeamSearchDecoder(decoder_cell,
-                                        start_token=0,
-                                        end_token=1,
-                                        beam_size=4,
-                                        embedding_fn=trg_embeder,
-                                        output_fn=output_layer)
-
+            >>> import numpy as np
+            >>> import paddle
+            >>> from paddle.nn import BeamSearchDecoder, dynamic_decode
+            >>> from paddle.nn import GRUCell, Linear, Embedding
+            >>> trg_embeder = Embedding(100, 32)
+            >>> output_layer = Linear(32, 32)
+            >>> decoder_cell = GRUCell(input_size=32, hidden_size=32)
+            >>> decoder = BeamSearchDecoder(decoder_cell,
+            ...                             start_token=0,
+            ...                             end_token=1,
+            ...                             beam_size=4,
+            ...                             embedding_fn=trg_embeder,
+            ...                             output_fn=output_layer)
+            ...
     """
 
     def __init__(
@@ -1054,22 +1054,24 @@ def dynamic_decode(
 
         .. code-block:: python
 
-            import paddle
-            from paddle.nn import BeamSearchDecoder, dynamic_decode
-            from paddle.nn import GRUCell, Linear, Embedding
-            trg_embeder = Embedding(100, 32)
-            output_layer = Linear(32, 32)
-            decoder_cell = GRUCell(input_size=32, hidden_size=32)
-            decoder = BeamSearchDecoder(decoder_cell,
-                                        start_token=0,
-                                        end_token=1,
-                                        beam_size=4,
-                                        embedding_fn=trg_embeder,
-                                        output_fn=output_layer)
-            encoder_output = paddle.ones((4, 8, 32), dtype=paddle.get_default_dtype())
-            outputs = dynamic_decode(decoder=decoder,
-                                    inits=decoder_cell.get_initial_states(encoder_output),
-                                    max_step_num=10)
+            >>> import paddle
+            >>> from paddle.nn import BeamSearchDecoder, dynamic_decode
+            >>> from paddle.nn import GRUCell, Linear, Embedding
+            >>> trg_embeder = Embedding(100, 32)
+            >>> output_layer = Linear(32, 32)
+            >>> decoder_cell = GRUCell(input_size=32, hidden_size=32)
+            >>> decoder = BeamSearchDecoder(decoder_cell,
+            ...                             start_token=0,
+            ...                             end_token=1,
+            ...                             beam_size=4,
+            ...                             embedding_fn=trg_embeder,
+            ...                             output_fn=output_layer)
+            >>> encoder_output = paddle.ones((4, 8, 32), dtype=paddle.get_default_dtype())
+            >>> outputs = dynamic_decode(decoder=decoder,
+            ...                          inits=decoder_cell.get_initial_states(encoder_output),
+            ...                          max_step_num=10)
+            >>> print(outputs[0].shape)
+            [4, 11, 4]
     """
     if in_dynamic_mode():
         return _dynamic_decode_imperative(
diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py
index 15dbeb54c0e0a1b625027f21bd3837bbe55d0e42..944d5a59a4b3e57b8db807c8a938d598fac1e13d 100644
--- a/python/paddle/nn/layer/loss.py
+++ b/python/paddle/nn/layer/loss.py
@@ -91,15 +91,15 @@ class BCEWithLogitsLoss(Layer):
 
         .. code-block:: python
 
-            import paddle
+            >>> import paddle
 
-            logit = paddle.to_tensor([5.0, 1.0, 3.0], dtype="float32")
-            label = paddle.to_tensor([1.0, 0.0, 1.0], dtype="float32")
-            bce_logit_loss = paddle.nn.BCEWithLogitsLoss()
-            output = bce_logit_loss(logit, label)
-            print(output)
-            # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
-            #        0.45618814)
+            >>> logit = paddle.to_tensor([5.0, 1.0, 3.0], dtype="float32")
+            >>> label = paddle.to_tensor([1.0, 0.0, 1.0], dtype="float32")
+            >>> bce_logit_loss = paddle.nn.BCEWithLogitsLoss()
+            >>> output = bce_logit_loss(logit, label)
+            >>> print(output)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+            0.45618808)
 
     """
 
@@ -303,50 +303,47 @@ class CrossEntropyLoss(Layer):
 
         .. code-block:: python
 
-            # hard labels
-            import paddle
-            paddle.seed(99999)
-            N=100
-            C=200
-            reduction='mean'
-            input =  paddle.rand([N, C], dtype='float64')
-            label =  paddle.randint(0, C, shape=[N], dtype='int64')
-            weight = paddle.rand([C], dtype='float64')
-
-            cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
-                weight=weight, reduction=reduction)
-            dy_ret = cross_entropy_loss(
-                                        input,
-                                        label)
-            print(dy_ret)
-            # Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=True,
-            #        5.34043430)
+            >>> # hard labels
+            >>> import paddle
+            >>> paddle.seed(2023)
+            >>> N=100
+            >>> C=200
+            >>> reduction='mean'
+            >>> input =  paddle.rand([N, C], dtype='float64')
+            >>> label =  paddle.randint(0, C, shape=[N], dtype='int64')
+            >>> weight = paddle.rand([C], dtype='float64')
+
+            >>> cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
+            ...     weight=weight, reduction=reduction)
+            >>> dy_ret = cross_entropy_loss(input, label)
+            >>> print(dy_ret)
+            Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=True,
+            5.33697682)
 
         .. code-block:: python
 
-            # soft labels
-            import paddle
-            paddle.seed(99999)
-            axis = -1
-            ignore_index = -100
-            N = 4
-            C = 3
-            shape = [N, C]
-            reduction='mean'
-            weight = None
-            logits = paddle.uniform(shape, dtype='float64', min=0.1, max=1.0)
-            labels = paddle.uniform(shape, dtype='float64', min=0.1, max=1.0)
-            labels /= paddle.sum(labels, axis=axis, keepdim=True)
-            paddle_loss_mean = paddle.nn.functional.cross_entropy(
-                                                                    logits,
-                                                                    labels,
-                                                                    soft_label=True,
-                                                                    axis=axis,
-                                                                    weight=weight,
-                                                                    reduction=reduction)
-            print(paddle_loss_mean)
-            # Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=True,
-            #        1.11043464)
+            >>> # soft labels
+            >>> import paddle
+            >>> paddle.seed(2023)
+            >>> axis = -1
+            >>> ignore_index = -100
+            >>> N = 4
+            >>> C = 3
+            >>> shape = [N, C]
+            >>> reduction='mean'
+            >>> weight = None
+            >>> logits = paddle.uniform(shape, dtype='float64', min=0.1, max=1.0)
+            >>> labels = paddle.uniform(shape, dtype='float64', min=0.1, max=1.0)
+            >>> labels /= paddle.sum(labels, axis=axis, keepdim=True)
+            >>> paddle_loss_mean = paddle.nn.functional.cross_entropy(logits,
+            ...                                                       labels,
+            ...                                                       soft_label=True,
+            ...                                                       axis=axis,
+            ...                                                       weight=weight,
+            ...                                                       reduction=reduction)
+            >>> print(paddle_loss_mean)
+            Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=True,
+            1.14554912)
 
     """
 
@@ -439,21 +436,25 @@ class HSigmoidLoss(Layer):
     Examples:
         .. code-block:: python
 
-            import paddle
-            paddle.set_device('cpu')
-
-            input = paddle.uniform([4, 3])
-            # [[0.56194401  -0.22450298  -0.10741806] # random
-            #  [0.36136317  0.23556745  0.88748658] # random
-            #  [0.18151939  0.80947340  -0.31078976] # random
-            #  [0.68886101  -0.14239830  -0.41297770]] # random
-            label = paddle.to_tensor([0, 1, 4, 5])
-            m = paddle.nn.HSigmoidLoss(3, 5)
-            out = m(input, label)
-            # [[2.42524505]
-            #  [1.74917245]
-            #  [3.14571381]
-            #  [2.34564662]]
+            >>> import paddle
+            >>> paddle.set_device('cpu')
+            >>> paddle.seed(2023)
+            >>> input = paddle.uniform([4, 3])
+            >>> print(input)
+            Tensor(shape=[4, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[ 0.73167229,  0.04029441, -0.48078126],
+             [ 0.81050646, -0.15199822, -0.18717426],
+             [ 0.94041789,  0.48874724,  0.03570259],
+             [ 0.46585739,  0.95573163, -0.91368192]])
+            >>> label = paddle.to_tensor([0, 1, 4, 5])
+            >>> m = paddle.nn.HSigmoidLoss(3, 6)
+            >>> out = m(input, label)
+            >>> print(out)
+            Tensor(shape=[4, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
+            [[1.94512916],
+             [2.26129627],
+             [2.36135936],
+             [2.97453213]])
     """
 
     def __init__(
@@ -558,13 +559,14 @@ class MSELoss(Layer):
 
         .. code-block:: python
 
-            import paddle
-            mse_loss = paddle.nn.loss.MSELoss()
-            input = paddle.to_tensor([1.5])
-            label = paddle.to_tensor([1.7])
-            output = mse_loss(input, label)
-            print(output)
-            # 0.04000002
+            >>> import paddle
+            >>> mse_loss = paddle.nn.loss.MSELoss()
+            >>> input = paddle.to_tensor([1.5])
+            >>> label = paddle.to_tensor([1.7])
+            >>> output = mse_loss(input, label)
+            >>> print(output)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+            0.04000002)
 
     """
 
@@ -642,29 +644,29 @@ class L1Loss(Layer):
     Examples:
         .. code-block:: python
 
-            import paddle
+            >>> import paddle
 
-            input = paddle.to_tensor([[1.5, 0.8], [0.2, 1.3]])
-            label = paddle.to_tensor([[1.7, 1], [0.4, 0.5]])
+            >>> input = paddle.to_tensor([[1.5, 0.8], [0.2, 1.3]])
+            >>> label = paddle.to_tensor([[1.7, 1], [0.4, 0.5]])
 
-            l1_loss = paddle.nn.L1Loss()
-            output = l1_loss(input, label)
-            print(output)
-            # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
-            #        0.34999999)
+            >>> l1_loss = paddle.nn.L1Loss()
+            >>> output = l1_loss(input, label)
+            >>> print(output)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+            0.34999999)
 
-            l1_loss = paddle.nn.L1Loss(reduction='sum')
-            output = l1_loss(input, label)
-            print(output)
-            # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
-            #        1.39999998)
+            >>> l1_loss = paddle.nn.L1Loss(reduction='sum')
+            >>> output = l1_loss(input, label)
+            >>> print(output)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+            1.39999998)
 
-            l1_loss = paddle.nn.L1Loss(reduction='none')
-            output = l1_loss(input, label)
-            print(output)
-            # Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=True,
-            #        [[0.20000005, 0.19999999],
-            #         [0.20000000, 0.79999995]])
+            >>> l1_loss = paddle.nn.L1Loss(reduction='none')
+            >>> output = l1_loss(input, label)
+            >>> print(output)
+            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[0.20000005, 0.19999999],
+             [0.20000000, 0.79999995]])
 
     """
 
@@ -740,15 +742,15 @@ class BCELoss(Layer):
     Examples:
         .. code-block:: python
 
-            import paddle
+            >>> import paddle
 
-            input = paddle.to_tensor([0.5, 0.6, 0.7])
-            label = paddle.to_tensor([1.0, 0.0, 1.0])
-            bce_loss = paddle.nn.BCELoss()
-            output = bce_loss(input, label)
-            print(output)
-            # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
-            #        0.65537101)
+            >>> input = paddle.to_tensor([0.5, 0.6, 0.7])
+            >>> label = paddle.to_tensor([1.0, 0.0, 1.0])
+            >>> bce_loss = paddle.nn.BCELoss()
+            >>> output = bce_loss(input, label)
+            >>> print(output)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+            0.65537095)
 
     """
 
@@ -840,20 +842,22 @@ class NLLLoss(Layer):
     Examples:
         .. code-block:: python
 
-                import paddle
+            >>> import paddle
 
-                nll_loss = paddle.nn.loss.NLLLoss()
-                log_softmax = paddle.nn.LogSoftmax(axis=1)
+            >>> nll_loss = paddle.nn.loss.NLLLoss()
+            >>> log_softmax = paddle.nn.LogSoftmax(axis=1)
 
-                input = paddle.to_tensor([[0.88103855, 0.9908683 , 0.6226845 ],
-                                          [0.53331435, 0.07999352, 0.8549948 ],
-                                          [0.25879037, 0.39530203, 0.698465  ],
-                                          [0.73427284, 0.63575995, 0.18827209],
-                                          [0.05689114, 0.0862954 , 0.6325046 ]], "float32")
-                log_out = log_softmax(input)
-                label = paddle.to_tensor([0, 2, 1, 1, 0], "int64")
-                result = nll_loss(log_out, label)
-                print(result) # Tensor(shape=[], dtype=float32, place=CPUPlace, stop_gradient=True, 1.07202101)
+            >>> input = paddle.to_tensor([[0.88103855, 0.9908683 , 0.6226845 ],
+            ...                           [0.53331435, 0.07999352, 0.8549948 ],
+            ...                           [0.25879037, 0.39530203, 0.698465  ],
+            ...                           [0.73427284, 0.63575995, 0.18827209],
+            ...                           [0.05689114, 0.0862954 , 0.6325046 ]], "float32")
+            >>> log_out = log_softmax(input)
+            >>> label = paddle.to_tensor([0, 2, 1, 1, 0], "int64")
+            >>> result = nll_loss(log_out, label)
+            >>> print(result)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+            1.07202101)
 
     """
 
@@ -929,12 +933,15 @@ class PoissonNLLLoss(Layer):
     Examples:
         .. code-block:: python
 
-            import paddle
-
-            poisson_nll_loss = paddle.nn.loss.PoissonNLLLoss()
-            input = paddle.randn([5, 2], dtype=paddle.float32)
-            label = paddle.randn([5, 2], dtype=paddle.float32)
-            loss = poisson_nll_loss(input, label)
+            >>> import paddle
+            >>> paddle.seed(2023)
+            >>> poisson_nll_loss = paddle.nn.loss.PoissonNLLLoss()
+            >>> input = paddle.randn([5, 2], dtype=paddle.float32)
+            >>> label = paddle.randn([5, 2], dtype=paddle.float32)
+            >>> loss = poisson_nll_loss(input, label)
+            >>> print(loss)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+            1.52983975)
 
     """
 
@@ -1017,32 +1024,36 @@ class KLDivLoss(Layer):
     Examples:
         .. code-block:: python
 
-            import paddle
-            import paddle.nn as nn
-
-            shape = (5, 20)
-            x = paddle.uniform(shape, min=-10, max=10).astype('float32')
-            target = paddle.uniform(shape, min=-10, max=10).astype('float32')
-
-            # 'batchmean' reduction, loss shape will be []
-            kldiv_criterion = nn.KLDivLoss(reduction='batchmean')
-            pred_loss = kldiv_criterion(x, target)
-            # shape=[]
-
-            # 'mean' reduction, loss shape will be []
-            kldiv_criterion = nn.KLDivLoss(reduction='mean')
-            pred_loss = kldiv_criterion(x, target)
-            # shape=[]
-
-            # 'sum' reduction, loss shape will be []
-            kldiv_criterion = nn.KLDivLoss(reduction='sum')
-            pred_loss = kldiv_criterion(x, target)
-            # shape=[]
-
-            # 'none' reduction, loss shape is same with X shape
-            kldiv_criterion = nn.KLDivLoss(reduction='none')
-            pred_loss = kldiv_criterion(x, target)
-            # shape=[5, 20]
+            >>> import paddle
+            >>> import paddle.nn as nn
+
+            >>> shape = (5, 20)
+            >>> x = paddle.uniform(shape, min=-10, max=10).astype('float32')
+            >>> target = paddle.uniform(shape, min=-10, max=10).astype('float32')
+
+            >>> # 'batchmean' reduction, loss shape will be []
+            >>> kldiv_criterion = nn.KLDivLoss(reduction='batchmean')
+            >>> pred_loss = kldiv_criterion(x, target)
+            >>> print(pred_loss.shape)
+            []
+
+            >>> # 'mean' reduction, loss shape will be []
+            >>> kldiv_criterion = nn.KLDivLoss(reduction='mean')
+            >>> pred_loss = kldiv_criterion(x, target)
+            >>> print(pred_loss.shape)
+            []
+
+            >>> # 'sum' reduction, loss shape will be []
+            >>> kldiv_criterion = nn.KLDivLoss(reduction='sum')
+            >>> pred_loss = kldiv_criterion(x, target)
+            >>> print(pred_loss.shape)
+            []
+
+            >>> # 'none' reduction, loss shape is same with X shape
+            >>> kldiv_criterion = nn.KLDivLoss(reduction='none')
+            >>> pred_loss = kldiv_criterion(x, target)
+            >>> print(pred_loss.shape)
+            [5, 20]
 
     """
 
@@ -1099,16 +1110,16 @@ class MarginRankingLoss(Layer):
 
         .. code-block:: python
 
-            import paddle
-
-            input = paddle.to_tensor([[1, 2], [3, 4]], dtype="float32")
-            other = paddle.to_tensor([[2, 1], [2, 4]], dtype="float32")
-            label = paddle.to_tensor([[1, -1], [-1, -1]], dtype="float32")
-            margin_rank_loss = paddle.nn.MarginRankingLoss()
-            loss = margin_rank_loss(input, other, label)
+            >>> import paddle
 
-            print(loss)
-            # 0.75
+            >>> input = paddle.to_tensor([[1, 2], [3, 4]], dtype="float32")
+            >>> other = paddle.to_tensor([[2, 1], [2, 4]], dtype="float32")
+            >>> label = paddle.to_tensor([[1, -1], [-1, -1]], dtype="float32")
+            >>> margin_rank_loss = paddle.nn.MarginRankingLoss()
+            >>> loss = margin_rank_loss(input, other, label)
+            >>> print(loss)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+            0.75000000)
     """
 
     def __init__(self, margin=0.0, reduction='mean', name=None):
@@ -1155,50 +1166,41 @@ class CTCLoss(Layer):
 
         .. code-block:: python
 
-            # declarative mode
-            import paddle
-
-            # length of the longest logit sequence
-            max_seq_length = 4
-            #length of the longest label sequence
-            max_label_length = 3
-            # number of logit sequences
-            batch_size = 2
-            # class num
-            class_num = 3
-
-            log_probs = paddle.to_tensor([[[4.17021990e-01, 7.20324516e-01, 1.14374816e-04],
-                                    [3.02332580e-01, 1.46755889e-01, 9.23385918e-02]],
-
-                                    [[1.86260208e-01, 3.45560730e-01, 3.96767467e-01],
-                                    [5.38816750e-01, 4.19194520e-01, 6.85219526e-01]],
-
-                                    [[2.04452246e-01, 8.78117442e-01, 2.73875929e-02],
-                                    [6.70467496e-01, 4.17304814e-01, 5.58689833e-01]],
-
-                                    [[1.40386939e-01, 1.98101491e-01, 8.00744593e-01],
-                                    [9.68261600e-01, 3.13424170e-01, 6.92322612e-01]],
-
-                                    [[8.76389146e-01, 8.94606650e-01, 8.50442126e-02],
-                                    [3.90547849e-02, 1.69830427e-01, 8.78142476e-01]]], dtype="float32")
-            labels = paddle.to_tensor([[1, 2, 2],
-                            [1, 2, 2]], dtype="int32")
-            input_lengths = paddle.to_tensor([5, 5], dtype="int64")
-            label_lengths = paddle.to_tensor([3, 3], dtype="int64")
-
-            loss = paddle.nn.CTCLoss(blank=0, reduction='none')(log_probs, labels,
-                input_lengths,
-                label_lengths)
-            print(loss)
-            # Tensor(shape=[2], dtype=float32, place=Place(gpu:0), stop_gradient=True,
-            #        [3.91798496, 2.90765190])
-
-            loss = paddle.nn.CTCLoss(blank=0, reduction='mean')(log_probs, labels,
-                input_lengths,
-                label_lengths)
-            print(loss)
-            # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
-            #        1.13760614)
+            >>> # declarative mode
+            >>> import paddle
+
+            >>> # length of the longest logit sequence
+            >>> max_seq_length = 4
+            >>> #length of the longest label sequence
+            >>> max_label_length = 3
+            >>> # number of logit sequences
+            >>> batch_size = 2
+            >>> # class num
+            >>> class_num = 3
+
+            >>> log_probs = paddle.to_tensor([[[4.17021990e-01, 7.20324516e-01, 1.14374816e-04],
+            ...                                [3.02332580e-01, 1.46755889e-01, 9.23385918e-02]],
+            ...                               [[1.86260208e-01, 3.45560730e-01, 3.96767467e-01],
+            ...                                [5.38816750e-01, 4.19194520e-01, 6.85219526e-01]],
+            ...                               [[2.04452246e-01, 8.78117442e-01, 2.73875929e-02],
+            ...                                [6.70467496e-01, 4.17304814e-01, 5.58689833e-01]],
+            ...                               [[1.40386939e-01, 1.98101491e-01, 8.00744593e-01],
+            ...                                [9.68261600e-01, 3.13424170e-01, 6.92322612e-01]],
+            ...                               [[8.76389146e-01, 8.94606650e-01, 8.50442126e-02],
+            ...                                [3.90547849e-02, 1.69830427e-01, 8.78142476e-01]]], dtype="float32")
+            >>> labels = paddle.to_tensor([[1, 2, 2], [1, 2, 2]], dtype="int32")
+            >>> input_lengths = paddle.to_tensor([5, 5], dtype="int64")
+            >>> label_lengths = paddle.to_tensor([3, 3], dtype="int64")
+
+            >>> loss = paddle.nn.CTCLoss(blank=0, reduction='none')(log_probs, labels, input_lengths, label_lengths)
+            >>> print(loss)
+            Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [3.91798496, 2.90765214])
+
+            >>> loss = paddle.nn.CTCLoss(blank=0, reduction='mean')(log_probs, labels, input_lengths, label_lengths)
+            >>> print(loss)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+            1.13760614)
     """
 
     def __init__(self, blank=0, reduction='mean'):
@@ -1247,33 +1249,33 @@ class RNNTLoss(Layer):
     Examples:
         .. code-block:: python
 
-            # declarative mode
-            import numpy as np
-            import paddle
-            from paddle.nn import RNNTLoss
-
-            fn = RNNTLoss(reduction='sum', fastemit_lambda=0.0)
-
-            acts = np.array([[[[0.1, 0.6, 0.1, 0.1, 0.1],
-                            [0.1, 0.1, 0.6, 0.1, 0.1],
-                            [0.1, 0.1, 0.2, 0.8, 0.1]],
-                            [[0.1, 0.6, 0.1, 0.1, 0.1],
-                            [0.1, 0.1, 0.2, 0.1, 0.1],
-                            [0.7, 0.1, 0.2, 0.1, 0.1]]]])
-            labels = [[1, 2]]
-
-            acts = paddle.to_tensor(acts, stop_gradient=False)
-
-            lengths = [acts.shape[1]] * acts.shape[0]
-            label_lengths = [len(l) for l in labels]
-            labels = paddle.to_tensor(labels, paddle.int32)
-            lengths = paddle.to_tensor(lengths, paddle.int32)
-            label_lengths = paddle.to_tensor(label_lengths, paddle.int32)
-
-            costs = fn(acts, labels, lengths, label_lengths)
-            print(costs)
-            # Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=False,
-            #        4.49566677)
+            >>> # declarative mode
+            >>> import numpy as np
+            >>> import paddle
+            >>> from paddle.nn import RNNTLoss
+
+            >>> fn = RNNTLoss(reduction='sum', fastemit_lambda=0.0)
+
+            >>> acts = np.array([[[[0.1, 0.6, 0.1, 0.1, 0.1],
+            ...                    [0.1, 0.1, 0.6, 0.1, 0.1],
+            ...                    [0.1, 0.1, 0.2, 0.8, 0.1]],
+            ...                   [[0.1, 0.6, 0.1, 0.1, 0.1],
+            ...                    [0.1, 0.1, 0.2, 0.1, 0.1],
+            ...                    [0.7, 0.1, 0.2, 0.1, 0.1]]]])
+            >>> labels = [[1, 2]]
+
+            >>> acts = paddle.to_tensor(acts, stop_gradient=False)
+
+            >>> lengths = [acts.shape[1]] * acts.shape[0]
+            >>> label_lengths = [len(l) for l in labels]
+            >>> labels = paddle.to_tensor(labels, paddle.int32)
+            >>> lengths = paddle.to_tensor(lengths, paddle.int32)
+            >>> label_lengths = paddle.to_tensor(label_lengths, paddle.int32)
+
+            >>> costs = fn(acts, labels, lengths, label_lengths)
+            >>> print(costs)
+            Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=False,
+            -2.85042444)
     """
 
     def __init__(
@@ -1346,13 +1348,15 @@ class SmoothL1Loss(Layer):
     Examples:
         .. code-block:: python
 
-            import paddle
-            input = paddle.rand([3, 3]).astype("float32")
-            label = paddle.rand([3, 3]).astype("float32")
-            loss = paddle.nn.SmoothL1Loss()
-            output = loss(input, label)
-            print(output)
-            # 0.049606
+            >>> import paddle
+            >>> paddle.seed(2023)
+            >>> input = paddle.rand([3, 3]).astype("float32")
+            >>> label = paddle.rand([3, 3]).astype("float32")
+            >>> loss = paddle.nn.SmoothL1Loss()
+            >>> output = loss(input, label)
+            >>> print(output)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+            0.08307374)
     """
 
     def __init__(self, reduction='mean', delta=1.0, name=None):
@@ -1414,21 +1418,23 @@ class MultiLabelSoftMarginLoss(Layer):
         Examples:
             .. code-block:: python
 
-                import paddle
-                import paddle.nn as nn
+                >>> import paddle
+                >>> import paddle.nn as nn
 
-                input = paddle.to_tensor([[1, -2, 3], [0, -1, 2], [1, 0, 1]], dtype=paddle.float32)
-                label = paddle.to_tensor([[-1, 1, -1], [1, 1, 1], [1, -1, 1]], dtype=paddle.float32)
+                >>> input = paddle.to_tensor([[1, -2, 3], [0, -1, 2], [1, 0, 1]], dtype=paddle.float32)
+                >>> label = paddle.to_tensor([[-1, 1, -1], [1, 1, 1], [1, -1, 1]], dtype=paddle.float32)
 
-                multi_label_soft_margin_loss = nn.MultiLabelSoftMarginLoss(reduction='none')
-                loss = multi_label_soft_margin_loss(input, label)
-                print(loss)
-                # Tensor([3.49625897, 0.71111226, 0.43989015])
+                >>> multi_label_soft_margin_loss = nn.MultiLabelSoftMarginLoss(reduction='none')
+                >>> loss = multi_label_soft_margin_loss(input, label)
+                >>> print(loss)
+                Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
+                [3.49625897, 0.71111226, 0.43989015])
 
-                multi_label_soft_margin_loss = nn.MultiLabelSoftMarginLoss(reduction='mean')
-                loss = multi_label_soft_margin_loss(input, label)
-                print(loss)
-                # Tensor(1.54908717)
+                >>> multi_label_soft_margin_loss = nn.MultiLabelSoftMarginLoss(reduction='mean')
+                >>> loss = multi_label_soft_margin_loss(input, label)
+                >>> print(loss)
+                Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+                1.54908717)
         """
 
     def __init__(self, weight=None, reduction="mean", name=None):
@@ -1512,24 +1518,26 @@ class HingeEmbeddingLoss(Layer):
     Examples:
         .. code-block:: python
 
-            import paddle
-            import paddle.nn as nn
-
-            input = paddle.to_tensor([[1, -2, 3], [0, -1, 2], [1, 0, 1]], dtype=paddle.float32)
-            # label elements in {1., -1.}
-            label = paddle.to_tensor([[-1, 1, -1], [1, 1, 1], [1, -1, 1]], dtype=paddle.float32)
-
-            hinge_embedding_loss = nn.HingeEmbeddingLoss(margin=1.0, reduction='none')
-            loss = hinge_embedding_loss(input, label)
-            print(loss)
-            # Tensor([[0., -2., 0.],
-            #         [0., -1., 2.],
-            #         [1., 1., 1.]])
-
-            hinge_embedding_loss = nn.HingeEmbeddingLoss(margin=1.0, reduction='mean')
-            loss = hinge_embedding_loss(input, label)
-            print(loss)
-            # Tensor(0.22222222)
+            >>> import paddle
+            >>> import paddle.nn as nn
+
+            >>> input = paddle.to_tensor([[1, -2, 3], [0, -1, 2], [1, 0, 1]], dtype=paddle.float32)
+            >>> # label elements in {1., -1.}
+            >>> label = paddle.to_tensor([[-1, 1, -1], [1, 1, 1], [1, -1, 1]], dtype=paddle.float32)
+
+            >>> hinge_embedding_loss = nn.HingeEmbeddingLoss(margin=1.0, reduction='none')
+            >>> loss = hinge_embedding_loss(input, label)
+            >>> print(loss)
+            Tensor(shape=[3, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[ 0., -2.,  0.],
+             [ 0., -1.,  2.],
+             [ 1.,  1.,  1.]])
+
+            >>> hinge_embedding_loss = nn.HingeEmbeddingLoss(margin=1.0, reduction='mean')
+            >>> loss = hinge_embedding_loss(input, label)
+            >>> print(loss)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+            0.22222222)
     """
 
     def __init__(self, margin=1.0, reduction="mean", name=None):
@@ -1595,23 +1603,29 @@ class CosineEmbeddingLoss(Layer):
     Examples:
         .. code-block:: python
 
-            import paddle
+            >>> import paddle
 
-            input1 = paddle.to_tensor([[1.6, 1.2, -0.5], [3.2, 2.6, -5.8]], 'float32')
-            input2 = paddle.to_tensor([[0.5, 0.5, -1.8], [2.3, -1.4, 1.1]], 'float32')
-            label = paddle.to_tensor([1, -1], 'int64')
+            >>> input1 = paddle.to_tensor([[1.6, 1.2, -0.5], [3.2, 2.6, -5.8]], 'float32')
+            >>> input2 = paddle.to_tensor([[0.5, 0.5, -1.8], [2.3, -1.4, 1.1]], 'float32')
+            >>> label = paddle.to_tensor([1, -1], 'int64')
 
-            cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='mean')
-            output = cosine_embedding_loss(input1, input2, label)
-            print(output) # 0.21155193
+            >>> cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='mean')
+            >>> output = cosine_embedding_loss(input1, input2, label)
+            >>> print(output)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+            0.21155193)
 
-            cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='sum')
-            output = cosine_embedding_loss(input1, input2, label)
-            print(output) # 0.42310387
+            >>> cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='sum')
+            >>> output = cosine_embedding_loss(input1, input2, label)
+            >>> print(output)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+            0.42310387)
 
-            cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='none')
-            output = cosine_embedding_loss(input1, input2, label)
-            print(output) # [0.42310387, 0.        ]
+            >>> cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='none')
+            >>> output = cosine_embedding_loss(input1, input2, label)
+            >>> print(output)
+            Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [0.42310387, 0.        ])
 
     """
 
@@ -1703,21 +1717,23 @@ class TripletMarginWithDistanceLoss(Layer):
     Examples:
         .. code-block:: python
 
-            import paddle
-            from paddle.nn import TripletMarginWithDistanceLoss
+            >>> import paddle
+            >>> from paddle.nn import TripletMarginWithDistanceLoss
 
-            input = paddle.to_tensor([[1, 5, 3], [0, 3, 2], [1, 4, 1]], dtype=paddle.float32)
-            positive= paddle.to_tensor([[5, 1, 2], [3, 2, 1], [3, -1, 1]], dtype=paddle.float32)
-            negative = paddle.to_tensor([[2, 1, -3], [1, 1, -1], [4, -2, 1]], dtype=paddle.float32)
-            triplet_margin_with_distance_loss = TripletMarginWithDistanceLoss(reduction='none')
-            loss = triplet_margin_with_distance_loss(input, positive, negative,)
-            print(loss)
-            # Tensor([0.        , 0.57496738, 0.        ])
+            >>> input = paddle.to_tensor([[1, 5, 3], [0, 3, 2], [1, 4, 1]], dtype=paddle.float32)
+            >>> positive= paddle.to_tensor([[5, 1, 2], [3, 2, 1], [3, -1, 1]], dtype=paddle.float32)
+            >>> negative = paddle.to_tensor([[2, 1, -3], [1, 1, -1], [4, -2, 1]], dtype=paddle.float32)
+            >>> triplet_margin_with_distance_loss = TripletMarginWithDistanceLoss(reduction='none')
+            >>> loss = triplet_margin_with_distance_loss(input, positive, negative,)
+            >>> print(loss)
+            Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [0.        , 0.57496595, 0.        ])
 
-            triplet_margin_with_distance_loss = TripletMarginWithDistanceLoss(reduction='mean')
-            loss = triplet_margin_with_distance_loss(input, positive, negative,)
-            print(loss)
-            # Tensor(0.19165580)
+            >>> triplet_margin_with_distance_loss = TripletMarginWithDistanceLoss(reduction='mean')
+            >>> loss = triplet_margin_with_distance_loss(input, positive, negative,)
+            >>> print(loss)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+            0.19165532)
 
     """
 
@@ -1812,20 +1828,22 @@ class TripletMarginLoss(Layer):
     Examples:
         .. code-block:: python
 
-            import paddle
+            >>> import paddle
 
-            input = paddle.to_tensor([[1, 5, 3], [0, 3, 2], [1, 4, 1]], dtype=paddle.float32)
-            positive= paddle.to_tensor([[5, 1, 2], [3, 2, 1], [3, -1, 1]], dtype=paddle.float32)
-            negative = paddle.to_tensor([[2, 1, -3], [1, 1, -1], [4, -2, 1]], dtype=paddle.float32)
-            triplet_margin_loss = paddle.nn.TripletMarginLoss(reduction='none')
-            loss = triplet_margin_loss(input, positive, negative)
-            print(loss)
-            # Tensor([0.        , 0.57496738, 0.        ])
+            >>> input = paddle.to_tensor([[1, 5, 3], [0, 3, 2], [1, 4, 1]], dtype=paddle.float32)
+            >>> positive= paddle.to_tensor([[5, 1, 2], [3, 2, 1], [3, -1, 1]], dtype=paddle.float32)
+            >>> negative = paddle.to_tensor([[2, 1, -3], [1, 1, -1], [4, -2, 1]], dtype=paddle.float32)
+            >>> triplet_margin_loss = paddle.nn.TripletMarginLoss(reduction='none')
+            >>> loss = triplet_margin_loss(input, positive, negative)
+            >>> print(loss)
+            Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [0.        , 0.57496595, 0.        ])
 
-            triplet_margin_loss = paddle.nn.TripletMarginLoss(margin=1.0, swap=True, reduction='mean', )
-            loss = triplet_margin_loss(input, positive, negative,)
-            print(loss)
-            # Tensor(0.19165580)
+            >>> triplet_margin_loss = paddle.nn.TripletMarginLoss(margin=1.0, swap=True, reduction='mean')
+            >>> loss = triplet_margin_loss(input, positive, negative)
+            >>> print(loss)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+            2.40039468)
 
     """
 
@@ -1924,15 +1942,17 @@ class MultiMarginLoss(Layer):
     Examples:
         .. code-block:: python
 
-            import paddle
-            import paddle.nn as nn
+            >>> import paddle
+            >>> import paddle.nn as nn
 
-            input = paddle.to_tensor([[1, -2, 3], [0, -1, 2], [1, 0, 1]], dtype=paddle.float32)
-            label = paddle.to_tensor([0, 1, 2], dtype=paddle.int32)
+            >>> input = paddle.to_tensor([[1, -2, 3], [0, -1, 2], [1, 0, 1]], dtype=paddle.float32)
+            >>> label = paddle.to_tensor([0, 1, 2], dtype=paddle.int32)
 
-            multi_margin_loss = nn.MultiMarginLoss(reduction='mean')
-            loss = multi_margin_loss(input, label)
-            print(loss)
+            >>> multi_margin_loss = nn.MultiMarginLoss(reduction='mean')
+            >>> loss = multi_margin_loss(input, label)
+            >>> print(loss)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+            1.11111104)
     """
 
     def __init__(
@@ -2003,31 +2023,30 @@ class SoftMarginLoss(Layer):
     Examples:
         .. code-block:: python
 
-            import paddle
-
-            input = paddle.to_tensor([[0.5, 0.6, 0.7],[0.3, 0.5, 0.2]], 'float32')
-            label = paddle.to_tensor([[1.0, -1.0, 1.0],[-1.0, 1.0, 1.0]], 'float32')
-            soft_margin_loss = paddle.nn.SoftMarginLoss()
-            output = soft_margin_loss(input, label)
-            print(output)
-            # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True,
-            #        0.64022040)
-
-            input_np = paddle.uniform(shape=(5, 5), min=0.1, max=0.8, dtype="float64")
-            label_np = paddle.randint(high=2, shape=(5, 5), dtype="int64")
-            label_np[label_np==0]=-1
-            input = paddle.to_tensor(input_np)
-            label = paddle.to_tensor(label_np)
-            soft_margin_loss = paddle.nn.SoftMarginLoss(reduction='none')
-            output = soft_margin_loss(input, label)
-            print(output)
-            # Tensor(shape=[5, 5], dtype=float64, place=Place(gpu:0), stop_gradient=True,
-            #        [[0.61739663, 0.51405668, 1.09346100, 0.42385561, 0.91602303],
-            #         [0.76997038, 1.01977148, 0.98971722, 1.13976032, 0.88152088],
-            #         [0.55476735, 1.10505384, 0.89923519, 0.45018155, 1.06587511],
-            #         [0.37998142, 0.48067240, 0.47791212, 0.55664053, 0.98581399],
-            #         [0.78571653, 0.59319711, 0.39701841, 0.76172109, 0.83781742]])
-
+            >>> import paddle
+            >>> paddle.seed(2023)
+            >>> input = paddle.to_tensor([[0.5, 0.6, 0.7],[0.3, 0.5, 0.2]], 'float32')
+            >>> label = paddle.to_tensor([[1.0, -1.0, 1.0],[-1.0, 1.0, 1.0]], 'float32')
+            >>> soft_margin_loss = paddle.nn.SoftMarginLoss()
+            >>> output = soft_margin_loss(input, label)
+            >>> print(output)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+            0.64022040)
+
+            >>> input_np = paddle.uniform(shape=(5, 5), min=0.1, max=0.8, dtype="float64")
+            >>> label_np = paddle.randint(high=2, shape=(5, 5), dtype="int64")
+            >>> label_np[label_np==0]=-1
+            >>> input = paddle.to_tensor(input_np)
+            >>> label = paddle.to_tensor(label_np)
+            >>> soft_margin_loss = paddle.nn.SoftMarginLoss(reduction='none')
+            >>> output = soft_margin_loss(input, label)
+            >>> print(output)
+            Tensor(shape=[5, 5], dtype=float64, place=Place(cpu), stop_gradient=True,
+            [[1.10725628, 0.48778139, 0.56217249, 1.12581404, 0.51430043],
+             [0.90375795, 0.37761249, 0.43007557, 0.95089798, 0.43288319],
+             [1.16043599, 0.63015939, 0.51362715, 0.43617541, 0.57783301],
+             [0.81927846, 0.52558369, 0.59713908, 0.83100696, 0.50811616],
+             [0.82684205, 1.02064907, 0.50296995, 1.13461733, 0.93222519]])
     """
 
     def __init__(self, reduction='mean', name=None):
@@ -2100,16 +2119,23 @@ class GaussianNLLLoss(Layer):
     Examples::
         .. code-block:: python
 
-            import paddle
-            import paddle.nn as nn
-
-            input = paddle.randn([5, 2], dtype=paddle.float32)
-            label = paddle.randn([5, 2], dtype=paddle.float32)
-            variance = paddle.ones([5, 2], dtype=paddle.float32)
-
-            gs_nll_loss = nn.GaussianNLLLoss(full=False, epsilon=1e-6, reduction='none')
-            loss = gs_nll_loss(input, label, variance)
-            print(loss)
+            >>> import paddle
+            >>> import paddle.nn as nn
+            >>> paddle.seed(2023)
+
+            >>> input = paddle.randn([5, 2], dtype=paddle.float32)
+            >>> label = paddle.randn([5, 2], dtype=paddle.float32)
+            >>> variance = paddle.ones([5, 2], dtype=paddle.float32)
+
+            >>> gs_nll_loss = nn.GaussianNLLLoss(full=False, epsilon=1e-6, reduction='none')
+            >>> loss = gs_nll_loss(input, label, variance)
+            >>> print(loss)
+            Tensor(shape=[5, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[0.21808575, 1.43013096],
+             [1.05245590, 0.00394560],
+             [1.20861185, 0.00000062],
+             [0.56946373, 0.73300570],
+             [0.37142906, 0.12038800]])
 
     Note:
         The clamping of ``variance`` is ignored with respect to autograd, and so the
diff --git a/python/paddle/nn/quant/format.py b/python/paddle/nn/quant/format.py
index 0e94ecef9f63380615655d391bc2f949dbc7e9ae..62694be27decbd9551586c800ece3232f99ee423 100644
--- a/python/paddle/nn/quant/format.py
+++ b/python/paddle/nn/quant/format.py
@@ -149,29 +149,31 @@ class ConvertibleQuantedLayer(Layer, metaclass=abc.ABCMeta):
     It defines some functions to convert quantizers and observers to quantize
     or dequantize operators that maintain the quantization parameters used
     during inference.
+
     Examples:
-       .. code-block:: python
-
-            # Given codes in ./customized_quanter.py
-            class CustomizedQuantedLayer(ConvertibleQuantedLayer):
-                def __init__(self):
-                    super().__init__()
-                    self.weight_a = paddle.create_parameter(shape=[1], dtype='float32')
-                    self.weight_b = paddle.create_parameter(shape=[1], dtype='float32')
-                    self.quanter_for_weight_a = None
-                    self.activation_weight = None
-                def forward(self, input):
-                    qweight_a = self.quanter_for_weight_a(self.weight_a)
-                    weight_b = self.weight_b
-                    qinput = self.activation_weight(input)
-                    // compute with qweight_a, weight_b and qinput.
-                    return qweight * qinput + weight_b
-
-                def weights_to_quanters(self):
-                    return [('weight_a', 'quanter_for_weight_a')]
-
-                def activation_quanters(self):
-                    return ['activation_weight']
+        .. code-block:: python
+
+            >>> # Given codes in ./customized_quanter.py
+            >>> class CustomizedQuantedLayer(ConvertibleQuantedLayer):
+            ...     def __init__(self):
+            ...         super().__init__()
+            ...         self.weight_a = paddle.create_parameter(shape=[1], dtype='float32')
+            ...         self.weight_b = paddle.create_parameter(shape=[1], dtype='float32')
+            ...         self.quanter_for_weight_a = None
+            ...         self.activation_weight = None
+            ...
+            ...     def forward(self, input):
+            ...         qweight_a = self.quanter_for_weight_a(self.weight_a)
+            ...         weight_b = self.weight_b
+            ...         qinput = self.activation_weight(input)
+            ...         # compute with qweight_a, weight_b and qinput.
+            ...         return qweight * qinput + weight_b
+            ...
+            ...     def weights_to_quanters(self):
+            ...         return [('weight_a', 'quanter_for_weight_a')]
+            ...
+            ...     def activation_quanters(self):
+            ...         return ['activation_weight']
     """
 
     def __init__(self):