diff --git a/python/paddle/static/amp/bf16/amp_lists.py b/python/paddle/static/amp/bf16/amp_lists.py
index cd4d6bdb329b400e91e12661e23a66c5c34f980e..0d6d3997aebd5f58eed60641312ee79cba460f6b 100644
--- a/python/paddle/static/amp/bf16/amp_lists.py
+++ b/python/paddle/static/amp/bf16/amp_lists.py
@@ -35,10 +35,11 @@ class AutoMixedPrecisionListsBF16:
 
     Examples:
         .. code-block:: python
-        import paddle
-        paddle.enable_static()
-        with paddle.static.amp.bf16_guard():
-            paddle.static.amp.bf16.AutoMixedPrecisionListsBF16(custom_fp32_list={'lstm'})
+
+            >>> import paddle
+            >>> paddle.enable_static()
+            >>> with paddle.static.amp.bf16.bf16_guard():
+            ...     paddle.static.amp.bf16.AutoMixedPrecisionListsBF16(custom_fp32_list={'lstm'})
     """
 
     def __init__(
diff --git a/python/paddle/static/amp/bf16/amp_utils.py b/python/paddle/static/amp/bf16/amp_utils.py
index 03b4dfce151ad42a23f9526db446e3ffebb23913..55ca4a5d06c86cfb1dd5b3fa39774e821bc0d85b 100644
--- a/python/paddle/static/amp/bf16/amp_utils.py
+++ b/python/paddle/static/amp/bf16/amp_utils.py
@@ -230,18 +230,18 @@ def bf16_guard():
     Examples:
         .. code-block:: python
 
-            import numpy as np
-            import paddle
-            import paddle.nn.functional as F
-            paddle.enable_static()
-            data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32')
-            conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3)
-
-            with paddle.static.amp.bf16_guard():
-                bn = paddle.static.nn.batch_norm(input=conv2d, act="relu")
-                pool = F.max_pool2d(bn, kernel_size=2, stride=2)
-                hidden = paddle.static.nn.fc(pool, size=10)
-                loss = paddle.mean(hidden)
+            >>> import numpy as np
+            >>> import paddle
+            >>> import paddle.nn.functional as F
+            >>> paddle.enable_static()
+            >>> data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32')
+            >>> conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3)
+
+            >>> with paddle.static.amp.bf16.bf16_guard():
+            ...     bn = paddle.static.nn.batch_norm(input=conv2d, act="relu")
+            ...     pool = F.max_pool2d(bn, kernel_size=2, stride=2)
+            ...     hidden = paddle.static.nn.fc(pool, size=10)
+            ...     loss = paddle.mean(hidden)
     """
     with framework.name_scope(prefix=_bf16_guard_pattern):
         yield
diff --git a/python/paddle/static/amp/bf16/decorator.py b/python/paddle/static/amp/bf16/decorator.py
index 66963e25634f09a1f73aed6df7945d726c0b9e40..47b19b697d1ec00bdffeddb8d4438a4359018e81 100644
--- a/python/paddle/static/amp/bf16/decorator.py
+++ b/python/paddle/static/amp/bf16/decorator.py
@@ -135,42 +135,44 @@ class OptimizerWithMixedPrecision:
         Examples:
             .. code-block:: python
 
-                import numpy as np
-                import paddle
-                import paddle.nn.functional as F
-                paddle.enable_static()
-
-                def run_example_code():
-                    place = paddle.CPUPlace(0)
-                    exe = paddle.static.Executor(place)
-                    data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32')
-                    conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3)
-                    # 1) Use bf16_guard to control the range of bf16 kernels used.
-                    with paddle.static.amp.bf16_guard():
-                        bn = paddle.static.nn.batch_norm(input=conv2d, act="relu")
-                        pool = F.max_pool2d(bn, kernel_size=2, stride=2)
-                        hidden = paddle.static.nn.fc(pool, size=10)
-                        loss = paddle.mean(hidden)
-                    # 2) Create the optimizer and set `multi_precision` to True.
-                    # Setting `multi_precision` to True can avoid the poor accuracy
-                    # or the slow convergence in a way.
-                    optimizer = paddle.optimizer.Momentum(learning_rate=0.01, multi_precision=True)
-                    # 3) These ops in `custom_fp32_list` will keep in the float32 computation type.
-                    amp_list = paddle.static.amp.CustomOpLists(
-                        custom_fp32_list=['pool2d'])
-                    # 4) The entry of Paddle AMP.
-                    # Enable pure bf16 training by setting `use_pure_bf16` to True.
-                    optimizer = paddle.static.amp.bf16.decorate_bf16(
-                        optimizer,
-                        amp_list,
-                        use_pure_bf16=True)
-                    # If you don't use the default_startup_program(), you sholud pass
-                    # your defined `startup_program` into `minimize`.
-                    optimizer.minimize(loss)
-                    exe.run(paddle.static.default_startup_program())
-                    # 5) Use `amp_init` after FP32 parameters initialization(such as `exe.run(startup_program)`).
-                    # If you want to perform the testing process, you should pass `test_program` into `amp_init`.
-                    optimizer.amp_init(place, scope=paddle.static.global_scope())
+                >>> import numpy as np
+                >>> import paddle
+                >>> import paddle.nn.functional as F
+                >>> paddle.enable_static()
+
+                >>> def run_example_code():
+                ...     place = paddle.CPUPlace()
+                ...     exe = paddle.static.Executor(place)
+                ...     data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32')
+                ...     conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3)
+                ...     # 1) Use bf16_guard to control the range of bf16 kernels used.
+                ...     with paddle.static.amp.bf16.bf16_guard():
+                ...         bn = paddle.static.nn.batch_norm(input=conv2d, act="relu")
+                ...         pool = F.max_pool2d(bn, kernel_size=2, stride=2)
+                ...         hidden = paddle.static.nn.fc(pool, size=10)
+                ...         loss = paddle.mean(hidden)
+                ...     # 2) Create the optimizer and set `multi_precision` to True.
+                ...     # Setting `multi_precision` to True can avoid the poor accuracy
+                ...     # or the slow convergence in a way.
+                ...     optimizer = paddle.optimizer.Momentum(learning_rate=0.01, multi_precision=True)
+                ...     # 3) These ops in `custom_black_list` will keep in the float32 computation type.
+                ...     amp_list = paddle.static.amp.CustomOpLists(
+                ...         custom_black_list=['pool2d'])
+                ...     # 4) The entry of Paddle AMP.
+                ...     # Enable pure bf16 training by setting `use_pure_bf16` to True.
+                ...     optimizer = paddle.static.amp.bf16.decorate_bf16(
+                ...         optimizer,
+                ...         amp_list,
+                ...         use_pure_bf16=True)
+                ...     # If you don't use the default_startup_program(), you sholud pass
+                ...     # your defined `startup_program` into `minimize`.
+                ...     optimizer.minimize(loss)
+                ...     exe.run(paddle.static.default_startup_program())
+                ...     # 5) Use `amp_init` after FP32 parameters initialization(such as `exe.run(startup_program)`).
+                ...     # If you want to perform the testing process, you should pass `test_program` into `amp_init`.
+                ...     optimizer.amp_init(place, scope=paddle.static.global_scope())
+
+                >>> run_example_code()
 
         """
         assert (
@@ -263,63 +265,68 @@ def decorate_bf16(
         An optimizer acting like a normal one but with mixed-precision training
         enabled.
 
-    Examples 1:
-            .. code-block:: python
+    Examples:
+        .. code-block:: python
+            :name: example-1
 
             # fp32&bf16 list based strategy example
-            import paddle
-            import paddle.static as static
+            >>> import paddle
+            >>> import paddle.static as static
+
+            >>> paddle.enable_static()
+
+            >>> data = static.data(name='X', shape=[None, 1], dtype='float32')
+            >>> hidden = static.nn.fc(x=data, size=10)
+            >>> loss = paddle.mean(hidden)
+            >>> optimizer = paddle.optimizer.Adam(learning_rate=0.001)
+
+            >>> mp_optimizer = static.amp.bf16.decorate_bf16(optimizer=optimizer)
 
-            paddle.enable_static()
+            >>> ops, param_grads = mp_optimizer.minimize(loss)
 
-            data = static.data(name='X', shape=[None, 1], dtype='float32')
-            hidden = static.nn.fc(x=data, size=10)
-            loss = paddle.mean(hidden)
-            optimizer = paddle.optimizer.Adam(learning_rate=0.001)
 
-            mp_optimizer = static.amp.decorate_bf16(optimizer=optimizer)
 
-            ops, param_grads = mp_optimizer.minimize(loss)
 
-    Examples 2:
         .. code-block:: python
+            :name: example-2
 
             # pure bf16 training example
-            import numpy as np
-            import paddle
-            import paddle.nn.functional as F
-
-            def run_example_code():
-                place = paddle.CPUPlace(0)
-                exe = paddle.static.Executor(place)
-                data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32')
-                conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3)
-                # 1) Use bf16_guard to control the range of bf16 kernels used.
-                with paddle.static.amp.bf16_guard():
-                    bn = paddle.static.nn.batch_norm(input=conv2d, act="relu")
-                    pool = F.max_pool2d(bn, kernel_size=2, stride=2)
-                    hidden = paddle.static.nn.fc(pool, size=10)
-                    loss = paddle.mean(hidden)
-                # 2) Create the optimizer and set `multi_precision` to True.
-                # Setting `multi_precision` to True can avoid the poor accuracy
-                # or the slow convergence in a way.
-                optimizer = paddle.optimizer.Momentum(learning_rate=0.01, multi_precision=True)
-                # 3) These ops in `custom_fp32_list` will keep in the float32 computation type.
-                amp_list = paddle.static.amp.CustomOpLists(
-                    custom_fp32_list=['pool2d'])
-                # 4) The entry of Paddle AMP.
-                # Enable pure bf16 training by setting `use_pure_bf16` to True.
-                optimizer = paddle.static.amp.decorate_bf16(
-                    optimizer,
-                    amp_list,
-                    use_pure_bf16=True)
-                # If you don't use the default_startup_program(), you sholud pass
-                # your defined `startup_program` into `minimize`.
-                optimizer.minimize(loss)
-                exe.run(paddle.static.default_startup_program())
-                # 5) Use `amp_init` after FP32 parameters initialization(such as `exe.run(startup_program)`).
-                # If you want to perform the testing process, you should pass `test_program` into `amp_init`.
-                optimizer.amp_init(place, scope=paddle.static.global_scope())
+            >>> import numpy as np
+            >>> import paddle
+            >>> import paddle.nn.functional as F
+
+            >>> def run_example_code():
+            ...     place = paddle.CPUPlace()
+            ...     exe = paddle.static.Executor(place)
+            ...     data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32')
+            ...     conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3)
+            ...     # 1) Use bf16_guard to control the range of bf16 kernels used.
+            ...     with paddle.static.amp.bf16.bf16_guard():
+            ...         bn = paddle.static.nn.batch_norm(input=conv2d, act="relu")
+            ...         pool = F.max_pool2d(bn, kernel_size=2, stride=2)
+            ...         hidden = paddle.static.nn.fc(pool, size=10)
+            ...         loss = paddle.mean(hidden)
+            ...     # 2) Create the optimizer and set `multi_precision` to True.
+            ...     # Setting `multi_precision` to True can avoid the poor accuracy
+            ...     # or the slow convergence in a way.
+            ...     optimizer = paddle.optimizer.Momentum(learning_rate=0.01, multi_precision=True)
+            ...     # 3) These ops in `custom_black_list` will keep in the float32 computation type.
+            ...     amp_list = paddle.static.amp.CustomOpLists(
+            ...         custom_black_list=['pool2d'])
+            ...     # 4) The entry of Paddle AMP.
+            ...     # Enable pure bf16 training by setting `use_pure_bf16` to True.
+            ...     optimizer = paddle.static.amp.bf16.decorate_bf16(
+            ...         optimizer,
+            ...         amp_list,
+            ...         use_pure_bf16=True)
+            ...     # If you don't use the default_startup_program(), you sholud pass
+            ...     # your defined `startup_program` into `minimize`.
+            ...     optimizer.minimize(loss)
+            ...     exe.run(paddle.static.default_startup_program())
+            ...     # 5) Use `amp_init` after FP32 parameters initialization(such as `exe.run(startup_program)`).
+            ...     # If you want to perform the testing process, you should pass `test_program` into `amp_init`.
+            ...     optimizer.amp_init(place, scope=paddle.static.global_scope())
+            >>> run_example_code()
 
     """
     if amp_lists is None:
diff --git a/python/paddle/static/amp/debugging.py b/python/paddle/static/amp/debugging.py
index 5a894495d98f5e679e288d248d10744075af4d5e..38e3764203ab1c850f7b5ed3aa76f005180e8ae3 100644
--- a/python/paddle/static/amp/debugging.py
+++ b/python/paddle/static/amp/debugging.py
@@ -203,65 +203,64 @@ def collect_operator_stats(program=None, print_subblocks=False):
 
     Examples:
 
-     .. code-block:: python
-
-        import paddle
-
-        paddle.enable_static()
-
-        class SimpleConvNet(paddle.nn.Layer):
-            def __init__(self):
-                super().__init__()
-                self.conv = paddle.nn.Conv2D(in_channels=1, out_channels=6, kernel_size=3)
-                self.linear = paddle.nn.Linear(in_features=26, out_features=10)
-
-            def forward(self, x):
-                out = self.conv(x)
-                out = paddle.nn.functional.relu(out)
-                out = self.linear(out)
-                out = paddle.nn.functional.softmax(out)
-                return out
-
-        main_program = paddle.static.Program()
-        startup_program = paddle.static.Program()
-        with paddle.utils.unique_name.guard():
-            with paddle.static.program_guard(main_program, startup_program):
-                model = SimpleConvNet()
-                x = paddle.static.data(
-                    name='input', shape=[None, 1, 28, 28], dtype='float32'
-                )
-                out = model(x)
-                loss = paddle.mean(out)
-                optimizer = paddle.optimizer.AdamW()
-                optimizer = paddle.static.amp.decorate(optimizer)
-                optimizer.minimize(loss)
-        paddle.static.amp.debugging.collect_operator_stats(main_program)
-        # <------------------------------------------------ op list of all blocks ------------------------------------------------->
-        # <------------------------------------------------------- op list -------------------------------------------------------->
-        # <--------------- Op Name ---------------- | -- FP16 Calls --- | -- BF16 Calls --- | --- FP32 Calls--- | -- Other Calls -->
-        #   adamw                                   |  0                |  0                |  4                |  0
-        #   cast                                    |  5                |  0                |  6                |  0
-        #   check_finite_and_unscale                |  0                |  0                |  1                |  0
-        #   conv2d                                  |  1                |  0                |  0                |  0
-        #   conv2d_grad                             |  1                |  0                |  0                |  0
-        #   elementwise_add                         |  2                |  0                |  0                |  0
-        #   elementwise_add_grad                    |  2                |  0                |  0                |  0
-        #   elementwise_mul                         |  0                |  0                |  1                |  0
-        #   elementwise_mul_grad                    |  0                |  0                |  1                |  0
-        #   fill_constant                           |  0                |  0                |  1                |  0
-        #   matmul_v2                               |  1                |  0                |  0                |  0
-        #   matmul_v2_grad                          |  1                |  0                |  0                |  0
-        #   memcpy                                  |  0                |  0                |  0                |  1
-        #   reduce_mean                             |  0                |  0                |  1                |  0
-        #   reduce_mean_grad                        |  0                |  0                |  1                |  0
-        #   relu                                    |  1                |  0                |  0                |  0
-        #   relu_grad                               |  1                |  0                |  0                |  0
-        #   reshape2                                |  0                |  0                |  1                |  0
-        #   reshape2_grad                           |  0                |  0                |  1                |  0
-        #   softmax                                 |  0                |  0                |  1                |  0
-        #   softmax_grad                            |  0                |  0                |  1                |  0
-        #   update_loss_scaling                     |  0                |  0                |  1                |  0
-        # <----------------------------------------------------- op count: 22 ----------------------------------------------------->
+        .. code-block:: python
+
+            >>> import paddle
+            >>> paddle.enable_static()
+
+            >>> class SimpleConvNet(paddle.nn.Layer):
+            ...     def __init__(self):
+            ...         super().__init__()
+            ...         self.conv = paddle.nn.Conv2D(in_channels=1, out_channels=6, kernel_size=3)
+            ...         self.linear = paddle.nn.Linear(in_features=26, out_features=10)
+            ...
+            ...     def forward(self, x):
+            ...         out = self.conv(x)
+            ...         out = paddle.nn.functional.relu(out)
+            ...         out = self.linear(out)
+            ...         out = paddle.nn.functional.softmax(out)
+            ...         return out
+
+            >>> main_program = paddle.static.Program()
+            >>> startup_program = paddle.static.Program()
+            >>> with paddle.utils.unique_name.guard():
+            ...     with paddle.static.program_guard(main_program, startup_program):
+            ...         model = SimpleConvNet()
+            ...         x = paddle.static.data(
+            ...             name='input', shape=[None, 1, 28, 28], dtype='float32'
+            ...         )
+            ...         out = model(x)
+            ...         loss = paddle.mean(out)
+            ...         optimizer = paddle.optimizer.AdamW()
+            ...         optimizer = paddle.static.amp.decorate(optimizer)
+            ...         optimizer.minimize(loss)
+            >>> paddle.static.amp.debugging.collect_operator_stats(main_program)
+            <------------------------------------------------ op list of all blocks ------------------------------------------------->
+            <------------------------------------------------------- op list -------------------------------------------------------->
+            <--------------- Op Name ---------------- | -- FP16 Calls --- | -- BF16 Calls --- | --- FP32 Calls--- | -- Other Calls -->
+            adamw                                   |  0                |  0                |  4                |  0
+            cast                                    |  5                |  0                |  6                |  0
+            check_finite_and_unscale                |  0                |  0                |  1                |  0
+            conv2d                                  |  1                |  0                |  0                |  0
+            conv2d_grad                             |  1                |  0                |  0                |  0
+            elementwise_add                         |  2                |  0                |  0                |  0
+            elementwise_add_grad                    |  2                |  0                |  0                |  0
+            elementwise_mul                         |  0                |  0                |  1                |  0
+            elementwise_mul_grad                    |  0                |  0                |  1                |  0
+            fill_constant                           |  0                |  0                |  1                |  0
+            matmul_v2                               |  1                |  0                |  0                |  0
+            matmul_v2_grad                          |  1                |  0                |  0                |  0
+            memcpy                                  |  0                |  0                |  0                |  1
+            reduce_mean                             |  0                |  0                |  1                |  0
+            reduce_mean_grad                        |  0                |  0                |  1                |  0
+            relu                                    |  1                |  0                |  0                |  0
+            relu_grad                               |  1                |  0                |  0                |  0
+            reshape2                                |  0                |  0                |  1                |  0
+            reshape2_grad                           |  0                |  0                |  1                |  0
+            softmax                                 |  0                |  0                |  1                |  0
+            softmax_grad                            |  0                |  0                |  1                |  0
+            update_loss_scaling                     |  0                |  0                |  1                |  0
+            <----------------------------------------------------- op count: 22 ----------------------------------------------------->
     """
 
     def _convert_to_list(op_stats_unit_dict):
diff --git a/python/paddle/static/amp/decorator.py b/python/paddle/static/amp/decorator.py
index 3c59e76d03af4a262fd30e0cc9fb620d60e555fc..440048f70209baca2eb13feabffd47d5f080543d 100644
--- a/python/paddle/static/amp/decorator.py
+++ b/python/paddle/static/amp/decorator.py
@@ -316,47 +316,48 @@ class OptimizerWithMixedPrecision:
         Examples:
             .. code-block:: python
 
-                import numpy as np
-                import paddle
-                import paddle.nn.functional as F
-                paddle.enable_static()
-
-                def run_example_code():
-                    place = paddle.CUDAPlace(0)
-                    exe = paddle.static.Executor(place)
-                    data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32')
-                    conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3)
-                    # 1) Use fp16_guard to control the range of fp16 kernels used.
-                    with paddle.static.amp.fp16_guard():
-                        bn = paddle.static.nn.batch_norm(input=conv2d, act="relu")
-                        pool = F.max_pool2d(bn, kernel_size=2, stride=2)
-                        hidden = paddle.static.nn.fc(pool, size=10)
-                        loss = paddle.mean(hidden)
-                    # 2) Create the optimizer and set `multi_precision` to True.
-                    # Setting `multi_precision` to True can avoid the poor accuracy
-                    # or the slow convergence in a way.
-                    optimizer = paddle.optimizer.Momentum(learning_rate=0.01, multi_precision=True)
-                    # 3) These ops in `custom_black_list` will keep in the float32 computation type.
-                    amp_list = paddle.static.amp.CustomOpLists(
-                        custom_black_list=['pool2d'])
-                    # 4) The entry of Paddle AMP.
-                    # Enable pure fp16 training by setting `use_pure_fp16` to True.
-                    optimizer = paddle.static.amp.decorate(
-                        optimizer,
-                        amp_list,
-                        init_loss_scaling=128.0,
-                        use_dynamic_loss_scaling=True,
-                        use_pure_fp16=True)
-                    # If you don't use the default_startup_program(), you sholud pass
-                    # your defined `startup_program` into `minimize`.
-                    optimizer.minimize(loss)
-                    exe.run(paddle.static.default_startup_program())
-                    # 5) Use `amp_init` after FP32 parameters initialization(such as `exe.run(startup_program)`).
-                    # If you want to perform the testing process, you should pass `test_program` into `amp_init`.
-                    optimizer.amp_init(place, scope=paddle.static.global_scope())
-
-                if paddle.is_compiled_with_cuda() and len(paddle.static.cuda_places()) > 0:
-                    run_example_code()
+                >>> import numpy as np
+                >>> import paddle
+                >>> import paddle.nn.functional as F
+                >>> paddle.enable_static()
+
+                >>> # doctest: +REQUIRES(env:GPU)
+                >>> def run_example_code():
+                ...     place = paddle.CUDAPlace(0)
+                ...     exe = paddle.static.Executor(place)
+                ...     data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32')
+                ...     conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3)
+                ...     # 1) Use fp16_guard to control the range of fp16 kernels used.
+                ...     with paddle.static.amp.fp16_guard():
+                ...         bn = paddle.static.nn.batch_norm(input=conv2d, act="relu")
+                ...         pool = F.max_pool2d(bn, kernel_size=2, stride=2)
+                ...         hidden = paddle.static.nn.fc(pool, size=10)
+                ...         loss = paddle.mean(hidden)
+                ...     # 2) Create the optimizer and set `multi_precision` to True.
+                ...     # Setting `multi_precision` to True can avoid the poor accuracy
+                ...     # or the slow convergence in a way.
+                ...     optimizer = paddle.optimizer.Momentum(learning_rate=0.01, multi_precision=True)
+                ...     # 3) These ops in `custom_black_list` will keep in the float32 computation type.
+                ...     amp_list = paddle.static.amp.CustomOpLists(
+                ...         custom_black_list=['pool2d'])
+                ...     # 4) The entry of Paddle AMP.
+                ...     # Enable pure fp16 training by setting `use_pure_fp16` to True.
+                ...     optimizer = paddle.static.amp.decorate(
+                ...         optimizer,
+                ...         amp_list,
+                ...         init_loss_scaling=128.0,
+                ...         use_dynamic_loss_scaling=True,
+                ...         use_pure_fp16=True)
+                ...     # If you don't use the default_startup_program(), you sholud pass
+                ...     # your defined `startup_program` into `minimize`.
+                ...     optimizer.minimize(loss)
+                ...     exe.run(paddle.static.default_startup_program())
+                ...     # 5) Use `amp_init` after FP32 parameters initialization(such as `exe.run(startup_program)`).
+                ...     # If you want to perform the testing process, you should pass `test_program` into `amp_init`.
+                ...     optimizer.amp_init(place, scope=paddle.static.global_scope())
+
+                >>> if paddle.is_compiled_with_cuda() and len(paddle.static.cuda_places()) > 0:
+                ...     run_example_code()
         """
         assert (
             self._train_program is not None
@@ -712,70 +713,74 @@ def decorate(
         An optimizer acting like a normal one but with mixed-precision training
         enabled.
 
-    Examples 1:
-            .. code-block:: python
+    Examples:
+        .. code-block:: python
+            :name: example-1
 
             # black&white list based strategy example
-            import paddle
-            import paddle.static as static
+            >>> import paddle
+            >>> import paddle.static as static
 
-            paddle.enable_static()
+            >>> paddle.enable_static()
 
-            data = static.data(name='X', shape=[None, 1], dtype='float32')
-            hidden = static.nn.fc(x=data, size=10)
-            loss = paddle.mean(hidden)
-            optimizer = paddle.optimizer.Adam(learning_rate=0.001)
+            >>> data = static.data(name='X', shape=[None, 1], dtype='float32')
+            >>> hidden = static.nn.fc(x=data, size=10)
+            >>> loss = paddle.mean(hidden)
+            >>> optimizer = paddle.optimizer.Adam(learning_rate=0.001)
 
-            mp_optimizer = static.amp.decorate(
-                    optimizer=optimizer, init_loss_scaling=8.0)
+            >>> mp_optimizer = static.amp.decorate(
+            ...         optimizer=optimizer, init_loss_scaling=8.0)
+
+            >>> ops, param_grads = mp_optimizer.minimize(loss)
+            >>> scaled_loss = mp_optimizer.get_scaled_loss()
 
-            ops, param_grads = mp_optimizer.minimize(loss)
-            scaled_loss = mp_optimizer.get_scaled_loss()
 
-    Examples 2:
         .. code-block:: python
+            :name: example-2
 
             # pure fp16 training example
-            import numpy as np
-            import paddle
-            import paddle.nn.functional as F
-
-            def run_example_code():
-                place = paddle.CUDAPlace(0)
-                exe = paddle.static.Executor(place)
-                data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32')
-                conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3)
-                # 1) Use fp16_guard to control the range of fp16 kernels used.
-                with paddle.static.amp.fp16_guard():
-                    bn = paddle.static.nn.batch_norm(input=conv2d, act="relu")
-                    pool = F.max_pool2d(bn, kernel_size=2, stride=2)
-                    hidden = paddle.static.nn.fc(pool, size=10)
-                    loss = paddle.mean(hidden)
-                # 2) Create the optimizer and set `multi_precision` to True.
-                # Setting `multi_precision` to True can avoid the poor accuracy
-                # or the slow convergence in a way.
-                optimizer = paddle.optimizer.Momentum(learning_rate=0.01, multi_precision=True)
-                # 3) These ops in `custom_black_list` will keep in the float32 computation type.
-                amp_list = paddle.static.amp.CustomOpLists(
-                    custom_black_list=['pool2d'])
-                # 4) The entry of Paddle AMP.
-                # Enable pure fp16 training by setting `use_pure_fp16` to True.
-                optimizer = paddle.static.amp.decorate(
-                    optimizer,
-                    amp_list,
-                    init_loss_scaling=128.0,
-                    use_dynamic_loss_scaling=True,
-                    use_pure_fp16=True)
-                # If you don't use the default_startup_program(), you sholud pass
-                # your defined `startup_program` into `minimize`.
-                optimizer.minimize(loss)
-                exe.run(paddle.static.default_startup_program())
-                # 5) Use `amp_init` after FP32 parameters initialization(such as `exe.run(startup_program)`).
-                # If you want to perform the testing process, you should pass `test_program` into `amp_init`.
-                optimizer.amp_init(place, scope=paddle.static.global_scope())
-
-            if paddle.is_compiled_with_cuda() and len(paddle.static.cuda_places()) > 0:
-                run_example_code()
+            >>> import numpy as np
+            >>> import paddle
+            >>> import paddle.nn.functional as F
+            >>> paddle.enable_static()
+
+            >>> # doctest: +REQUIRES(env:GPU)
+            >>> def run_example_code():
+            ...     place = paddle.CUDAPlace(0)
+            ...     exe = paddle.static.Executor(place)
+            ...     data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32')
+            ...     conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3)
+            ...     # 1) Use fp16_guard to control the range of fp16 kernels used.
+            ...     with paddle.static.amp.fp16_guard():
+            ...         bn = paddle.static.nn.batch_norm(input=conv2d, act="relu")
+            ...         pool = F.max_pool2d(bn, kernel_size=2, stride=2)
+            ...         hidden = paddle.static.nn.fc(pool, size=10)
+            ...         loss = paddle.mean(hidden)
+            ...     # 2) Create the optimizer and set `multi_precision` to True.
+            ...     # Setting `multi_precision` to True can avoid the poor accuracy
+            ...     # or the slow convergence in a way.
+            ...     optimizer = paddle.optimizer.Momentum(learning_rate=0.01, multi_precision=True)
+            ...     # 3) These ops in `custom_black_list` will keep in the float32 computation type.
+            ...     amp_list = paddle.static.amp.CustomOpLists(
+            ...         custom_black_list=['pool2d'])
+            ...     # 4) The entry of Paddle AMP.
+            ...     # Enable pure fp16 training by setting `use_pure_fp16` to True.
+            ...     optimizer = paddle.static.amp.decorate(
+            ...         optimizer,
+            ...         amp_list,
+            ...         init_loss_scaling=128.0,
+            ...         use_dynamic_loss_scaling=True,
+            ...         use_pure_fp16=True)
+            ...     # If you don't use the default_startup_program(), you sholud pass
+            ...     # your defined `startup_program` into `minimize`.
+            ...     optimizer.minimize(loss)
+            ...     exe.run(paddle.static.default_startup_program())
+            ...     # 5) Use `amp_init` after FP32 parameters initialization(such as `exe.run(startup_program)`).
+            ...     # If you want to perform the testing process, you should pass `test_program` into `amp_init`.
+            ...     optimizer.amp_init(place, scope=paddle.static.global_scope())
+
+            >>> if paddle.is_compiled_with_cuda() and len(paddle.static.cuda_places()) > 0:
+            ...     run_example_code()
     """
     amp_dtype = "bfloat16" if use_bf16 else "float16"
     if amp_lists is None:
@@ -859,47 +864,47 @@ def decorate(  # noqa: F811
 
     Examples:
 
-     .. code-block:: python
-
-        import paddle
-
-        paddle.enable_static()
-
-        class SimpleConvNet(paddle.nn.Layer):
-            def __init__(self):
-                super().__init__()
-                self.conv = paddle.nn.Conv2D(in_channels=1, out_channels=6, kernel_size=3)
-                self.linear = paddle.nn.Linear(in_features=26, out_features=10)
-
-            def forward(self, x):
-                out = self.conv(x)
-                out = paddle.nn.functional.relu(out)
-                out = self.linear(out)
-                out = paddle.nn.functional.softmax(out)
-                return out
+        .. code-block:: python
 
-        main_program = paddle.static.Program()
-        startup_program = paddle.static.Program()
-        with paddle.utils.unique_name.guard():
-            with paddle.static.program_guard(main_program, startup_program):
-                model = SimpleConvNet()
-                x = paddle.static.data(
-                    name='input', shape=[None, 1, 28, 28], dtype='float32'
-                )
-                out = model(x)
-                loss = paddle.mean(out)
-                optimizer = paddle.optimizer.AdamW()
-                optimizer = paddle.static.amp.decorate(optimizer, level="O2", dtype="float16")
-                optimizer.minimize(loss)
-
-        if paddle.is_compiled_with_cuda() and len(paddle.static.cuda_places()) > 0:
-            place = paddle.CUDAPlace(0)
-            exe = paddle.static.Executor(place)
-            exe.run(startup_program)
-
-            # Call `amp_init` after FP32 parameters initialization, such as `exe.run(startup_program)`,
-            # to convert FP32 parameters to low precision FP16 / BF16.
-            optimizer.amp_init(place, scope=paddle.static.global_scope())
+            >>> import paddle
+            >>> paddle.enable_static()
+
+            >>> # doctest: +REQUIRES(env:GPU)
+            >>> class SimpleConvNet(paddle.nn.Layer):
+            ...     def __init__(self):
+            ...         super().__init__()
+            ...         self.conv = paddle.nn.Conv2D(in_channels=1, out_channels=6, kernel_size=3)
+            ...         self.linear = paddle.nn.Linear(in_features=26, out_features=10)
+            ...
+            ...     def forward(self, x):
+            ...         out = self.conv(x)
+            ...         out = paddle.nn.functional.relu(out)
+            ...         out = self.linear(out)
+            ...         out = paddle.nn.functional.softmax(out)
+            ...         return out
+
+            >>> main_program = paddle.static.Program()
+            >>> startup_program = paddle.static.Program()
+            >>> with paddle.utils.unique_name.guard():
+            ...     with paddle.static.program_guard(main_program, startup_program):
+            ...         model = SimpleConvNet()
+            ...         x = paddle.static.data(
+            ...             name='input', shape=[None, 1, 28, 28], dtype='float32'
+            ...         )
+            ...         out = model(x)
+            ...         loss = paddle.mean(out)
+            ...         optimizer = paddle.optimizer.AdamW()
+            ...         optimizer = paddle.static.amp.decorate(optimizer, level="O2", dtype="float16")
+            ...         optimizer.minimize(loss)
+
+            >>> if paddle.is_compiled_with_cuda() and len(paddle.static.cuda_places()) > 0:
+            ...     place = paddle.CUDAPlace(0)
+            ...     exe = paddle.static.Executor(place)
+            ...     exe.run(startup_program)
+            ...
+            ...     # Call `amp_init` after FP32 parameters initialization, such as `exe.run(startup_program)`,
+            ...     # to convert FP32 parameters to low precision FP16 / BF16.
+            ...     optimizer.amp_init(place, scope=paddle.static.global_scope())
 
     """
     # check amp_level: O0-O2
diff --git a/python/paddle/static/amp/fp16_utils.py b/python/paddle/static/amp/fp16_utils.py
index ea73d48cf3a967de22f211009642d8417a6bab7b..46c669ba54e46b3be2fc3f0bee92b81c954639ac 100644
--- a/python/paddle/static/amp/fp16_utils.py
+++ b/python/paddle/static/amp/fp16_utils.py
@@ -365,18 +365,18 @@ def fp16_guard():
     Examples:
         .. code-block:: python
 
-            import numpy as np
-            import paddle
-            import paddle.nn.functional as F
-            paddle.enable_static()
-            data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32')
-            conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3)
-
-            with paddle.static.amp.fp16_guard():
-                bn = paddle.static.nn.batch_norm(input=conv2d, act="relu")
-                pool = F.max_pool2d(bn, kernel_size=2, stride=2)
-                hidden = paddle.static.nn.fc(pool, size=10)
-                loss = paddle.mean(hidden)
+            >>> import numpy as np
+            >>> import paddle
+            >>> import paddle.nn.functional as F
+            >>> paddle.enable_static()
+            >>> data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32')
+            >>> conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3)
+
+            >>> with paddle.static.amp.fp16_guard():
+            ...     bn = paddle.static.nn.batch_norm(input=conv2d, act="relu")
+            ...     pool = F.max_pool2d(bn, kernel_size=2, stride=2)
+            ...     hidden = paddle.static.nn.fc(pool, size=10)
+            ...     loss = paddle.mean(hidden)
     """
     with framework.name_scope(prefix=_fp16_guard_pattern):
         yield
diff --git a/python/paddle/static/input.py b/python/paddle/static/input.py
index 4fe3d4d07c7bf368ce62ce7a4f802b8c2afbc2dc..8cdcb5f551a7c3174681a6fc4e68827b720e7910 100644
--- a/python/paddle/static/input.py
+++ b/python/paddle/static/input.py
@@ -58,38 +58,44 @@ def data(name, shape, dtype=None, lod_level=0):
     Examples:
         .. code-block:: python
 
-          import numpy as np
-          import paddle
-          paddle.enable_static()
-
-          # Creates a variable with fixed size [3, 2, 1]
-          # User can only feed data of the same shape to x
-          # the dtype is not set, so it will set "float32" by
-          # paddle.get_default_dtype(). You can use paddle.get_default_dtype() to
-          # change the global dtype
-          x = paddle.static.data(name='x', shape=[3, 2, 1])
-
-          # Creates a variable with changeable batch size -1.
-          # Users can feed data of any batch size into y,
-          # but size of each data sample has to be [2, 1]
-          y = paddle.static.data(name='y', shape=[-1, 2, 1], dtype='float32')
-
-          z = x + y
-
-          # In this example, we will feed x and y with np-ndarray "1"
-          # and fetch z, like implementing "1 + 1 = 2" in PaddlePaddle
-          feed_data = np.ones(shape=[3, 2, 1], dtype=np.float32)
-
-          exe = paddle.static.Executor(paddle.framework.CPUPlace())
-          out = exe.run(paddle.static.default_main_program(),
-                        feed={
-                            'x': feed_data,
-                            'y': feed_data
-                        },
-                        fetch_list=[z.name])
-
-          # np-ndarray of shape=[3, 2, 1], dtype=float32, whose elements are 2
-          print(out)
+            >>> import numpy as np
+            >>> import paddle
+            >>> paddle.enable_static()
+
+            # Creates a variable with fixed size [3, 2, 1]
+            # User can only feed data of the same shape to x
+            # the dtype is not set, so it will set "float32" by
+            # paddle.get_default_dtype(). You can use paddle.get_default_dtype() to
+            # change the global dtype
+            >>> x = paddle.static.data(name='x', shape=[3, 2, 1])
+
+            # Creates a variable with changeable batch size -1.
+            # Users can feed data of any batch size into y,
+            # but size of each data sample has to be [2, 1]
+            >>> y = paddle.static.data(name='y', shape=[-1, 2, 1], dtype='float32')
+
+            >>> z = x + y
+
+            # In this example, we will feed x and y with np-ndarray "1"
+            # and fetch z, like implementing "1 + 1 = 2" in PaddlePaddle
+            >>> feed_data = np.ones(shape=[3, 2, 1], dtype=np.float32)
+
+            >>> exe = paddle.static.Executor(paddle.framework.CPUPlace())
+            >>> out = exe.run(paddle.static.default_main_program(),
+            ...             feed={
+            ...                 'x': feed_data,
+            ...                 'y': feed_data
+            ...             },
+            ...             fetch_list=[z.name])
+
+            # np-ndarray of shape=[3, 2, 1], dtype=float32, whose elements are 2
+            >>> print(out)
+            [array([[[2.],
+                    [2.]],
+                [[2.],
+                    [2.]],
+                [[2.],
+                    [2.]]], dtype=float32)]
 
     """
     helper = LayerHelper('data', **locals())
@@ -171,13 +177,17 @@ class InputSpec:
     Examples:
         .. code-block:: python
 
-            from paddle.static import InputSpec
+            >>> import paddle
+            >>> from paddle.static import InputSpec
 
-            input = InputSpec([None, 784], 'float32', 'x')
-            label = InputSpec([None, 1], 'int64', 'label')
+            >>> input = InputSpec([None, 784], 'float32', 'x')
+            >>> label = InputSpec([None, 1], 'int64', 'label')
 
-            print(input)  # InputSpec(shape=(-1, 784), dtype=paddle.float32, name=x)
-            print(label)  # InputSpec(shape=(-1, 1), dtype=paddle.int64, name=label)
+            >>> print(input)
+            InputSpec(shape=(-1, 784), dtype=paddle.float32, name=x, stop_gradient=False)
+
+            >>> print(label)
+            InputSpec(shape=(-1, 1), dtype=paddle.int64, name=label, stop_gradient=False)
     """
 
     def __init__(self, shape, dtype='float32', name=None, stop_gradient=False):
@@ -217,14 +227,15 @@ class InputSpec:
         Examples:
             .. code-block:: python
 
-                import paddle
-                from paddle.static import InputSpec
+                >>> import paddle
+                >>> from paddle.static import InputSpec
 
-                paddle.disable_static()
+                >>> paddle.disable_static()
 
-                x = paddle.ones([2, 2], dtype="float32")
-                x_spec = InputSpec.from_tensor(x, name='x')
-                print(x_spec)  # InputSpec(shape=(2, 2), dtype=paddle.float32, name=x)
+                >>> x = paddle.ones([2, 2], dtype="float32")
+                >>> x_spec = InputSpec.from_tensor(x, name='x')
+                >>> print(x_spec)
+                InputSpec(shape=(2, 2), dtype=paddle.float32, name=x, stop_gradient=False)
 
         """
         if isinstance(tensor, (Variable, core.eager.Tensor)):
@@ -250,12 +261,13 @@ class InputSpec:
         Examples:
             .. code-block:: python
 
-                import numpy as np
-                from paddle.static import InputSpec
+                >>> import numpy as np
+                >>> from paddle.static import InputSpec
 
-                x = np.ones([2, 2], np.float32)
-                x_spec = InputSpec.from_numpy(x, name='x')
-                print(x_spec)  # InputSpec(shape=(2, 2), dtype=paddle.float32, name=x)
+                >>> x = np.ones([2, 2], np.float32)
+                >>> x_spec = InputSpec.from_numpy(x, name='x')
+                >>> print(x_spec)
+                InputSpec(shape=(2, 2), dtype=paddle.float32, name=x, stop_gradient=False)
 
         """
         return cls(ndarray.shape, ndarray.dtype, name)
@@ -273,11 +285,12 @@ class InputSpec:
         Examples:
             .. code-block:: python
 
-                from paddle.static import InputSpec
+                >>> from paddle.static import InputSpec
 
-                x_spec = InputSpec(shape=[64], dtype='float32', name='x')
-                x_spec.batch(4)
-                print(x_spec) # InputSpec(shape=(4, 64), dtype=paddle.float32, name=x)
+                >>> x_spec = InputSpec(shape=[64], dtype='float32', name='x')
+                >>> x_spec.batch(4)
+                >>> print(x_spec)
+                InputSpec(shape=(4, 64), dtype=paddle.float32, name=x, stop_gradient=False)
 
         """
         if isinstance(batch_size, (list, tuple)):
@@ -310,11 +323,12 @@ class InputSpec:
         Examples:
             .. code-block:: python
 
-                from paddle.static import InputSpec
+                >>> from paddle.static import InputSpec
 
-                x_spec = InputSpec(shape=[4, 64], dtype='float32', name='x')
-                x_spec.unbatch()
-                print(x_spec) # InputSpec(shape=(64,), dtype=paddle.float32, name=x)
+                >>> x_spec = InputSpec(shape=[4, 64], dtype='float32', name='x')
+                >>> x_spec.unbatch()
+                >>> print(x_spec) # InputSpec(shape=(64,), dtype=paddle.float32, name=x)
+                InputSpec(shape=(64,), dtype=paddle.float32, name=x, stop_gradient=False)
 
         """
         if len(self.shape) == 0:
diff --git a/python/paddle/static/io.py b/python/paddle/static/io.py
index 88be89e3d440386fcf74a96b6ea09bb0ae7b44d0..feaaf905bc70b5eb1cbaba35d391743ed7b8d12e 100644
--- a/python/paddle/static/io.py
+++ b/python/paddle/static/io.py
@@ -203,25 +203,25 @@ def normalize_program(program, feed_vars, fetch_vars):
     Examples:
         .. code-block:: python
 
-            import paddle
+            >>> import paddle
 
-            paddle.enable_static()
+            >>> paddle.enable_static()
 
-            path_prefix = "./infer_model"
+            >>> path_prefix = "./infer_model"
 
             # User defined network, here a softmax regession example
-            image = paddle.static.data(name='img', shape=[None, 28, 28], dtype='float32')
-            label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
-            predict = paddle.static.nn.fc(image, 10, activation='softmax')
+            >>> image = paddle.static.data(name='img', shape=[None, 28, 28], dtype='float32')
+            >>> label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
+            >>> predict = paddle.static.nn.fc(image, 10, activation='softmax')
 
-            loss = paddle.nn.functional.cross_entropy(predict, label)
+            >>> loss = paddle.nn.functional.cross_entropy(predict, label)
 
-            exe = paddle.static.Executor(paddle.CPUPlace())
-            exe.run(paddle.static.default_startup_program())
+            >>> exe = paddle.static.Executor(paddle.CPUPlace())
+            >>> exe.run(paddle.static.default_startup_program())
 
             # normalize main program.
-            program = paddle.static.default_main_program()
-            normalized_program = paddle.static.normalize_program(program, [image], [predict])
+            >>> program = paddle.static.default_main_program()
+            >>> normalized_program = paddle.static.normalize_program(program, [image], [predict])
 
     """
     if not isinstance(program, Program):
@@ -308,27 +308,26 @@ def serialize_program(feed_vars, fetch_vars, **kwargs):
     Examples:
         .. code-block:: python
 
-            import paddle
+            >>> import paddle
+            >>> paddle.enable_static()
 
-            paddle.enable_static()
-
-            path_prefix = "./infer_model"
+            >>> path_prefix = "./infer_model"
 
             # User defined network, here a softmax regession example
-            image = paddle.static.data(name='img', shape=[None, 28, 28], dtype='float32')
-            label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
-            predict = paddle.static.nn.fc(image, 10, activation='softmax')
+            >>> image = paddle.static.data(name='img', shape=[None, 28, 28], dtype='float32')
+            >>> label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
+            >>> predict = paddle.static.nn.fc(image, 10, activation='softmax')
 
-            loss = paddle.nn.functional.cross_entropy(predict, label)
+            >>> loss = paddle.nn.functional.cross_entropy(predict, label)
 
-            exe = paddle.static.Executor(paddle.CPUPlace())
-            exe.run(paddle.static.default_startup_program())
+            >>> exe = paddle.static.Executor(paddle.CPUPlace())
+            >>> exe.run(paddle.static.default_startup_program())
 
             # serialize the default main program to bytes.
-            serialized_program = paddle.static.serialize_program([image], [predict])
+            >>> serialized_program = paddle.static.serialize_program([image], [predict])
 
             # deserialize bytes to program
-            deserialized_program = paddle.static.deserialize_program(serialized_program)
+            >>> deserialized_program = paddle.static.deserialize_program(serialized_program)
 
     """
     # verify feed_vars
@@ -368,28 +367,27 @@ def serialize_persistables(feed_vars, fetch_vars, executor, **kwargs):
     Examples:
         .. code-block:: python
 
-            import paddle
-
-            paddle.enable_static()
+            >>> import paddle
+            >>> paddle.enable_static()
 
-            path_prefix = "./infer_model"
+            >>> path_prefix = "./infer_model"
 
             # User defined network, here a softmax regession example
-            image = paddle.static.data(name='img', shape=[None, 28, 28], dtype='float32')
-            label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
-            predict = paddle.static.nn.fc(image, 10, activation='softmax')
+            >>> image = paddle.static.data(name='img', shape=[None, 28, 28], dtype='float32')
+            >>> label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
+            >>> predict = paddle.static.nn.fc(image, 10, activation='softmax')
 
-            loss = paddle.nn.functional.cross_entropy(predict, label)
+            >>> loss = paddle.nn.functional.cross_entropy(predict, label)
 
-            exe = paddle.static.Executor(paddle.CPUPlace())
-            exe.run(paddle.static.default_startup_program())
+            >>> exe = paddle.static.Executor(paddle.CPUPlace())
+            >>> exe.run(paddle.static.default_startup_program())
 
             # serialize parameters to bytes.
-            serialized_params = paddle.static.serialize_persistables([image], [predict], exe)
+            >>> serialized_params = paddle.static.serialize_persistables([image], [predict], exe)
 
             # deserialize bytes to parameters.
-            main_program = paddle.static.default_main_program()
-            deserialized_params = paddle.static.deserialize_persistables(main_program, serialized_params, exe)
+            >>> main_program = paddle.static.default_main_program()
+            >>> deserialized_params = paddle.static.deserialize_persistables(main_program, serialized_params, exe)
 
     """
     # verify feed_vars
@@ -463,21 +461,24 @@ def save_to_file(path, content):
     Examples:
         .. code-block:: python
 
-            import paddle
-            paddle.enable_static()
-            path_prefix = "./infer_model"
+            >>> import paddle
+            >>> paddle.enable_static()
+            >>> path_prefix = "./infer_model"
+
             # 用户自定义网络，此处用 softmax 回归为例。
-            image = paddle.static.data(name='img', shape=[None, 28, 28], dtype='float32')
-            label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
-            predict = paddle.static.nn.fc(image, 10, activation='softmax')
-            loss = paddle.nn.functional.cross_entropy(predict, label)
-            exe = paddle.static.Executor(paddle.CPUPlace())
-            exe.run(paddle.static.default_startup_program())
+            >>> image = paddle.static.data(name='img', shape=[None, 28, 28], dtype='float32')
+            >>> label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
+            >>> predict = paddle.static.nn.fc(image, 10, activation='softmax')
+            >>> loss = paddle.nn.functional.cross_entropy(predict, label)
+            >>> exe = paddle.static.Executor(paddle.CPUPlace())
+            >>> exe.run(paddle.static.default_startup_program())
+
             # 序列化参数
-            serialized_params = paddle.static.serialize_persistables([image], [predict], exe)
+            >>> serialized_params = paddle.static.serialize_persistables([image], [predict], exe)
+
             # 将序列化之后的参数保存到文件
-            params_path = path_prefix + ".params"
-            paddle.static.save_to_file(params_path, serialized_params)
+            >>> params_path = path_prefix + ".params"
+            >>> paddle.static.save_to_file(params_path, serialized_params)
     """
 
     if not isinstance(content, bytes):
@@ -517,26 +518,26 @@ def save_inference_model(
     Examples:
         .. code-block:: python
 
-            import paddle
+            >>> import paddle
 
-            paddle.enable_static()
+            >>> paddle.enable_static()
 
-            path_prefix = "./infer_model"
+            >>> path_prefix = "./infer_model"
 
             # User defined network, here a softmax regession example
-            image = paddle.static.data(name='img', shape=[None, 28, 28], dtype='float32')
-            label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
-            predict = paddle.static.nn.fc(image, 10, activation='softmax')
+            >>> image = paddle.static.data(name='img', shape=[None, 28, 28], dtype='float32')
+            >>> label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
+            >>> predict = paddle.static.nn.fc(image, 10, activation='softmax')
 
-            loss = paddle.nn.functional.cross_entropy(predict, label)
+            >>> loss = paddle.nn.functional.cross_entropy(predict, label)
 
-            exe = paddle.static.Executor(paddle.CPUPlace())
-            exe.run(paddle.static.default_startup_program())
+            >>> exe = paddle.static.Executor(paddle.CPUPlace())
+            >>> exe.run(paddle.static.default_startup_program())
 
             # Feed data and train process
 
             # Save inference model. Note we don't save label and loss in this example
-            paddle.static.save_inference_model(path_prefix, [image], [predict], exe)
+            >>> paddle.static.save_inference_model(path_prefix, [image], [predict], exe)
 
             # In this example, the save_inference_mode inference will prune the default
             # main program according to the network's input node (img) and output node(predict).
@@ -613,27 +614,27 @@ def deserialize_program(data):
     Examples:
         .. code-block:: python
 
-            import paddle
+            >>> import paddle
 
-            paddle.enable_static()
+            >>> paddle.enable_static()
 
-            path_prefix = "./infer_model"
+            >>> path_prefix = "./infer_model"
 
             # User defined network, here a softmax regession example
-            image = paddle.static.data(name='img', shape=[None, 28, 28], dtype='float32')
-            label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
-            predict = paddle.static.nn.fc(image, 10, activation='softmax')
+            >>> image = paddle.static.data(name='img', shape=[None, 28, 28], dtype='float32')
+            >>> label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
+            >>> predict = paddle.static.nn.fc(image, 10, activation='softmax')
 
-            loss = paddle.nn.functional.cross_entropy(predict, label)
+            >>> loss = paddle.nn.functional.cross_entropy(predict, label)
 
-            exe = paddle.static.Executor(paddle.CPUPlace())
-            exe.run(paddle.static.default_startup_program())
+            >>> exe = paddle.static.Executor(paddle.CPUPlace())
+            >>> exe.run(paddle.static.default_startup_program())
 
             # serialize the default main program to bytes.
-            serialized_program = paddle.static.serialize_program([image], [predict])
+            >>> serialized_program = paddle.static.serialize_program([image], [predict])
 
             # deserialize bytes to program
-            deserialized_program = paddle.static.deserialize_program(serialized_program)
+            >>> deserialized_program = paddle.static.deserialize_program(serialized_program)
 
     """
     program = Program.parse_from_string(data)
@@ -662,28 +663,28 @@ def deserialize_persistables(program, data, executor):
     Examples:
         .. code-block:: python
 
-            import paddle
+            >>> import paddle
 
-            paddle.enable_static()
+            >>> paddle.enable_static()
 
-            path_prefix = "./infer_model"
+            >>> path_prefix = "./infer_model"
 
             # User defined network, here a softmax regession example
-            image = paddle.static.data(name='img', shape=[None, 28, 28], dtype='float32')
-            label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
-            predict = paddle.static.nn.fc(image, 10, activation='softmax')
+            >>> image = paddle.static.data(name='img', shape=[None, 28, 28], dtype='float32')
+            >>> label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
+            >>> predict = paddle.static.nn.fc(image, 10, activation='softmax')
 
-            loss = paddle.nn.functional.cross_entropy(predict, label)
+            >>> loss = paddle.nn.functional.cross_entropy(predict, label)
 
-            exe = paddle.static.Executor(paddle.CPUPlace())
-            exe.run(paddle.static.default_startup_program())
+            >>> exe = paddle.static.Executor(paddle.CPUPlace())
+            >>> exe.run(paddle.static.default_startup_program())
 
             # serialize parameters to bytes.
-            serialized_params = paddle.static.serialize_persistables([image], [predict], exe)
+            >>> serialized_params = paddle.static.serialize_persistables([image], [predict], exe)
 
             # deserialize bytes to parameters.
-            main_program = paddle.static.default_main_program()
-            deserialized_params = paddle.static.deserialize_persistables(main_program, serialized_params, exe)
+            >>> main_program = paddle.static.default_main_program()
+            >>> deserialized_params = paddle.static.deserialize_persistables(main_program, serialized_params, exe)
 
 
     """
@@ -764,23 +765,27 @@ def load_from_file(path):
 
         .. code-block:: python
 
-            import paddle
-            paddle.enable_static()
-            path_prefix = "./infer_model"
+            >>> import paddle
+            >>> paddle.enable_static()
+            >>> path_prefix = "./infer_model"
+
             # 用户自定义网络，此处用 softmax 回归为例。
-            image = paddle.static.data(name='img', shape=[None, 28, 28], dtype='float32')
-            label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
-            predict = paddle.static.nn.fc(image, 10, activation='softmax')
-            loss = paddle.nn.functional.cross_entropy(predict, label)
-            exe = paddle.static.Executor(paddle.CPUPlace())
-            exe.run(paddle.static.default_startup_program())
+            >>> image = paddle.static.data(name='img', shape=[None, 28, 28], dtype='float32')
+            >>> label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
+            >>> predict = paddle.static.nn.fc(image, 10, activation='softmax')
+            >>> loss = paddle.nn.functional.cross_entropy(predict, label)
+            >>> exe = paddle.static.Executor(paddle.CPUPlace())
+            >>> exe.run(paddle.static.default_startup_program())
+
             # 序列化参数
-            serialized_params = paddle.static.serialize_persistables([image], [predict], exe)
+            >>> serialized_params = paddle.static.serialize_persistables([image], [predict], exe)
+
             # 将序列化之后的参数保存到文件
-            params_path = path_prefix + ".params"
-            paddle.static.save_to_file(params_path, serialized_params)
+            >>> params_path = path_prefix + ".params"
+            >>> paddle.static.save_to_file(params_path, serialized_params)
+
             # 从文件加载序列化之后的参数
-            serialized_params_copy = paddle.static.load_from_file(params_path)
+            >>> serialized_params_copy = paddle.static.load_from_file(params_path)
     """
     with open(path, 'rb') as f:
         data = f.read()
@@ -818,33 +823,33 @@ def load_inference_model(path_prefix, executor, **kwargs):
     Examples:
         .. code-block:: python
 
-            import paddle
-            import numpy as np
+            >>> import paddle
+            >>> import numpy as np
 
-            paddle.enable_static()
+            >>> paddle.enable_static()
 
             # Build the model
-            startup_prog = paddle.static.default_startup_program()
-            main_prog = paddle.static.default_main_program()
-            with paddle.static.program_guard(main_prog, startup_prog):
-                image = paddle.static.data(name="img", shape=[64, 784])
-                w = paddle.create_parameter(shape=[784, 200], dtype='float32')
-                b = paddle.create_parameter(shape=[200], dtype='float32')
-                hidden_w = paddle.matmul(x=image, y=w)
-                hidden_b = paddle.add(hidden_w, b)
-            exe = paddle.static.Executor(paddle.CPUPlace())
-            exe.run(startup_prog)
+            >>> startup_prog = paddle.static.default_startup_program()
+            >>> main_prog = paddle.static.default_main_program()
+            >>> with paddle.static.program_guard(main_prog, startup_prog):
+            ...     image = paddle.static.data(name="img", shape=[64, 784])
+            ...     w = paddle.create_parameter(shape=[784, 200], dtype='float32')
+            ...     b = paddle.create_parameter(shape=[200], dtype='float32')
+            ...     hidden_w = paddle.matmul(x=image, y=w)
+            ...     hidden_b = paddle.add(hidden_w, b)
+            >>> exe = paddle.static.Executor(paddle.CPUPlace())
+            >>> exe.run(startup_prog)
 
             # Save the inference model
-            path_prefix = "./infer_model"
-            paddle.static.save_inference_model(path_prefix, [image], [hidden_b], exe)
+            >>> path_prefix = "./infer_model"
+            >>> paddle.static.save_inference_model(path_prefix, [image], [hidden_b], exe)
 
-            [inference_program, feed_target_names, fetch_targets] = (
-                paddle.static.load_inference_model(path_prefix, exe))
-            tensor_img = np.array(np.random.random((64, 784)), dtype=np.float32)
-            results = exe.run(inference_program,
-                          feed={feed_target_names[0]: tensor_img},
-                          fetch_list=fetch_targets)
+            >>> [inference_program, feed_target_names, fetch_targets] = (
+            ...     paddle.static.load_inference_model(path_prefix, exe))
+            >>> tensor_img = np.array(np.random.random((64, 784)), dtype=np.float32)
+            >>> results = exe.run(inference_program,
+            ...               feed={feed_target_names[0]: tensor_img},
+            ...               fetch_list=fetch_targets)
 
             # In this example, the inference program was saved in file
             # "./infer_model.pdmodel" and parameters were saved in file
@@ -1001,36 +1006,40 @@ def save_vars(
     Examples:
         .. code-block:: python
 
-            import paddle
-            import paddle.fluid as fluid
-
-            paddle.enable_static()
-            main_prog = fluid.Program()
-            startup_prog = fluid.Program()
-            with fluid.program_guard(main_prog, startup_prog):
-                data = paddle.static.data(name="img", shape=[64, 784])
-                w = paddle.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
-                b = paddle.create_parameter(shape=[200], dtype='float32', name='fc_b')
-                hidden_w = paddle.matmul(x=data, y=w)
-                hidden_b = paddle.add(hidden_w, b)
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-            exe.run(startup_prog)
+            >>> import paddle
+            >>> import paddle.static as static
+
+            >>> paddle.enable_static()
+            >>> main_prog = static.Program()
+            >>> startup_prog = static.Program()
+            >>> with static.program_guard(main_prog, startup_prog):
+            ...     data = paddle.static.data(name="img", shape=[64, 784])
+            ...     w = paddle.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
+            ...     b = paddle.create_parameter(shape=[200], dtype='float32', name='fc_b')
+            ...     hidden_w = paddle.matmul(x=data, y=w)
+            ...     hidden_b = paddle.add(hidden_w, b)
+            >>> place = static.CPUPlace()
+            >>> exe = static.Executor(place)
+            >>> exe.run(startup_prog)
 
             # The first usage: use `vars` to set the saved variables.
-            var_list = [w, b]
-            path = "./my_paddle_vars"
-            fluid.io.save_vars(executor=exe, dirname=path, vars=var_list,
-                            filename="vars_file")
+            >>> var_list = [w, b]
+            >>> path = "./my_paddle_vars"
+
             # w and b will be save in a file named "var_file".
+            >>> paddle.static.io.save_vars(executor=exe, dirname=path, vars=var_list,
+            ...                 filename="vars_file")
 
             # The second usage: use `predicate` to select the saved variable.
-            def name_has_fc(var):
-                res = "fc" in var.name
-                return res
-            param_path = "./my_paddle_model"
-            fluid.io.save_vars(executor=exe, dirname=param_path, main_program=main_prog, vars=None, predicate = name_has_fc)
+            >>> def name_has_fc(var):
+            ...     res = "fc" in var.name
+            ...     return res
+            >>> param_path = "./my_paddle_model"
+
             # all variables whose names contain "fc " are saved.
+            >>> paddle.static.io.save_vars(executor=exe, dirname=param_path, main_program=main_prog, vars=None, predicate = name_has_fc)
+
+
     """
     save_to_memory = False
     if dirname is None and filename is None:
@@ -1154,41 +1163,43 @@ def load_vars(
     Examples:
         .. code-block:: python
 
-            import paddle
-            import paddle.fluid as fluid
-
-            paddle.enable_static()
-            main_prog = fluid.Program()
-            startup_prog = fluid.Program()
-            with fluid.program_guard(main_prog, startup_prog):
-                data = paddle.static.data(name="img", shape=[64, 784])
-                w = paddle.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
-                b = paddle.create_parameter(shape=[200], dtype='float32', name='fc_b')
-                hidden_w = paddle.matmul(x=data, y=w)
-                hidden_b = paddle.add(hidden_w, b)
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-            exe.run(startup_prog)
+            >>> import paddle
+            >>> import paddle.static as static
+
+            >>> paddle.enable_static()
+            >>> main_prog = static.Program()
+            >>> startup_prog = static.Program()
+            >>> with static.program_guard(main_prog, startup_prog):
+            ...     data = paddle.static.data(name="img", shape=[64, 784])
+            ...     w = paddle.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
+            ...     b = paddle.create_parameter(shape=[200], dtype='float32', name='fc_b')
+            ...     hidden_w = paddle.matmul(x=data, y=w)
+            ...     hidden_b = paddle.add(hidden_w, b)
+            >>> place = paddle.CPUPlace()
+            >>> exe = static.Executor(place)
+            >>> exe.run(startup_prog)
 
             # The first usage: using `vars` to specify the variables.
-            path = "./my_paddle_vars"
-            var_list = [w, b]
-            fluid.io.save_vars(executor=exe, dirname=path, vars=var_list,
-                               filename="vars_file")
-            fluid.io.load_vars(executor=exe, dirname=path, vars=var_list,
-                               filename="vars_file")
+            >>> path = "./my_paddle_vars"
+            >>> var_list = [w, b]
+            >>> paddle.static.io.save_vars(executor=exe, dirname=path, vars=var_list,
+            ...                    filename="vars_file")
+            >>> paddle.static.io.load_vars(executor=exe, dirname=path, vars=var_list,
+            ...                    filename="vars_file")
+
             # w and b will be loaded, and they are supposed to
             # be saved in the same file named 'var_file' in the path "./my_paddle_vars".
 
             # The second usage: using the `predicate` function to select variables
-            param_path = "./my_paddle_model"
-            def name_has_fc(var):
-                res = "fc" in var.name
-                return res
-            fluid.io.save_vars(executor=exe, dirname=param_path, main_program=main_prog,
-                              vars=None, predicate=name_has_fc)
-            fluid.io.load_vars(executor=exe, dirname=param_path, main_program=main_prog,
-                               vars=None, predicate=name_has_fc)
+            >>> param_path = "./my_paddle_model"
+            >>> def name_has_fc(var):
+            ...     res = "fc" in var.name
+            ...     return res
+            >>> paddle.static.io.save_vars(executor=exe, dirname=param_path, main_program=main_prog,
+            ...                    vars=None, predicate=name_has_fc)
+            >>> paddle.static.io.load_vars(executor=exe, dirname=param_path, main_program=main_prog,
+            ...                    vars=None, predicate=name_has_fc)
+
             # Load All variables in the `main_program` whose name includes "fc".
             # And all the variables are supposed to be saved in separate files.
 
@@ -1391,21 +1402,21 @@ def save(program, model_path, protocol=4, **configs):
     Examples:
         .. code-block:: python
 
-            import paddle
-            import paddle.static as static
+            >>> import paddle
+            >>> import paddle.static as static
 
-            paddle.enable_static()
+            >>> paddle.enable_static()
 
-            x = static.data(name="x", shape=[10, 10], dtype='float32')
-            y = static.nn.fc(x, 10)
-            z = static.nn.fc(y, 10)
+            >>> x = static.data(name="x", shape=[10, 10], dtype='float32')
+            >>> y = static.nn.fc(x, 10)
+            >>> z = static.nn.fc(y, 10)
 
-            place = paddle.CPUPlace()
-            exe = static.Executor(place)
-            exe.run(static.default_startup_program())
-            prog = static.default_main_program()
+            >>> place = paddle.CPUPlace()
+            >>> exe = static.Executor(place)
+            >>> exe.run(static.default_startup_program())
+            >>> prog = static.default_main_program()
 
-            static.save(prog, "./temp")
+            >>> static.save(prog, "./temp")
     """
 
     base_name = os.path.basename(model_path)
@@ -1496,22 +1507,22 @@ def load(program, model_path, executor=None, var_list=None):
      Examples:
         .. code-block:: python
 
-            import paddle
-            import paddle.static as static
+            >>> import paddle
+            >>> import paddle.static as static
 
-            paddle.enable_static()
+            >>> paddle.enable_static()
 
-            x = static.data(name="x", shape=[10, 10], dtype='float32')
-            y = static.nn.fc(x, 10)
-            z = static.nn.fc(y, 10)
+            >>> x = static.data(name="x", shape=[10, 10], dtype='float32')
+            >>> y = static.nn.fc(x, 10)
+            >>> z = static.nn.fc(y, 10)
 
-            place = paddle.CPUPlace()
-            exe = static.Executor(place)
-            exe.run(static.default_startup_program())
-            prog = static.default_main_program()
+            >>> place = paddle.CPUPlace()
+            >>> exe = static.Executor(place)
+            >>> exe.run(static.default_startup_program())
+            >>> prog = static.default_main_program()
 
-            static.save(prog, "./temp")
-            static.load(prog, "./temp")
+            >>> static.save(prog, "./temp")
+            >>> static.load(prog, "./temp")
     """
 
     assert executor is None or isinstance(executor, Executor)
@@ -1705,24 +1716,24 @@ def set_program_state(program, state_dict):
     Examples:
         .. code-block:: python
 
-            import paddle
-            import paddle.static as static
+            >>> import paddle
+            >>> import paddle.static as static
 
-            paddle.enable_static()
+            >>> paddle.enable_static()
 
-            x = static.data(name="x", shape=[10, 10], dtype='float32')
-            y = static.nn.fc(x, 10)
-            z = static.nn.fc(y, 10)
+            >>> x = static.data(name="x", shape=[10, 10], dtype='float32')
+            >>> y = static.nn.fc(x, 10)
+            >>> z = static.nn.fc(y, 10)
 
-            place = paddle.CPUPlace()
-            exe = static.Executor(place)
-            exe.run(static.default_startup_program())
-            prog = static.default_main_program()
+            >>> place = paddle.CPUPlace()
+            >>> exe = static.Executor(place)
+            >>> exe.run(static.default_startup_program())
+            >>> prog = static.default_main_program()
 
-            static.save(prog, "./temp")
-            program_state = static.load_program_state("./temp")
+            >>> static.save(prog, "./temp")
+            >>> program_state = static.load_program_state("./temp")
 
-            static.set_program_state(prog, program_state)
+            >>> static.set_program_state(prog, program_state)
     """
     state_dict = _pack_loaded_dict(state_dict)
     parameter_list = list(filter(is_persistable, program.list_vars()))
@@ -1795,14 +1806,14 @@ def get_program_persistable_vars(program):
         list: The list contains all persistable vars in the program
     Examples:
         .. code-block:: python
-            import paddle
-            import paddle.static.io as io
-            import paddle.fluid as fluid
-            paddle.enable_static()
-            data = paddle.static.data(name="img", shape=[64, 784])
-            w = paddle.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
-            b = paddle.create_parameter(shape=[200], dtype='float32', name='fc_b')
-            list_para  = io.get_program_persistable_vars(  fluid.default_main_program() )
+
+            >>> import paddle
+            >>> import paddle.static.io as io
+            >>> paddle.enable_static()
+            >>> data = paddle.static.data(name="img", shape=[64, 784])
+            >>> w = paddle.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
+            >>> b = paddle.create_parameter(shape=[200], dtype='float32', name='fc_b')
+            >>> list_para  = io.get_program_persistable_vars(  paddle.static.default_main_program() )
     """
     return list(filter(is_persistable, program.list_vars()))
 
@@ -1826,22 +1837,22 @@ def load_program_state(model_path, var_list=None):
 
         .. code-block:: python
 
-            import paddle
-            import paddle.static as static
+            >>> import paddle
+            >>> import paddle.static as static
 
-            paddle.enable_static()
+            >>> paddle.enable_static()
 
-            x = static.data(name="x", shape=[10, 10], dtype='float32')
-            y = static.nn.fc(x, 10)
-            z = static.nn.fc(y, 10)
+            >>> x = static.data(name="x", shape=[10, 10], dtype='float32')
+            >>> y = static.nn.fc(x, 10)
+            >>> z = static.nn.fc(y, 10)
 
-            place = paddle.CPUPlace()
-            exe = static.Executor(place)
-            exe.run(static.default_startup_program())
-            prog = static.default_main_program()
+            >>> place = paddle.CPUPlace()
+            >>> exe = static.Executor(place)
+            >>> exe.run(static.default_startup_program())
+            >>> prog = static.default_main_program()
 
-            static.save(prog, "./temp")
-            program_state = static.load_program_state("./temp")
+            >>> static.save(prog, "./temp")
+            >>> program_state = static.load_program_state("./temp")
     """
     model_prefix = model_path
     if model_prefix.endswith(".pdparams"):
diff --git a/python/paddle/static/log_helper.py b/python/paddle/static/log_helper.py
index 3bb0e8477fd3caa0cb56c13c2a6126cd3b59a3e3..b86505174098222dbbff7351ccf312ec4415408a 100644
--- a/python/paddle/static/log_helper.py
+++ b/python/paddle/static/log_helper.py
@@ -31,10 +31,10 @@ def get_logger(name, level, fmt=None):
 
     Examples:
         .. code-block:: python
-            import paddle
-            import logging
-            logger = paddle.static.log_helper.get_logger(__name__, logging.INFO,
-                            fmt='%(asctime)s-%(levelname)s: %(message)s')
+            >>> import paddle
+            >>> import logging
+            >>> logger = paddle.static.log_helper.get_logger(__name__, logging.INFO,
+            ...                 fmt='%(asctime)s-%(levelname)s: %(message)s')
     """
 
     logger = logging.getLogger(name)
diff --git a/python/paddle/static/nn/loss.py b/python/paddle/static/nn/loss.py
index 7d7912a06fbdd58ae0ac8dc9126e0d9f29ffdc64..cbc099d963a2333f9b0612f4f05ed695e574eb66 100644
--- a/python/paddle/static/nn/loss.py
+++ b/python/paddle/static/nn/loss.py
@@ -86,44 +86,43 @@ def nce(
     Examples:
         .. code-block:: python
 
-
-            import paddle
-            import numpy as np
-
-            paddle.enable_static()
-
-            window_size = 5
-            words = []
-            for i in range(window_size):
-                words.append(paddle.static.data(
-                    name='word_{0}'.format(i), shape=[-1, 1], dtype='int64'))
-
-            dict_size = 10000
-            label_word = int(window_size / 2) + 1
-
-            embs = []
-            for i in range(window_size):
-                if i == label_word:
-                    continue
-
-                emb = paddle.static.nn.embedding(input=words[i], size=[dict_size, 32],
-                                    param_attr='embed', is_sparse=True)
-                embs.append(emb)
-
-            embs = paddle.concat(x=embs, axis=1)                # concat from 4 * [(-1, 1, 32)] to (-1, 4, 32)
-            embs = paddle.reshape(x=embs, shape=(-1, 4 * 32))   # reshape to (batch_size = -1, dim = 4*32)
-            loss = paddle.static.nn.nce(input=embs, label=words[label_word],
-                        num_total_classes=dict_size, param_attr='nce.w_0',
-                        bias_attr='nce.b_0')
-
-            #or use custom distribution
-            dist = np.array([0.05,0.5,0.1,0.3,0.05])
-            loss = paddle.static.nn.nce(input=embs, label=words[label_word],
-                    num_total_classes=5, param_attr='nce.w_1',
-                    bias_attr='nce.b_1',
-                    num_neg_samples=3,
-                    sampler="custom_dist",
-                    custom_dist=dist)
+            >>> import paddle
+            >>> import numpy as np
+
+            >>> paddle.enable_static()
+
+            >>> window_size = 5
+            >>> words = []
+            >>> for i in range(window_size):
+            ...     words.append(paddle.static.data(
+            ...         name='word_{0}'.format(i), shape=[-1, 1], dtype='int64'))
+
+            >>> dict_size = 10000
+            >>> label_word = int(window_size / 2) + 1
+
+            >>> embs = []
+            >>> for i in range(window_size):
+            ...     if i == label_word:
+            ...         continue
+            ...
+            ...     emb = paddle.static.nn.embedding(input=words[i], size=[dict_size, 32],
+            ...                         param_attr='embed', is_sparse=True)
+            ...     embs.append(emb)
+
+            >>> embs = paddle.concat(x=embs, axis=1)                # concat from 4 * [(-1, 1, 32)] to (-1, 4, 32)
+            >>> embs = paddle.reshape(x=embs, shape=(-1, 4 * 32))   # reshape to (batch_size = -1, dim = 4*32)
+            >>> loss = paddle.static.nn.nce(input=embs, label=words[label_word],
+            ...             num_total_classes=dict_size, param_attr='nce.w_0',
+            ...             bias_attr='nce.b_0')
+
+            # or use custom distribution
+            >>> dist = np.array([0.05,0.5,0.1,0.3,0.05])
+            >>> loss = paddle.static.nn.nce(input=embs, label=words[label_word],
+            ...         num_total_classes=5, param_attr='nce.w_1',
+            ...         bias_attr='nce.b_1',
+            ...         num_neg_samples=3,
+            ...         sampler="custom_dist",
+            ...         custom_dist=dist)
     """
     helper = LayerHelper('nce', **locals())
     check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'nce')
diff --git a/python/paddle/static/nn/metric.py b/python/paddle/static/nn/metric.py
index e63a52367261fed3bf4f2ad4d47976074e97385e..c4fc0a9b4f2406adf799a994c8d8d3d6db5509bf 100644
--- a/python/paddle/static/nn/metric.py
+++ b/python/paddle/static/nn/metric.py
@@ -51,25 +51,27 @@ def accuracy(input, label, k=1, correct=None, total=None):
     Examples:
         .. code-block:: python
 
-            import numpy as np
-            import paddle
-            import paddle.static as static
-            import paddle.nn.functional as F
-            paddle.enable_static()
-            data = static.data(name="input", shape=[-1, 32, 32], dtype="float32")
-            label = static.data(name="label", shape=[-1,1], dtype="int")
-            fc_out = static.nn.fc(x=data, size=10)
-            predict = F.softmax(x=fc_out)
-            result = static.accuracy(input=predict, label=label, k=5)
-            place = paddle.CPUPlace()
-            exe = static.Executor(place)
-            exe.run(static.default_startup_program())
-            x = np.random.rand(3, 32, 32).astype("float32")
-            y = np.array([[1],[0],[1]])
-            output = exe.run(feed={"input": x,"label": y},
-                             fetch_list=[result])
-            print(output)
-            # [array(0.33333334, dtype=float32)]
+            >>> import numpy as np
+            >>> import paddle
+            >>> import paddle.static as static
+            >>> import paddle.nn.functional as F
+            >>> paddle.seed(2023)
+            >>> paddle.enable_static()
+            >>> data = static.data(name="input", shape=[-1, 32, 32], dtype="float32")
+            >>> label = static.data(name="label", shape=[-1,1], dtype="int")
+            >>> fc_out = static.nn.fc(x=data, size=10)
+            >>> predict = F.softmax(x=fc_out)
+            >>> result = static.accuracy(input=predict, label=label, k=5)
+            >>> place = paddle.CPUPlace()
+            >>> exe = static.Executor(place)
+            >>> exe.run(static.default_startup_program())
+            >>> np.random.seed(1107)
+            >>> x = np.random.rand(3, 32, 32).astype("float32")
+            >>> y = np.array([[1],[0],[1]])
+            >>> output = exe.run(feed={"input": x,"label": y},
+            ...                  fetch_list=[result])
+            >>> print(output)
+            [array(0.33333334, dtype=float32)]
 
     """
     if in_dygraph_mode():
@@ -177,51 +179,61 @@ def auc(
 
     Examples:
         .. code-block:: python
+            :name: example-1
 
-            import paddle
-            import numpy as np
-            paddle.enable_static()
-
-            data = paddle.static.data(name="input", shape=[-1, 32,32], dtype="float32")
-            label = paddle.static.data(name="label", shape=[-1], dtype="int")
-            fc_out = paddle.static.nn.fc(x=data, size=2)
-            predict = paddle.nn.functional.softmax(x=fc_out)
-            result=paddle.static.auc(input=predict, label=label)
-
-            place = paddle.CPUPlace()
-            exe = paddle.static.Executor(place)
-
-            exe.run(paddle.static.default_startup_program())
-            x = np.random.rand(3,32,32).astype("float32")
-            y = np.array([1,0,1])
-            output= exe.run(feed={"input": x,"label": y},
-                             fetch_list=[result[0]])
-            print(output)
-
-            #you can learn the usage of ins_tag_weight by the following code.
-            '''
-            import paddle
-            import numpy as np
-            paddle.enable_static()
-
-            data = paddle.static.data(name="input", shape=[-1, 32,32], dtype="float32")
-            label = paddle.static.data(name="label", shape=[-1], dtype="int")
-            ins_tag_weight = paddle.static.data(name='ins_tag', shape=[-1,16], lod_level=0, dtype='float64')
-            fc_out = paddle.static.nn.fc(x=data, size=2)
-            predict = paddle.nn.functional.softmax(x=fc_out)
-            result=paddle.static.auc(input=predict, label=label, ins_tag_weight=ins_tag_weight)
-
-            place = paddle.CPUPlace()
-            exe = paddle.static.Executor(place)
-
-            exe.run(paddle.static.default_startup_program())
-            x = np.random.rand(3,32,32).astype("float32")
-            y = np.array([1,0,1])
-            z = np.array([1,0,1])
-            output= exe.run(feed={"input": x,"label": y, "ins_tag_weight":z},
-                             fetch_list=[result[0]])
-            print(output)
-            '''
+            >>> import paddle
+            >>> import numpy as np
+            >>> paddle.enable_static()
+
+            >>> paddle.seed(2023)
+            >>> data = paddle.static.data(name="input", shape=[-1, 32,32], dtype="float32")
+            >>> label = paddle.static.data(name="label", shape=[-1], dtype="int")
+            >>> fc_out = paddle.static.nn.fc(x=data, size=2)
+            >>> predict = paddle.nn.functional.softmax(x=fc_out)
+            >>> result=paddle.static.auc(input=predict, label=label)
+
+            >>> place = paddle.CPUPlace()
+            >>> exe = paddle.static.Executor(place)
+
+            >>> exe.run(paddle.static.default_startup_program())
+            >>> np.random.seed(1107)
+            >>> x = np.random.rand(3,32,32).astype("float32")
+            >>> y = np.array([1,0,1])
+            >>> output= exe.run(feed={"input": x,"label": y},
+            ...                 fetch_list=[result[0]])
+            >>> print(output)
+            [array(1.)]
+
+
+        .. code-block:: python
+            :name: example-2
+
+            # you can learn the usage of ins_tag_weight by the following code.
+
+            >>> import paddle
+            >>> import numpy as np
+            >>> paddle.enable_static()
+
+            >>> paddle.seed(2023)
+            >>> data = paddle.static.data(name="input", shape=[-1, 32,32], dtype="float32")
+            >>> label = paddle.static.data(name="label", shape=[-1], dtype="int")
+            >>> ins_tag_weight = paddle.static.data(name='ins_tag_weight', shape=[-1,16], lod_level=0, dtype='float64')
+            >>> fc_out = paddle.static.nn.fc(x=data, size=2)
+            >>> predict = paddle.nn.functional.softmax(x=fc_out)
+            >>> result=paddle.static.auc(input=predict, label=label, ins_tag_weight=ins_tag_weight)
+
+            >>> place = paddle.CPUPlace()
+            >>> exe = paddle.static.Executor(place)
+
+            >>> exe.run(paddle.static.default_startup_program())
+            >>> np.random.seed(1107)
+            >>> x = np.random.rand(3,32,32).astype("float32")
+            >>> y = np.array([1,0,1])
+            >>> z = np.array([1,0,1]).astype("float64")
+            >>> output= exe.run(feed={"input": x,"label": y, "ins_tag_weight":z},
+            ...                 fetch_list=[result[0]])
+            >>> print(output)
+            [array(1.)]
 
     """
     helper = LayerHelper("auc", **locals())
@@ -350,26 +362,27 @@ def ctr_metric_bundle(input, label, ins_tag_weight=None):
         local_prob(Tensor): Local sum of predicted ctr
         local_q(Tensor): Local sum of q value
 
-    Examples 1:
-        .. code-block:: python
-
-            import paddle
-            paddle.enable_static()
-            data = paddle.static.data(name="data", shape=[32, 32], dtype="float32")
-            label = paddle.static.data(name="label", shape=[-1, 1], dtype="int32")
-            predict = paddle.nn.functional.sigmoid(paddle.static.nn.fc(input=data, size=1))
-            auc_out = paddle.static.ctr_metric_bundle(input=predict, label=label)
-    Examples 2:
+    Examples:
         .. code-block:: python
+            :name: example-1
 
-            import paddle
-            paddle.enable_static()
-            data = paddle.static.data(name="data", shape=[32, 32], dtype="float32")
-            label = paddle.static.data(name="label", shape=[-1, 1], dtype="int32")
-            predict = paddle.nn.functional.sigmoid(paddle.static.nn.fc(input=data, size=1))
-            ins_tag_weight = paddle.static.data(name='ins_tag', shape=[-1,16], lod_level=0, dtype='int64')
-            auc_out = paddle.static.ctr_metric_bundle(input=predict, label=label, ins_tag_weight=ins_tag_weight)
+            >>> import paddle
+            >>> paddle.enable_static()
+            >>> data = paddle.static.data(name="data", shape=[-1, 32], dtype="float32")
+            >>> label = paddle.static.data(name="label", shape=[-1, 1], dtype="int32")
+            >>> predict = paddle.nn.functional.sigmoid(paddle.static.nn.fc(x=data, size=1))
+            >>> auc_out = paddle.static.ctr_metric_bundle(input=predict, label=label)
 
+        .. code-block:: python
+            :name: example-2
+
+            >>> import paddle
+            >>> paddle.enable_static()
+            >>> data = paddle.static.data(name="data", shape=[-1, 32], dtype="float32")
+            >>> label = paddle.static.data(name="label", shape=[-1, 1], dtype="int32")
+            >>> predict = paddle.nn.functional.sigmoid(paddle.static.nn.fc(x=data, size=1))
+            >>> ins_tag_weight = paddle.static.data(name='ins_tag_weight', shape=[-1, 1], lod_level=0, dtype='int64')
+            >>> auc_out = paddle.static.ctr_metric_bundle(input=predict, label=label, ins_tag_weight=ins_tag_weight)
     """
     if ins_tag_weight is None:
         ins_tag_weight = paddle.tensor.fill_constant(