From 0c5781e5ba5caba659095d217fde01c7e8300057 Mon Sep 17 00:00:00 2001
From: Sonder <55493212+AndSonder@users.noreply.github.com>
Date: Tue, 15 Aug 2023 11:16:03 +0800
Subject: [PATCH] [xdoctest] reformat example code with google style No.116-119
 (#56118)

---
 python/paddle/optimizer/optimizer.py         | 382 ++++++++++---------
 python/paddle/quantization/config.py         | 228 ++++++-----
 python/paddle/quantization/factory.py        |  31 +-
 python/paddle/quantization/imperative/qat.py | 215 ++++++-----
 4 files changed, 453 insertions(+), 403 deletions(-)

diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py
index c198c436b23..c4ae555d4c5 100644
--- a/python/paddle/optimizer/optimizer.py
+++ b/python/paddle/optimizer/optimizer.py
@@ -125,40 +125,40 @@ class Optimizer:
     Examples:
         .. code-block:: python
 
-            #Take the subclass adam as an example
-            import paddle
-            linear = paddle.nn.Linear(10, 10)
-            inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1)
-            out = linear(inp)
-            loss = paddle.mean(out)
-            adam = paddle.optimizer.Adam(learning_rate=0.1,
-                    parameters=linear.parameters())
-            loss.backward()
-            adam.step()
-            adam.clear_grad()
-
-            #Take the subclass sgd as an example
-            #optimize parameters in linear_1 and linear2 in different options.
-            #Note that the learning_rate of linear_2 is 0.01.
-            linear_1 = paddle.nn.Linear(10, 10)
-            linear_2 = paddle.nn.Linear(10, 10)
-            inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1)
-            out = linear_1(inp)
-            out = linear_2(out)
-            loss = paddle.mean(out)
-            sgd = paddle.optimizer.SGD(
-                learning_rate=0.1,
-                parameters=[{
-                    'params': linear_1.parameters()
-                }, {
-                    'params': linear_2.parameters(),
-                    'weight_decay': 0.001,
-                    'learning_rate': 0.1
-                }],
-                weight_decay=0.01)
-            loss.backward()
-            sgd.step()
-            sgd.clear_grad()
+            >>> # Take the subclass adam as an example
+            >>> import paddle
+            >>> linear = paddle.nn.Linear(10, 10)
+            >>> inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1)
+            >>> out = linear(inp)
+            >>> loss = paddle.mean(out)
+            >>> adam = paddle.optimizer.Adam(learning_rate=0.1,
+            ...         parameters=linear.parameters())
+            >>> loss.backward()
+            >>> adam.step()
+            >>> adam.clear_grad()
+
+            >>> #Take the subclass sgd as an example
+            >>> #optimize parameters in linear_1 and linear2 in different options.
+            >>> #Note that the learning_rate of linear_2 is 0.01.
+            >>> linear_1 = paddle.nn.Linear(10, 10)
+            >>> linear_2 = paddle.nn.Linear(10, 10)
+            >>> inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1)
+            >>> out = linear_1(inp)
+            >>> out = linear_2(out)
+            >>> loss = paddle.mean(out)
+            >>> sgd = paddle.optimizer.SGD(
+            ...     learning_rate=0.1,
+            ...     parameters=[{
+            ...         'params': linear_1.parameters()
+            ...     }, {
+            ...         'params': linear_2.parameters(),
+            ...         'weight_decay': 0.001,
+            ...         'learning_rate': 0.1
+            ...     }],
+            ...     weight_decay=0.01)
+            >>> loss.backward()
+            >>> sgd.step()
+            >>> sgd.clear_grad()
 
     """
 
@@ -343,23 +343,23 @@ class Optimizer:
         Examples:
             .. code-block:: python
 
-                import paddle
+                >>> import paddle
 
-                emb = paddle.nn.Embedding(10, 10)
+                >>> emb = paddle.nn.Embedding(10, 10)
 
-                layer_state_dict = emb.state_dict()
-                paddle.save(layer_state_dict, "emb.pdparams")
+                >>> layer_state_dict = emb.state_dict()
+                >>> paddle.save(layer_state_dict, "emb.pdparams")
 
-                scheduler = paddle.optimizer.lr.NoamDecay(
-                    d_model=0.01, warmup_steps=100, verbose=True)
-                adam = paddle.optimizer.Adam(
-                    learning_rate=scheduler,
-                    parameters=emb.parameters())
-                opt_state_dict = adam.state_dict()
-                paddle.save(opt_state_dict, "adam.pdopt")
+                >>> scheduler = paddle.optimizer.lr.NoamDecay(
+                ...     d_model=0.01, warmup_steps=100, verbose=True)
+                >>> adam = paddle.optimizer.Adam(
+                ...     learning_rate=scheduler,
+                ...     parameters=emb.parameters())
+                >>> opt_state_dict = adam.state_dict()
+                >>> paddle.save(opt_state_dict, "adam.pdopt")
 
-                opti_state_dict = paddle.load("adam.pdopt")
-                adam.set_state_dict(opti_state_dict)
+                >>> opti_state_dict = paddle.load("adam.pdopt")
+                >>> adam.set_state_dict(opti_state_dict)
 
         '''
         if isinstance(self._learning_rate, LRScheduler):
@@ -500,23 +500,22 @@ class Optimizer:
         Examples:
             .. code-block:: python
 
-                import paddle
-                linear = paddle.nn.Linear(10, 10)
-
-                adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())
-
-                # set learning rate manually by python float value
-                lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
-                for i in range(5):
-                    adam.set_lr(lr_list[i])
-                    lr = adam.get_lr()
-                    print("current lr is {}".format(lr))
-                # Print:
-                #    current lr is 0.2
-                #    current lr is 0.3
-                #    current lr is 0.4
-                #    current lr is 0.5
-                #    current lr is 0.6
+                >>> import paddle
+                >>> linear = paddle.nn.Linear(10, 10)
+
+                >>> adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())
+
+                >>> # set learning rate manually by python float value
+                >>> lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
+                >>> for i in range(5):
+                ...     adam.set_lr(lr_list[i])
+                ...     lr = adam.get_lr()
+                ...     print("current lr is {}".format(lr))
+                current lr is 0.2
+                current lr is 0.3
+                current lr is 0.4
+                current lr is 0.5
+                current lr is 0.6
 
         """
         if not isinstance(value, (int, float)):
@@ -570,24 +569,24 @@ class Optimizer:
         Examples:
             .. code-block:: python
 
-                import paddle
-                linear = paddle.nn.Linear(10, 10)
+                >>> import paddle
+                >>> linear = paddle.nn.Linear(10, 10)
 
-                adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())
+                >>> adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())
 
-                # set learning rate manually by class LRScheduler
-                scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2,4,6], gamma=0.8)
-                adam.set_lr_scheduler(scheduler)
-                lr = adam.get_lr()
-                print("current lr is {}".format(lr))
-                #    current lr is 0.5
+                >>> # set learning rate manually by class LRScheduler
+                >>> scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2,4,6], gamma=0.8)
+                >>> adam.set_lr_scheduler(scheduler)
+                >>> lr = adam.get_lr()
+                >>> print("current lr is {}".format(lr))
+                current lr is 0.5
 
-                # set learning rate manually by another LRScheduler
-                scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.1, step_size=5, gamma=0.6)
-                adam.set_lr_scheduler(scheduler)
-                lr = adam.get_lr()
-                print("current lr is {}".format(lr))
-                #    current lr is 0.1
+                >>> # set learning rate manually by another LRScheduler
+                >>> scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.1, step_size=5, gamma=0.6)
+                >>> adam.set_lr_scheduler(scheduler)
+                >>> lr = adam.get_lr()
+                >>> print("current lr is {}".format(lr))
+                current lr is 0.1
 
         """
         from paddle.optimizer.lr import LRScheduler
@@ -611,50 +610,79 @@ class Optimizer:
         Examples:
             .. code-block:: python
 
-                # train on default dynamic graph mode
-                import paddle
-                import numpy as np
-                emb = paddle.nn.Embedding(10, 3)
-
-                ## example1: LRScheduler is not used, return the same value is all the same
-                adam = paddle.optimizer.Adam(0.01, parameters = emb.parameters())
-                for batch in range(10):
-                    input = paddle.randint(low=0, high=5, shape=[5])
-                    out = emb(input)
-                    out.backward()
-                    print("Learning rate of step{}: {}".format(batch, adam.get_lr())) # 0.01
-                    adam.step()
-
-                ## example2: StepDecay is used, return the scheduled learning rate
-                scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=2, gamma=0.1)
-                adam = paddle.optimizer.Adam(scheduler, parameters = emb.parameters())
-                for batch in range(10):
-                    input = paddle.randint(low=0, high=5, shape=[5])
-                    out = emb(input)
-                    out.backward()
-                    print("Learning rate of step{}: {}".format(batch, adam.get_lr())) # 0.5->0.05...
-                    adam.step()
-                    scheduler.step()
-
-                # train on static graph mode
-                paddle.enable_static()
-                main_prog = paddle.static.Program()
-                start_prog = paddle.static.Program()
-                with paddle.static.program_guard(main_prog, start_prog):
-                    x = paddle.static.data(name='x', shape=[None, 10])
-                    z = paddle.static.nn.fc(x, 100)
-                    loss = paddle.mean(z)
-                    scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=2, gamma=0.1)
-                    adam = paddle.optimizer.Adam(learning_rate=scheduler)
-                    adam.minimize(loss)
-
-                exe = paddle.static.Executor()
-                exe.run(start_prog)
-                for batch in range(10):
-                    print("Learning rate of step{}: {}", adam.get_lr())     # 0.5->0.05->0.005...
-                    out = exe.run(main_prog, feed={'x': np.random.randn(3, 10).astype('float32')})
-                    scheduler.step()
-
+                >>> # train on default dynamic graph mode
+                >>> import paddle
+                >>> import numpy as np
+                >>> emb = paddle.nn.Embedding(10, 3)
+
+                >>> ## example1: LRScheduler is not used, return the same value is all the same
+                >>> adam = paddle.optimizer.Adam(0.01, parameters = emb.parameters())
+                >>> for batch in range(10):
+                ...     input = paddle.randint(low=0, high=5, shape=[5])
+                ...     out = emb(input)
+                ...     out.backward()
+                ...     print("Learning rate of step{}: {}".format(batch, adam.get_lr())) # 0.01
+                ...     adam.step()
+                Learning rate of step0: 0.01
+                Learning rate of step1: 0.01
+                Learning rate of step2: 0.01
+                Learning rate of step3: 0.01
+                Learning rate of step4: 0.01
+                Learning rate of step5: 0.01
+                Learning rate of step6: 0.01
+                Learning rate of step7: 0.01
+                Learning rate of step8: 0.01
+                Learning rate of step9: 0.01
+
+                >>> ## example2: StepDecay is used, return the scheduled learning rate
+                >>> scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=2, gamma=0.1)
+                >>> adam = paddle.optimizer.Adam(scheduler, parameters = emb.parameters())
+                >>> for batch in range(10):
+                ...     input = paddle.randint(low=0, high=5, shape=[5])
+                ...     out = emb(input)
+                ...     out.backward()
+                ...     print("Learning rate of step{}: {}".format(batch, adam.get_lr())) # 0.5->0.05...
+                ...     adam.step()
+                ...     scheduler.step()
+                Learning rate of step0: 0.5
+                Learning rate of step1: 0.5
+                Learning rate of step2: 0.05
+                Learning rate of step3: 0.05
+                Learning rate of step4: 0.005000000000000001
+                Learning rate of step5: 0.005000000000000001
+                Learning rate of step6: 0.0005000000000000001
+                Learning rate of step7: 0.0005000000000000001
+                Learning rate of step8: 5.000000000000001e-05
+                Learning rate of step9: 5.000000000000001e-05
+
+                >>> # train on static graph mode
+                >>> paddle.enable_static()
+                >>> main_prog = paddle.static.Program()
+                >>> start_prog = paddle.static.Program()
+                >>> with paddle.static.program_guard(main_prog, start_prog):
+                ...     x = paddle.static.data(name='x', shape=[None, 10])
+                ...     z = paddle.static.nn.fc(x, 100)
+                ...     loss = paddle.mean(z)
+                ...     scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=2, gamma=0.1)
+                ...     adam = paddle.optimizer.Adam(learning_rate=scheduler)
+                ...     adam.minimize(loss)
+
+                >>> exe = paddle.static.Executor()
+                >>> exe.run(start_prog)
+                >>> for batch in range(10):
+                ...     print("Learning rate of step{}: {}".format(batch, adam.get_lr())) # 0.5->0.05->0.005...
+                ...     out = exe.run(main_prog, feed={'x': np.random.randn(3, 10).astype('float32')})
+                ...     scheduler.step()
+                Learning rate of step0: 0.5
+                Learning rate of step1: 0.5
+                Learning rate of step2: 0.05
+                Learning rate of step3: 0.05
+                Learning rate of step4: 0.005000000000000001
+                Learning rate of step5: 0.005000000000000001
+                Learning rate of step6: 0.0005000000000000001
+                Learning rate of step7: 0.0005000000000000001
+                Learning rate of step8: 5.000000000000001e-05
+                Learning rate of step9: 5.000000000000001e-05
         """
         if isinstance(self._learning_rate, float):
             return self._learning_rate
@@ -1146,17 +1174,17 @@ class Optimizer:
         Examples:
             .. code-block:: python
 
-                import paddle
-                x = paddle.arange(26, dtype="float32").reshape([2, 13])
-
-                linear = paddle.nn.Linear(13, 5)
-                # This can be any optimizer supported by dygraph.
-                adam = paddle.optimizer.Adam(learning_rate = 0.01,
-                                            parameters = linear.parameters())
-                out = linear(x)
-                out.backward()
-                adam.step()
-                adam.clear_grad()
+                >>> import paddle
+                >>> x = paddle.arange(26, dtype="float32").reshape([2, 13])
+
+                >>> linear = paddle.nn.Linear(13, 5)
+                >>> # This can be any optimizer supported by dygraph.
+                >>> adam = paddle.optimizer.Adam(learning_rate = 0.01,
+                ...                             parameters = linear.parameters())
+                >>> out = linear(x)
+                >>> out.backward()
+                >>> adam.step()
+                >>> adam.clear_grad()
         """
         act_no_grad_set = None
         if framework.in_dygraph_mode():
@@ -1218,16 +1246,16 @@ class Optimizer:
         Examples:
             .. code-block:: python
 
-                import paddle
+                >>> import paddle
 
-                inp = paddle.uniform([10, 10], dtype="float32", min=-0.1, max=0.1)
-                linear = paddle.nn.Linear(10, 10)
-                out = linear(inp)
-                loss = paddle.mean(out)
-                optimizer = paddle.optimizer.Adam(learning_rate=0.1,
-                        parameters=linear.parameters())
-                params_grads = optimizer.backward(loss)
-                optimizer.apply_gradients(params_grads)
+                >>> inp = paddle.uniform([10, 10], dtype="float32", min=-0.1, max=0.1)
+                >>> linear = paddle.nn.Linear(10, 10)
+                >>> out = linear(inp)
+                >>> loss = paddle.mean(out)
+                >>> optimizer = paddle.optimizer.Adam(learning_rate=0.1,
+                ...         parameters=linear.parameters())
+                >>> params_grads = optimizer.backward(loss)
+                >>> optimizer.apply_gradients(params_grads)
 
         """
 
@@ -1436,17 +1464,17 @@ class Optimizer:
         Examples:
             .. code-block:: python
 
-                import paddle
+                >>> import paddle
 
-                a = paddle.arange(26, dtype="float32").reshape([2, 13])
-                linear = paddle.nn.Linear(13, 5)
-                # This can be any optimizer supported by dygraph.
-                adam = paddle.optimizer.Adam(learning_rate = 0.01,
-                                            parameters = linear.parameters())
-                out = linear(a)
-                out.backward()
-                adam.step()
-                adam.clear_grad()
+                >>> a = paddle.arange(26, dtype="float32").reshape([2, 13])
+                >>> linear = paddle.nn.Linear(13, 5)
+                >>> # This can be any optimizer supported by dygraph.
+                >>> adam = paddle.optimizer.Adam(learning_rate = 0.01,
+                ...                             parameters = linear.parameters())
+                >>> out = linear(a)
+                >>> out.backward()
+                >>> adam.step()
+                >>> adam.clear_grad()
 
         """
         param_list = []
@@ -1494,21 +1522,21 @@ class Optimizer:
         Examples:
             .. code-block:: python
 
-                import paddle
-                linear = paddle.nn.Linear(10, 10)
-                input = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1)
-                out = linear(input)
-                loss = paddle.mean(out)
+                >>> import paddle
+                >>> linear = paddle.nn.Linear(10, 10)
+                >>> input = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1)
+                >>> out = linear(input)
+                >>> loss = paddle.mean(out)
 
-                beta1 = paddle.to_tensor([0.9], dtype="float32")
-                beta2 = paddle.to_tensor([0.99], dtype="float32")
+                >>> beta1 = paddle.to_tensor([0.9], dtype="float32")
+                >>> beta2 = paddle.to_tensor([0.99], dtype="float32")
 
-                adam = paddle.optimizer.Adam(learning_rate=0.1,
-                        parameters=linear.parameters(),
-                        weight_decay=0.01)
-                loss.backward()
-                adam.minimize(loss)
-                adam.clear_grad()
+                >>> adam = paddle.optimizer.Adam(learning_rate=0.1,
+                ...         parameters=linear.parameters(),
+                ...         weight_decay=0.01)
+                >>> loss.backward()
+                >>> adam.minimize(loss)
+                >>> adam.clear_grad()
 
         """
         assert isinstance(loss, Variable), "The loss should be an Tensor."
@@ -1562,17 +1590,17 @@ class Optimizer:
         Examples:
             .. code-block:: python
 
-                import paddle
-
-                a = paddle.arange(26, dtype="float32").reshape([2, 13])
-                linear = paddle.nn.Linear(13, 5)
-                # This can be any optimizer supported by dygraph.
-                adam = paddle.optimizer.Adam(learning_rate = 0.01,
-                                        parameters = linear.parameters())
-                out = linear(a)
-                out.backward()
-                adam.step()
-                adam.clear_grad()
+                >>> import paddle
+
+                >>> a = paddle.arange(26, dtype="float32").reshape([2, 13])
+                >>> linear = paddle.nn.Linear(13, 5)
+                >>> # This can be any optimizer supported by dygraph.
+                >>> adam = paddle.optimizer.Adam(learning_rate = 0.01,
+                ...                         parameters = linear.parameters())
+                >>> out = linear(a)
+                >>> out.backward()
+                >>> adam.step()
+                >>> adam.clear_grad()
         """
         if paddle.fluid.dygraph.base.in_declarative_mode():
             self._declarative_step()
diff --git a/python/paddle/quantization/config.py b/python/paddle/quantization/config.py
index dc8ea16e53b..cb8db9206e6 100644
--- a/python/paddle/quantization/config.py
+++ b/python/paddle/quantization/config.py
@@ -70,12 +70,15 @@ class QuantConfig:
     Examples:
        .. code-block:: python
 
-          from paddle.quantization import QuantConfig
-          from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
+          >>> from paddle.quantization import QuantConfig
+          >>> from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
 
-          quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
-          q_config = QuantConfig(activation=quanter, weight=quanter)
-          print(q_config)
+          >>> quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
+          >>> q_config = QuantConfig(activation=quanter, weight=quanter)
+          >>> print(q_config)
+          Global config:
+          activation: FakeQuanterWithAbsMaxObserver(name=None,moving_rate=0.9,bit_length=8,dtype=float32)
+          weight: FakeQuanterWithAbsMaxObserver(name=None,moving_rate=0.9,bit_length=8,dtype=float32)
 
     """
 
@@ -100,31 +103,36 @@ class QuantConfig:
         weight: QuanterFactory = None,
     ):
         r"""
-         Set the quantization config by layer. It has the highest priority among
-         all the setting methods.
-
-         Args:
-             layer(Union[Layer, list]): One or a list of layers.
-             activation(QuanterFactory): Quanter used for activations.
-             weight(QuanterFactory): Quanter used for weights.
-
-         Examples:
-        .. code-block:: python
-
-             import paddle
-             from paddle.nn import Linear
-             from paddle.quantization import QuantConfig
-             from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
-
-             class Model(paddle.nn.Layer):
-                 def __init__(self):
-                     super().__init__()
-                     self.fc = Linear(576, 120)
-             model = Model()
-             quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
-             q_config = QuantConfig(activation=None, weight=None)
-             q_config.add_layer_config([model.fc], activation=quanter, weight=quanter)
-             print(q_config)
+        Set the quantization config by layer. It has the highest priority among
+        all the setting methods.
+
+        Args:
+            layer(Union[Layer, list]): One or a list of layers.
+            activation(QuanterFactory): Quanter used for activations.
+            weight(QuanterFactory): Quanter used for weights.
+
+        Examples:
+           .. code-block:: python
+
+               >>> import paddle
+               >>> from paddle.nn import Linear
+               >>> from paddle.quantization import QuantConfig
+               >>> from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
+
+               >>> class Model(paddle.nn.Layer):
+               ...    def __init__(self):
+               ...        super().__init__()
+               ...        self.fc = Linear(576, 120)
+               >>> model = Model()
+               >>> quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
+               >>> q_config = QuantConfig(activation=None, weight=None)
+               >>> q_config.add_layer_config([model.fc], activation=quanter, weight=quanter)
+               >>> # doctest: +SKIP
+               >>> print(q_config)
+               Global config:
+               None
+               Layer prefix config:
+               {'linear_0': <paddle.quantization.config.SingleLayerConfig object at 0x7fe41a680ee0>}
 
         """
         if isinstance(layer, list):
@@ -144,31 +152,36 @@ class QuantConfig:
         weight: QuanterFactory = None,
     ):
         r"""
-         Set the quantization config by full name of layer. Its priority is
-         lower than `add_layer_config`.
-
-         Args:
-             layer_name(Union[str, list]): One or a list of layers' full name.
-             activation(QuanterFactory): Quanter used for activations.
-             weight(QuanterFactory): Quanter used for weights.
-
-         Examples:
-        .. code-block:: python
-
-             import paddle
-             from paddle.nn import Linear
-             from paddle.quantization import QuantConfig
-             from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
-
-             class Model(paddle.nn.Layer):
-                 def __init__(self):
-                     super().__init__()
-                     self.fc = Linear(576, 120)
-             model = Model()
-             quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
-             q_config = QuantConfig(activation=None, weight=None)
-             q_config.add_name_config([model.fc.full_name()], activation=quanter, weight=quanter)
-             print(q_config)
+        Set the quantization config by full name of layer. Its priority is
+        lower than `add_layer_config`.
+
+        Args:
+            layer_name(Union[str, list]): One or a list of layers' full name.
+            activation(QuanterFactory): Quanter used for activations.
+            weight(QuanterFactory): Quanter used for weights.
+
+        Examples:
+           .. code-block:: python
+
+               >>> import paddle
+               >>> from paddle.nn import Linear
+               >>> from paddle.quantization import QuantConfig
+               >>> from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
+
+               >>> class Model(paddle.nn.Layer):
+               ...     def __init__(self):
+               ...         super().__init__()
+               ...         self.fc = Linear(576, 120)
+               >>> model = Model()
+               >>> quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
+               >>> q_config = QuantConfig(activation=None, weight=None)
+               >>> q_config.add_name_config([model.fc.full_name()], activation=quanter, weight=quanter)
+               >>> # doctest: +SKIP
+               >>> print(q_config)
+               Global config:
+               None
+               Layer prefix config:
+               {'linear_0': <paddle.quantization.config.SingleLayerConfig object at 0x7fe41a680fd0>}
 
         """
         if isinstance(layer_name, str):
@@ -198,22 +211,27 @@ class QuantConfig:
             weight(QuanterFactory): Quanter used for weights.
 
         Examples:
-        .. code-block:: python
-
-            import paddle
-            from paddle.nn import Linear
-            from paddle.quantization import QuantConfig
-            from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
-
-            class Model(paddle.nn.Layer):
-                def __init__(self):
-                    super().__init__()
-                    self.fc = Linear(576, 120)
-            model = Model()
-            quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
-            q_config = QuantConfig(activation=None, weight=None)
-            q_config.add_type_config([Linear], activation=quanter, weight=quanter)
-            print(q_config)
+            .. code-block:: python
+
+                >>> import paddle
+                >>> from paddle.nn import Linear
+                >>> from paddle.quantization import QuantConfig
+                >>> from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
+
+                >>> class Model(paddle.nn.Layer):
+                ...     def __init__(self):
+                ...         super().__init__()
+                ...         self.fc = Linear(576, 120)
+                >>> model = Model()
+                >>> quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
+                >>> q_config = QuantConfig(activation=None, weight=None)
+                >>> q_config.add_type_config([Linear], activation=quanter, weight=quanter)
+                >>> # doctest: +SKIP
+                >>> print(q_config)
+                Global config:
+                None
+                Layer type config:
+                {<class 'paddle.nn.layer.common.Linear'>: <paddle.quantization.config.SingleLayerConfig object at 0x7fe41a680a60>}
 
         """
         if isinstance(layer_type, type) and issubclass(
@@ -240,18 +258,18 @@ class QuantConfig:
             target(type): The type of layers that will be converted to.
 
         Examples:
-        .. code-block:: python
-
-            from paddle.nn import Conv2D
-            from paddle.quantization import QuantConfig
-            from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
-            quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
-            q_config = QuantConfig(activation=None, weight=None)
-            class CustomizedQuantedConv2D:
-                def forward(self, x):
-                    pass
-                    # add some code for quantization simulation
-            q_config.add_qat_layer_mapping(Conv2D, CustomizedQuantedConv2D)
+            .. code-block:: python
+
+                >>> from paddle.nn import Conv2D
+                >>> from paddle.quantization import QuantConfig
+                >>> from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
+                >>> quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
+                >>> q_config = QuantConfig(activation=None, weight=None)
+                >>> class CustomizedQuantedConv2D:
+                ...     def forward(self, x):
+                ...         pass
+                ...         # add some code for quantization simulation
+                >>> q_config.add_qat_layer_mapping(Conv2D, CustomizedQuantedConv2D)
         """
         assert isinstance(source, type) and issubclass(
             source, paddle.nn.Layer
@@ -272,13 +290,13 @@ class QuantConfig:
             layer_type(type): The type of layer to be declared as leaf.
 
         Examples:
-        .. code-block:: python
+            .. code-block:: python
 
-            from paddle.nn import Sequential
-            from paddle.quantization import QuantConfig
-            from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
-            q_config = QuantConfig(activation=None, weight=None)
-            q_config.add_customized_leaf(Sequential)
+                >>> from paddle.nn import Sequential
+                >>> from paddle.quantization import QuantConfig
+                >>> from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
+                >>> q_config = QuantConfig(activation=None, weight=None)
+                >>> q_config.add_customized_leaf(Sequential)
 
         """
         self._customized_leaves.append(layer_type)
@@ -379,22 +397,22 @@ class QuantConfig:
             model(Layer): The model to be specified by the config.
 
         Examples:
-        .. code-block:: python
-
-            import paddle
-            from paddle.nn import Linear, Sequential
-            from paddle.quantization import QuantConfig
-            from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
-
-            class Model(paddle.nn.Layer):
-                def __init__(self):
-                    super().__init__()
-                    self.fc = Sequential(Linear(576, 120),Linear(576, 120))
-            model = Model()
-            quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
-            q_config = QuantConfig(activation=None, weight=None)
-            q_config.add_layer_config([model.fc], activation=quanter, weight=quanter)
-            q_config._specify(model)
+            .. code-block:: python
+
+                >>> import paddle
+                >>> from paddle.nn import Linear, Sequential
+                >>> from paddle.quantization import QuantConfig
+                >>> from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
+
+                >>> class Model(paddle.nn.Layer):
+                ...     def __init__(self):
+                ...         super().__init__()
+                ...         self.fc = Sequential(Linear(576, 120),Linear(576, 120))
+                >>> model = Model()
+                >>> quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
+                >>> q_config = QuantConfig(activation=None, weight=None)
+                >>> q_config.add_layer_config([model.fc], activation=quanter, weight=quanter)
+                >>> q_config._specify(model)
         """
         self._model = model
         self._specify_helper(self._model)
diff --git a/python/paddle/quantization/factory.py b/python/paddle/quantization/factory.py
index d7ad2c355ba..3a1205c38be 100644
--- a/python/paddle/quantization/factory.py
+++ b/python/paddle/quantization/factory.py
@@ -83,21 +83,22 @@ def quanter(class_name):
     Examples:
        .. code-block:: python
 
-            # Given codes in ./customized_quanter.py
-            from paddle.quantization import quanter
-            from paddle.quantization import BaseQuanter
-            @quanter("CustomizedQuanter")
-            class CustomizedQuanterLayer(BaseQuanter):
-                def __init__(self, arg1, kwarg1=None):
-                    pass
-
-            # Used in ./test.py
-            # from .customized_quanter import CustomizedQuanter
-            from paddle.quantization import QuantConfig
-            arg1_value = "test"
-            kwarg1_value = 20
-            quanter = CustomizedQuanter(arg1_value, kwarg1=kwarg1_value)
-            q_config = QuantConfig(activation=quanter, weight=quanter)
+            >>> # doctest: +SKIP
+            >>> # Given codes in ./customized_quanter.py
+            >>> from paddle.quantization import quanter
+            >>> from paddle.quantization import BaseQuanter
+            >>> @quanter("CustomizedQuanter")
+            >>> class CustomizedQuanterLayer(BaseQuanter):
+            ...     def __init__(self, arg1, kwarg1=None):
+            ...         pass
+
+            >>> # Used in ./test.py
+            >>> # from .customized_quanter import CustomizedQuanter
+            >>> from paddle.quantization import QuantConfig
+            >>> arg1_value = "test"
+            >>> kwarg1_value = 20
+            >>> quanter = CustomizedQuanter(arg1_value, kwarg1=kwarg1_value)
+            >>> q_config = QuantConfig(activation=quanter, weight=quanter)
 
     """
 
diff --git a/python/paddle/quantization/imperative/qat.py b/python/paddle/quantization/imperative/qat.py
index 591dac54507..f261f4cabe4 100644
--- a/python/paddle/quantization/imperative/qat.py
+++ b/python/paddle/quantization/imperative/qat.py
@@ -135,79 +135,81 @@ class ImperativeQuantAware:
             during training. If this attribute is not sets or the attribute is
             false, the Layer would be qunatized in training.
 
-        Examples 1:
-        .. code-block:: python
-
-            import paddle
-            from paddle.static.quantization \
-                import ImperativeQuantAware
-            from paddle.vision.models \
-                import resnet
-
-            model = resnet.resnet50(pretrained=True)
-
-            imperative_qat = ImperativeQuantAware(
-                weight_quantize_type='abs_max',
-                activation_quantize_type='moving_average_abs_max')
-
-            # Add the fake quant logical.
-            # The original model will be rewrite.
-            # The outscale of outputs in supportted layers would be calculated.
-            imperative_qat.quantize(model)
-
-            # Fine-tune the quantized model
-            # ...
-
-            # Save quant model for the inference.
-            imperative_qat.save_quantized_model(
-                layer=model,
-                model_path="./resnet50_qat",
-                input_spec=[
-                    paddle.static.InputSpec(
-                    shape=[None, 3, 224, 224], dtype='float32')])
-
-        Examples 2:
-        .. code-block:: python
-
-            import paddle
-            from paddle.static.quantization \
-                import ImperativeQuantAware
-
-            class ImperativeModel(paddle.nn.Layer):
-                def __init__(self):
-                    super().__init__()
-                    # self.linear_0 would skip the quantization.
-                    self.linear_0 = paddle.nn.Linear(784, 400)
-                    self.linear_0.skip_quant = True
-
-                    # self.linear_1 would not skip the quantization.
-                    self.linear_1 = paddle.nn.Linear(400, 10)
-                    self.linear_1.skip_quant = False
-
-                def forward(self, inputs):
-                    x = self.linear_0(inputs)
-                    x = self.linear_1(inputs)
-                    return x
-
-            model = ImperativeModel()
-            imperative_qat = ImperativeQuantAware(
-                weight_quantize_type='abs_max',
-                activation_quantize_type='moving_average_abs_max')
-
-            # Add the fake quant logical.
-            # The original model will be rewrite.
-            #
-            # There is only one Layer(self.linear1) would be added the
-            # fake quant logical.
-            imperative_qat.quantize(model)
-
-            # Fine-tune the quantized model
-            # ...
-
-            # Save quant model for the inference.
-            imperative_qat.save_quantized_model(
-                layer=model,
-                model_path="./imperative_model_qat")
+        Examples:
+            .. code-block:: python
+
+                >>> import paddle
+                >>> from paddle.static.quantization import (
+                ...     ImperativeQuantAware,
+                ... )
+                >>> from paddle.vision.models import (
+                ...     resnet,
+                ... )
+
+                >>> model = resnet.resnet50(pretrained=True)
+
+                >>> imperative_qat = ImperativeQuantAware(
+                ...     weight_quantize_type='abs_max',
+                ...     activation_quantize_type='moving_average_abs_max')
+
+                >>> # Add the fake quant logical.
+                >>> # The original model will be rewrite.
+                >>> # The outscale of outputs in supportted layers would be calculated.
+                >>> imperative_qat.quantize(model)
+
+                >>> # Fine-tune the quantized model
+                >>> # ...
+
+                >>> # Save quant model for the inference.
+                >>> imperative_qat.save_quantized_model(
+                ...     layer=model,
+                ...     model_path="./resnet50_qat",
+                ...     input_spec=[
+                ...         paddle.static.InputSpec(
+                ...         shape=[None, 3, 224, 224], dtype='float32')])
+
+            .. code-block:: python
+
+                >>> import paddle
+                >>> from paddle.static.quantization import (
+                ...     ImperativeQuantAware,
+                ... )
+
+                >>> class ImperativeModel(paddle.nn.Layer):
+                ...     def __init__(self):
+                ...         super().__init__()
+                ...         # self.linear_0 would skip the quantization.
+                ...         self.linear_0 = paddle.nn.Linear(784, 400)
+                ...         self.linear_0.skip_quant = True
+
+                ...         # self.linear_1 would not skip the quantization.
+                ...         self.linear_1 = paddle.nn.Linear(400, 10)
+                ...         self.linear_1.skip_quant = False
+
+                ...     def forward(self, inputs):
+                ...         x = self.linear_0(inputs)
+                ...         x = self.linear_1(inputs)
+                ...         return x
+
+                >>> model = ImperativeModel()
+                >>> imperative_qat = ImperativeQuantAware(
+                ...     weight_quantize_type='abs_max',
+                ...     activation_quantize_type='moving_average_abs_max')
+
+                >>> # Add the fake quant logical.
+                >>> # The original model will be rewrite.
+                >>> #
+                >>> # There is only one Layer(self.linear1) would be added the
+                >>> # fake quant logical.
+                >>> imperative_qat.quantize(model)
+
+                >>> # Fine-tune the quantized model
+                >>> # ...
+
+                >>> # Save quant model for the inference.
+                >>> imperative_qat.save_quantized_model(
+                ...    layer=model,
+                ...    model_path="./imperative_model_qat")
         """
         super().__init__()
         self.fuse_conv_bn = fuse_conv_bn
@@ -245,39 +247,40 @@ class ImperativeQuantAware:
             None
 
         Examples:
-        .. code-block:: python
-
-            import paddle
-            from paddle.static.quantization \
-                import ImperativeQuantAware
-
-            class ImperativeModel(paddle.nn.Layer):
-                def __init__(self):
-                    super().__init__()
-                    # self.linear_0 would skip the quantization.
-                    self.linear_0 = paddle.nn.Linear(784, 400)
-                    self.linear_0.skip_quant = True
-
-                    # self.linear_1 would not skip the quantization.
-                    self.linear_1 = paddle.nn.Linear(400, 10)
-                    self.linear_1.skip_quant = False
-
-                def forward(self, inputs):
-                    x = self.linear_0(inputs)
-                    x = self.linear_1(inputs)
-                    return x
-
-            model = ImperativeModel()
-            imperative_qat = ImperativeQuantAware(
-                weight_quantize_type='abs_max',
-                activation_quantize_type='moving_average_abs_max')
-
-            # Add the fake quant logical.
-            # The original model will be rewrite.
-            #
-            # There is only one Layer(self.linear1) would be added the
-            # fake quant logical.
-            imperative_qat.quantize(model)
+            .. code-block:: python
+
+                >>> import paddle
+                >>> from paddle.static.quantization import (
+                ...     ImperativeQuantAware,
+                ... )
+
+                >>> class ImperativeModel(paddle.nn.Layer):
+                ...     def __init__(self):
+                ...         super().__init__()
+                ...         # self.linear_0 would skip the quantization.
+                ...         self.linear_0 = paddle.nn.Linear(784, 400)
+                ...         self.linear_0.skip_quant = True
+
+                ...         # self.linear_1 would not skip the quantization.
+                ...         self.linear_1 = paddle.nn.Linear(400, 10)
+                ...         self.linear_1.skip_quant = False
+
+                ...     def forward(self, inputs):
+                ...         x = self.linear_0(inputs)
+                ...         x = self.linear_1(inputs)
+                ...         return x
+
+                >>> model = ImperativeModel()
+                >>> imperative_qat = ImperativeQuantAware(
+                ...     weight_quantize_type='abs_max',
+                ...     activation_quantize_type='moving_average_abs_max')
+
+                >>> # Add the fake quant logical.
+                >>> # The original model will be rewrite.
+                >>> #
+                >>> # There is only one Layer(self.linear1) would be added the
+                >>> # fake quant logical.
+                >>> imperative_qat.quantize(model)
         """
         assert isinstance(
             model, paddle.nn.Layer
-- 
GitLab