From 0c3b36984fc5f83396da34daa9708a26b2e31fee Mon Sep 17 00:00:00 2001
From: Nyakku Shigure <sigure.qaq@gmail.com>
Date: Mon, 31 Jul 2023 14:44:48 +0800
Subject: [PATCH] [xdoctest] reformat example code with google style in
 `paddle/jit` (#55645)

* [xdoctest] reformat example code for paddle.jit.api

* test=docs_preview

* add some ... for decorator

* skip some example, test=docs_preview

* add ..., test=docs_preview

* skip some test, test=docs_preview

* more jit files, test=docs_preview

* remove some empty lines, test=docs_preview

* format program translator, test=docs_preview

* remove a blank line, test=docs_preview

* skip translated layer.program, test=docs_preview

* fix doc format, test=docs_preview
---
 python/paddle/jit/api.py                      | 870 +++++++++---------
 .../paddle/jit/dy2static/convert_call_func.py |  43 +-
 python/paddle/jit/dy2static/logging_utils.py  |  25 +-
 .../jit/dy2static/program_translator.py       | 322 ++++---
 python/paddle/jit/translated_layer.py         | 301 +++---
 5 files changed, 785 insertions(+), 776 deletions(-)

diff --git a/python/paddle/jit/api.py b/python/paddle/jit/api.py
index c12d54e5351..4af2a4fdfb3 100644
--- a/python/paddle/jit/api.py
+++ b/python/paddle/jit/api.py
@@ -126,29 +126,28 @@ def _dygraph_to_static_func_(dygraph_func):
     Examples:
         .. code-block:: python
 
-          import paddle
-          import paddle.fluid as fluid
-          import numpy as np
-          from paddle.jit.api import dygraph_to_static_func
-
-          @dygraph_to_static_func
-          def func(x):
-              if paddle.mean(x) < 0:
-                  x_v = x - 1
-              else:
-                  x_v = x + 1
-
-               return x_v
-
-          x = paddle.full(shape=[3, 3], fill_value=0, dtype='float64')
-
-          x_v = func(x)
-          exe = fluid.Executor(fluid.CPUPlace())
-          out = exe.run(fetch_list=[x_v])
-          print(out[0])
-          # [[1. 1. 1.]
-          #  [1. 1. 1.]
-          #  [1. 1. 1.]]
+            >>> import paddle
+            >>> from paddle.jit.api import dygraph_to_static_func
+
+            >>> @dygraph_to_static_func
+            ... def func(x):
+            ...     if paddle.mean(x) < 0:
+            ...         x_v = x - 1
+            ...     else:
+            ...         x_v = x + 1
+            ...
+            ...     return x_v
+            ...
+            >>> paddle.enable_static()
+            >>> x = paddle.full(shape=[3, 3], fill_value=0, dtype='float64')
+
+            >>> x_v = func(x)
+            >>> exe = paddle.static.Executor(paddle.CPUPlace())
+            >>> out = exe.run(fetch_list=[x_v])
+            >>> print(out[0])
+            [[1. 1. 1.]
+             [1. 1. 1.]
+             [1. 1. 1.]]
 
     """
 
@@ -202,18 +201,16 @@ def ignore_module(modules: list[Any]):
     Examples:
         .. code-block:: python
 
-            import scipy
-            import astor
+            >>> import scipy
+            >>> import astor
 
-            import paddle
-            from paddle.jit import ignore_module
-
-            modules = [
-               scipy,
-               astor
-            ]
-
-            ignore_module(modules)
+            >>> import paddle
+            >>> from paddle.jit import ignore_module
+            >>> modules = [
+            ...     scipy,
+            ...     astor,
+            ... ]
+            >>> ignore_module(modules)
 
     """
     add_ignore_module(modules)
@@ -263,20 +260,23 @@ def to_static(
     Examples:
         .. code-block:: python
 
-            import paddle
-            from paddle.jit import to_static
-
-            @to_static
-            def func(x):
-                if paddle.mean(x) < 0:
-                    x_v = x - 1
-                else:
-                    x_v = x + 1
-                return x_v
-
-            x = paddle.ones([1, 2], dtype='float32')
-            x_v = func(x)
-            print(x_v) # [[2. 2.]]
+            >>> # doctest: +SKIP
+            >>> import paddle
+            >>> from paddle.jit import to_static
+
+            >>> @to_static
+            >>> def func(x):
+            ...     if paddle.mean(x) < 0:
+            ...         x_v = x - 1
+            ...     else:
+            ...         x_v = x + 1
+            ...     return x_v
+            ...
+            >>> x = paddle.ones([1, 2], dtype='float32')
+            >>> x_v = func(x)
+            >>> print(x_v)
+            Tensor(shape=[1, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[2., 2.]])
 
     """
     property = kwargs.get("property", False)
@@ -343,24 +343,27 @@ def not_to_static(func=None):
     Examples:
         .. code-block:: python
 
-            import paddle
-
-            @paddle.jit.not_to_static
-            def func_not_to_static(x):
-                res = x - 1
-                return res
-
-            @paddle.jit.to_static
-            def func(x):
-                if paddle.mean(x) < 0:
-                    out = func_not_to_static(x)
-                else:
-                    out = x + 1
-                return out
-
-            x = paddle.ones([1, 2], dtype='float32')
-            out = func(x)
-            print(out) # [[2. 2.]]
+            >>> # doctest: +SKIP
+            >>> import paddle
+
+            >>> @paddle.jit.not_to_static
+            ... def func_not_to_static(x):
+            ...     res = x - 1
+            ...     return res
+
+            >>> @paddle.jit.to_static
+            ... def func(x):
+            ...     if paddle.mean(x) < 0:
+            ...         out = func_not_to_static(x)
+            ...     else:
+            ...         out = x + 1
+            ...     return out
+            ...
+            >>> x = paddle.ones([1, 2], dtype='float32')
+            >>> out = func(x)
+            >>> print(out)
+            Tensor(shape=[1, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[2., 2.]])
     """
     if func is None:
         return not_to_static
@@ -688,34 +691,36 @@ def _register_save_pre_hook(hook):
     Examples:
         .. code-block:: python
 
-            import numpy as np
-            import paddle
-
-            IMAGE_SIZE = 256
-            CLASS_NUM = 10
-
-            class LinearNet(paddle.nn.Layer):
-                def __init__(self):
-                    super().__init__()
-                    self._linear = paddle.nn.Linear(IMAGE_SIZE, CLASS_NUM)
-
-                def forward(self, x):
-                    return self._linear(x)
-
-            saving_count = 0
-            def save_pre_hook(layer, input_spec, configs):
-                global saving_count
-                saving_count += 1
-
-            remove_handler = paddle.jit.register_save_pre_hook(save_pre_hook)
-
-            layer = LinearNet()
-            paddle.jit.save(layer, "/tmp", [paddle.static.InputSpec(shape=[-1, IMAGE_SIZE])])
-            # saving_count == 1
-
-            remove_handler.remove()
-            paddle.jit.save(layer, "/tmp", [paddle.static.InputSpec(shape=[-1, IMAGE_SIZE])])
-            # saving_count == 1
+            >>> import numpy as np
+            >>> import paddle
+
+            >>> IMAGE_SIZE = 256
+            >>> CLASS_NUM = 10
+
+            >>> class LinearNet(paddle.nn.Layer):
+            ...     def __init__(self):
+            ...         super().__init__()
+            ...         self._linear = paddle.nn.Linear(IMAGE_SIZE, CLASS_NUM)
+            ...
+            ...     def forward(self, x):
+            ...         return self._linear(x)
+            ...
+            >>> saving_count = 0
+            >>> def save_pre_hook(layer, input_spec, configs):
+            ...     global saving_count
+            ...     saving_count += 1
+            ...
+            >>> remove_handler = paddle.jit.api._register_save_pre_hook(save_pre_hook)
+
+            >>> layer = LinearNet()
+            >>> paddle.jit.save(layer, "/tmp", [paddle.static.InputSpec(shape=[-1, IMAGE_SIZE])])
+            >>> print(saving_count)
+            1
+
+            >>> remove_handler.remove()
+            >>> paddle.jit.save(layer, "/tmp", [paddle.static.InputSpec(shape=[-1, IMAGE_SIZE])])
+            >>> print(saving_count)
+            1
     """
     global _save_pre_hooks_lock
     global _save_pre_hooks
@@ -834,95 +839,97 @@ def save(layer, path, input_spec=None, **configs):
     Examples:
         .. code-block:: python
 
-            # example 1: save layer
-            import numpy as np
-            import paddle
-            import paddle.nn as nn
-            import paddle.optimizer as opt
-
-            BATCH_SIZE = 16
-            BATCH_NUM = 4
-            EPOCH_NUM = 4
-
-            IMAGE_SIZE = 784
-            CLASS_NUM = 10
-
-            # define a random dataset
-            class RandomDataset(paddle.io.Dataset):
-                def __init__(self, num_samples):
-                    self.num_samples = num_samples
-
-                def __getitem__(self, idx):
-                    image = np.random.random([IMAGE_SIZE]).astype('float32')
-                    label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
-                    return image, label
-
-                def __len__(self):
-                    return self.num_samples
-
-            class LinearNet(nn.Layer):
-                def __init__(self):
-                    super().__init__()
-                    self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM)
-
-                @paddle.jit.to_static
-                def forward(self, x):
-                    return self._linear(x)
-
-            def train(layer, loader, loss_fn, opt):
-                for epoch_id in range(EPOCH_NUM):
-                    for batch_id, (image, label) in enumerate(loader()):
-                        out = layer(image)
-                        loss = loss_fn(out, label)
-                        loss.backward()
-                        opt.step()
-                        opt.clear_grad()
-                        print("Epoch {} batch {}: loss = {}".format(
-                            epoch_id, batch_id, np.mean(loss.numpy())))
-
-            # 1. train & save model.
-
-            # create network
-            layer = LinearNet()
-            loss_fn = nn.CrossEntropyLoss()
-            adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters())
-
-            # create data loader
-            dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
-            loader = paddle.io.DataLoader(dataset,
-                batch_size=BATCH_SIZE,
-                shuffle=True,
-                drop_last=True,
-                num_workers=2)
-
-            # train
-            train(layer, loader, loss_fn, adam)
-
-            # save
-            path = "example_model/linear"
-            paddle.jit.save(layer, path)
-
-            # example 2: save function
-            import paddle
-            from paddle.static import InputSpec
-
-
-            def save_function():
-                @paddle.jit.to_static
-                def fun(inputs):
-                    return paddle.tanh(inputs)
-
-                path = 'test_jit_save_load_function_1/func'
-                inps = paddle.rand([3, 6])
-                origin = fun(inps)
-
-                paddle.jit.save(fun, path)
-                load_func = paddle.jit.load(path)
-
-                load_result = load_func(inps)
-                print((load_result - origin).abs().max() < 1e-10)
-
-            save_function()
+            >>> # doctest: +SKIP
+            >>> # example 1: save layer
+            >>> import numpy as np
+            >>> import paddle
+            >>> import paddle.nn as nn
+            >>> import paddle.optimizer as opt
+
+            >>> BATCH_SIZE = 16
+            >>> BATCH_NUM = 4
+            >>> EPOCH_NUM = 4
+
+            >>> IMAGE_SIZE = 784
+            >>> CLASS_NUM = 10
+
+            >>> # define a random dataset
+            >>> class RandomDataset(paddle.io.Dataset):
+            ...     def __init__(self, num_samples):
+            ...         self.num_samples = num_samples
+            ...
+            ...     def __getitem__(self, idx):
+            ...         image = np.random.random([IMAGE_SIZE]).astype('float32')
+            ...         label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
+            ...         return image, label
+            ...
+            ...     def __len__(self):
+            ...         return self.num_samples
+
+            >>> class LinearNet(nn.Layer):
+            ...     def __init__(self):
+            ...         super().__init__()
+            ...         self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM)
+            ...
+            ...     @paddle.jit.to_static
+            ...     def forward(self, x):
+            ...         return self._linear(x)
+
+            >>> def train(layer, loader, loss_fn, opt):
+            ...     for epoch_id in range(EPOCH_NUM):
+            ...         for batch_id, (image, label) in enumerate(loader()):
+            ...             out = layer(image)
+            ...             loss = loss_fn(out, label)
+            ...             loss.backward()
+            ...             opt.step()
+            ...             opt.clear_grad()
+            ...             print("Epoch {} batch {}: loss = {}".format(
+            ...                 epoch_id, batch_id, np.mean(loss.numpy())))
+
+            >>> # 1. train & save model.
+
+            >>> # create network
+            >>> layer = LinearNet()
+            >>> loss_fn = nn.CrossEntropyLoss()
+            >>> adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters())
+
+            >>> # create data loader
+            >>> dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
+            >>> loader = paddle.io.DataLoader(dataset,
+            ...     batch_size=BATCH_SIZE,
+            ...     shuffle=True,
+            ...     drop_last=True,
+            ...     num_workers=2
+            ... )
+
+            >>> # train
+            >>> train(layer, loader, loss_fn, adam)
+
+            >>> # save
+            >>> path = "example_model/linear"
+            >>> paddle.jit.save(layer, path)
+
+            >>> # example 2: save function
+            >>> import paddle
+            >>> from paddle.static import InputSpec
+
+
+            >>> def save_function():
+            ...     @paddle.jit.to_static
+            ...     def fun(inputs):
+            ...         return paddle.tanh(inputs)
+            ...
+            ...     path = 'test_jit_save_load_function_1/func'
+            ...     inps = paddle.rand([3, 6])
+            ...     origin = fun(inps)
+            ...
+            ...     paddle.jit.save(fun, path)
+            ...     load_func = paddle.jit.load(path)
+            ...
+            ...     load_result = load_func(inps)
+            ...     print((load_result - origin).abs().max() < 1e-10)
+
+            >>> save_function()
     """
 
     # 1. input build & check
@@ -1307,193 +1314,199 @@ def load(path, **configs):
     Examples:
         1. Load model saved by ``paddle.jit.save`` then performing inference and fine-tune training.
 
-        .. code-block:: python
-            :name: code-example1
-
-            import numpy as np
-            import paddle
-            import paddle.nn as nn
-            import paddle.optimizer as opt
-
-            BATCH_SIZE = 16
-            BATCH_NUM = 4
-            EPOCH_NUM = 4
-
-            IMAGE_SIZE = 784
-            CLASS_NUM = 10
-
-            # define a random dataset
-            class RandomDataset(paddle.io.Dataset):
-                def __init__(self, num_samples):
-                    self.num_samples = num_samples
-
-                def __getitem__(self, idx):
-                    image = np.random.random([IMAGE_SIZE]).astype('float32')
-                    label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
-                    return image, label
-
-                def __len__(self):
-                    return self.num_samples
-
-            class LinearNet(nn.Layer):
-                def __init__(self):
-                    super().__init__()
-                    self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM)
-
-                @paddle.jit.to_static
-                def forward(self, x):
-                    return self._linear(x)
-
-            def train(layer, loader, loss_fn, opt):
-                for epoch_id in range(EPOCH_NUM):
-                    for batch_id, (image, label) in enumerate(loader()):
-                        out = layer(image)
-                        loss = loss_fn(out, label)
-                        loss.backward()
-                        opt.step()
-                        opt.clear_grad()
-                        print("Epoch {} batch {}: loss = {}".format(
-                            epoch_id, batch_id, np.mean(loss.numpy())))
-
-            # 1. train & save model.
-
-            # create network
-            layer = LinearNet()
-            loss_fn = nn.CrossEntropyLoss()
-            adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters())
-
-            # create data loader
-            dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
-            loader = paddle.io.DataLoader(dataset,
-                batch_size=BATCH_SIZE,
-                shuffle=True,
-                drop_last=True,
-                num_workers=2)
-
-            # train
-            train(layer, loader, loss_fn, adam)
-
-            # save
-            path = "example_model/linear"
-            paddle.jit.save(layer, path)
-
-            # 2. load model
-
-            # load
-            loaded_layer = paddle.jit.load(path)
-
-            # inference
-            loaded_layer.eval()
-            x = paddle.randn([1, IMAGE_SIZE], 'float32')
-            pred = loaded_layer(x)
-
-            # fine-tune
-            loaded_layer.train()
-            adam = opt.Adam(learning_rate=0.001, parameters=loaded_layer.parameters())
-            train(loaded_layer, loader, loss_fn, adam)
+            .. code-block:: python
+                :name: code-example1
+
+                >>> # doctest: +SKIP
+                >>> import numpy as np
+                >>> import paddle
+                >>> import paddle.nn as nn
+                >>> import paddle.optimizer as opt
+
+                >>> BATCH_SIZE = 16
+                >>> BATCH_NUM = 4
+                >>> EPOCH_NUM = 4
+
+                >>> IMAGE_SIZE = 784
+                >>> CLASS_NUM = 10
+
+                >>> # define a random dataset
+                >>> class RandomDataset(paddle.io.Dataset):
+                ...     def __init__(self, num_samples):
+                ...         self.num_samples = num_samples
+                ...
+                ...     def __getitem__(self, idx):
+                ...         image = np.random.random([IMAGE_SIZE]).astype('float32')
+                ...         label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
+                ...         return image, label
+                ...
+                ...     def __len__(self):
+                ...         return self.num_samples
+
+                >>> class LinearNet(nn.Layer):
+                ...     def __init__(self):
+                ...         super().__init__()
+                ...         self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM)
+                ...
+                ...     @paddle.jit.to_static
+                ...     def forward(self, x):
+                ...         return self._linear(x)
+                ...
+                >>> def train(layer, loader, loss_fn, opt):
+                ...     for epoch_id in range(EPOCH_NUM):
+                ...         for batch_id, (image, label) in enumerate(loader()):
+                ...             out = layer(image)
+                ...             loss = loss_fn(out, label)
+                ...             loss.backward()
+                ...             opt.step()
+                ...             opt.clear_grad()
+                ...             print("Epoch {} batch {}: loss = {}".format(
+                ...                 epoch_id, batch_id, np.mean(loss.numpy())))
+
+                >>> # 1. train & save model.
+
+                >>> # create network
+                >>> layer = LinearNet()
+                >>> loss_fn = nn.CrossEntropyLoss()
+                >>> adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters())
+
+                >>> # create data loader
+                >>> dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
+                >>> loader = paddle.io.DataLoader(
+                ...     dataset,
+                ...     batch_size=BATCH_SIZE,
+                ...     shuffle=True,
+                ...     drop_last=True,
+                ...     num_workers=2
+                ... )
+
+                >>> # train
+                >>> train(layer, loader, loss_fn, adam)
+
+                >>> # save
+                >>> path = "example_model/linear"
+                >>> paddle.jit.save(layer, path)
+
+                >>> # 2. load model
+
+                >>> # load
+                >>> loaded_layer = paddle.jit.load(path)
+
+                >>> # inference
+                >>> loaded_layer.eval()
+                >>> x = paddle.randn([1, IMAGE_SIZE], 'float32')
+                >>> pred = loaded_layer(x)
+
+                >>> # fine-tune
+                >>> loaded_layer.train()
+                >>> adam = opt.Adam(learning_rate=0.001, parameters=loaded_layer.parameters())
+                >>> train(loaded_layer, loader, loss_fn, adam)
 
 
         2. Load model saved by ``paddle.fluid.io.save_inference_model`` then performing and fine-tune training.
 
-        .. code-block:: python
-            :name: code-example2
-
-            import numpy as np
-            import paddle
-            import paddle.static as static
-            import paddle.nn as nn
-            import paddle.optimizer as opt
-            import paddle.nn.functional as F
-
-            BATCH_SIZE = 16
-            BATCH_NUM = 4
-            EPOCH_NUM = 4
-
-            IMAGE_SIZE = 784
-            CLASS_NUM = 10
-
-            # define a random dataset
-            class RandomDataset(paddle.io.Dataset):
-                def __init__(self, num_samples):
-                    self.num_samples = num_samples
-
-                def __getitem__(self, idx):
-                    image = np.random.random([IMAGE_SIZE]).astype('float32')
-                    label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
-                    return image, label
-
-                def __len__(self):
-                    return self.num_samples
-
-            paddle.enable_static()
-
-            image = static.data(name='image', shape=[None, 784], dtype='float32')
-            label = static.data(name='label', shape=[None, 1], dtype='int64')
-            pred = static.nn.fc(x=image, size=10, activation='softmax')
-            loss = F.cross_entropy(input=pred, label=label)
-            avg_loss = paddle.mean(loss)
-
-            optimizer = paddle.optimizer.SGD(learning_rate=0.001)
-            optimizer.minimize(avg_loss)
-
-            place = paddle.CPUPlace()
-            exe = static.Executor(place)
-            exe.run(static.default_startup_program())
-
-            # create data loader
-            dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
-            loader = paddle.io.DataLoader(dataset,
-                feed_list=[image, label],
-                places=place,
-                batch_size=BATCH_SIZE,
-                shuffle=True,
-                drop_last=True,
-                return_list=False,
-                num_workers=2)
-
-            # 1. train and save inference model
-            for data in loader():
-                exe.run(
-                    static.default_main_program(),
-                    feed=data,
-                    fetch_list=[avg_loss])
-
-            model_path = "fc.example.model"
-            paddle.fluid.io.save_inference_model(
-                model_path, ["image"], [pred], exe)
-
-            # 2. load model
-
-            # enable dygraph mode
-            paddle.disable_static(place)
-
-            # load
-            fc = paddle.jit.load(model_path)
-
-            # inference
-            fc.eval()
-            x = paddle.randn([1, IMAGE_SIZE], 'float32')
-            pred = fc(x)
-
-            # fine-tune
-            fc.train()
-            loss_fn = nn.CrossEntropyLoss()
-            adam = opt.Adam(learning_rate=0.001, parameters=fc.parameters())
-            loader = paddle.io.DataLoader(dataset,
-                places=place,
-                batch_size=BATCH_SIZE,
-                shuffle=True,
-                drop_last=True,
-                num_workers=2)
-            for epoch_id in range(EPOCH_NUM):
-                for batch_id, (image, label) in enumerate(loader()):
-                    out = fc(image)
-                    loss = loss_fn(out, label)
-                    loss.backward()
-                    adam.step()
-                    adam.clear_grad()
-                    print("Epoch {} batch {}: loss = {}".format(
-                        epoch_id, batch_id, np.mean(loss.numpy())))
+            .. code-block:: python
+                :name: code-example2
+
+                >>> import numpy as np
+                >>> import paddle
+                >>> import paddle.static as static
+                >>> import paddle.nn as nn
+                >>> import paddle.optimizer as opt
+                >>> import paddle.nn.functional as F
+
+                >>> BATCH_SIZE = 16
+                >>> BATCH_NUM = 4
+                >>> EPOCH_NUM = 4
+
+                >>> IMAGE_SIZE = 784
+                >>> CLASS_NUM = 10
+
+                >>> # define a random dataset
+                >>> class RandomDataset(paddle.io.Dataset):
+                ...     def __init__(self, num_samples):
+                ...         self.num_samples = num_samples
+                ...
+                ...     def __getitem__(self, idx):
+                ...         image = np.random.random([IMAGE_SIZE]).astype('float32')
+                ...         label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
+                ...         return image, label
+                ...
+                ...     def __len__(self):
+                ...         return self.num_samples
+
+                >>> paddle.enable_static()
+
+                >>> image = static.data(name='image', shape=[None, 784], dtype='float32')
+                >>> label = static.data(name='label', shape=[None, 1], dtype='int64')
+                >>> pred = static.nn.fc(x=image, size=10, activation='softmax')
+                >>> loss = F.cross_entropy(input=pred, label=label)
+                >>> avg_loss = paddle.mean(loss)
+
+                >>> optimizer = paddle.optimizer.SGD(learning_rate=0.001)
+                >>> optimizer.minimize(avg_loss)
+
+                >>> place = paddle.CPUPlace()
+                >>> exe = static.Executor(place)
+                >>> exe.run(static.default_startup_program())
+
+                >>> # create data loader
+                >>> dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
+                >>> loader = paddle.io.DataLoader(dataset,
+                ...     feed_list=[image, label],
+                ...     places=place,
+                ...     batch_size=BATCH_SIZE,
+                ...     shuffle=True,
+                ...     drop_last=True,
+                ...     return_list=False,
+                ...     num_workers=2
+                ... )
+
+                >>> # 1. train and save inference model
+                >>> for data in loader():
+                >>>     exe.run(
+                ...         static.default_main_program(),
+                ...         feed=data,
+                ...         fetch_list=[avg_loss]
+                ...     )
+
+                >>> model_path = "fc.example.model"
+                >>> paddle.fluid.io.save_inference_model(
+                >>> model_path, ["image"], [pred], exe)
+
+                >>> # 2. load model
+
+                >>> # enable dygraph mode
+                >>> paddle.disable_static(place)
+
+                >>> # load
+                >>> fc = paddle.jit.load(model_path)
+
+                >>> # inference
+                >>> fc.eval()
+                >>> x = paddle.randn([1, IMAGE_SIZE], 'float32')
+                >>> pred = fc(x)
+
+                >>> # fine-tune
+                >>> fc.train()
+                >>> loss_fn = nn.CrossEntropyLoss()
+                >>> adam = opt.Adam(learning_rate=0.001, parameters=fc.parameters())
+                >>> loader = paddle.io.DataLoader(dataset,
+                ...     places=place,
+                ...     batch_size=BATCH_SIZE,
+                ...     shuffle=True,
+                ...     drop_last=True,
+                ...     num_workers=2
+                ... )
+                >>> for epoch_id in range(EPOCH_NUM):
+                ...     for batch_id, (image, label) in enumerate(loader()):
+                ...         out = fc(image)
+                ...         loss = loss_fn(out, label)
+                ...         loss.backward()
+                ...         adam.step()
+                ...         adam.clear_grad()
+                ...         print("Epoch {} batch {}: loss = {}".format(
+                ...             epoch_id, batch_id, np.mean(loss.numpy())))
     """
     # 1. construct correct config
     config = _parse_load_config(configs)
@@ -1612,29 +1625,29 @@ class TracedLayer:
         Examples:
             .. code-block:: python
 
-                import paddle
-
-                class ExampleLayer(paddle.nn.Layer):
-                    def __init__(self):
-                        super().__init__()
-                        self._fc = paddle.nn.Linear(3, 10)
+                >>> import paddle
 
-                    def forward(self, input):
-                        return self._fc(input)
+                >>> class ExampleLayer(paddle.nn.Layer):
+                ...     def __init__(self):
+                ...         super().__init__()
+                ...         self._fc = paddle.nn.Linear(3, 10)
+                ...
+                ...     def forward(self, input):
+                ...         return self._fc(input)
 
 
-                layer = ExampleLayer()
-                in_var = paddle.uniform(shape=[2, 3], dtype='float32')
-                out_dygraph, static_layer = paddle.jit.TracedLayer.trace(layer, inputs=[in_var])
+                >>> layer = ExampleLayer()
+                >>> in_var = paddle.uniform(shape=[2, 3], dtype='float32')
+                >>> out_dygraph, static_layer = paddle.jit.TracedLayer.trace(layer, inputs=[in_var])
 
-                # run the static graph model using Executor inside
-                out_static_graph = static_layer([in_var])
+                >>> # run the static graph model using Executor inside
+                >>> out_static_graph = static_layer([in_var])
 
-                print(len(out_static_graph)) # 1
-                print(out_static_graph[0].shape) # (2, 10)
+                >>> print(len(out_static_graph)) # 1
+                >>> print(out_static_graph[0].shape) # (2, 10)
 
-                # save the static graph model for inference
-                static_layer.save_inference_model('./saved_infer_model')
+                >>> # save the static graph model for inference
+                >>> static_layer.save_inference_model('./saved_infer_model')
 
         """
         assert isinstance(
@@ -1662,29 +1675,29 @@ class TracedLayer:
         Examples:
             .. code-block:: python
 
-                import paddle
+                >>> import paddle
 
-                class ExampleLayer(paddle.nn.Layer):
-                    def __init__(self):
-                        super().__init__()
-                        self._fc = paddle.nn.Linear(3, 10)
+                >>> class ExampleLayer(paddle.nn.Layer):
+                ...     def __init__(self):
+                ...         super().__init__()
+                ...         self._fc = paddle.nn.Linear(3, 10)
+                ...
+                ...     def forward(self, input):
+                ...         return self._fc(input)
 
-                    def forward(self, input):
-                        return self._fc(input)
+                >>> layer = ExampleLayer()
+                >>> in_var = paddle.uniform(shape=[2, 3], dtype='float32')
 
-                layer = ExampleLayer()
-                in_var = paddle.uniform(shape=[2, 3], dtype='float32')
+                >>> out_dygraph, static_layer = paddle.jit.TracedLayer.trace(layer, inputs=[in_var])
 
-                out_dygraph, static_layer = paddle.jit.TracedLayer.trace(layer, inputs=[in_var])
+                >>> build_strategy = paddle.static.BuildStrategy()
+                >>> build_strategy.enable_inplace = True
 
-                build_strategy = paddle.static.BuildStrategy()
-                build_strategy.enable_inplace = True
+                >>> exec_strategy = paddle.static.ExecutionStrategy()
+                >>> exec_strategy.num_threads = 2
 
-                exec_strategy = paddle.static.ExecutionStrategy()
-                exec_strategy.num_threads = 2
-
-                static_layer.set_strategy(build_strategy=build_strategy, exec_strategy=exec_strategy)
-                out_static_graph = static_layer([in_var])
+                >>> static_layer.set_strategy(build_strategy=build_strategy, exec_strategy=exec_strategy)
+                >>> out_static_graph = static_layer([in_var])
 
         """
         assert self._compiled_program is None, "Cannot set strategy after run"
@@ -1765,33 +1778,36 @@ class TracedLayer:
         Examples:
             .. code-block:: python
 
-                import numpy as np
-                import paddle
-
-                class ExampleLayer(paddle.nn.Layer):
-                    def __init__(self):
-                        super().__init__()
-                        self._fc = paddle.nn.Linear(3, 10)
-
-                    def forward(self, input):
-                        return self._fc(input)
-
-                save_dirname = './saved_infer_model'
-                in_np = np.random.random([2, 3]).astype('float32')
-                in_var = paddle.to_tensor(in_np)
-                layer = ExampleLayer()
-
-                out_dygraph, static_layer = paddle.jit.TracedLayer.trace(layer, inputs=[in_var])
-                static_layer.save_inference_model(save_dirname, feed=[0], fetch=[0])
-
-                paddle.enable_static()
-                place = paddle.CPUPlace()
-                exe = paddle.static.Executor(place)
-                program, feed_vars, fetch_vars = paddle.static.load_inference_model(save_dirname,
-                                                    exe)
-
-                fetch, = exe.run(program, feed={feed_vars[0]: in_np}, fetch_list=fetch_vars)
-                print(fetch.shape) # (2, 10)
+                >>> import numpy as np
+                >>> import paddle
+
+                >>> class ExampleLayer(paddle.nn.Layer):
+                ...     def __init__(self):
+                ...         super().__init__()
+                ...         self._fc = paddle.nn.Linear(3, 10)
+                ...
+                ...     def forward(self, input):
+                ...         return self._fc(input)
+
+                >>> save_dirname = './saved_infer_model'
+                >>> in_np = np.random.random([2, 3]).astype('float32')
+                >>> in_var = paddle.to_tensor(in_np)
+                >>> layer = ExampleLayer()
+
+                >>> out_dygraph, static_layer = paddle.jit.TracedLayer.trace(layer, inputs=[in_var])
+                >>> static_layer.save_inference_model(save_dirname, feed=[0], fetch=[0])
+
+                >>> paddle.enable_static()
+                >>> place = paddle.CPUPlace()
+                >>> exe = paddle.static.Executor(place)
+                >>> program, feed_vars, fetch_vars = paddle.static.load_inference_model(
+                ...     save_dirname,
+                ...     exe
+                ... )
+
+                >>> fetch, = exe.run(program, feed={feed_vars[0]: in_np}, fetch_list=fetch_vars)
+                >>> print(fetch.shape)
+                [2, 10]
         """
         check_type(
             path,
diff --git a/python/paddle/jit/dy2static/convert_call_func.py b/python/paddle/jit/dy2static/convert_call_func.py
index 7f086680b48..c6b8169b7c5 100644
--- a/python/paddle/jit/dy2static/convert_call_func.py
+++ b/python/paddle/jit/dy2static/convert_call_func.py
@@ -162,27 +162,28 @@ def convert_call(func):
     Examples:
         .. code-block:: python
 
-            import paddle
-            from paddle.jit.dy2static import Call
-
-            paddle.enable_static()
-            def dyfunc(x):
-                if paddle.mean(x) < 0:
-                    x_v = x - 1
-                else:
-                    x_v = x + 1
-                return x_v
-
-            new_func = Call(dyfunc)
-            x = paddle.tensor.manipulation.fill_constant(shape=[3, 3], value=0, dtype='float64')
-            x_v = new_func(x)
-
-            exe = paddle.static.Executor(paddle.CPUPlace())
-            out = exe.run(fetch_list=[x_v])
-            print(out[0])
-            # [[1. 1. 1.]
-            #  [1. 1. 1.]
-            #  [1. 1. 1.]]
+            >>> # doctest: +SKIP
+            >>> import paddle
+            >>> from paddle.jit.dy2static import Call
+
+            >>> paddle.enable_static()
+            >>> def dyfunc(x):
+            ...     if paddle.mean(x) < 0:
+            ...         x_v = x - 1
+            ...     else:
+            ...         x_v = x + 1
+            ...     return x_v
+            ...
+            >>> new_func = Call(dyfunc)
+            >>> x = paddle.tensor.manipulation.fill_constant(shape=[3, 3], value=0, dtype='float64')
+            >>> x_v = new_func(x)
+
+            >>> exe = paddle.static.Executor(paddle.CPUPlace())
+            >>> out = exe.run(fetch_list=[x_v])
+            >>> print(out[0])
+            [[1. 1. 1.]
+             [1. 1. 1.]
+             [1. 1. 1.]]
 
     """
     translator_logger.log(1, f"Convert callable object: convert {func}.")
diff --git a/python/paddle/jit/dy2static/logging_utils.py b/python/paddle/jit/dy2static/logging_utils.py
index b8a6e5f4b63..7b45ab82d03 100644
--- a/python/paddle/jit/dy2static/logging_utils.py
+++ b/python/paddle/jit/dy2static/logging_utils.py
@@ -206,14 +206,14 @@ def set_verbosity(level=0, also_to_stdout=False):
     Examples:
         .. code-block:: python
 
-            import os
-            import paddle
+            >>> import os
+            >>> import paddle
 
-            paddle.jit.set_verbosity(1)
-            # The verbosity level is now 1
+            >>> paddle.jit.set_verbosity(1)
+            >>> # The verbosity level is now 1
 
-            os.environ['TRANSLATOR_VERBOSITY'] = '3'
-            # The verbosity level is now 3, but it has no effect because it has a lower priority than `set_verbosity`
+            >>> os.environ['TRANSLATOR_VERBOSITY'] = '3'
+            >>> # The verbosity level is now 3, but it has no effect because it has a lower priority than `set_verbosity`
     """
     _TRANSLATOR_LOGGER.verbosity_level = level
     _TRANSLATOR_LOGGER.need_to_echo_log_to_stdout = also_to_stdout
@@ -244,14 +244,15 @@ def set_code_level(level=LOG_AllTransformer, also_to_stdout=False):
     Examples:
         .. code-block:: python
 
-            import paddle
+            >>> import os
+            >>> import paddle
 
-            paddle.jit.set_code_level(2)
-            # It will print the transformed code at level 2, which means to print the code after second transformer,
-            # as the date of August 28, 2020, it is CastTransformer.
+            >>> paddle.jit.set_code_level(2)
+            >>> # It will print the transformed code at level 2, which means to print the code after second transformer,
+            >>> # as the date of August 28, 2020, it is CastTransformer.
 
-            os.environ['TRANSLATOR_CODE_LEVEL'] = '3'
-            # The code level is now 3, but it has no effect because it has a lower priority than `set_code_level`
+            >>> os.environ['TRANSLATOR_CODE_LEVEL'] = '3'
+            >>> # The code level is now 3, but it has no effect because it has a lower priority than `set_code_level`
 
     """
     _TRANSLATOR_LOGGER.transformed_code_level = level
diff --git a/python/paddle/jit/dy2static/program_translator.py b/python/paddle/jit/dy2static/program_translator.py
index db612474c87..5869f1bf8ea 100644
--- a/python/paddle/jit/dy2static/program_translator.py
+++ b/python/paddle/jit/dy2static/program_translator.py
@@ -641,24 +641,25 @@ class StaticFunction:
         Examples:
             .. code-block:: python
 
-                import paddle
-                from paddle.jit import to_static
-                from paddle.static import InputSpec
-
-                paddle.disable_static()
-
-                def foo(x, y):
-                    z = x + y
-                    return z
-
-                # usage 1:
-                decorated_foo = to_static(foo, input_spec=[InputSpec([10], name='x'), InputSpec([10], name='y')])
-                print(decorated_foo.concrete_program)
-
-                # usage 2:
-                decorated_foo = to_static(foo)
-                out_foo = decorated_foo(paddle.rand([10]), paddle.rand([10]))
-                print(decorated_foo.concrete_program)
+                >>> # doctest: +SKIP
+                >>> import paddle
+                >>> from paddle.jit import to_static
+                >>> from paddle.static import InputSpec
+
+                >>> paddle.disable_static()
+
+                >>> def foo(x, y):
+                ...     z = x + y
+                ...     return z
+                ...
+                >>> # usage 1:
+                >>> decorated_foo = to_static(foo, input_spec=[InputSpec([10], name='x'), InputSpec([10], name='y')])
+                >>> print(decorated_foo.concrete_program)
+
+                >>> # usage 2:
+                >>> decorated_foo = to_static(foo)
+                >>> out_foo = decorated_foo(paddle.rand([10]), paddle.rand([10]))
+                >>> print(decorated_foo.concrete_program)
         """
         return self.concrete_program_specify_input_spec(input_spec=None)
 
@@ -760,25 +761,26 @@ class StaticFunction:
         Example::
             .. code-block:: python
 
-                import paddle
-
-                class Net(paddle.nn.Layer):
-                    def __init__(self):
-                        super().__init__()
-
-                    def forward(self, x, flag=True):
-                        if flag:
-                            out = x + 1
-                        else:
-                            out = x - 1
-                        return out
-
-                x = paddle.randn([10, 1], 'float32')
-                net = paddle.jit.to_static(Net())  # convert into static graph mode
-                out = net(x)
-
-                net.forward.rollback()  # rollback into dygraph mode
-                out = net(x)
+                >>> # doctest: +SKIP
+                >>> import paddle
+
+                >>> class Net(paddle.nn.Layer):
+                ...     def __init__(self):
+                ...         super().__init__()
+                ...
+                ...     def forward(self, x, flag=True):
+                ...         if flag:
+                ...             out = x + 1
+                ...         else:
+                ...             out = x - 1
+                ...         return out
+                ...
+                >>> x = paddle.randn([10, 1], 'float32')
+                >>> net = paddle.jit.to_static(Net())  # convert into static graph mode
+                >>> out = net(x)
+
+                >>> net.forward.rollback()  # rollback into dygraph mode
+                >>> out = net(x)
         """
 
         def rollback_impl(class_instance):
@@ -819,24 +821,24 @@ class StaticFunction:
         Example::
             .. code-block:: python
 
-                import copy
-                import paddle
-
-                class Net(paddle.nn.Layer):
-                    def __init__(self):
-                        super().__init__()
-
-                    def forward(self, x, flag=True):
-                        if flag:
-                            out = x + 1
-                        else:
-                            out = x - 1
-                        return out
-
-                x = paddle.randn([10, 1], 'float32')
-                net = paddle.jit.to_static(Net())  # convert into static graph mode
-
-                copy_net = copy.deepcopy(net)      # deepcopy a new net without @to_static
+                >>> import copy
+                >>> import paddle
+
+                >>> class Net(paddle.nn.Layer):
+                ...     def __init__(self):
+                ...         super().__init__()
+                ...
+                ...     def forward(self, x, flag=True):
+                ...         if flag:
+                ...             out = x + 1
+                ...         else:
+                ...             out = x - 1
+                ...         return out
+                ...
+                >>> x = paddle.randn([10, 1], 'float32')
+                >>> net = paddle.jit.to_static(Net())  # convert into static graph mode
+
+                >>> copy_net = copy.deepcopy(net)      # deepcopy a new net without @to_static
 
         Please attention that original 'net' will unwrap @to_static and rollback into simple Layer.
         """
@@ -1378,11 +1380,11 @@ class ProgramTranslator:
     Examples:
         .. code-block:: python
 
-            import paddle
+            >>> import paddle
 
-            # Two methods get same object because ProgramTranslator is a singleton
-            paddle.jit.ProgramTranslator()
-            paddle.jit.ProgramTranslator.get_instance()
+            >>> # Two methods get same object because ProgramTranslator is a singleton
+            >>> paddle.jit.dy2static.program_translator.ProgramTranslator()
+            >>> paddle.jit.dy2static.program_translator.ProgramTranslator.get_instance()
 
     """
 
@@ -1433,24 +1435,23 @@ class ProgramTranslator:
         Examples:
             .. code-block:: python
 
-                import paddle
-
-
-                @paddle.jit.to_static
-                def func(x):
-                    if paddle.mean(x) > 0:
-                        x_v = x - 1
-                    else:
-                        x_v = x + 1
-                    return x_v
-
-
-                paddle.jit.enable_to_static(False)
-
-                x = paddle.ones([1, 2])
-                # ProgramTranslator is disabled so the func is run in dygraph
-                print(func(x))  # [[0. 0.]]
-
+                >>> # doctest: +SKIP
+                >>> import paddle
+                >>> def func(x):
+                ...     if paddle.mean(x) > 0:
+                ...         x_v = x - 1
+                ...     else:
+                ...         x_v = x + 1
+                ...     return x_v
+                ...
+                ...
+                >>> prog_trans = paddle.jit.dy2static.program_translator.ProgramTranslator()
+
+                >>> x = paddle.ones([1, 2])
+                >>> x_v = prog_trans.get_output(func, x)
+                >>> print(x_v)
+                Tensor(shape=[1, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+                [[0., 0.]])
         """
         check_type(
             enable_to_static,
@@ -1477,23 +1478,23 @@ class ProgramTranslator:
         Examples:
             .. code-block:: python
 
-                import paddle
-
-
-                def func(x):
-                    if paddle.mean(x) > 0:
-                        x_v = x - 1
-                    else:
-                        x_v = x + 1
-                    return x_v
-
-
-                prog_trans = paddle.jit.ProgramTranslator()
-
-                x = paddle.ones([1, 2])
-                x_v = prog_trans.get_output(func, x)
-                print(x_v)  # [[0. 0.]]
-
+                >>> # doctest: +SKIP
+                >>> import paddle
+                >>> def func(x):
+                ...     if paddle.mean(x) > 0:
+                ...         x_v = x - 1
+                ...     else:
+                ...         x_v = x + 1
+                ...     return x_v
+                ...
+                ...
+                >>> prog_trans = paddle.jit.dy2static.program_translator.ProgramTranslator()
+
+                >>> x = paddle.ones([1, 2])
+                >>> x_v = prog_trans.get_output(func, x)
+                >>> print(x_v)
+                Tensor(shape=[1, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+                [[0., 0.]])
         """
         assert callable(
             dygraph_func
@@ -1560,21 +1561,19 @@ class ProgramTranslator:
         Examples:
             .. code-block:: python
 
-                import paddle
-
-
-                def func(x):
-                    if paddle.mean(x) > 0:
-                        x_v = x - 1
-                    else:
-                        x_v = x + 1
-                    return x_v
-
-
-                prog_trans = paddle.jit.ProgramTranslator()
-                static_func = prog_trans.get_func(func)
-                print(callable(static_func)) # True
-
+                >>> # doctest: +SKIP
+                >>> import paddle
+                >>> def func(x):
+                ...     if paddle.mean(x) > 0:
+                ...         x_v = x - 1
+                ...     else:
+                ...         x_v = x + 1
+                ...     return x_v
+                ...
+                >>> prog_trans = paddle.jit.dy2static.program_translator.ProgramTranslator()
+                >>> static_func = prog_trans.get_func(func)
+                >>> print(callable(static_func))
+                True
         """
         assert callable(
             dygraph_func
@@ -1611,25 +1610,22 @@ class ProgramTranslator:
         Examples:
             .. code-block:: python
 
-                import paddle
-
-
-                def func(x):
-                    if paddle.mean(x) > 0:
-                        x_v = x - 1
-                    else:
-                        x_v = x + 1
-                    return x_v
-
-
-                prog_trans = paddle.jit.ProgramTranslator()
-                x = paddle.ones([1, 2])
-                main_prog, start_prog, inputs, outputs = prog_trans.get_program(func, x)
-                print([i.name for i in inputs])
-                # [u'generated_tensor_0'] the feed input Tensor name representing x
-                print([o.name for o in outputs])
-                # [u'_generated_var_4'] the fetch output Tensor name representing x_v
-
+                >>> # doctest: +SKIP
+                >>> import paddle
+                >>> def func(x):
+                ...     if paddle.mean(x) > 0:
+                ...         x_v = x - 1
+                ...     else:
+                ...         x_v = x + 1
+                ...     return x_v
+                ...
+                >>> prog_trans = paddle.jit.dy2static.program_translator.ProgramTranslator()
+                >>> x = paddle.ones([1, 2])
+                >>> main_prog, start_prog, inputs, outputs = prog_trans.get_program(func, x)
+                >>> print([i.name for i in inputs])
+                >>> # [u'generated_tensor_0'] the feed input Tensor name representing x
+                >>> print([o.name for o in outputs])
+                >>> # [u'_generated_var_4'] the fetch output Tensor name representing x_v
         """
         assert callable(
             dygraph_func
@@ -1681,22 +1677,20 @@ class ProgramTranslator:
         Examples:
             .. code-block:: python
 
-                import paddle
-
-
-                def func(x):
-                    if paddle.mean(x) > 0:
-                        x_v = x - 1
-                    else:
-                        x_v = x + 1
-                    return x_v
-
-
-                prog_trans = paddle.jit.ProgramTranslator()
-
-                code = prog_trans.get_code(func)
-                print(type(code)) # <class 'str'>
-
+                >>> # doctest: +SKIP
+                >>> import paddle
+                >>> def func(x):
+                ...     if paddle.mean(x) > 0:
+                ...         x_v = x - 1
+                ...     else:
+                ...         x_v = x + 1
+                ...     return x_v
+                ...
+                >>> prog_trans = paddle.jit.dy2static.program_translator.ProgramTranslator()
+
+                >>> code = prog_trans.get_code(func)
+                >>> print(type(code))
+                <class 'str'>
         """
         assert callable(
             dygraph_func
@@ -1728,11 +1722,10 @@ class ProgramTranslator:
         Examples:
             .. code-block:: python
 
-                import paddle
-
-                prog_trans = paddle.jit.ProgramTranslator()
-                prog_cache = prog_trans.get_program_cache()
+                >>> import paddle
 
+                >>> prog_trans = paddle.jit.dy2static.program_translator.ProgramTranslator()
+                >>> prog_cache = prog_trans.get_program_cache()
         """
         return self._program_cache
 
@@ -1751,23 +1744,22 @@ def enable_to_static(enable_to_static_bool):
     Examples:
         .. code-block:: python
 
-            import paddle
-
-
-            @paddle.jit.to_static
-            def func(x):
-                if paddle.mean(x) > 0:
-                    x_v = x - 1
-                else:
-                    x_v = x + 1
-                return x_v
-
-
-            paddle.jit.enable_to_static(False)
-
-            x = paddle.ones([1, 2])
-            # ProgramTranslator is disabled so the func is run in dygraph
-            print(func(x))  # [[0. 0.]]
+            >>> import paddle
+            >>> @paddle.jit.to_static
+            >>> def func(x):
+            ...     if paddle.mean(x) > 0:
+            ...         x_v = x - 1
+            ...     else:
+            ...         x_v = x + 1
+            ...     return x_v
+            ...
+            >>> paddle.jit.enable_to_static(False)
+
+            >>> x = paddle.ones([1, 2])
+            >>> # ProgramTranslator is disabled so the func is run in dygraph
+            >>> print(func(x))
+            Tensor(shape=[1, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[0., 0.]])
 
     """
     check_type(
diff --git a/python/paddle/jit/translated_layer.py b/python/paddle/jit/translated_layer.py
index a5f31c9bea3..9fea8023608 100644
--- a/python/paddle/jit/translated_layer.py
+++ b/python/paddle/jit/translated_layer.py
@@ -1312,87 +1312,86 @@ class TranslatedLayer(layers.Layer):
     Examples:
         .. code-block:: python
 
-            import numpy as np
-            import paddle
-            import paddle.nn as nn
-            import paddle.optimizer as opt
-
-            BATCH_SIZE = 16
-            BATCH_NUM = 4
-            EPOCH_NUM = 4
-
-            IMAGE_SIZE = 784
-            CLASS_NUM = 10
-
-            # define a random dataset
-            class RandomDataset(paddle.io.Dataset):
-                def __init__(self, num_samples):
-                    self.num_samples = num_samples
-
-                def __getitem__(self, idx):
-                    image = np.random.random([IMAGE_SIZE]).astype('float32')
-                    label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
-                    return image, label
-
-                def __len__(self):
-                    return self.num_samples
-
-            class LinearNet(nn.Layer):
-                def __init__(self):
-                    super().__init__()
-                    self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM)
-
-                @paddle.jit.to_static
-                def forward(self, x):
-                    return self._linear(x)
-
-            def train(layer, loader, loss_fn, opt):
-                for epoch_id in range(EPOCH_NUM):
-                    for batch_id, (image, label) in enumerate(loader()):
-                        out = layer(image)
-                        loss = loss_fn(out, label)
-                        loss.backward()
-                        opt.step()
-                        opt.clear_grad()
-                        print("Epoch {} batch {}: loss = {}".format(
-                            epoch_id, batch_id, np.mean(loss.numpy())))
-
-            # 1. train & save model.
-
-            # create network
-            layer = LinearNet()
-            loss_fn = nn.CrossEntropyLoss()
-            adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters())
-
-            # create data loader
-            dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
-            loader = paddle.io.DataLoader(dataset,
-                batch_size=BATCH_SIZE,
-                shuffle=True,
-                drop_last=True,
-                num_workers=2)
-
-            # train
-            train(layer, loader, loss_fn, adam)
-
-            # save
-            model_path = "linear.example.model"
-            paddle.jit.save(layer, model_path)
-
-            # 2. load model as TranslatedLayer
-
-            # load
-            translated_layer = paddle.jit.load(model_path)
-
-            # inference
-            translated_layer.eval()
-            x = paddle.randn([1, IMAGE_SIZE], 'float32')
-            pred = translated_layer(x)
-
-            # fine-tune
-            translated_layer.train()
-            adam = opt.Adam(learning_rate=0.001, parameters=translated_layer.parameters())
-            train(translated_layer, loader, loss_fn, adam)
+            >>> # doctest: +SKIP
+            >>> import numpy as np
+            >>> import paddle
+            >>> import paddle.nn as nn
+            >>> import paddle.optimizer as opt
+
+            >>> BATCH_SIZE = 16
+            >>> BATCH_NUM = 4
+            >>> EPOCH_NUM = 4
+
+            >>> IMAGE_SIZE = 784
+            >>> CLASS_NUM = 10
+
+            >>> # define a random dataset
+            >>> class RandomDataset(paddle.io.Dataset):
+            ...     def __init__(self, num_samples):
+            ...         self.num_samples = num_samples
+            ...
+            ...     def __getitem__(self, idx):
+            ...         image = np.random.random([IMAGE_SIZE]).astype('float32')
+            ...         label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
+            ...         return image, label
+            ...
+            ...     def __len__(self):
+            ...         return self.num_samples
+            ...
+            >>> class LinearNet(nn.Layer):
+            ...     def __init__(self):
+            ...         super().__init__()
+            ...         self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM)
+            ...
+            ...     @paddle.jit.to_static
+            ...     def forward(self, x):
+            ...         return self._linear(x)
+            ...
+            >>> def train(layer, loader, loss_fn, opt):
+            ...     for epoch_id in range(EPOCH_NUM):
+            ...         for batch_id, (image, label) in enumerate(loader()):
+            ...             out = layer(image)
+            ...             loss = loss_fn(out, label)
+            ...             loss.backward()
+            ...             opt.step()
+            ...             opt.clear_grad()
+            ...             print("Epoch {} batch {}: loss = {}".format(
+            ...                 epoch_id, batch_id, np.mean(loss.numpy())))
+            ...
+            >>> # 1. train & save model.
+            >>> # create network
+            >>> layer = LinearNet()
+            >>> loss_fn = nn.CrossEntropyLoss()
+            >>> adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters())
+
+            >>> # create data loader
+            >>> dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
+            >>> loader = paddle.io.DataLoader(dataset,
+            ...     batch_size=BATCH_SIZE,
+            ...     shuffle=True,
+            ...     drop_last=True,
+            ...     num_workers=2
+            ... )
+            >>> # train
+            >>> train(layer, loader, loss_fn, adam)
+
+            >>> # save
+            >>> model_path = "linear.example.model"
+            >>> paddle.jit.save(layer, model_path)
+
+            >>> # 2. load model as TranslatedLayer
+            >>> # load
+            >>> translated_layer = paddle.jit.load(model_path)
+
+            >>> # inference
+            >>> translated_layer.eval()
+            >>> x = paddle.randn([1, IMAGE_SIZE], 'float32')
+            >>> pred = translated_layer(x)
+
+            >>> # fine-tune
+            >>> translated_layer.train()
+            >>> adam = opt.Adam(learning_rate=0.001, parameters=translated_layer.parameters())
+            >>> train(translated_layer, loader, loss_fn, adam)
 
     """
 
@@ -1523,76 +1522,76 @@ class TranslatedLayer(layers.Layer):
         Examples:
             .. code-block:: python
 
-                import numpy as np
-                import paddle
-                import paddle.nn as nn
-                import paddle.optimizer as opt
-
-                BATCH_SIZE = 16
-                BATCH_NUM = 4
-                EPOCH_NUM = 4
-
-                IMAGE_SIZE = 784
-                CLASS_NUM = 10
-
-                # define a random dataset
-                class RandomDataset(paddle.io.Dataset):
-                    def __init__(self, num_samples):
-                        self.num_samples = num_samples
-
-                    def __getitem__(self, idx):
-                        image = np.random.random([IMAGE_SIZE]).astype('float32')
-                        label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
-                        return image, label
-
-                    def __len__(self):
-                        return self.num_samples
-
-                class LinearNet(nn.Layer):
-                    def __init__(self):
-                        super().__init__()
-                        self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM)
-
-                    @paddle.jit.to_static
-                    def forward(self, x):
-                        return self._linear(x)
-
-                def train(layer, loader, loss_fn, opt):
-                    for epoch_id in range(EPOCH_NUM):
-                        for batch_id, (image, label) in enumerate(loader()):
-                            out = layer(image)
-                            loss = loss_fn(out, label)
-                            loss.backward()
-                            opt.step()
-                            opt.clear_grad()
-                            print("Epoch {} batch {}: loss = {}".format(
-                                epoch_id, batch_id, np.mean(loss.numpy())))
-
-                # create network
-                layer = LinearNet()
-                loss_fn = nn.CrossEntropyLoss()
-                adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters())
-
-                # create data loader
-                dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
-                loader = paddle.io.DataLoader(dataset,
-                    batch_size=BATCH_SIZE,
-                    shuffle=True,
-                    drop_last=True,
-                    num_workers=2)
-
-                # train
-                train(layer, loader, loss_fn, adam)
-
-                # save
-                model_path = "linear.example.model"
-                paddle.jit.save(layer, model_path)
-
-                # load
-                translated_layer = paddle.jit.load(model_path)
-
-                # get program
-                program = translated_layer.program()
+                >>> # doctest: +SKIP
+                >>> import numpy as np
+                >>> import paddle
+                >>> from paddle import nn
+                >>> import paddle.optimizer as opt
+
+                >>> BATCH_SIZE = 16
+                >>> BATCH_NUM = 4
+                >>> EPOCH_NUM = 4
+
+                >>> IMAGE_SIZE = 784
+                >>> CLASS_NUM = 10
+
+                >>> # define a random dataset
+                >>> class RandomDataset(paddle.io.Dataset):
+                ...     def __init__(self, num_samples):
+                ...         self.num_samples = num_samples
+                ...
+                ...     def __getitem__(self, idx):
+                ...         image = np.random.random([IMAGE_SIZE]).astype('float32')
+                ...         label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
+                ...         return image, label
+                ...
+                ...     def __len__(self):
+                ...         return self.num_samples
+                ...
+                >>> class LinearNet(nn.Layer):
+                ...     def __init__(self):
+                ...         super().__init__()
+                ...         self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM)
+                ...
+                ...     @paddle.jit.to_static
+                ...     def forward(self, x):
+                ...         return self._linear(x)
+                ...
+                >>> def train(layer, loader, loss_fn, opt):
+                ...     for epoch_id in range(EPOCH_NUM):
+                ...         for batch_id, (image, label) in enumerate(loader()):
+                ...             out = layer(image)
+                ...             loss = loss_fn(out, label)
+                ...             loss.backward()
+                ...             opt.step()
+                ...             opt.clear_grad()
+                ...             print("Epoch {} batch {}: loss = {}".format(
+                ...                 epoch_id, batch_id, np.mean(loss.numpy())))
+                ...
+                >>> # create network
+                >>> layer = LinearNet()
+                >>> loss_fn = nn.CrossEntropyLoss()
+                >>> adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters())
+                >>> # create data loader
+                >>> dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
+                >>> loader = paddle.io.DataLoader(dataset,
+                ...     batch_size=BATCH_SIZE,
+                ...     shuffle=True,
+                ...     drop_last=True,
+                ...     num_workers=2
+                ... )
+                >>> # train
+                >>> train(layer, loader, loss_fn, adam)
+
+                >>> # save
+                >>> model_path = "linear.example.model"
+                >>> paddle.jit.save(layer, model_path)
+
+                >>> # load
+                >>> translated_layer = paddle.jit.load(model_path)
+
+                >>> # get program
+                >>> program = translated_layer.program()
         """
         # 1. get program holder
         program_holder = self._get_program_holder(method_name)
-- 
GitLab