diff --git a/python/paddle/incubate/autograd/functional.py b/python/paddle/incubate/autograd/functional.py
index 8f6f012ac394f38d5f8991beb687aa3f7f2d466d..116f4e2936fb7ad231ab2959312f5519affcc839 100644
--- a/python/paddle/incubate/autograd/functional.py
+++ b/python/paddle/incubate/autograd/functional.py
@@ -46,24 +46,24 @@ def vjp(func, xs, v=None):
 
         .. code-block:: python
 
-            import paddle
-
-            def func(x):
-                return paddle.matmul(x, x)
-
-            x = paddle.ones(shape=[2, 2], dtype='float32')
-            _, vjp_result = paddle.incubate.autograd.vjp(func, x)
-            print(vjp_result)
-            # Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
-            #        [[4., 4.],
-            #         [4., 4.]])
-
-            v = paddle.to_tensor([[1.0, 0.0], [0.0, 0.0]])
-            _, vjp_result = paddle.incubate.autograd.vjp(func, x, v)
-            print(vjp_result)
-            # Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
-            #        [[2., 1.],
-            #         [1., 0.]])
+            >>> import paddle
+
+            >>> def func(x):
+            ...     return paddle.matmul(x, x)
+            ...
+            >>> x = paddle.ones(shape=[2, 2], dtype='float32')
+            >>> _, vjp_result = paddle.incubate.autograd.vjp(func, x)
+            >>> print(vjp_result)
+            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
+                   [[4., 4.],
+                    [4., 4.]])
+
+            >>> v = paddle.to_tensor([[1.0, 0.0], [0.0, 0.0]])
+            >>> _, vjp_result = paddle.incubate.autograd.vjp(func, x, v)
+            >>> print(vjp_result)
+            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
+                   [[2., 1.],
+                    [1., 0.]])
     """
     _check_inputs(func, xs, v)
 
@@ -106,25 +106,24 @@ def jvp(func, xs, v=None):
 
         .. code-block:: python
 
-            import paddle
-
-
-            def func(x):
-                return paddle.matmul(x, x)
-
-
-            x = paddle.ones(shape=[2, 2], dtype='float32')
-            _, jvp_result = paddle.incubate.autograd.jvp(func, x)
-            print(jvp_result)
-            # Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
-            #        [[4., 4.],
-            #         [4., 4.]])
-            v = paddle.to_tensor([[1.0, 0.0], [0.0, 0.0]])
-            _, jvp_result = paddle.incubate.autograd.jvp(func, x, v)
-            print(jvp_result)
-            # Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
-            #        [[2., 1.],
-            #         [1., 0.]])
+            >>> import paddle
+
+            >>> def func(x):
+            ...     return paddle.matmul(x, x)
+            ...
+            >>> x = paddle.ones(shape=[2, 2], dtype='float32')
+            >>> _, jvp_result = paddle.incubate.autograd.jvp(func, x)
+            >>> print(jvp_result)
+            Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
+                   [[4., 4.],
+                    [4., 4.]])
+
+            >>> v = paddle.to_tensor([[1.0, 0.0], [0.0, 0.0]])
+            >>> _, jvp_result = paddle.incubate.autograd.jvp(func, x, v)
+            >>> print(jvp_result)
+            Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
+                   [[2., 1.],
+                    [1., 0.]])
 
     """
     _check_inputs(func, xs, v)
@@ -217,28 +216,26 @@ class Jacobian:
 
         .. code-block:: python
 
-            import paddle
-
-
-            def func(x, y):
-                return paddle.matmul(x, y)
-
-
-            x = paddle.to_tensor([[1., 2.], [3., 4.]])
-            J = paddle.incubate.autograd.Jacobian(func, [x, x])
-            print(J[:, :])
-            # Tensor(shape=[4, 8], dtype=float32, place=Place(gpu:0), stop_gradient=False,
-            #        [[1., 3., 0., 0., 1., 0., 2., 0.],
-            #         [2., 4., 0., 0., 0., 1., 0., 2.],
-            #         [0., 0., 1., 3., 3., 0., 4., 0.],
-            #         [0., 0., 2., 4., 0., 3., 0., 4.]])
-
-            print(J[0, :])
-            # Tensor(shape=[8], dtype=float32, place=Place(gpu:0), stop_gradient=False,
-            #        [1., 3., 0., 0., 1., 0., 2., 0.])
-            print(J[:, 0])
-            # Tensor(shape=[4], dtype=float32, place=Place(gpu:0), stop_gradient=False,
-            #        [1., 2., 0., 0.])
+            >>> import paddle
+
+            >>> def func(x, y):
+            ...     return paddle.matmul(x, y)
+            ...
+            >>> x = paddle.to_tensor([[1., 2.], [3., 4.]])
+            >>> J = paddle.incubate.autograd.Jacobian(func, [x, x])
+            >>> print(J[:, :])
+            Tensor(shape=[4, 8], dtype=float32, place=Place(cpu), stop_gradient=False,
+                   [[1., 3., 0., 0., 1., 0., 2., 0.],
+                    [2., 4., 0., 0., 0., 1., 0., 2.],
+                    [0., 0., 1., 3., 3., 0., 4., 0.],
+                    [0., 0., 2., 4., 0., 3., 0., 4.]])
+
+            >>> print(J[0, :])
+            Tensor(shape=[8], dtype=float32, place=Place(cpu), stop_gradient=False,
+                   [1., 3., 0., 0., 1., 0., 2., 0.])
+            >>> print(J[:, 0])
+            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=False,
+                   [1., 2., 0., 0.])
 
     """
 
@@ -287,23 +284,22 @@ class Hessian:
 
     Examples:
 
-    .. code-block:: python
-
-        import paddle
-
+        .. code-block:: python
 
-        def reducer(x):
-            return paddle.sum(x * x)
+            >>> import paddle
 
+            >>> def reducer(x):
+            ...     return paddle.sum(x * x)
+            ...
+            >>> x = paddle.rand([2, 2])
+            >>> h = paddle.incubate.autograd.Hessian(reducer, x)
+            >>> print(h[:])
+            Tensor(shape=[4, 4], dtype=float32, place=CPUPlace(), stop_gradient=False,
+                [[2., 0., 0., 0.],
+                 [0., 2., 0., 0.],
+                 [0., 0., 2., 0.],
+                 [0., 0., 0., 2.]])
 
-        x = paddle.rand([2, 2])
-        h = paddle.incubate.autograd.Hessian(reducer, x)
-        print(h[:])
-        # Tensor(shape=[4, 4], dtype=float32, place=Place(gpu:0), stop_gradient=False,
-        #        [[2., 0., 0., 0.],
-        #         [0., 2., 0., 0.],
-        #         [0., 0., 2., 0.],
-        #         [0., 0., 0., 2.]])
     """
 
     def __init__(self, func, xs, is_batched=False):
@@ -619,27 +615,25 @@ def _separate(xs):
 
         .. code-block:: python
 
-            import paddle
-            from paddle.autograd.functional import _separate
-
-
-            def func(x, y):
-                return x * y
-
-
-            x = paddle.ones((1,))
-            x.stop_gradient = False
-
-            y = func(x, x)
-            print(paddle.grad(y, x))
-            # [Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True,
-            #        [2.])]
-
-            x1, x2 = _separate((x, x))
-            y = func(x1, x2)
-            print(paddle.grad(y, x1))
-            # [Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True,
-            #        [1.])]
+            >>> import paddle
+            >>> from paddle.incubate.autograd.functional import _separate
+
+            >>> def func(x, y):
+            ...     return x * y
+            ...
+            >>> x = paddle.ones((1,))
+            >>> x.stop_gradient = False
+
+            >>> y = func(x, x)
+            >>> print(paddle.grad(y, x))
+            [Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True,
+                   [2.])]
+
+            >>> x1, x2 = _separate((x, x))
+            >>> y = func(x1, x2)
+            >>> print(paddle.grad(y, x1))
+            [Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True,
+                   [1.])]
 
     """
     if isinstance(xs, typing.Sequence):
diff --git a/python/paddle/incubate/autograd/primapi.py b/python/paddle/incubate/autograd/primapi.py
index 1ba95c7f5b28b33382515661c3e839a0d18b552f..ee72cc3bd8f189d2b996147738a03f9f6ae30571 100644
--- a/python/paddle/incubate/autograd/primapi.py
+++ b/python/paddle/incubate/autograd/primapi.py
@@ -42,29 +42,29 @@ def forward_grad(outputs, inputs, grad_inputs=None):
 
         .. code-block:: python
 
-            import numpy as np
-            import paddle
-
-            paddle.enable_static()
-            paddle.incubate.autograd.enable_prim()
-
-            startup_program = paddle.static.Program()
-            main_program = paddle.static.Program()
-
-            with paddle.static.program_guard(main_program, startup_program):
-                x = paddle.static.data('x', shape=[1], dtype='float32')
-                y = x * x
-                y_grad = paddle.incubate.autograd.forward_grad(y, x)
-                paddle.incubate.autograd.prim2orig()
-
-            exe = paddle.static.Executor()
-            exe.run(startup_program)
-            y_grad = exe.run(main_program, feed={'x': np.array([2.]).astype('float32')}, fetch_list=[y_grad])
-            print(y_grad)
-            # [array([4.], dtype=float32)]
-
-            paddle.incubate.autograd.disable_prim()
-            paddle.disable_static()
+            >>> import numpy as np
+            >>> import paddle
+
+            >>> paddle.enable_static()
+            >>> paddle.incubate.autograd.enable_prim()
+
+            >>> startup_program = paddle.static.Program()
+            >>> main_program = paddle.static.Program()
+
+            >>> with paddle.static.program_guard(main_program, startup_program):
+            ...     x = paddle.static.data('x', shape=[1], dtype='float32')
+            ...     y = x * x
+            ...     y_grad = paddle.incubate.autograd.forward_grad(y, x)
+            ...     paddle.incubate.autograd.prim2orig()
+            ...
+            >>> exe = paddle.static.Executor()
+            >>> exe.run(startup_program)
+            >>> y_grad = exe.run(main_program, feed={'x': np.array([2.]).astype('float32')}, fetch_list=[y_grad])
+            >>> print(y_grad)
+            [array([4.], dtype=float32)]
+
+            >>> paddle.incubate.autograd.disable_prim()
+            >>> paddle.disable_static()
     """
     if not utils.prim_enabled():
         raise RuntimeError(
@@ -125,29 +125,29 @@ def grad(outputs, inputs, grad_outputs=None):
 
         .. code-block:: python
 
-            import numpy as np
-            import paddle
-
-            paddle.enable_static()
-            paddle.incubate.autograd.enable_prim()
-
-            startup_program = paddle.static.Program()
-            main_program = paddle.static.Program()
-            with paddle.static.program_guard(main_program, startup_program):
-                x = paddle.static.data('x', shape=[1], dtype='float32')
-                x.stop_gradients = False
-                y = x * x
-                x_grad = paddle.incubate.autograd.grad(y, x)
-                paddle.incubate.autograd.prim2orig()
-
-            exe = paddle.static.Executor()
-            exe.run(startup_program)
-            x_grad = exe.run(main_program, feed={'x': np.array([2.]).astype('float32')}, fetch_list=[x_grad])
-            print(x_grad)
-            # [array([4.], dtype=float32)]
-
-            paddle.incubate.autograd.disable_prim()
-            paddle.disable_static()
+            >>> import numpy as np
+            >>> import paddle
+
+            >>> paddle.enable_static()
+            >>> paddle.incubate.autograd.enable_prim()
+
+            >>> startup_program = paddle.static.Program()
+            >>> main_program = paddle.static.Program()
+            >>> with paddle.static.program_guard(main_program, startup_program):
+            ...     x = paddle.static.data('x', shape=[1], dtype='float32')
+            ...     x.stop_gradients = False
+            ...     y = x * x
+            ...     x_grad = paddle.incubate.autograd.grad(y, x)
+            ...     paddle.incubate.autograd.prim2orig()
+            ...
+            >>> exe = paddle.static.Executor()
+            >>> exe.run(startup_program)
+            >>> x_grad = exe.run(main_program, feed={'x': np.array([2.]).astype('float32')}, fetch_list=[x_grad])
+            >>> print(x_grad)
+            [array([4.], dtype=float32)]
+
+            >>> paddle.incubate.autograd.disable_prim()
+            >>> paddle.disable_static()
     """
     if not utils.prim_enabled():
         grad_inputs = backward.gradients(outputs, inputs, grad_outputs)
diff --git a/python/paddle/incubate/autograd/primreg.py b/python/paddle/incubate/autograd/primreg.py
index 00e6833639bce60fe1771873c994595890a471cd..5e94c220acb4717702278445f5820270e8bf2296 100644
--- a/python/paddle/incubate/autograd/primreg.py
+++ b/python/paddle/incubate/autograd/primreg.py
@@ -77,9 +77,15 @@ def op_position_inputs(op):
 
     Examples:
         .. code-block:: python
-            @REGISTER_FN('div_p', 'X', 'Y', 'Z')
-            def div(x, y, out=None):
-                return _simple_binop(LayerHelper('div_p', **locals()))
+
+            >>> from paddle.incubate.autograd.primops import _simple_binop
+            >>> from paddle.fluid.layer_helper import LayerHelper
+            >>> from paddle.incubate.autograd.primreg import REGISTER_FN
+
+            >>> # doctest: +SKIP('Depends on external code.')
+            >>> @REGISTER_FN('div_p', 'X', 'Y', 'Z')
+            >>> def div(x, y, out=None):
+            ...     return _simple_binop(LayerHelper('div_p', **locals()))
 
     The registered inputs are ['X', 'Y'] for div_p and accordingly this
     function will return inputs in the order of X then Y.
@@ -117,9 +123,15 @@ def op_position_output(op):
 
     Examples:
         .. code-block:: python
-            @REGISTER_FN('div_p', 'X', 'Y', 'Z')
-            def div(x, y, out=None):
-                return _simple_binop(LayerHelper('div_p', **locals()))
+
+            >>> # doctest: +SKIP('Depends on external code.')
+            >>> from paddle.incubate.autograd.primops import _simple_binop
+            >>> from paddle.fluid.layer_helper import LayerHelper
+            >>> from paddle.incubate.autograd.primreg import REGISTER_FN
+
+            >>> @REGISTER_FN('div_p', 'X', 'Y', 'Z')
+            >>> def div(x, y, out=None):
+            ...     return _simple_binop(LayerHelper('div_p', **locals()))
 
     The registered output is ['Z'] for div_p and accordingly this
     function will return output Z.
@@ -154,9 +166,15 @@ def REGISTER_FN(op_type, *position_argnames):
 
     Examples:
         .. code-block:: python
-        @REGISTER_FN('tanh_p', 'X', 'Y')
-        def tanh(x, out=None):
-            return _simple_unop(LayerHelper('tanh_p', **locals()))
+
+            >>> # doctest: +SKIP('Depends on external code.')
+            >>> from paddle.incubate.autograd.primops import _simple_binop
+            >>> from paddle.fluid.layer_helper import LayerHelper
+            >>> from paddle.incubate.autograd.primreg import REGISTER_FN
+
+            >>> @REGISTER_FN('tanh_p', 'X', 'Y')
+            >>> def tanh(x, out=None):
+            ...    return _simple_unop(LayerHelper('tanh_p', **locals()))
 
     """
 
@@ -184,10 +202,17 @@ def REGISTER_ORIG2PRIM(op_type):
 
     Examples:
         .. code-block:: python
-            @REGISTER_ORIG2PRIM('tanh')
-            def tanh_orig2prim(op):
-                x, = get_input_var_list(op)
-                return primops.tanh(x)
+
+            >>> # doctest: +SKIP('Depends on external code.')
+            >>> from paddle.fluid.layer_helper import LayerHelper
+            >>> from paddle.incubate.autograd.utils import get_input_var_list
+            >>> from paddle.incubate.autograd import primops
+            >>> from paddle.incubate.autograd.primreg import REGISTER_ORIG2PRIM
+
+            >>> @REGISTER_ORIG2PRIM('tanh')
+            >>> def tanh_orig2prim(op):
+            ...     x, = get_input_var_list(op)
+            ...     return primops.tanh(x)
 
     """
     if not isinstance(op_type, str):
@@ -217,12 +242,17 @@ def REGISTER_COMPOSITE(op_type):
 
     Examples:
         .. code-block:: python
-            @REGISTER_COMPOSITE('softmax')
-            def softmax_composite(x, axis):
-                molecular = exp(x)
-                denominator = broadcast_to(sum(molecular, axis=axis, keepdim=True), x.shape)
-                res = divide(molecular, denominator)
-                return res
+
+            >>> # doctest: +SKIP('Depends on external code.')
+            >>> import paddle
+            >>> from paddle.incubate.autograd.primreg import REGISTER_COMPOSITE
+
+            >>> @REGISTER_COMPOSITE('softmax')
+            >>> def softmax_composite(x, axis):
+            ...     molecular = paddle.exp(x)
+            ...     denominator = paddle.broadcast_to(sum(molecular, axis=axis, keepdim=True), x.shape)
+            ...     res = paddle.divide(molecular, denominator)
+            ...     return res
 
     """
     if not isinstance(op_type, str):
@@ -252,11 +282,17 @@ def REGISTER_PRIM2ORIG(op_type):
 
     Examples:
         .. code-block:: python
-            @REGISTER_PRIM2ORIG('tanh_p')
-            def tanh_prim2orig(op):
-                x, = get_input_var_list(op)
-                return paddle.tanh(x)
 
+            >>> # doctest: +SKIP('Depends on external code.')
+            >>> import paddle
+            >>> from paddle.incubate.autograd.primreg import REGISTER_PRIM2ORIG
+            >>> from paddle.incubate.autograd.utils import get_input_var_list
+
+            >>> @REGISTER_PRIM2ORIG('tanh_p')
+            >>> def tanh_prim2orig(op):
+            ...     x, = get_input_var_list(op)
+            ...     return paddle.tanh(x)
+            ...
     """
     if not isinstance(op_type, str):
         raise TypeError(f'op_type must be str, but got {type(op_type)}.')
@@ -285,9 +321,14 @@ def REGISTER_JVP(op_type):
 
     Examples:
         .. code-block:: python
-            @REGISTER_JVP('add_p')
-            def add_jvp(op, x_dot, y_dot):
-                return primops.add(x_dot, y_dot)
+
+            >>> # doctest: +SKIP('Depends on external code.')
+            >>> from paddle.incubate.autograd import primops
+            >>> from paddle.incubate.autograd.primreg import REGISTER_JVP
+
+            >>> @REGISTER_JVP('add_p')
+            >>> def add_jvp(op, x_dot, y_dot):
+            ...     return primops.add(x_dot, y_dot)
 
     """
     if not isinstance(op_type, str):
@@ -319,9 +360,13 @@ def REGISTER_TRANSPOSE(op_type):
 
     Examples:
         .. code-block:: python
-            @REGISTER_TRANSPOSE('add_p')
-            def add_transpose(op, z_bar):
-                return z_bar, z_bar
+
+            >>> # doctest: +SKIP('Depends on external code.')
+            >>> from paddle.incubate.autograd.primreg import REGISTER_TRANSPOSE
+
+            >>> @REGISTER_TRANSPOSE('add_p')
+            >>> def add_transpose(op, z_bar):
+            ...     return z_bar, z_bar
 
     """
     if not isinstance(op_type, str):
diff --git a/python/paddle/incubate/autograd/primx.py b/python/paddle/incubate/autograd/primx.py
index f3995a691db61e4b8d09d9b42dbe59c80a56831c..b61076ed0df2288f62c137702139133740645067 100644
--- a/python/paddle/incubate/autograd/primx.py
+++ b/python/paddle/incubate/autograd/primx.py
@@ -810,18 +810,18 @@ def prim2orig(block=None, blacklist=None):
 
         .. code-block:: python
 
-            import paddle
-            from paddle.incubate.autograd import enable_prim, prim_enabled, prim2orig
-
-            paddle.enable_static()
-            enable_prim()
-
-            x = paddle.ones(shape=[2, 2], dtype='float32')
-            x.stop_gradients = False
-            y = x * x
-            dy_dx = paddle.static.gradients(y, x)
-            if prim_enabled():
-                prim2orig()
+            >>> import paddle
+            >>> from paddle.incubate.autograd import enable_prim, prim_enabled, prim2orig
+
+            >>> paddle.enable_static()
+            >>> enable_prim()
+
+            >>> x = paddle.ones(shape=[2, 2], dtype='float32')
+            >>> x.stop_gradients = False
+            >>> y = x * x
+            >>> dy_dx = paddle.static.gradients(y, x)
+            >>> if prim_enabled():
+            ...     prim2orig()
     """
 
     block = default_main_program().current_block() if block is None else block
diff --git a/python/paddle/incubate/autograd/utils.py b/python/paddle/incubate/autograd/utils.py
index 0de52c68bb61b2b42f055cc69ae9cb4a1c11b798..79667cafeaf3e79d73ad9882d110cdce1667a311 100644
--- a/python/paddle/incubate/autograd/utils.py
+++ b/python/paddle/incubate/autograd/utils.py
@@ -51,17 +51,20 @@ def prim_enabled():
 
         .. code-block:: python
 
-            import paddle
-            from paddle.incubate.autograd import enable_prim, disable_prim, prim_enabled
+            >>> import paddle
+            >>> from paddle.incubate.autograd import enable_prim, disable_prim, prim_enabled
 
-            paddle.enable_static()
-            enable_prim()
+            >>> paddle.enable_static()
+            >>> enable_prim()
 
-            print(prim_enabled()) # True
+            >>> print(prim_enabled())
+            True
 
-            disable_prim()
+            >>> disable_prim()
+
+            >>> print(prim_enabled())
+            False
 
-            print(prim_enabled()) # False
     """
     return prim_option.get_status()
 
@@ -79,13 +82,15 @@ def enable_prim():
 
         .. code-block:: python
 
-            import paddle
-            from paddle.incubate.autograd import enable_prim, prim_enabled
+            >>> import paddle
+            >>> from paddle.incubate.autograd import enable_prim, prim_enabled
+
+            >>> paddle.enable_static()
+            >>> enable_prim()
 
-            paddle.enable_static()
-            enable_prim()
+            >>> print(prim_enabled())
+            True
 
-            print(prim_enabled()) # True
     """
     prim_option.set_status(True)
 
@@ -103,17 +108,20 @@ def disable_prim():
 
         .. code-block:: python
 
-            import paddle
-            from paddle.incubate.autograd import enable_prim, disable_prim, prim_enabled
+            >>> import paddle
+            >>> from paddle.incubate.autograd import enable_prim, disable_prim, prim_enabled
+
+            >>> paddle.enable_static()
+            >>> enable_prim()
 
-            paddle.enable_static()
-            enable_prim()
+            >>> print(prim_enabled())
+            True
 
-            print(prim_enabled()) # True
+            >>> disable_prim()
 
-            disable_prim()
+            >>> print(prim_enabled())
+            False
 
-            print(prim_enabled()) # False
     """
     prim_option.set_status(False)