diff --git a/python/paddle/distributed/auto_parallel/static/cluster_v2.py b/python/paddle/distributed/auto_parallel/static/cluster_v2.py
index 3f08d240efb057948ac4185d265da4a25dbe5a52..479dbdfb57493c723bb8491772e392b01ee6d11d 100644
--- a/python/paddle/distributed/auto_parallel/static/cluster_v2.py
+++ b/python/paddle/distributed/auto_parallel/static/cluster_v2.py
@@ -58,14 +58,15 @@ class DeviceMesh(core.DeviceMesh):
     Examples:
         .. code-block:: python
 
-            import paddle
-            import paddle.distributed as dist
+            >>> # doctest: +REQUIRES(env:DISTRIBUTED)
+            >>> import paddle
+            >>> import paddle.distributed as dist
 
-            paddle.enable_static()
+            >>> paddle.enable_static()
 
-            mesh = dist.DeviceMesh([[2, 4, 5], [0, 1, 3]])
-            assert mesh.shape == [2, 3]
-            assert mesh.device_ids == [2, 4, 5, 0, 1, 3]
+            >>> mesh = dist.DeviceMesh([[2, 4, 5], [0, 1, 3]])
+            >>> assert mesh.shape == [2, 3]
+            >>> assert mesh.device_ids == [2, 4, 5, 0, 1, 3]
 
     """
 
diff --git a/python/paddle/distributed/auto_parallel/static/converter.py b/python/paddle/distributed/auto_parallel/static/converter.py
index 68f571857d7cf38effcf838cd245546b9238e08f..710dfb43e877b0697b0ac72e8d4b1ae5b25677fb 100644
--- a/python/paddle/distributed/auto_parallel/static/converter.py
+++ b/python/paddle/distributed/auto_parallel/static/converter.py
@@ -101,28 +101,30 @@ class Converter:
         Examples:
             .. code-block:: python
 
-                import numpy as np
-                complete_tensors = np.arange(4).reshape([2, 2])
-                partitial_tensors = np.split(complete_tensors, 2, axis=0)
-                name = "tmp_0"
-                tensors_dict = {name: partitial_tensors}
-                strategy_1 = {
-                    name: {
-                        "process_shape": [2],
-                        "process_group": [0, 1],
-                        "dims_mapping": [0, -1]
-                    }
-                }
-                strategy_2 = {
-                    name: {
-                        "process_shape": [2],
-                        "process_group": [0, 1],
-                        "dims_mapping": [-1, -1]
-                    }
-                }
-                converter = Converter(tensors_dict, strategy_1, strategy_2)
-                result = converter.convert()
-                # the result's value is equal to `complete_tensors`
+                >>> # doctest: +REQUIRES(env:DISTRIBUTED)
+                >>> import numpy as np
+                >>> from paddle.distributed.auto_parallel.static.converter import Converter
+                >>> complete_tensors = np.arange(4).reshape([2, 2])
+                >>> partitial_tensors = np.split(complete_tensors, 2, axis=0)
+                >>> name = "tmp_0"
+                >>> tensors_dict = {name: partitial_tensors}
+                >>> strategy_1 = {
+                ...     name: {
+                ...         "process_shape": [2],
+                ...         "process_group": [0, 1],
+                ...         "dims_mapping": [0, -1]
+                ...     }
+                ... }
+                >>> strategy_2 = {
+                ...     name: {
+                ...         "process_shape": [2],
+                ...         "process_group": [0, 1],
+                ...         "dims_mapping": [-1, -1]
+                ...     }
+                ... }
+                >>> converter = Converter(tensors_dict, strategy_1, strategy_2)
+                >>> result = converter.convert()
+                >>> # the result's value is equal to `complete_tensors`
         """
         tensors_dict = {}
         # the name which is in cur_process but not in pre_process
@@ -352,13 +354,18 @@ class Converter:
         Examples:
             .. code-block:: python
 
-                import numpy as np
-                partition_tensor_list = [(np.array([[[1.11, 1.12]]]), [[0,1],[0,1],[0,2]])]
-                tensor = np.array([[[1.13, 1.14]]])
-                partition_index = [[0,1],[0,1],[2,4]]
-
-                _merge_tensor(partition_tensor_list, tensor, partition_index)
-                # partition_tensor_list: [(np.array([[[1.11, 1.12, 1.13, 1.14]]]), [[0,1],[0,1],[0,4]])]
+                >>> # doctest: +REQUIRES(env:DISTRIBUTED)
+                >>> import numpy as np
+                >>> import paddle
+                >>> from paddle.distributed.auto_parallel.static.converter import Converter
+                >>> partition_tensor_list = [(np.array([[[1.11, 1.12]]]), [[0,1],[0,1],[0,2]])]
+                >>> tensor = np.array([[[1.13, 1.14]]])
+                >>> partition_index = [[0,1],[0,1],[2,4]]
+                >>> complete_shape = [3, 2]
+
+                >>> Converter.merge(partition_tensor_list, tensor, partition_index, complete_shape)
+                >>> print(partition_tensor_list)
+                [(array([[[1.11, 1.12, 1.13, 1.14]]]), [[0, 1], [0, 1], [0, 4]])]
         """
         from .reshard import Resharder
 
@@ -416,16 +423,19 @@ class Converter:
         Examples:
             .. code-block:: python
 
-                import numpy as np
-                complete_tensor = np.array([[[1.11, 1.12, 1.13, 1.14, 1.15, 1.16]]])
-                rank = 2
-                complete_shape = [1, 1, 6]
-                dims_mapping = [-1, -1, 0]
-                process_shape = [3]
-                process_group = [0, 1, 2]
-
-                sliced_tensor_list = split(complete_tensor, [[], [], [2, 4]], 3)
-                # [array([[[1.11, 1.12]]]), array([[[1.13, 1.14]]]), array([[[1.15, 1.16]]])]
+                >>> # doctest: +REQUIRES(env:DISTRIBUTED)
+                >>> import numpy as np
+                >>> from paddle.distributed.auto_parallel.static.converter import Converter
+                >>> complete_tensor = np.array([[[1.11, 1.12, 1.13, 1.14, 1.15, 1.16]]])
+                >>> rank = 2
+                >>> complete_shape = [1, 1, 6]
+                >>> dims_mapping = [-1, -1, 0]
+                >>> process_shape = [3]
+                >>> process_group = [0, 1, 2]
+
+                >>> sliced_tensor_list = Converter.split(complete_tensor, [[], [], [2, 4]], 3)
+                >>> print(sliced_tensor_list)
+                [array([[[1.11, 1.12]]]), array([[[1.13, 1.14]]]), array([[[1.15, 1.16]]])]
         """
         sliced_tensor_list = []
         axis = len(complete_tensor.shape) - length
@@ -453,15 +463,18 @@ class Converter:
         Examples:
             .. code-block:: python
 
-                import numpy as np
-                complete_tensor = np.array([[[1.11, 1.12, 1.13, 1.14, 1.15, 1.16]]])
-                complete_shape = [1, 1, 6]
-                dims_mapping = [-1, -1, 0]
-                process_shape = [3]
-                process_group = [0, 1, 2]
-
-                index = _get_split_indices(complete_shape, dims_mapping, process_shape, process_group)
-                # index: [[], [], [2, 4]]
+                >>> # doctest: +REQUIRES(env:DISTRIBUTED)
+                >>> import numpy as np
+                >>> from paddle.distributed.auto_parallel.static.utils import _get_split_indices
+                >>> complete_tensor = np.array([[[1.11, 1.12, 1.13, 1.14, 1.15, 1.16]]])
+                >>> complete_shape = [1, 1, 6]
+                >>> dims_mapping = [-1, -1, 0]
+                >>> process_shape = [3]
+                >>> process_group = [0, 1, 2]
+
+                >>> index = _get_split_indices(complete_shape, dims_mapping, process_shape, process_group)
+                >>> print(index)
+                [[], [], [2, 4]]
         """
         from .reshard import Resharder
 
@@ -502,21 +515,20 @@ class Converter:
         Examples:
             .. code-block:: python
 
-                import numpy as np
-                complete_tensor = np.array([[[1.11, 1.12, 1.13, 1.14, 1.15, 1.16]]])
-                rank = 2
-                complete_shape = [1, 1, 6]
-                dims_mapping = [-1, -1, 0]
-                process_shape = [3]
-                process_group = [0, 1, 2]
-
-                slice_tensor = _slice_tensor(complete_tensor, [[], [], [2, 4]], 3)
-                # slice_tensor:
-                # [array([[[1.11, 1.12]]]), array([[[1.13, 1.14]]]), array([[[1.15, 1.16]]])]
-
-                index = _get_sliced_index(rank, complete_shape, dims_mapping
-                                                process_shape, process_group)
-                # index: 2
+                >>> # doctest: +REQUIRES(env:DISTRIBUTED)
+                >>> import numpy as np
+                >>> from paddle.distributed.auto_parallel.static.converter import Converter
+                >>> complete_tensor = np.array([[[1.11, 1.12, 1.13, 1.14, 1.15, 1.16]]])
+                >>> rank = 2
+                >>> complete_shape = [1, 1, 6]
+                >>> dims_mapping = [-1, -1, 0]
+                >>> process_shape = [3]
+                >>> process_group = [0, 1, 2]
+
+                >>> index = Converter._get_sliced_index(rank, complete_shape, dims_mapping,
+                ...                                 process_shape, process_group)
+                >>> print(index)
+                2
         """
         from .reshard import Resharder