diff --git a/python/paddle/distributed/communication/group.py b/python/paddle/distributed/communication/group.py index e722d6fed1deeb3b8030f8fc5a30f43a587249c1..dfab85404a1d09d6c3886cad0be2f8f911261a5b 100644 --- a/python/paddle/distributed/communication/group.py +++ b/python/paddle/distributed/communication/group.py @@ -144,15 +144,15 @@ def is_initialized(): Examples: .. code-block:: python - # required: distributed - import paddle + >>> # doctest: +REQUIRES(env: DISTRIBUTED) + >>> import paddle - print(paddle.distributed.is_initialized()) - # False + >>> print(paddle.distributed.is_initialized()) + False - paddle.distributed.init_parallel_env() - print(paddle.distributed.is_initialized()) - # True + >>> paddle.distributed.init_parallel_env() + >>> print(paddle.distributed.is_initialized()) + True """ return _GroupManager.global_group_id in _GroupManager.group_map_by_id @@ -175,19 +175,19 @@ def destroy_process_group(group=None): Examples: .. code-block:: python - # required: distributed - import paddle - import paddle.distributed as dist + >>> # doctest: +REQUIRES(env: DISTRIBUTED) + >>> import paddle + >>> import paddle.distributed as dist - dist.init_parallel_env() - group = dist.new_group([0, 1]) + >>> dist.init_parallel_env() + >>> group = dist.new_group([0, 1]) - dist.destroy_process_group(group) - print(dist.is_initialized()) - # True - dist.destroy_process_group() - print(dist.is_initialized()) - # False + >>> dist.destroy_process_group(group) + >>> print(dist.is_initialized()) + True + >>> dist.destroy_process_group() + >>> print(dist.is_initialized()) + False """ group = _get_global_group() if group is None else group @@ -214,13 +214,13 @@ def get_group(id=0): Examples: .. code-block:: python - # required: distributed - import paddle - import paddle.distributed as dist + >>> # doctest: +REQUIRES(env: DISTRIBUTED) + >>> import paddle + >>> import paddle.distributed as dist - dist.init_parallel_env() - gid = paddle.distributed.new_group([2,4,6]) - paddle.distributed.get_group(gid.id) + >>> dist.init_parallel_env() + >>> gid = paddle.distributed.new_group([2,4,6]) + >>> paddle.distributed.get_group(gid.id) """ @@ -276,12 +276,13 @@ def wait(tensor, group=None, use_calc_stream=True): Examples: .. code-block:: python - import paddle + >>> # doctest: +REQUIRES(env: DISTRIBUTED) + >>> import paddle - paddle.distributed.init_parallel_env() - tindata = paddle.randn(shape=[2, 3]) - paddle.distributed.all_reduce(tindata, sync_op=True) - paddle.distributed.wait(tindata) + >>> paddle.distributed.init_parallel_env() + >>> tindata = paddle.randn(shape=[2, 3]) + >>> paddle.distributed.all_reduce(tindata, sync_op=True) + >>> paddle.distributed.wait(tindata) """ if group is not None and not group.is_member(): @@ -308,12 +309,13 @@ def barrier(group=None): Examples: .. code-block:: python - import paddle - from paddle.distributed import init_parallel_env + >>> # doctest: +REQUIRES(env: DISTRIBUTED) + >>> import paddle + >>> from paddle.distributed import init_parallel_env - paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id) - init_parallel_env() - paddle.distributed.barrier() + >>> paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id) + >>> init_parallel_env() + >>> paddle.distributed.barrier() """ if group is not None and not group.is_member(): return @@ -362,11 +364,12 @@ def get_backend(group=None): Examples: .. code-block:: python - # required: distributed - import paddle + >>> # doctest: +REQUIRES(env: DISTRIBUTED) + >>> import paddle - paddle.distributed.init_parallel_env() - paddle.distributed.get_backend() # NCCL + >>> paddle.distributed.init_parallel_env() + >>> paddle.distributed.get_backend() + NCCL """ if _warn_cur_rank_not_in_group(group): raise RuntimeError("Invalid group specified") diff --git a/python/paddle/distributed/communication/recv.py b/python/paddle/distributed/communication/recv.py index 9d4c5326abaa65904be6b1c24f58a27b2909ecb9..e7e0315b7dd5116be51149cf74556cf8d59760ce 100644 --- a/python/paddle/distributed/communication/recv.py +++ b/python/paddle/distributed/communication/recv.py @@ -32,19 +32,19 @@ def recv(tensor, src=0, group=None, sync_op=True): Examples: .. code-block:: python - # required: distributed - import paddle - import paddle.distributed as dist - - dist.init_parallel_env() - if dist.get_rank() == 0: - data = paddle.to_tensor([7, 8, 9]) - dist.send(data, dst=1) - else: - data = paddle.to_tensor([1, 2, 3]) - dist.recv(data, src=0) - print(data) - # [7, 8, 9] (2 GPUs) + >>> # doctest: +REQUIRES(env: DISTRIBUTED) + >>> import paddle + >>> import paddle.distributed as dist + + >>> dist.init_parallel_env() + >>> if dist.get_rank() == 0: + ... data = paddle.to_tensor([7, 8, 9]) + ... dist.send(data, dst=1) + >>> else: + ... data = paddle.to_tensor([1, 2, 3]) + ... dist.recv(data, src=0) + >>> print(data) + >>> # [7, 8, 9] (2 GPUs) """ return stream.recv( tensor, src=src, group=group, sync_op=sync_op, use_calc_stream=False @@ -70,19 +70,19 @@ def irecv(tensor, src=None, group=None): Examples: .. code-block:: python - # required: distributed - import paddle - import paddle.distributed as dist - - dist.init_parallel_env() - if dist.get_rank() == 0: - data = paddle.to_tensor([7, 8, 9]) - task = dist.isend(data, dst=1) - else: - data = paddle.to_tensor([1, 2, 3]) - task = dist.irecv(data, src=0) - task.wait() - print(data) - # [7, 8, 9] (2 GPUs) + >>> # doctest: +REQUIRES(env: DISTRIBUTED) + >>> import paddle + >>> import paddle.distributed as dist + + >>> dist.init_parallel_env() + >>> if dist.get_rank() == 0: + ... data = paddle.to_tensor([7, 8, 9]) + ... task = dist.isend(data, dst=1) + >>> else: + ... data = paddle.to_tensor([1, 2, 3]) + ... task = dist.irecv(data, src=0) + >>> task.wait() + >>> print(data) + >>> # [7, 8, 9] (2 GPUs) """ return recv(tensor, src, group, sync_op=False) diff --git a/python/paddle/distributed/communication/reduce.py b/python/paddle/distributed/communication/reduce.py index a44c3144889b5e7446bb9cdeda4645a1d92fcd4d..e3c8d9bc13aa488b7c67807442c83fdb3ecd1744 100644 --- a/python/paddle/distributed/communication/reduce.py +++ b/python/paddle/distributed/communication/reduce.py @@ -34,18 +34,18 @@ class ReduceOp: Examples: .. code-block:: python - # required: distributed - import paddle - import paddle.distributed as dist - - dist.init_parallel_env() - if dist.get_rank() == 0: - data = paddle.to_tensor([[4, 5, 6], [4, 5, 6]]) - else: - data = paddle.to_tensor([[1, 2, 3], [1, 2, 3]]) - dist.all_reduce(data, op=dist.ReduceOp.SUM) - print(data) - # [[5, 7, 9], [5, 7, 9]] (2 GPUs) + >>> # doctest: +REQUIRES(env: DISTRIBUTED) + >>> import paddle + >>> import paddle.distributed as dist + + >>> dist.init_parallel_env() + >>> if dist.get_rank() == 0: + ... data = paddle.to_tensor([[4, 5, 6], [4, 5, 6]]) + >>> else: + ... data = paddle.to_tensor([[1, 2, 3], [1, 2, 3]]) + >>> dist.all_reduce(data, op=dist.ReduceOp.SUM) + >>> print(data) + >>> # [[5, 7, 9], [5, 7, 9]] (2 GPUs) """ SUM = 0 @@ -106,19 +106,19 @@ def reduce(tensor, dst, op=ReduceOp.SUM, group=None, sync_op=True): Examples: .. code-block:: python - # required: distributed - import paddle - import paddle.distributed as dist - - dist.init_parallel_env() - if dist.get_rank() == 0: - data = paddle.to_tensor([[4, 5, 6], [4, 5, 6]]) - else: - data = paddle.to_tensor([[1, 2, 3], [1, 2, 3]]) - dist.reduce(data, dst=0) - print(data) - # [[5, 7, 9], [5, 7, 9]] (2 GPUs, out for rank 0) - # [[1, 2, 3], [1, 2, 3]] (2 GPUs, out for rank 1) + >>> # doctest: +REQUIRES(env: DISTRIBUTED) + >>> import paddle + >>> import paddle.distributed as dist + + >>> dist.init_parallel_env() + >>> if dist.get_rank() == 0: + ... data = paddle.to_tensor([[4, 5, 6], [4, 5, 6]]) + >>> else: + ... data = paddle.to_tensor([[1, 2, 3], [1, 2, 3]]) + >>> dist.reduce(data, dst=0) + >>> print(data) + >>> # [[5, 7, 9], [5, 7, 9]] (2 GPUs, out for rank 0) + >>> # [[1, 2, 3], [1, 2, 3]] (2 GPUs, out for rank 1) """ return stream.reduce( tensor, diff --git a/python/paddle/distributed/communication/reduce_scatter.py b/python/paddle/distributed/communication/reduce_scatter.py index 734447b3feb1fa3c19917e51e822cfe186371b31..0265e0a0b52c61dae9334732c3dc07e4534360e9 100644 --- a/python/paddle/distributed/communication/reduce_scatter.py +++ b/python/paddle/distributed/communication/reduce_scatter.py @@ -44,21 +44,21 @@ def reduce_scatter( Examples: .. code-block:: python - # required: distributed - import paddle - import paddle.distributed as dist - - dist.init_parallel_env() - if dist.get_rank() == 0: - data1 = paddle.to_tensor([0, 1]) - data2 = paddle.to_tensor([2, 3]) - else: - data1 = paddle.to_tensor([4, 5]) - data2 = paddle.to_tensor([6, 7]) - dist.reduce_scatter(data1, [data1, data2]) - print(data1) - # [4, 6] (2 GPUs, out for rank 0) - # [8, 10] (2 GPUs, out for rank 1) + >>> # doctest: +REQUIRES(env: DISTRIBUTED) + >>> import paddle + >>> import paddle.distributed as dist + + >>> dist.init_parallel_env() + >>> if dist.get_rank() == 0: + ... data1 = paddle.to_tensor([0, 1]) + ... data2 = paddle.to_tensor([2, 3]) + >>> else: + ... data1 = paddle.to_tensor([4, 5]) + ... data2 = paddle.to_tensor([6, 7]) + >>> dist.reduce_scatter(data1, [data1, data2]) + >>> print(data1) + >>> # [4, 6] (2 GPUs, out for rank 0) + >>> # [8, 10] (2 GPUs, out for rank 1) """ return stream.reduce_scatter( @@ -93,20 +93,20 @@ def _reduce_scatter_base( Examples: .. code-block:: python - # required: distributed - import paddle - import paddle.distributed as dist - - dist.init_parallel_env() - rank = dist.get_rank() - data = paddle.arange(4) + rank - # [0, 1, 2, 3] (2 GPUs, for rank 0) - # [1, 2, 3, 4] (2 GPUs, for rank 1) - output = paddle.empty(shape=[2], dtype=data.dtype) - dist.collective._reduce_scatter_base(output, data) - print(output) - # [1, 3] (2 GPUs, out for rank 0) - # [5, 7] (2 GPUs, out for rank 1) + >>> # doctest: +REQUIRES(env: DISTRIBUTED) + >>> import paddle + >>> import paddle.distributed as dist + + >>> dist.init_parallel_env() + >>> rank = dist.get_rank() + >>> data = paddle.arange(4) + rank + >>> # [0, 1, 2, 3] (2 GPUs, for rank 0) + >>> # [1, 2, 3, 4] (2 GPUs, for rank 1) + >>> output = paddle.empty(shape=[2], dtype=data.dtype) + >>> dist.collective._reduce_scatter_base(output, data) + >>> print(output) + >>> # [1, 3] (2 GPUs, out for rank 0) + >>> # [5, 7] (2 GPUs, out for rank 1) """ return _reduce_scatter_base_stream( diff --git a/python/paddle/distributed/communication/scatter.py b/python/paddle/distributed/communication/scatter.py index f3ae2e358e00fd67d9ed51c872de276084636d9b..18f9c74ce9bd87a7bc57eaf55968e0db2c010306 100644 --- a/python/paddle/distributed/communication/scatter.py +++ b/python/paddle/distributed/communication/scatter.py @@ -51,22 +51,22 @@ def scatter(tensor, tensor_list=None, src=0, group=None, sync_op=True): Examples: .. code-block:: python - # required: distributed - import paddle - import paddle.distributed as dist - - dist.init_parallel_env() - if dist.get_rank() == 0: - data1 = paddle.to_tensor([7, 8, 9]) - data2 = paddle.to_tensor([10, 11, 12]) - dist.scatter(data1, src=1) - else: - data1 = paddle.to_tensor([1, 2, 3]) - data2 = paddle.to_tensor([4, 5, 6]) - dist.scatter(data1, tensor_list=[data1, data2], src=1) - print(data1, data2) - # [1, 2, 3] [10, 11, 12] (2 GPUs, out for rank 0) - # [4, 5, 6] [4, 5, 6] (2 GPUs, out for rank 1) + >>> # doctest: +REQUIRES(env: DISTRIBUTED) + >>> import paddle + >>> import paddle.distributed as dist + + >>> dist.init_parallel_env() + >>> if dist.get_rank() == 0: + ... data1 = paddle.to_tensor([7, 8, 9]) + ... data2 = paddle.to_tensor([10, 11, 12]) + ... dist.scatter(data1, src=1) + >>> else: + ... data1 = paddle.to_tensor([1, 2, 3]) + ... data2 = paddle.to_tensor([4, 5, 6]) + ... dist.scatter(data1, tensor_list=[data1, data2], src=1) + >>> print(data1, data2) + >>> # [1, 2, 3] [10, 11, 12] (2 GPUs, out for rank 0) + >>> # [4, 5, 6] [4, 5, 6] (2 GPUs, out for rank 1) """ return stream.scatter(tensor, tensor_list, src, group, sync_op) @@ -93,19 +93,19 @@ def scatter_object_list( Examples: .. code-block:: python - # required: distributed - import paddle.distributed as dist - - dist.init_parallel_env() - out_object_list = [] - if dist.get_rank() == 0: - in_object_list = [{'foo': [1, 2, 3]}, {'foo': [4, 5, 6]}] - else: - in_object_list = [{'bar': [1, 2, 3]}, {'bar': [4, 5, 6]}] - dist.scatter_object_list(out_object_list, in_object_list, src=1) - print(out_object_list) - # [{'bar': [1, 2, 3]}] (2 GPUs, out for rank 0) - # [{'bar': [4, 5, 6]}] (2 GPUs, out for rank 1) + >>> # doctest: +REQUIRES(env: DISTRIBUTED) + >>> import paddle.distributed as dist + + >>> dist.init_parallel_env() + >>> out_object_list = [] + >>> if dist.get_rank() == 0: + ... in_object_list = [{'foo': [1, 2, 3]}, {'foo': [4, 5, 6]}] + >>> else: + ... in_object_list = [{'bar': [1, 2, 3]}, {'bar': [4, 5, 6]}] + >>> dist.scatter_object_list(out_object_list, in_object_list, src=1) + >>> print(out_object_list) + >>> # [{'bar': [1, 2, 3]}] (2 GPUs, out for rank 0) + >>> # [{'bar': [4, 5, 6]}] (2 GPUs, out for rank 1) """ assert ( framework.in_dynamic_mode() diff --git a/python/paddle/distributed/communication/send.py b/python/paddle/distributed/communication/send.py index 3233255c71e67009752c3e19ef03c98ccff5c651..c1c3e19204a4e30982167e5638df819b4a225fe1 100644 --- a/python/paddle/distributed/communication/send.py +++ b/python/paddle/distributed/communication/send.py @@ -32,19 +32,19 @@ def send(tensor, dst=0, group=None, sync_op=True): Examples: .. code-block:: python - # required: distributed - import paddle - import paddle.distributed as dist - - dist.init_parallel_env() - if dist.get_rank() == 0: - data = paddle.to_tensor([7, 8, 9]) - dist.send(data, dst=1) - else: - data = paddle.to_tensor([1, 2, 3]) - dist.recv(data, src=0) - print(data) - # [7, 8, 9] (2 GPUs) + >>> # doctest: +REQUIRES(env: DISTRIBUTED) + >>> import paddle + >>> import paddle.distributed as dist + + >>> dist.init_parallel_env() + >>> if dist.get_rank() == 0: + ... data = paddle.to_tensor([7, 8, 9]) + ... dist.send(data, dst=1) + >>> else: + ... data = paddle.to_tensor([1, 2, 3]) + ... dist.recv(data, src=0) + >>> print(data) + >>> # [7, 8, 9] (2 GPUs) """ return stream.send( tensor, dst=dst, group=group, sync_op=sync_op, use_calc_stream=False @@ -70,20 +70,20 @@ def isend(tensor, dst, group=None): Examples: .. code-block:: python - # required: distributed - import paddle - import paddle.distributed as dist - - dist.init_parallel_env() - if dist.get_rank() == 0: - data = paddle.to_tensor([7, 8, 9]) - task = dist.isend(data, dst=1) - else: - data = paddle.to_tensor([1, 2, 3]) - task = dist.irecv(data, src=0) - task.wait() - print(data) - # [7, 8, 9] (2 GPUs) + >>> # doctest: +REQUIRES(env: DISTRIBUTED) + >>> import paddle + >>> import paddle.distributed as dist + + >>> dist.init_parallel_env() + >>> if dist.get_rank() == 0: + ... data = paddle.to_tensor([7, 8, 9]) + ... task = dist.isend(data, dst=1) + >>> else: + ... data = paddle.to_tensor([1, 2, 3]) + ... task = dist.irecv(data, src=0) + >>> task.wait() + >>> print(data) + >>> # [7, 8, 9] (2 GPUs) """ return send(tensor, dst, group, sync_op=False) diff --git a/python/paddle/utils/cpp_extension/extension_utils.py b/python/paddle/utils/cpp_extension/extension_utils.py index d4d1525cf5222713717c74f2ace4ccc17dcfaede..d98d2c8e7aed3365ba1ec77e7d2f1d79aca88ab2 100644 --- a/python/paddle/utils/cpp_extension/extension_utils.py +++ b/python/paddle/utils/cpp_extension/extension_utils.py @@ -906,10 +906,10 @@ def get_build_directory(verbose=False): .. code-block:: python - from paddle.utils.cpp_extension import get_build_directory + >>> from paddle.utils.cpp_extension import get_build_directory - build_dir = get_build_directory() - print(build_dir) + >>> build_dir = get_build_directory() + >>> print(build_dir) """ root_extensions_directory = os.environ.get('PADDLE_EXTENSION_DIR')