diff --git a/python/paddle/distributed/__init__.py b/python/paddle/distributed/__init__.py index fdb7a3b2cb447904face736d52be665e3c6c91cc..50e4f7285b1698b3524666c812325f2b12ea1fe7 100644 --- a/python/paddle/distributed/__init__.py +++ b/python/paddle/distributed/__init__.py @@ -25,6 +25,7 @@ from .parallel_with_gloo import gloo_release from paddle.distributed.fleet.dataset import InMemoryDataset # noqa: F401 from paddle.distributed.fleet.dataset import QueueDataset # noqa: F401 +from paddle.distributed.fleet.base.topology import ParallelMode # noqa: F401 from .collective import broadcast # noqa: F401 from .collective import all_reduce # noqa: F401 @@ -86,4 +87,5 @@ __all__ = [ # noqa "wait", "get_rank", "ProbabilityEntry", + "ParallelMode", ] diff --git a/python/paddle/distributed/fleet/base/topology.py b/python/paddle/distributed/fleet/base/topology.py index 5b8d185212c23c0cfd5db8831b9c0667be8741f7..ef34fd144a703b2996b34ca940bfa403b11f257b 100644 --- a/python/paddle/distributed/fleet/base/topology.py +++ b/python/paddle/distributed/fleet/base/topology.py @@ -27,6 +27,22 @@ _HYBRID_PARALLEL_GROUP = None class ParallelMode(object): + """ + There are all the parallel modes currently supported: + - DATA_PARALLEL: Distribute input data to different devices. + - TENSOR_PARALLEL: Shards tensors in the network to different devices. + - PIPELINE_PARALLEL: Place different layers of the network on different devices. + - SHARDING_PARALLEL: Segment the model parameters, parameter gradients and optimizer states + corresponding to the parameters to each device. + + Examples: + .. code-block:: python + + import paddle + parallel_mode = paddle.distributed.ParallelMode + print(parallel_mode.DATA_PARALLEL) # 0 + + """ DATA_PARALLEL = 0 TENSOR_PARALLEL = 1 PIPELINE_PARALLEL = 2