From 24fde2643997f1a48f1620168298f0c424638ecf Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Tue, 1 Sep 2020 11:46:20 +0800 Subject: [PATCH] Add paddle.distributed dir and docs (#2482) * add distributed dir and doc * remove start_processes & move DataParallel * add get rank and world size * add cn docs * add index to distributed_cn * add white list api * add api white list * add cn to api name * polish details * add rst suffix * add py function --- doc/fluid/api/distributed.rst | 13 + doc/fluid/api/distributed/ParallelEnv.rst | 5 + doc/fluid/api/distributed/get_rank.rst | 10 + doc/fluid/api/distributed/get_world_size.rst | 10 + .../api/distributed/init_parallel_env.rst | 10 + doc/fluid/api/distributed/prepare_context.rst | 5 + doc/fluid/api/distributed/spawn.rst | 10 + doc/fluid/api/index_en.rst | 1 + doc/fluid/api/paddle.rst | 1 + doc/fluid/api/paddle/DataParallel.rst | 7 + doc/fluid/api_cn/distributed_cn.rst | 6 + .../api_cn/distributed_cn/ParallelEnv_cn.rst | 5 + .../api_cn/distributed_cn/get_rank_cn.rst | 25 ++ .../distributed_cn/get_world_size_cn.rst | 25 ++ .../distributed_cn/init_parallel_env_cn.rst | 64 +++++ .../distributed_cn/prepare_context_cn.rst | 5 + doc/fluid/api_cn/distributed_cn/spawn_cn.rst | 105 ++++++++ .../api_cn/dygraph_cn/DataParallel_cn.rst | 253 +++++++++++------- scripts/api_white_list.txt | 2 + 19 files changed, 462 insertions(+), 100 deletions(-) create mode 100644 doc/fluid/api/distributed.rst create mode 100644 doc/fluid/api/distributed/ParallelEnv.rst create mode 100644 doc/fluid/api/distributed/get_rank.rst create mode 100644 doc/fluid/api/distributed/get_world_size.rst create mode 100644 doc/fluid/api/distributed/init_parallel_env.rst create mode 100644 doc/fluid/api/distributed/prepare_context.rst create mode 100644 doc/fluid/api/distributed/spawn.rst create mode 100644 doc/fluid/api/paddle/DataParallel.rst create mode 100644 doc/fluid/api_cn/distributed_cn/ParallelEnv_cn.rst create mode 100644 doc/fluid/api_cn/distributed_cn/get_rank_cn.rst create mode 100644 doc/fluid/api_cn/distributed_cn/get_world_size_cn.rst create mode 100644 doc/fluid/api_cn/distributed_cn/init_parallel_env_cn.rst create mode 100644 doc/fluid/api_cn/distributed_cn/prepare_context_cn.rst create mode 100644 doc/fluid/api_cn/distributed_cn/spawn_cn.rst diff --git a/doc/fluid/api/distributed.rst b/doc/fluid/api/distributed.rst new file mode 100644 index 000000000..fddddbb11 --- /dev/null +++ b/doc/fluid/api/distributed.rst @@ -0,0 +1,13 @@ +================== +paddle.distributed +================== + +.. toctree:: + :maxdepth: 1 + + distributed/get_rank.rst + distributed/get_world_size.rst + distributed/init_parallel_env.rst + distributed/ParallelEnv.rst + distributed/prepare_context.rst + distributed/spawn.rst diff --git a/doc/fluid/api/distributed/ParallelEnv.rst b/doc/fluid/api/distributed/ParallelEnv.rst new file mode 100644 index 000000000..46b07c64f --- /dev/null +++ b/doc/fluid/api/distributed/ParallelEnv.rst @@ -0,0 +1,5 @@ +.. _api_distributed_ParallelEnv: + +ParallelEnv +------------------------------- +:doc_source: paddle.fluid.dygraph.parallel.ParallelEnv \ No newline at end of file diff --git a/doc/fluid/api/distributed/get_rank.rst b/doc/fluid/api/distributed/get_rank.rst new file mode 100644 index 000000000..98a648314 --- /dev/null +++ b/doc/fluid/api/distributed/get_rank.rst @@ -0,0 +1,10 @@ +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +.. _api_distributed_get_rank: + +get_rank +-------- + +.. autofunction:: paddle.distributed.get_rank + :noindex: \ No newline at end of file diff --git a/doc/fluid/api/distributed/get_world_size.rst b/doc/fluid/api/distributed/get_world_size.rst new file mode 100644 index 000000000..2de447e1b --- /dev/null +++ b/doc/fluid/api/distributed/get_world_size.rst @@ -0,0 +1,10 @@ +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +.. _api_distributed_get_world_size: + +get_world_size +-------------- + +.. autofunction:: paddle.distributed.get_world_size + :noindex: \ No newline at end of file diff --git a/doc/fluid/api/distributed/init_parallel_env.rst b/doc/fluid/api/distributed/init_parallel_env.rst new file mode 100644 index 000000000..99473dd34 --- /dev/null +++ b/doc/fluid/api/distributed/init_parallel_env.rst @@ -0,0 +1,10 @@ +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +.. _api_distributed_init_parallel_env: + +init_parallel_env +----------------- + +.. autofunction:: paddle.distributed.init_parallel_env + :noindex: \ No newline at end of file diff --git a/doc/fluid/api/distributed/prepare_context.rst b/doc/fluid/api/distributed/prepare_context.rst new file mode 100644 index 000000000..abe6865f5 --- /dev/null +++ b/doc/fluid/api/distributed/prepare_context.rst @@ -0,0 +1,5 @@ +.. _api_distributed_prepare_context: + +prepare_context +------------------------------- +:doc_source: paddle.fluid.dygraph.parallel.prepare_context diff --git a/doc/fluid/api/distributed/spawn.rst b/doc/fluid/api/distributed/spawn.rst new file mode 100644 index 000000000..9c7895932 --- /dev/null +++ b/doc/fluid/api/distributed/spawn.rst @@ -0,0 +1,10 @@ +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +.. _api_distributed_spawn: + +spawn +----- + +.. autofunction:: paddle.distributed.spawn + :noindex: \ No newline at end of file diff --git a/doc/fluid/api/index_en.rst b/doc/fluid/api/index_en.rst index f344d4478..e3ac0dc69 100644 --- a/doc/fluid/api/index_en.rst +++ b/doc/fluid/api/index_en.rst @@ -8,6 +8,7 @@ API Reference ../api_guides/index_en.rst dataset.rst declarative.rst + distributed.rst framework.rst imperative.rst io.rst diff --git a/doc/fluid/api/paddle.rst b/doc/fluid/api/paddle.rst index 9670615d7..45d26734e 100644 --- a/doc/fluid/api/paddle.rst +++ b/doc/fluid/api/paddle.rst @@ -37,6 +37,7 @@ paddle paddle/CUDAPinnedPlace.rst paddle/CUDAPlace.rst paddle/cumsum.rst + paddle/DataParallel.rst paddle/default_main_program.rst paddle/default_startup_program.rst paddle/diag.rst diff --git a/doc/fluid/api/paddle/DataParallel.rst b/doc/fluid/api/paddle/DataParallel.rst new file mode 100644 index 000000000..0c79fbe9b --- /dev/null +++ b/doc/fluid/api/paddle/DataParallel.rst @@ -0,0 +1,7 @@ +.. _api_paddle_DataParallel: + +DataParallel +------------------------------- +:doc_source: paddle.fluid.dygraph.parallel.DataParallel + + diff --git a/doc/fluid/api_cn/distributed_cn.rst b/doc/fluid/api_cn/distributed_cn.rst index 1d7b233fa..ff75e9ce3 100644 --- a/doc/fluid/api_cn/distributed_cn.rst +++ b/doc/fluid/api_cn/distributed_cn.rst @@ -12,5 +12,11 @@ paddle.distributed distributed_cn/all_reduce_cn.rst distributed_cn/barrier_cn.rst distributed_cn/broadcast_cn.rst + distributed_cn/get_rank_cn.rst + distributed_cn/get_world_size_cn.rst + distributed_cn/init_parallel_env_cn.rst + distributed_cn/ParallelEnv_cn.rst + distributed_cn/prepare_context_cn.rst distributed_cn/reduce_cn.rst distributed_cn/scatter_cn.rst + distributed_cn/spawn_cn.rst diff --git a/doc/fluid/api_cn/distributed_cn/ParallelEnv_cn.rst b/doc/fluid/api_cn/distributed_cn/ParallelEnv_cn.rst new file mode 100644 index 000000000..c56a48346 --- /dev/null +++ b/doc/fluid/api_cn/distributed_cn/ParallelEnv_cn.rst @@ -0,0 +1,5 @@ +.. _cn_api_distributed_ParallelEnv: + +ParallelEnv +------------------------------- +:doc_source: paddle.fluid.dygraph.parallel.ParallelEnv \ No newline at end of file diff --git a/doc/fluid/api_cn/distributed_cn/get_rank_cn.rst b/doc/fluid/api_cn/distributed_cn/get_rank_cn.rst new file mode 100644 index 000000000..75ad8fc79 --- /dev/null +++ b/doc/fluid/api_cn/distributed_cn/get_rank_cn.rst @@ -0,0 +1,25 @@ +.. _cn_api_distributed_get_rank: + +get_rank +---------- + +.. py:function:: paddle.distributed.get_rank() + +返回当前进程的rank。 + +当前进程rank的值等于环境变量 ``PADDLE_TRAINER_ID`` 的值,默认值为0。 + +返回 +::::::::: +(int) 当前进程的rank。 + +代码示例 +::::::::: +.. code-block:: python + + import paddle + import paddle.distributed as dist + + # execute this command in terminal: export PADDLE_TRAINER_ID=0 + print("The rank is %d" % dist.get_rank()) + # The rank is 0 diff --git a/doc/fluid/api_cn/distributed_cn/get_world_size_cn.rst b/doc/fluid/api_cn/distributed_cn/get_world_size_cn.rst new file mode 100644 index 000000000..08342de3c --- /dev/null +++ b/doc/fluid/api_cn/distributed_cn/get_world_size_cn.rst @@ -0,0 +1,25 @@ +.. _cn_api_distributed_get_world_size: + +get_world_size +---------------- + +.. py:function:: paddle.distributed.get_world_size() + +返回参与当前任务的进程数。 + +当前进程数等于环境变量 ``PADDLE_TRAINERS_NUM`` 的值,默认值为1。 + +返回 +::::::::: +(int) 参与任务的进程数。 + +代码示例 +::::::::: +.. code-block:: python + + import paddle + import paddle.distributed as dist + + # execute this command in terminal: export PADDLE_TRAINERS_NUM=4 + print("The world_size is %d" % dist.get_world_size()) + # The world_size is 4 diff --git a/doc/fluid/api_cn/distributed_cn/init_parallel_env_cn.rst b/doc/fluid/api_cn/distributed_cn/init_parallel_env_cn.rst new file mode 100644 index 000000000..eafe9f10f --- /dev/null +++ b/doc/fluid/api_cn/distributed_cn/init_parallel_env_cn.rst @@ -0,0 +1,64 @@ +.. _cn_api_distributed_init_parallel_env: + +init_parallel_env +----------------- + +.. py:function:: paddle.distributed.init_parallel_env() + +初始化动态图模式下的并行训练环境。 + +.. note:: + 目前仅支持初始化GPU训练环境,使用NCCL进行通信。 + +返回 +::::::::: +无 + +代码示例 +::::::::: +.. code-block:: python + + import paddle + import paddle.nn as nn + import paddle.optimizer as opt + import paddle.distributed as dist + + class LinearNet(nn.Layer): + def __init__(self): + super(LinearNet, self).__init__() + self._linear1 = nn.Linear(10, 10) + self._linear2 = nn.Linear(10, 1) + + def forward(self, x): + return self._linear2(self._linear1(x)) + + def train(): + # 1. enable dynamic mode + paddle.disable_static() + + # 2. initialize parallel environment + dist.init_parallel_env() + + # 3. create data parallel layer & optimizer + layer = LinearNet() + dp_layer = paddle.DataParallel(layer) + + loss_fn = nn.MSELoss() + adam = opt.Adam( + learning_rate=0.001, parameters=dp_layer.parameters()) + + # 4. run layer + inputs = paddle.randn([10, 10], 'float32') + outputs = dp_layer(inputs) + labels = paddle.randn([10, 1], 'float32') + loss = loss_fn(outputs, labels) + + loss = dp_layer.scale_loss(loss) + loss.backward() + dp_layer.apply_collective_grads() + + adam.step() + adam.clear_grad() + + if __name__ == '__main__': + dist.spawn(train) diff --git a/doc/fluid/api_cn/distributed_cn/prepare_context_cn.rst b/doc/fluid/api_cn/distributed_cn/prepare_context_cn.rst new file mode 100644 index 000000000..656ff3c34 --- /dev/null +++ b/doc/fluid/api_cn/distributed_cn/prepare_context_cn.rst @@ -0,0 +1,5 @@ +.. _cn_api_distributed_prepare_context: + +prepare_context +------------------------------- +:doc_source: paddle.fluid.dygraph.parallel.prepare_context diff --git a/doc/fluid/api_cn/distributed_cn/spawn_cn.rst b/doc/fluid/api_cn/distributed_cn/spawn_cn.rst new file mode 100644 index 000000000..21f8f762f --- /dev/null +++ b/doc/fluid/api_cn/distributed_cn/spawn_cn.rst @@ -0,0 +1,105 @@ +.. _cn_api_distributed_spawn: + +spawn +----- + +.. py:function:: paddle.distributed.spawn(func, args=(), nprocs=-1, join=True, daemon=False, **options) + +使用 ``spawn`` 方法启动多进程任务。 + +参数 +::::::::: + - func (function) - 由 ``spawn`` 方法启动的进程所调用的目标函数。该目标函数需要能够被 ``pickled`` (序列化),所以目标函数必须定义为模块的一级函数,不能是内部子函数或者类方法。 + - args (tuple, 可选) - 传入目标函数 ``func`` 的参数。 + - nprocs (int, 可选) - 启动进程的数目。默认值为-1。当 ``nproc`` 为-1时,模型执行时将会从环境变量中获取当前可用的所有设备进行使用:如果使用GPU执行任务,将会从环境变量 ``CUDA_VISIBLE_DEVICES`` 中获取当前所有可用的设备ID;如果使用CPU执行任务,将会从环境变量 ``CPU_NUM`` 中获取当前可用的CPU设备数,例如,可以通过指令 ``export CPU_NUM=4`` 配置默认可用CPU设备数,如果此环境变量没有设置,将会默认设置该环境变量的值为1。 + - join (bool, 可选) - 对所有启动的进程执行阻塞的 ``join`` ,等待进程执行结束。默认为True。 + - daemon (bool, 可选) - 配置启动进程的 ``daemon`` 属性。默认为False。 + - **options (dict, 可选) - 其他初始化并行执行环境的配置选项。目前支持以下选项: (1) start_method (string) - 启动子进程的方法。进程的启动方法可以是 ``spawn`` , ``fork`` , ``forkserver`` 。 因为CUDA运行时环境不支持 ``fork`` 方法,当在子进程中使用CUDA时,需要使用 ``spawn`` 或者 ``forkserver`` 方法启动进程。默认方法为 ``spawn`` ; (2) cluster_node_ips (string) - 运行集群的节点(机器)IP,例如 "192.168.0.16,192.168.0.17" ,默认值为 "127.0.0.1" ; (3) node_ip (string) - 当前节点(机器)的IP。例如 "192.168.0.16" , 默认值为 "127.0.0.1" ; (4) started_port (int) - 一个训练节点(机器)上各训练进程的起始端口。例如 6170. 默认值为None ; (5) selected_gpus (string) - 指定训练使用的GPU ID, 例如 "0,1,2,3" , 默认值为None ; (6) print_config (bool) - 打印当前并行训练的配置, 默认值为False ; (7) use_paddlecloud (bool) - 配置是否使用PaddleCloud启动多进程任务,默认值为False。 + +返回 +::::::::: + ``MultiprocessContext`` 对象,持有创建的多个进程。 + +代码示例 +::::::::: +.. code-block:: python + + from __future__ import print_function + + import paddle + import paddle.nn as nn + import paddle.optimizer as opt + import paddle.distributed as dist + + class LinearNet(nn.Layer): + def __init__(self): + super(LinearNet, self).__init__() + self._linear1 = nn.Linear(10, 10) + self._linear2 = nn.Linear(10, 1) + + def forward(self, x): + return self._linear2(self._linear1(x)) + + def train(print_result=False): + # 1. enable dynamic mode + paddle.disable_static() + + # 2. initialize parallel environment + dist.init_parallel_env() + + # 3. create data parallel layer & optimizer + layer = LinearNet() + dp_layer = paddle.DataParallel(layer) + + loss_fn = nn.MSELoss() + adam = opt.Adam( + learning_rate=0.001, parameters=dp_layer.parameters()) + + # 4. run layer + inputs = paddle.randn([10, 10], 'float32') + outputs = dp_layer(inputs) + labels = paddle.randn([10, 1], 'float32') + loss = loss_fn(outputs, labels) + + if print_result is True: + print("loss:", loss.numpy()) + + loss = dp_layer.scale_loss(loss) + loss.backward() + dp_layer.apply_collective_grads() + + adam.step() + adam.clear_grad() + + # Usage 1: only pass function. + # If your training method no need any argument, and + # use all visible devices for parallel training. + if __name__ == '__main__': + dist.spawn(train) + + # Usage 2: pass function and arguments. + # If your training method need some arguments, and + # use all visible devices for parallel training. + if __name__ == '__main__': + dist.spawn(train, args=(True,)) + + # Usage 3: pass function, arguments and nprocs. + # If your training method need some arguments, and + # only use part of visible devices for parallel training. + # If your machine hold 8 cards {0,1,2,3,4,5,6,7}, + # this case will use cards {0,1}; If you set + # CUDA_VISIBLE_DEVICES=4,5,6,7, this case will use + # cards {4,5} + if __name__ == '__main__': + dist.spawn(train, args=(True,), nprocs=2) + + # Usage 4: pass function, arguments, nprocs and selected_gpus. + # If your training method need some arguments, and + # only use part of visible devices for parallel training, + # but you can't set your machine's environment varibale + # CUDA_VISIBLE_DEVICES, such as it is None or all cards + # {0,1,2,3,4,5,6,7}, you can pass `selelcted_gpus` to + # select the GPU cards you want to use. For example, + # this case will use cards {4,5} if your machine hold 8 cards. + if __name__ == '__main__': + dist.spawn(train, args=(True,), nprocs=2, selelcted_gpus='4,5') \ No newline at end of file diff --git a/doc/fluid/api_cn/dygraph_cn/DataParallel_cn.rst b/doc/fluid/api_cn/dygraph_cn/DataParallel_cn.rst index f9a92c72b..0258a6136 100644 --- a/doc/fluid/api_cn/dygraph_cn/DataParallel_cn.rst +++ b/doc/fluid/api_cn/dygraph_cn/DataParallel_cn.rst @@ -9,15 +9,23 @@ DataParallel 通过数据并行模式执行动态图模型。 -目前,``DataParallel`` 仅支持以多进程的方式执行动态图模型。使用方式如下: +目前,``DataParallel`` 仅支持以多进程的方式执行动态图模型。 -``python -m paddle.distributed.launch –selected_gpus=0,1 dynamic_graph_test.py`` +支持两种使用方式: -其中 ``dynamic_graph_test.py`` 脚本的代码可以是下面的示例代码。 +1. 使用 ``paddle.distributed.spawn`` 方法启动,例如: + + ``python demo.py`` (spawn need to be called in ``__main__`` method) + +2. 使用 ``paddle.distributed.launch`` 方法启动,例如: + +``python -m paddle.distributed.launch –selected_gpus=0,1 demo.py`` + +其中 ``demo.py`` 脚本的代码可以是下面的示例代码。 参数: - **Layer** (Layer) - 需要通过数据并行方式执行的模型。 - - **strategy** (ParallelStrategy) - 数据并行的策略,包括并行执行的环境配置。 + - **strategy** (ParallelStrategy,可选) - (deprecated) 数据并行的策略,包括并行执行的环境配置。默认为None。 返回:支持数据并行的 ``Layer`` @@ -27,38 +35,53 @@ DataParallel .. code-block:: python - import numpy as np - import paddle.fluid as fluid - - place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id) - with fluid.dygraph.guard(place): - - # prepare the data parallel context - strategy = fluid.dygraph.prepare_context() - - linear = fluid.dygraph.Linear(1, 10, act="softmax") - adam = fluid.optimizer.AdamOptimizer( - learning_rate=0.001, parameter_list=linear.parameters()) - - # make the module become the data parallelism module - linear = fluid.dygraph.DataParallel(linear, strategy) - - x_data = np.random.random(size=[10, 1]).astype(np.float32) - data = fluid.dygraph.to_variable(x_data) - - hidden = linear(data) - avg_loss = fluid.layers.mean(hidden) - - # scale the loss according to the number of trainers. - avg_loss = linear.scale_loss(avg_loss) - - avg_loss.backward() - - # collect the gradients of trainers. - linear.apply_collective_grads() - - adam.minimize(avg_loss) - linear.clear_gradients() + import paddle + import paddle.nn as nn + import paddle.optimizer as opt + import paddle.distributed as dist + + class LinearNet(nn.Layer): + def __init__(self): + super(LinearNet, self).__init__() + self._linear1 = nn.Linear(10, 10) + self._linear2 = nn.Linear(10, 1) + + def forward(self, x): + return self._linear2(self._linear1(x)) + + def train(): + # 1. enable dynamic mode + paddle.disable_static() + + # 2. initialize parallel environment + dist.init_parallel_env() + + # 3. create data parallel layer & optimizer + layer = LinearNet() + dp_layer = paddle.DataParallel(layer) + + loss_fn = nn.MSELoss() + adam = opt.Adam( + learning_rate=0.001, parameters=dp_layer.parameters()) + + # 4. run layer + inputs = paddle.randn([10, 10], 'float32') + outputs = dp_layer(inputs) + labels = paddle.randn([10, 1], 'float32') + loss = loss_fn(outputs, labels) + + loss = dp_layer.scale_loss(loss) + loss.backward() + dp_layer.apply_collective_grads() + + adam.step() + adam.clear_grad() + + if __name__ == '__main__': + # 1. start by ``paddle.distributed.spawn`` (default) + dist.spawn(train, nprocs=2) + # 2. start by ``paddle.distributed.launch`` + # train() .. py:method:: scale_loss(loss) @@ -77,38 +100,53 @@ DataParallel .. code-block:: python - import numpy as np - import paddle.fluid as fluid - - place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id) - with fluid.dygraph.guard(place): - - # prepare the data parallel context - strategy = fluid.dygraph.prepare_context() - - linear = fluid.dygraph.Linear(1, 10, act="softmax") - adam = fluid.optimizer.AdamOptimizer( - learning_rate=0.001, parameter_list=linear.parameters()) - - # make the module become the data parallelism module - linear = fluid.dygraph.DataParallel(linear, strategy) - - x_data = np.random.random(size=[10, 1]).astype(np.float32) - data = fluid.dygraph.to_variable(x_data) - - hidden = linear(data) - avg_loss = fluid.layers.mean(hidden) - - # scale the loss according to the number of trainers. - avg_loss = linear.scale_loss(avg_loss) - - avg_loss.backward() - - # collect the gradients of trainers. - linear.apply_collective_grads() - - adam.minimize(avg_loss) - linear.clear_gradients() + import paddle + import paddle.nn as nn + import paddle.optimizer as opt + import paddle.distributed as dist + + class LinearNet(nn.Layer): + def __init__(self): + super(LinearNet, self).__init__() + self._linear1 = nn.Linear(10, 10) + self._linear2 = nn.Linear(10, 1) + + def forward(self, x): + return self._linear2(self._linear1(x)) + + def train(): + # 1. enable dynamic mode + paddle.disable_static() + + # 2. initialize parallel environment + dist.init_parallel_env() + + # 3. create data parallel layer & optimizer + layer = LinearNet() + dp_layer = paddle.DataParallel(layer) + + loss_fn = nn.MSELoss() + adam = opt.Adam( + learning_rate=0.001, parameters=dp_layer.parameters()) + + # 4. run layer + inputs = paddle.randn([10, 10], 'float32') + outputs = dp_layer(inputs) + labels = paddle.randn([10, 1], 'float32') + loss = loss_fn(outputs, labels) + + loss = dp_layer.scale_loss(loss) + loss.backward() + dp_layer.apply_collective_grads() + + adam.step() + adam.clear_grad() + + if __name__ == '__main__': + # 1. start by ``paddle.distributed.spawn`` (default) + dist.spawn(train, nprocs=2) + # 2. start by ``paddle.distributed.launch`` + # train() .. py:method:: apply_collective_grads() @@ -121,35 +159,50 @@ AllReduce(规约)参数的梯度值。 .. code-block:: python - import numpy as np - import paddle.fluid as fluid - - place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id) - with fluid.dygraph.guard(place): - - # prepare the data parallel context - strategy = fluid.dygraph.prepare_context() - - linear = fluid.dygraph.Linear(1, 10, act="softmax") - adam = fluid.optimizer.AdamOptimizer( - learning_rate=0.001, parameter_list=linear.parameters()) - - # make the module become the data parallelism module - linear = fluid.dygraph.DataParallel(linear, strategy) - - x_data = np.random.random(size=[10, 1]).astype(np.float32) - data = fluid.dygraph.to_variable(x_data) - - hidden = linear(data) - avg_loss = fluid.layers.mean(hidden) - - # scale the loss according to the number of trainers. - avg_loss = linear.scale_loss(avg_loss) - - avg_loss.backward() - - # collect the gradients of trainers. - linear.apply_collective_grads() - - adam.minimize(avg_loss) - linear.clear_gradients() + import paddle + import paddle.nn as nn + import paddle.optimizer as opt + import paddle.distributed as dist + + class LinearNet(nn.Layer): + def __init__(self): + super(LinearNet, self).__init__() + self._linear1 = nn.Linear(10, 10) + self._linear2 = nn.Linear(10, 1) + + def forward(self, x): + return self._linear2(self._linear1(x)) + + def train(): + # 1. enable dynamic mode + paddle.disable_static() + + # 2. initialize parallel environment + dist.init_parallel_env() + + # 3. create data parallel layer & optimizer + layer = LinearNet() + dp_layer = paddle.DataParallel(layer) + + loss_fn = nn.MSELoss() + adam = opt.Adam( + learning_rate=0.001, parameters=dp_layer.parameters()) + + # 4. run layer + inputs = paddle.randn([10, 10], 'float32') + outputs = dp_layer(inputs) + labels = paddle.randn([10, 1], 'float32') + loss = loss_fn(outputs, labels) + + loss = dp_layer.scale_loss(loss) + loss.backward() + dp_layer.apply_collective_grads() + + adam.step() + adam.clear_grad() + + if __name__ == '__main__': + # 1. start by ``paddle.distributed.spawn`` (default) + dist.spawn(train, nprocs=2) + # 2. start by ``paddle.distributed.launch`` + # train() diff --git a/scripts/api_white_list.txt b/scripts/api_white_list.txt index 699e0f605..46405e392 100644 --- a/scripts/api_white_list.txt +++ b/scripts/api_white_list.txt @@ -14,3 +14,5 @@ distributed_cn/barrier_cn.rst distributed_cn/broadcast_cn.rst distributed_cn/reduce_cn.rst distributed_cn/scatter_cn.rst +distributed_cn/init_parallel_env_cn.rst +distributed_cn/spawn_cn.rst -- GitLab