From c98bfff160dcc095f7ce7a98a71a0bdf36c3bf65 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Wed, 19 Aug 2020 11:41:15 +0800 Subject: [PATCH] Add dygraph.DataParallel cn doc (#2411) * add DataParallel cn doc * polish details * fix method declarative error * polish format * fix sample code error * append to white list --- doc/fluid/api_cn/dygraph_cn.rst | 1 + .../api_cn/dygraph_cn/DataParallel_cn.rst | 155 ++++++++++++++++++ scripts/api_white_list.txt | 1 + 3 files changed, 157 insertions(+) create mode 100644 doc/fluid/api_cn/dygraph_cn/DataParallel_cn.rst diff --git a/doc/fluid/api_cn/dygraph_cn.rst b/doc/fluid/api_cn/dygraph_cn.rst index 0e2fb1e2c..402460745 100644 --- a/doc/fluid/api_cn/dygraph_cn.rst +++ b/doc/fluid/api_cn/dygraph_cn.rst @@ -17,6 +17,7 @@ fluid.dygraph dygraph_cn/Conv3DTranspose_cn.rst dygraph_cn/CosineAnnealingDecay_cn.rst dygraph_cn/CosineDecay_cn.rst + dygraph_cn/DataParallel_cn.rst dygraph_cn/declarative_cn.rst dygraph_cn/Dropout_cn.rst dygraph_cn/Embedding_cn.rst diff --git a/doc/fluid/api_cn/dygraph_cn/DataParallel_cn.rst b/doc/fluid/api_cn/dygraph_cn/DataParallel_cn.rst new file mode 100644 index 000000000..f9a92c72b --- /dev/null +++ b/doc/fluid/api_cn/dygraph_cn/DataParallel_cn.rst @@ -0,0 +1,155 @@ +.. _cn_api_fluid_dygraph_DataParallel: + +DataParallel +------------ + +.. py:class:: paddle.fluid.dygraph.DataParallel(layers, strategy) + +:api_attr: 命令式编程模式(动态图) + +通过数据并行模式执行动态图模型。 + +目前,``DataParallel`` 仅支持以多进程的方式执行动态图模型。使用方式如下: + +``python -m paddle.distributed.launch –selected_gpus=0,1 dynamic_graph_test.py`` + +其中 ``dynamic_graph_test.py`` 脚本的代码可以是下面的示例代码。 + +参数: + - **Layer** (Layer) - 需要通过数据并行方式执行的模型。 + - **strategy** (ParallelStrategy) - 数据并行的策略,包括并行执行的环境配置。 + +返回:支持数据并行的 ``Layer`` + +返回类型:Layer实例 + +**代码示例**: + +.. code-block:: python + + import numpy as np + import paddle.fluid as fluid + + place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id) + with fluid.dygraph.guard(place): + + # prepare the data parallel context + strategy = fluid.dygraph.prepare_context() + + linear = fluid.dygraph.Linear(1, 10, act="softmax") + adam = fluid.optimizer.AdamOptimizer( + learning_rate=0.001, parameter_list=linear.parameters()) + + # make the module become the data parallelism module + linear = fluid.dygraph.DataParallel(linear, strategy) + + x_data = np.random.random(size=[10, 1]).astype(np.float32) + data = fluid.dygraph.to_variable(x_data) + + hidden = linear(data) + avg_loss = fluid.layers.mean(hidden) + + # scale the loss according to the number of trainers. + avg_loss = linear.scale_loss(avg_loss) + + avg_loss.backward() + + # collect the gradients of trainers. + linear.apply_collective_grads() + + adam.minimize(avg_loss) + linear.clear_gradients() + +.. py:method:: scale_loss(loss) + +缩放模型损失值 ``loss`` 。在数据并行模式中,损失值 ``loss`` 需要根据并行训练进程的数目进行缩放。 + +如果不在数据并行模式下,会直接返回原 ``loss`` 。 + +参数: + - **loss** (Variable) - 当前模型的损失值。 + +返回:缩放后的损失值 ``loss`` + +返回类型:Variable + +**代码示例** + +.. code-block:: python + + import numpy as np + import paddle.fluid as fluid + + place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id) + with fluid.dygraph.guard(place): + + # prepare the data parallel context + strategy = fluid.dygraph.prepare_context() + + linear = fluid.dygraph.Linear(1, 10, act="softmax") + adam = fluid.optimizer.AdamOptimizer( + learning_rate=0.001, parameter_list=linear.parameters()) + + # make the module become the data parallelism module + linear = fluid.dygraph.DataParallel(linear, strategy) + + x_data = np.random.random(size=[10, 1]).astype(np.float32) + data = fluid.dygraph.to_variable(x_data) + + hidden = linear(data) + avg_loss = fluid.layers.mean(hidden) + + # scale the loss according to the number of trainers. + avg_loss = linear.scale_loss(avg_loss) + + avg_loss.backward() + + # collect the gradients of trainers. + linear.apply_collective_grads() + + adam.minimize(avg_loss) + linear.clear_gradients() + + +.. py:method:: apply_collective_grads() + +AllReduce(规约)参数的梯度值。 + +返回:无 + +**代码示例** + +.. code-block:: python + + import numpy as np + import paddle.fluid as fluid + + place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id) + with fluid.dygraph.guard(place): + + # prepare the data parallel context + strategy = fluid.dygraph.prepare_context() + + linear = fluid.dygraph.Linear(1, 10, act="softmax") + adam = fluid.optimizer.AdamOptimizer( + learning_rate=0.001, parameter_list=linear.parameters()) + + # make the module become the data parallelism module + linear = fluid.dygraph.DataParallel(linear, strategy) + + x_data = np.random.random(size=[10, 1]).astype(np.float32) + data = fluid.dygraph.to_variable(x_data) + + hidden = linear(data) + avg_loss = fluid.layers.mean(hidden) + + # scale the loss according to the number of trainers. + avg_loss = linear.scale_loss(avg_loss) + + avg_loss.backward() + + # collect the gradients of trainers. + linear.apply_collective_grads() + + adam.minimize(avg_loss) + linear.clear_gradients() diff --git a/scripts/api_white_list.txt b/scripts/api_white_list.txt index 778fa2be2..b5f42e565 100644 --- a/scripts/api_white_list.txt +++ b/scripts/api_white_list.txt @@ -7,3 +7,4 @@ transpiler_cn/release_memory_cn.rst transpiler_cn/RoundRobin_cn.rst optimizer_cn/Dpsgd_cn.rst io_cn/ComposeNotAligned_cn.rst +dygraph_cn/DataParallel_cn.rst \ No newline at end of file -- GitLab