From 25835216f15cb65ba90feac88d04a88d41ea30c2 Mon Sep 17 00:00:00 2001 From: JepsonWong <2013000149@qq.com> Date: Mon, 9 Mar 2020 19:16:04 +0800 Subject: [PATCH] add dataparallel api, test=develop --- doc/fluid/api_cn/dygraph_cn.rst | 1 + .../api_cn/dygraph_cn/DataParallel_cn.rst | 73 +++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 doc/fluid/api_cn/dygraph_cn/DataParallel_cn.rst diff --git a/doc/fluid/api_cn/dygraph_cn.rst b/doc/fluid/api_cn/dygraph_cn.rst index 23b6e1357..ca59114ab 100644 --- a/doc/fluid/api_cn/dygraph_cn.rst +++ b/doc/fluid/api_cn/dygraph_cn.rst @@ -16,6 +16,7 @@ fluid.dygraph dygraph_cn/Conv3D_cn.rst dygraph_cn/Conv3DTranspose_cn.rst dygraph_cn/CosineDecay_cn.rst + dygraph_cn/DataParallel_cn.rst dygraph_cn/Embedding_cn.rst dygraph_cn/ExponentialDecay_cn.rst dygraph_cn/FC_cn.rst diff --git a/doc/fluid/api_cn/dygraph_cn/DataParallel_cn.rst b/doc/fluid/api_cn/dygraph_cn/DataParallel_cn.rst new file mode 100644 index 000000000..17c7ade14 --- /dev/null +++ b/doc/fluid/api_cn/dygraph_cn/DataParallel_cn.rst @@ -0,0 +1,73 @@ +.. _cn_api_fluid_dygraph_DataParallel: + +DataParallel +------------------------------- + +.. py:class:: paddle.fluid.dygraph.DataParallel(layers, strategy) + +该接口用于构建 ``DataParallel`` 类的一个可调用对象,具体用法参照 ``代码示例`` 。用于实现在数据并行模式下运行模型。 + +当前, ``DataParallel`` 仅支持使用多进程来运行动态图程序,具体用法如下(其中 ``dynamic_graph_test.py`` 是包含示例代码的文件): + +``python -m paddle.distributed.launch --selected_gpus=0,1 dynamic_graph_test.py`` + +参数: + - **layers** (Layer) - 需要在数据并行模式下运行的模型。 + - **strategy** (ParallelStrategy) - 数据并行化策略。 + +返回: + None + +.. code-block:: python + + import numpy as np + import paddle.fluid as fluid + import paddle.fluid.dygraph as dygraph + from paddle.fluid.optimizer import AdamOptimizer + from paddle.fluid.dygraph.nn import Linear + from paddle.fluid.dygraph.base import to_variable + + place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) + with fluid.dygraph.guard(place=place): + + # prepare the data parallel context + strategy=dygraph.parallel.prepare_context() + + linear = Linear(1, 10, act="softmax") + adam = fluid.optimizer.AdamOptimizer(parameter_list=linear.parameters()) + + # make the module become the data parallelism module + linear = dygraph.parallel.DataParallel(linear, strategy) + + x_data = np.random.random(size=[10, 1]).astype(np.float32) + data = to_variable(x_data) + + hidden = linear(data) + avg_loss = fluid.layers.mean(hidden) + + # scale the loss according to the number of trainers. + avg_loss = linear.scale_loss(avg_loss) + + avg_loss.backward() + + # collect the gradients of trainers. + linear.apply_collective_grads() + + adam.minimize(avg_loss) + linear.clear_gradients() + +.. py:method:: scale_loss(loss) + +对损失值进行缩放。在数据并行模式下,损失值根据 ``trainers`` 的数量缩放一定的比例;反之,返回原始的损失值。 + +参数: + - **loss** (Variable) - 当前模型的损失值 + +返回:缩放的损失值 + +返回类型:Variable + +.. py:method:: apply_collective_grads() + +使用AllReduce模式来计算数据并行模式下多个模型之间参数梯度的均值。 + -- GitLab