diff --git a/doc/fluid/api_cn/dygraph_cn.rst b/doc/fluid/api_cn/dygraph_cn.rst index 23b6e135713c3bbdad323a9cc1d8113d27653377..ca59114ab6f28680ce41eef2cbd6e90b8beaee70 100644 --- a/doc/fluid/api_cn/dygraph_cn.rst +++ b/doc/fluid/api_cn/dygraph_cn.rst @@ -16,6 +16,7 @@ fluid.dygraph dygraph_cn/Conv3D_cn.rst dygraph_cn/Conv3DTranspose_cn.rst dygraph_cn/CosineDecay_cn.rst + dygraph_cn/DataParallel_cn.rst dygraph_cn/Embedding_cn.rst dygraph_cn/ExponentialDecay_cn.rst dygraph_cn/FC_cn.rst diff --git a/doc/fluid/api_cn/dygraph_cn/DataParallel_cn.rst b/doc/fluid/api_cn/dygraph_cn/DataParallel_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..17c7ade141bae85c50cc3b588f1aa7932a6f0f87 --- /dev/null +++ b/doc/fluid/api_cn/dygraph_cn/DataParallel_cn.rst @@ -0,0 +1,73 @@ +.. _cn_api_fluid_dygraph_DataParallel: + +DataParallel +------------------------------- + +.. py:class:: paddle.fluid.dygraph.DataParallel(layers, strategy) + +该接口用于构建 ``DataParallel`` 类的一个可调用对象,具体用法参照 ``代码示例`` 。用于实现在数据并行模式下运行模型。 + +当前, ``DataParallel`` 仅支持使用多进程来运行动态图程序,具体用法如下(其中 ``dynamic_graph_test.py`` 是包含示例代码的文件): + +``python -m paddle.distributed.launch --selected_gpus=0,1 dynamic_graph_test.py`` + +参数: + - **layers** (Layer) - 需要在数据并行模式下运行的模型。 + - **strategy** (ParallelStrategy) - 数据并行化策略。 + +返回: + None + +.. code-block:: python + + import numpy as np + import paddle.fluid as fluid + import paddle.fluid.dygraph as dygraph + from paddle.fluid.optimizer import AdamOptimizer + from paddle.fluid.dygraph.nn import Linear + from paddle.fluid.dygraph.base import to_variable + + place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) + with fluid.dygraph.guard(place=place): + + # prepare the data parallel context + strategy=dygraph.parallel.prepare_context() + + linear = Linear(1, 10, act="softmax") + adam = fluid.optimizer.AdamOptimizer(parameter_list=linear.parameters()) + + # make the module become the data parallelism module + linear = dygraph.parallel.DataParallel(linear, strategy) + + x_data = np.random.random(size=[10, 1]).astype(np.float32) + data = to_variable(x_data) + + hidden = linear(data) + avg_loss = fluid.layers.mean(hidden) + + # scale the loss according to the number of trainers. + avg_loss = linear.scale_loss(avg_loss) + + avg_loss.backward() + + # collect the gradients of trainers. + linear.apply_collective_grads() + + adam.minimize(avg_loss) + linear.clear_gradients() + +.. py:method:: scale_loss(loss) + +对损失值进行缩放。在数据并行模式下,损失值根据 ``trainers`` 的数量缩放一定的比例;反之,返回原始的损失值。 + +参数: + - **loss** (Variable) - 当前模型的损失值 + +返回:缩放的损失值 + +返回类型:Variable + +.. py:method:: apply_collective_grads() + +使用AllReduce模式来计算数据并行模式下多个模型之间参数梯度的均值。 +