Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
a7cd61fd
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a7cd61fd
编写于
8月 19, 2020
作者:
C
Chen Weihang
提交者:
GitHub
8月 19, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix DataParallel code samples, test=document_fix (#26423)
上级
bcf03273
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
57 addition
and
49 deletion
+57
-49
python/paddle/fluid/dygraph/parallel.py
python/paddle/fluid/dygraph/parallel.py
+57
-49
未找到文件。
python/paddle/fluid/dygraph/parallel.py
浏览文件 @
a7cd61fd
...
...
@@ -242,41 +242,38 @@ class DataParallel(layers.Layer):
Examples:
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.dygraph.nn import Linear
from paddle.fluid.dygraph.base import to_variable
import numpy as np
import paddle.fluid as fluid
place =
place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id)
with fluid.dygraph.guard(place=
place):
place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id)
with fluid.dygraph.guard(
place):
# prepare the data parallel context
strategy=
dygraph.prepare_context()
# prepare the data parallel context
strategy = fluid.
dygraph.prepare_context()
linear = Linear(1, 10, act="softmax")
adam = fluid.optimizer.AdamOptimizer()
linear = fluid.dygraph.Linear(1, 10, act="softmax")
adam = fluid.optimizer.AdamOptimizer(
learning_rate=0.001, parameter_list=linear.parameters())
# make the module become the data parallelism module
linear =
dygraph.DataParallel(linear, strategy)
# make the module become the data parallelism module
linear = fluid.
dygraph.DataParallel(linear, strategy)
x_data = np.random.random(size=[10, 1]).astype(np.float32)
data =
to_variable(x_data)
x_data = np.random.random(size=[10, 1]).astype(np.float32)
data = fluid.dygraph.
to_variable(x_data)
hidden = linear(data)
avg_loss = fluid.layers.mean(hidden)
hidden = linear(data)
avg_loss = fluid.layers.mean(hidden)
# scale the loss according to the number of trainers.
avg_loss = linear.scale_loss(avg_loss)
# scale the loss according to the number of trainers.
avg_loss = linear.scale_loss(avg_loss)
avg_loss.backward()
avg_loss.backward()
# collect the gradients of trainers.
linear.apply_collective_grads()
# collect the gradients of trainers.
linear.apply_collective_grads()
adam.minimize(avg_loss)
linear.clear_gradients()
adam.minimize(avg_loss)
linear.clear_gradients()
"""
def
__init__
(
self
,
layers
,
strategy
):
...
...
@@ -306,20 +303,23 @@ class DataParallel(layers.Layer):
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.dygraph.nn import Linear
from paddle.fluid.dygraph.base import to_variable
place = place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id)
with fluid.dygraph.guard(place=place):
strategy=dygraph.prepare_context()
linear = Linear(1, 10, act="softmax")
adam = fluid.optimizer.AdamOptimizer()
linear = dygraph.DataParallel(linear, strategy)
place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id)
with fluid.dygraph.guard(place):
# prepare the data parallel context
strategy = fluid.dygraph.prepare_context()
linear = fluid.dygraph.Linear(1, 10, act="softmax")
adam = fluid.optimizer.AdamOptimizer(
learning_rate=0.001, parameter_list=linear.parameters())
# make the module become the data parallelism module
linear = fluid.dygraph.DataParallel(linear, strategy)
x_data = np.random.random(size=[10, 1]).astype(np.float32)
data = to_variable(x_data)
data = fluid.dygraph.to_variable(x_data)
hidden = linear(data)
avg_loss = fluid.layers.mean(hidden)
...
...
@@ -327,6 +327,8 @@ class DataParallel(layers.Layer):
avg_loss = linear.scale_loss(avg_loss)
avg_loss.backward()
# collect the gradients of trainers.
linear.apply_collective_grads()
adam.minimize(avg_loss)
...
...
@@ -390,23 +392,29 @@ class DataParallel(layers.Layer):
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.dygraph.nn import Linear
from paddle.fluid.dygraph.base import to_variable
place = place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id)
with fluid.dygraph.guard(place=place):
strategy=dygraph.prepare_context()
linear = Linear(1, 10, act="softmax")
adam = fluid.optimizer.AdamOptimizer()
linear = dygraph.DataParallel(linear, strategy)
place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id)
with fluid.dygraph.guard(place):
# prepare the data parallel context
strategy = fluid.dygraph.prepare_context()
linear = fluid.dygraph.Linear(1, 10, act="softmax")
adam = fluid.optimizer.AdamOptimizer(
learning_rate=0.001, parameter_list=linear.parameters())
# make the module become the data parallelism module
linear = fluid.dygraph.DataParallel(linear, strategy)
x_data = np.random.random(size=[10, 1]).astype(np.float32)
data = to_variable(x_data)
data = fluid.dygraph.to_variable(x_data)
hidden = linear(data)
avg_loss = fluid.layers.mean(hidden)
# scale the loss according to the number of trainers.
avg_loss = linear.scale_loss(avg_loss)
avg_loss.backward()
# collect the gradients of trainers.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录