Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
cadc6a97
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
cadc6a97
编写于
10月 14, 2019
作者:
W
WangXi
提交者:
gongweibao
10月 15, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix dgc test and bug when not set trainers_endpoints_, test=develop (#20617)
上级
46797f53
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
8 addition
and
4 deletion
+8
-4
paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc
...k/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc
+1
-2
python/paddle/fluid/incubate/fleet/collective/__init__.py
python/paddle/fluid/incubate/fleet/collective/__init__.py
+0
-1
python/paddle/fluid/tests/unittests/test_dist_base.py
python/paddle/fluid/tests/unittests/test_dist_base.py
+7
-1
未找到文件。
paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc
浏览文件 @
cadc6a97
...
...
@@ -465,8 +465,7 @@ void MultiDevSSAGraphBuilderBase::CreateAllReduceOp(ir::Graph *result,
new
details
::
SparseAllReduceOpHandle
(
result
->
CreateEmptyNode
(
"allreduce"
,
ir
::
Node
::
Type
::
kOperation
),
scopes
,
places
,
multi_nccl_ctxs_
,
is_encoded
,
static_cast
<
int
>
(
strategy_
.
trainers_endpoints_
.
size
())
*
places_
.
size
()));
strategy_
.
num_trainers_
*
places_
.
size
()));
}
else
{
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
new
details
::
AllReduceOpHandle
(
...
...
python/paddle/fluid/incubate/fleet/collective/__init__.py
浏览文件 @
cadc6a97
...
...
@@ -271,7 +271,6 @@ class CollectiveOptimizer(DistributedOptimizer):
node_num
=
self
.
_node_num
()
assert
node_num
>=
1
,
"nccl2 node_num must >= 1, now:{}"
%
node_num
self
.
_strategy
.
fuse_all_reduce_ops
=
True
exec_strategy
=
self
.
_strategy
.
exec_strategy
if
node_num
<=
1
:
...
...
python/paddle/fluid/tests/unittests/test_dist_base.py
浏览文件 @
cadc6a97
...
...
@@ -291,6 +291,10 @@ class TestDistRunnerBase(object):
build_stra
.
num_trainers
=
1
build_stra
.
trainer_id
=
0
if
args
.
use_dgc
:
# fuse_all_reduce_ops require that gradients should not be sparse types
build_stra
.
fuse_all_reduce_ops
=
False
print_to_err
(
type
(
self
).
__name__
,
"begin to compile with data parallel"
)
binary
=
compiler
.
CompiledProgram
(
trainer_prog
).
with_data_parallel
(
loss_name
=
avg_cost
.
name
,
...
...
@@ -852,7 +856,9 @@ class TestDistBase(unittest.TestCase):
if
check_error_log
:
required_envs
[
"GLOG_vmodule"
]
=
\
"fused_all_reduce_op_handle=10,all_reduce_op_handle=10,alloc_continuous_space_op=10,fuse_all_reduce_op_pass=10,alloc_continuous_space_for_grad_pass=10,fast_threaded_ssa_graph_executor=10,executor=10,operator=10"
"fused_all_reduce_op_handle=10,all_reduce_op_handle=10,alloc_continuous_space_op=10,fuse_all_reduce_op_pass=10,"
\
"alloc_continuous_space_for_grad_pass=10,fast_threaded_ssa_graph_executor=10,executor=10,operator=10,"
\
"sparse_all_reduce_op_handle=10"
required_envs
[
"GLOG_logtostderr"
]
=
"1"
local_losses
\
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录