Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
af17a6ee
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
af17a6ee
编写于
3月 03, 2021
作者:
W
WangXi
提交者:
sandyhouse
3月 22, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
optimizer pp insert sendrecv from O(n2) to O(n)
上级
5646f710
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
91 addition
and
1 deletion
+91
-1
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+91
-1
未找到文件。
python/paddle/fluid/optimizer.py
浏览文件 @
af17a6ee
...
...
@@ -4515,6 +4515,96 @@ class PipelineOptimizer(object):
})
extra_index
+=
1
def
_xx_insert_sendrecv_ops_for_boundaries
(
self
,
block
):
"""
Insert a pair of send and recv ops for every two
consecutive ops on different devices.
"""
extra_index
=
0
# A map from var to device where op takes it as input,
# avoiding multiple send and recv ops.
input_var_to_device
=
dict
()
# A map from output var to op which generate it.
output_var_to_op
=
dict
()
for
index
,
op
in
enumerate
(
list
(
block
.
ops
)):
for
var_name
in
op
.
output_arg_names
:
ops
=
output_var_to_op
.
setdefault
(
var_name
,
[])
ops
.
append
([
op
,
index
])
for
index
,
op
in
enumerate
(
list
(
block
.
ops
)):
cur_device
=
op
.
attr
(
self
.
_op_device_key
)
if
cur_device
==
"gpu:all"
:
continue
for
var_name
in
op
.
input_arg_names
:
var
=
block
.
var
(
var_name
)
if
var
.
is_data
:
continue
#if var_name not in input_var_to_device:
# input_var_to_device[var_name] = []
#if cur_device in input_var_to_device[var_name]:
# continue
#input_var_to_device[var_name].append(cur_device)
generate_ops
=
output_var_to_op
.
get
(
var_name
)
if
generate_ops
is
None
:
continue
prev_op
=
None
for
gen_op
,
gen_idx
in
reversed
(
generate_ops
):
if
gen_idx
<
index
:
prev_op
=
gen_op
break
prev_device
=
prev_op
.
attr
(
self
.
_op_device_key
)
\
if
prev_op
else
None
if
prev_device
is
None
or
prev_device
==
'gpu:all'
:
continue
if
prev_device
==
cur_device
:
continue
if
var_name
not
in
input_var_to_device
:
input_var_to_device
[
var_name
]
=
[]
if
cur_device
in
input_var_to_device
[
var_name
]:
continue
input_var_to_device
[
var_name
].
append
(
cur_device
)
op_role
=
op
.
all_attrs
()[
self
.
_op_role_key
]
var
=
block
.
vars
[
var_name
]
prev_device_index
=
int
(
prev_device
.
split
(
':'
)[
1
])
cur_device_index
=
int
(
cur_device
.
split
(
':'
)[
1
])
#block._insert_op(
block
.
_insert_op_without_sync
(
index
=
index
+
extra_index
,
type
=
'send_v2'
,
inputs
=
{
'X'
:
var
},
attrs
=
{
self
.
_op_device_key
:
prev_device
,
self
.
_op_role_key
:
op_role
,
'use_calc_stream'
:
True
,
'peer'
:
cur_device_index
,
'ring_id'
:
self
.
ring_id
,
})
extra_index
+=
1
#block._insert_op(
block
.
_insert_op_without_sync
(
index
=
index
+
extra_index
,
type
=
'recv_v2'
,
outputs
=
{
'Out'
:
[
var
]},
attrs
=
{
'out_shape'
:
var
.
shape
,
'dtype'
:
var
.
dtype
,
self
.
_op_device_key
:
cur_device
,
self
.
_op_role_key
:
op_role
,
'use_calc_stream'
:
True
,
'peer'
:
prev_device_index
,
'ring_id'
:
self
.
ring_id
,
})
extra_index
+=
1
block
.
_sync_with_cpp
()
def
_clear_gradients
(
self
,
main_block
,
param_names
):
"""
Clear gradients at the begining of each run of a minibatch.
...
...
@@ -4932,7 +5022,7 @@ class PipelineOptimizer(object):
"another in the order of their ids."
)
# Step2: add send and recv ops between section boundaries
self
.
_insert_sendrecv_ops_for_boundaries
(
main_block
)
self
.
_
xx_
insert_sendrecv_ops_for_boundaries
(
main_block
)
# Step3: split program into sections and add pairs of
# send and recv ops for data var.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录