Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
6a1db204
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
6a1db204
编写于
9月 16, 2019
作者:
T
tangwei12
提交者:
GitHub
9月 16, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix sync_with_distributed_lookup_table, test=develop (#19737)
fix wrong place with distributed_lookup_table
上级
38f1c2fe
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
82 addition
and
45 deletion
+82
-45
python/paddle/fluid/transpiler/distribute_transpiler.py
python/paddle/fluid/transpiler/distribute_transpiler.py
+82
-45
未找到文件。
python/paddle/fluid/transpiler/distribute_transpiler.py
浏览文件 @
6a1db204
...
...
@@ -388,49 +388,84 @@ class DistributeTranspiler(object):
sparse_update_ops
.
append
(
op
)
return
sparse_update_ops
def
_update_remote_sparse_update_op
(
self
,
program
,
param_varname
,
height_sections
,
endpoints
,
table_names
):
ops
=
[]
op_type
=
""
for
op
in
self
.
sparse_update_ops
:
if
param_varname
in
op
.
input_arg_names
and
op_type
==
""
:
op_type
=
op
.
type
ops
.
append
(
op
)
elif
param_varname
in
op
.
input_arg_names
and
op_type
==
op
.
type
:
ops
.
append
(
op
)
if
op_type
==
"lookup_table"
:
all_ops
=
program
.
global_block
().
ops
op_idxs
=
[
all_ops
.
index
(
op
)
for
op
in
ops
]
inputs
=
[
program
.
global_block
().
vars
[
op
.
input
(
"Ids"
)[
0
]]
for
op
in
ops
]
w
=
program
.
global_block
().
vars
[
ops
[
0
].
input
(
"W"
)[
0
]]
padding_idx
=
ops
[
0
].
attr
(
"padding_idx"
)
outputs
=
[
program
.
global_block
().
vars
[
op
.
output
(
"Out"
)[
0
]]
for
op
in
ops
]
def
_update_remote_sparse_update_op
(
self
,
program
,
need_sparse_update_params
):
for
param_varname
,
attrs
in
need_sparse_update_params
.
items
():
height_sections
=
self
.
sparse_param_to_height_sections
[
param_varname
]
endpoints
=
attrs
[
0
]
table_names
=
attrs
[
1
]
ops
=
[]
op_type
=
""
used_ops
=
[]
for
idx
,
op
in
enumerate
(
self
.
sparse_update_ops
):
if
param_varname
in
op
.
input_arg_names
and
op_type
==
""
:
op_type
=
op
.
type
ops
.
append
(
op
)
used_ops
.
append
(
idx
)
elif
param_varname
in
op
.
input_arg_names
and
op_type
==
op
.
type
:
ops
.
append
(
op
)
used_ops
.
append
(
idx
)
if
op_type
==
"lookup_table"
:
all_ops
=
program
.
global_block
().
ops
op_idxs
=
[
all_ops
.
index
(
op
)
for
op
in
ops
]
inputs
=
[
program
.
global_block
().
vars
[
op
.
input
(
"Ids"
)[
0
]]
for
op
in
ops
]
w
=
program
.
global_block
().
vars
[
ops
[
0
].
input
(
"W"
)[
0
]]
padding_idx
=
ops
[
0
].
attr
(
"padding_idx"
)
outputs
=
[
program
.
global_block
().
vars
[
op
.
output
(
"Out"
)[
0
]]
for
op
in
ops
]
for
idx
in
op_idxs
[::
-
1
]:
program
.
global_block
().
_remove_op
(
idx
)
for
idx
in
op_idxs
[::
-
1
]:
program
.
global_block
().
_remove_op
(
idx
)
inputs_idxs
=
[
-
1
]
*
len
(
inputs
)
outputs_idxs
=
[
-
1
]
*
len
(
outputs
)
for
idx
,
op
in
enumerate
(
program
.
global_block
().
ops
):
for
i
in
range
(
0
,
len
(
op
.
output_names
)):
outs
=
op
.
output
(
op
.
output_names
[
i
])
for
in_id
,
in_var
in
enumerate
(
inputs
):
if
in_var
.
name
in
outs
:
inputs_idxs
[
in_id
]
=
idx
for
i
in
range
(
0
,
len
(
op
.
input_names
)):
ins
=
op
.
input
(
op
.
input_names
[
i
])
for
out_id
,
out_var
in
enumerate
(
outputs
):
if
out_var
.
name
in
ins
:
outputs_idxs
[
out_id
]
=
idx
if
min
(
outputs_idxs
)
-
max
(
inputs_idxs
)
>=
1
:
distributed_idx
=
max
(
inputs_idxs
)
+
1
program
.
global_block
().
_insert_op
(
index
=
distributed_idx
,
type
=
"distributed_lookup_table"
,
inputs
=
{
"Ids"
:
inputs
,
'W'
:
w
},
outputs
=
{
"Outputs"
:
outputs
},
attrs
=
{
"table_names"
:
table_names
,
"height_sections"
:
height_sections
,
"endpoints"
:
endpoints
,
"padding_idx"
:
padding_idx
,
"trainer_id"
:
self
.
trainer_id
})
else
:
raise
ValueError
(
"something wrong with distribute_transpiler, submit a issue is recommended"
)
program
.
global_block
().
_insert_op
(
index
=
op_idxs
[
0
],
type
=
"distributed_lookup_table"
,
inputs
=
{
"Ids"
:
inputs
,
'W'
:
w
},
outputs
=
{
"Outputs"
:
outputs
},
attrs
=
{
"table_names"
:
table_names
,
"height_sections"
:
height_sections
,
"endpoints"
:
endpoints
,
"padding_idx"
:
padding_idx
,
"trainer_id"
:
self
.
trainer_id
})
for
idx
in
used_ops
[::
-
1
]:
self
.
sparse_update_ops
.
pop
(
idx
)
def
_is_input_of_remote_sparse_update_op
(
self
,
param_name
):
for
op
in
self
.
sparse_update_ops
:
...
...
@@ -681,6 +716,8 @@ class DistributeTranspiler(object):
recv_vars
[
i
].
name
)
distributed_var
.
endpoint
=
ep
need_sparse_update_params
=
{}
# step4: Concat the parameters splits together after recv.
all_recv_outputs
=
[]
for
param_varname
,
splited_var
in
six
.
iteritems
(
self
.
param_var_mapping
):
...
...
@@ -712,10 +749,7 @@ class DistributeTranspiler(object):
table_name
)
distributed_var
.
vtype
=
"RemotePrefetch"
height_sections
=
self
.
sparse_param_to_height_sections
[
param_varname
]
self
.
_update_remote_sparse_update_op
(
program
,
param_varname
,
height_sections
,
eps
,
table_names
)
need_sparse_update_params
[
param_varname
]
=
(
eps
,
table_names
)
else
:
recv_varnames
=
[]
if
self
.
config
.
runtime_split_send_recv
:
...
...
@@ -764,6 +798,9 @@ class DistributeTranspiler(object):
RPC_OP_ROLE_ATTR_NAME
:
DIST_OP_ROLE_ATTR_VALUE
})
self
.
_update_remote_sparse_update_op
(
program
,
need_sparse_update_params
)
self
.
_get_trainer_startup_program
(
recv_vars
=
recv_vars
,
eplist
=
eplist
)
if
self
.
has_distributed_lookup_table
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录