Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
6c5c547e
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6c5c547e
编写于
9月 23, 2020
作者:
M
mapingshuo
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
correct role_maker usage
上级
e3334f3e
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
19 addition
and
20 deletion
+19
-20
python/paddle/distributed/fleet/meta_optimizers/zero_optimizer.py
...addle/distributed/fleet/meta_optimizers/zero_optimizer.py
+19
-20
未找到文件。
python/paddle/distributed/fleet/meta_optimizers/zero_optimizer.py
浏览文件 @
6c5c547e
...
...
@@ -243,7 +243,7 @@ class ZeroOptimizer(MetaOptimizerBase):
param2mem
.
append
((
param
.
name
,
mem
))
# print(param.name, mem)
# print("total_param_mem: ", total_param_mem)
device_num
=
self
.
role_maker
.
worker_num
()
device_num
=
self
.
role_maker
.
_
worker_num
()
# print("device_num: ", device_num)
device2params
=
{
x
:
[]
for
x
in
range
(
device_num
)}
device_idx
=
0
...
...
@@ -327,7 +327,7 @@ class ZeroOptimizer(MetaOptimizerBase):
if
input_name
!=
broadcast_name
:
op
.
_rename_input
(
input_name
,
broadcast_name
)
continue
if
root_device
==
self
.
role_maker
.
worker_index
():
if
root_device
==
self
.
role_maker
.
_
worker_index
():
broadcast_var_name
=
input_name
else
:
broadcast_var_name
=
unique_name
.
generate
(
input_name
+
...
...
@@ -357,7 +357,7 @@ class ZeroOptimizer(MetaOptimizerBase):
fp32_param
=
op
.
desc
.
input_arg_names
()[
0
]
fp16_param
=
op
.
desc
.
output_arg_names
()[
0
]
if
self
.
_param2device
[
fp32_param
]
==
self
.
role_maker
.
worker_index
():
fp32_param
]
==
self
.
role_maker
.
_
worker_index
():
sub_prog
.
_cast_ops
[
fp16_param
]
=
fp32_param
if
sub_prog
.
_param_mem
>
0
:
...
...
@@ -406,7 +406,7 @@ class ZeroOptimizer(MetaOptimizerBase):
params
=
[]
for
var_name
,
_
in
block
.
vars
.
items
():
if
self
.
_is_opti_var
(
var_name
)
and
\
self
.
_var_device_id
(
var_name
)
!=
self
.
role_maker
.
worker_index
():
self
.
_var_device_id
(
var_name
)
!=
self
.
role_maker
.
_
worker_index
():
params
.
append
(
var_name
)
program_deps
=
ProgramDeps
(
block
,
reduced_grads
,
params
)
...
...
@@ -428,7 +428,7 @@ class ZeroOptimizer(MetaOptimizerBase):
reduce_var
=
var_to_reduce_var
[
input_name
]
param_name
=
self
.
_reduced_grads_to_param
[
reduce_var
]
if
self
.
_param2device
[
param_name
]
!=
self
.
role_maker
.
worker_index
():
param_name
]
!=
self
.
role_maker
.
_
worker_index
():
program_deps
.
crop_input_var_from_op
(
idx
,
input_name
)
else
:
reversed_input_vars
.
append
(
input_name
)
...
...
@@ -726,20 +726,20 @@ class ZeroOptimizer(MetaOptimizerBase):
for
idx
,
op
in
reversed
(
list
(
enumerate
(
block
.
ops
))):
for
output_name
in
op
.
desc
.
output_arg_names
():
var_device_id
=
self
.
_var_device_id
(
output_name
)
if
var_device_id
==
-
1
or
var_device_id
==
self
.
role_maker
.
worker_index
(
if
var_device_id
==
-
1
or
var_device_id
==
self
.
role_maker
.
_
worker_index
(
):
continue
print
(
"%d: startup_block remove op %s"
%
(
self
.
role_maker
.
worker_index
(),
op
.
type
))
(
self
.
role_maker
.
_
worker_index
(),
op
.
type
))
block
.
_remove_op
(
idx
)
break
for
var_name
,
_
in
block
.
vars
.
items
():
var_device_id
=
self
.
_var_device_id
(
var_name
)
if
var_device_id
==
-
1
or
var_device_id
==
self
.
role_maker
.
worker_index
(
if
var_device_id
==
-
1
or
var_device_id
==
self
.
role_maker
.
_
worker_index
(
):
continue
print
(
"%d: startup_block remove var %s"
%
(
self
.
role_maker
.
worker_index
(),
var_name
))
(
self
.
role_maker
.
_
worker_index
(),
var_name
))
block
.
_remove_var
(
var_name
)
block
.
_sync_with_cpp
()
...
...
@@ -775,15 +775,14 @@ class ZeroOptimizer(MetaOptimizerBase):
def
_set_up
(
self
,
params_grads
):
# step 1: initialize nccl
# TODO(mapingshuo) fix get_trainer_endpoints
print
(
"work idx: "
,
self
.
role_maker
.
worker_index
())
endpoints
=
self
.
role_maker
.
get_trainer_endpoints
()
current_endpoint
=
endpoints
[
self
.
role_maker
.
worker_index
()]
print
(
"work idx: "
,
self
.
role_maker
.
_worker_index
())
endpoints
=
self
.
role_maker
.
_get_trainer_endpoints
()
current_endpoint
=
endpoints
[
self
.
role_maker
.
_worker_index
()]
collective_helper
=
CollectiveHelper
(
self
.
role_maker
,
self
.
_nrings
)
for
ring_id
in
range
(
self
.
_nrings
):
collective_helper
.
_init_communicator
(
self
.
_startup_program
,
current_endpoint
,
endpoints
,
self
.
role_maker
.
worker_index
(),
ring_id
,
'6174'
)
self
.
role_maker
.
_
worker_index
(),
ring_id
,
'6174'
)
startup_block
=
self
.
_startup_program
.
global_block
()
startup_block
.
_sync_with_cpp
()
...
...
@@ -846,7 +845,7 @@ class ZeroOptimizer(MetaOptimizerBase):
# step4: insert reduce_sum for grad
self
.
_insert_scale_loss_grad_ops
(
main_block
,
scale
=
1.0
/
self
.
role_maker
.
worker_num
())
main_block
,
scale
=
1.0
/
self
.
role_maker
.
_
worker_num
())
main_block
.
_sync_with_cpp
()
# step5: remove unneeded ops and vars from block
...
...
@@ -1194,21 +1193,21 @@ class ZeroOptimizer(MetaOptimizerBase):
if
startup_program
is
None
:
startup_program
=
default_startup_program
()
print
(
"work idx: "
,
self
.
role_maker
.
worker_index
())
endpoints
=
self
.
role_maker
.
get_trainer_endpoints
()
current_endpoint
=
endpoints
[
self
.
role_maker
.
worker_index
()]
print
(
"work idx: "
,
self
.
role_maker
.
_
worker_index
())
endpoints
=
self
.
role_maker
.
_
get_trainer_endpoints
()
current_endpoint
=
endpoints
[
self
.
role_maker
.
_
worker_index
()]
collective_helper
=
CollectiveHelper
(
self
.
role_maker
,
self
.
_nrings
)
for
ring_id
in
range
(
self
.
_nrings
):
collective_helper
.
_init_communicator
(
startup_program
,
current_endpoint
,
endpoints
,
self
.
role_maker
.
worker_index
(),
ring_id
,
'6174'
)
self
.
role_maker
.
_
worker_index
(),
ring_id
,
'6174'
)
main_block
=
loss
.
block
startup_block
=
startup_program
.
global_block
()
self
.
_broadcast_params
(
startup_block
)
self
.
_insert_scale_loss_grad_ops
(
main_block
,
scale
=
1.0
/
self
.
role_maker
.
worker_num
())
main_block
,
scale
=
1.0
/
self
.
role_maker
.
_
worker_num
())
self
.
_insert_allreduce_ops_tmp
(
main_block
)
print
(
"insert allreduce done"
)
return
optimize_ops
,
params_grads
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录