Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
767422ee
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
767422ee
编写于
3月 16, 2021
作者:
S
sandyhouse
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update
上级
c4d789af
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
23 addition
and
21 deletion
+23
-21
python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py
...addle/distributed/fleet/meta_optimizers/sharding/utils.py
+11
-10
python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
...e/distributed/fleet/meta_optimizers/sharding_optimizer.py
+2
-1
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+10
-10
未找到文件。
python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py
浏览文件 @
767422ee
...
...
@@ -231,20 +231,21 @@ def get_valid_op_role(block, insert_idx):
return OpRole.Forward or OpRole.Backward
"""
op_role
=
block
.
ops
[
insert_idx
].
attr
(
'op_role'
)
# if (insert_idx >= len(block.ops)) or (
# op_role in [int(OpRole.Backward), int(OpRole.Optimize)]):
# return OpRole.Backward
# if op_role in [int(OpRole.Forward), int(OpRole.Loss)]:
# return OpRole.Forward
# return get_valid_op_role(block, insert_idx + 1)
if
insert_idx
>=
len
(
block
.
ops
):
return
OpRole
.
Optimize
if
op_role
==
int
(
OpRole
.
Backward
):
return
OpRole
.
Backward
if
op_role
==
int
(
OpRole
.
Optimize
):
return
OpRole
.
Optimize
if
(
insert_idx
>=
len
(
block
.
ops
))
or
(
op_role
in
[
int
(
OpRole
.
Backward
),
int
(
OpRole
.
Optimize
)]):
return
OpRole
.
Backward
if
op_role
in
[
int
(
OpRole
.
Forward
),
int
(
OpRole
.
Loss
)]:
return
OpRole
.
Forward
return
get_valid_op_role
(
block
,
insert_idx
+
1
)
# if insert_idx >= len(block.ops): return OpRole.Optimize
# if op_role == int(OpRole.Backward): return OpRole.Backward
# if op_role == int(OpRole.Optimize): return OpRole.Optimize
# if op_role in [int(OpRole.Forward), int(OpRole.Loss)]:
# return OpRole.Forward
# return get_valid_op_role(block, insert_idx + 1)
def
insert_sync_calc_op
(
block
,
insert_idx
,
calc_dep_vars
):
"""
...
...
python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
浏览文件 @
767422ee
...
...
@@ -153,7 +153,6 @@ class ShardingOptimizer(MetaOptimizerBase):
if
self
.
use_pipeline
:
pp_optimizer
.
_rename_gradient_var_name
(
main_block
)
pp_optimizer
.
_accumulate_gradients
(
main_block
)
with
open
(
"main_%d"
%
self
.
role_maker
.
_worker_index
(),
'w'
)
as
f
:
f
.
writelines
(
str
(
main_program
))
...
...
@@ -201,6 +200,8 @@ class ShardingOptimizer(MetaOptimizerBase):
#if self._shard.has_param(param_name): continue
if
in_name
not
in
main_block
.
vars
:
main_block
.
_remove_op
(
idx
)
accumulated_grad_names
=
pp_optimizer
.
_accumulate_gradients
(
main_block
)
# accumulated_grad_names = sorted(accumulated_grad_names)
if
self
.
pp_allreduce_in_optimize
:
print
(
"persistable FP32 grad: "
)
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
767422ee
...
...
@@ -4836,6 +4836,7 @@ class PipelineOptimizer(object):
input_names
=
op
.
input_arg_names
output_names
=
op
.
output_arg_names
in_out_names
=
input_names
+
output_names
if
op
.
type
==
'cast'
:
continue
# append "MERGED" to the names of parameter gradients,
# and mofify the op_role_var attribute (by rename_arg func).
for
name
in
in_out_names
:
...
...
@@ -4857,13 +4858,16 @@ class PipelineOptimizer(object):
if
self
.
_is_optimize_op
(
op
)
and
op
.
type
==
'cast'
:
in_name
=
op
.
input_arg_names
[
0
]
out_name
=
op
.
output_arg_names
[
0
]
if
out_name
.
strip
(
'@GRAD
@MERGED
'
)
in
self
.
_param_device_map
:
if
out_name
.
strip
(
'@GRAD'
)
in
self
.
_param_device_map
:
assert
in_name
.
replace
(
'.cast_fp16'
,
''
)
==
out_name
block
.
_remove_op
(
index
)
continue
if
self
.
_is_backward_op
(
op
)
and
not
first_opt_op_idx
:
first_opt_op_idx
=
index
+
1
if
block
.
ops
[
first_opt_op_idx
].
type
==
"c_sync_comm_stream"
:
#block.ops[first_opt_op_idx]._set_attr(self._op_role_key, self._op_role.Backward)
first_opt_op_idx
+=
1
if
self
.
_is_backward_op
(
op
)
and
(
self
.
_op_role_var_key
in
op
.
attr_names
):
...
...
@@ -4872,17 +4876,13 @@ class PipelineOptimizer(object):
if
len
(
op_role_var
)
==
0
:
continue
assert
len
(
op_role_var
)
%
2
==
0
op
.
_remove_attr
(
self
.
_op_role_var_key
)
#
op._remove_attr(self._op_role_var_key)
for
i
in
range
(
0
,
len
(
op_role_var
),
2
):
offset
=
0
param_name
=
op_role_var
[
i
]
assert
block
.
has_var
(
param_name
),
(
"parameter {} not in "
"current block."
.
format
(
param_name
))
# clear gradient
assert
param_name
in
self
.
origin_main_block
.
vars
,
"[{}] not in original main block"
.
format
(
param_name
)
param_grad_name
=
self
.
_append_grad_suffix
(
param_name
)
if
not
block
.
has_var
(
param_name
):
continue
if
'@BroadCast'
in
param_name
:
continue
param_grad_name
=
param_name
+
core
.
grad_var_suffix
()
merged_param_grad_name
=
param_grad_name
+
'@MERGED'
if
not
block
.
has_var
(
merged_param_grad_name
):
self
.
_create_var
(
block
,
block
.
vars
[
param_name
],
...
...
@@ -4944,7 +4944,7 @@ class PipelineOptimizer(object):
attrs
=
{
# self._op_device_key: device,
self
.
_op_role_key
:
self
.
_op_role
.
Backward
,
self
.
_op_role_var_key
:
op_role_var
#
self._op_role_var_key: op_role_var
})
offset
+=
1
merged_gradient_names
.
append
(
merged_param_grad_name
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录