Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
6655077e
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6655077e
编写于
3月 07, 2021
作者:
S
sandyhouse
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix: set @Bcast@Grad to non persistable
上级
a6879219
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
21 addition
and
18 deletion
+21
-18
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+21
-18
未找到文件。
python/paddle/fluid/optimizer.py
浏览文件 @
6655077e
...
...
@@ -4843,8 +4843,8 @@ class PipelineOptimizer(object):
Accumulate the gradients generated in microbatch to the one in mini-batch.
"""
first_optimize_op_index
=
None
accumulated_grad_names
=
[]
for
index
,
op
in
reversed
(
tuple
(
enumerate
(
list
(
block
.
ops
)))):
# device = op.attr(self._op_device_key)
# remove the cast op of fp16 grad to fp32 grad
if
self
.
_is_optimize_op
(
op
)
and
op
.
type
==
'cast'
:
in_name
=
op
.
input_arg_names
[
0
]
...
...
@@ -4872,13 +4872,15 @@ class PipelineOptimizer(object):
for
i
in
range
(
0
,
len
(
op_role_var
),
2
):
offset
=
0
param_name
=
op_role_var
[
i
]
if
not
block
.
has_var
(
param_name
):
continue
# if not block.has_var(param_name): continue
if
'@BroadCast'
in
param_name
:
param_name
=
param_name
[
0
:
param_name
.
find
(
'@BroadCast'
)]
# clear gradient
param_grad_name
=
self
.
_append_grad_suffix
(
param_name
)
# if not main_block.has_var(grad_name): continue
if
not
block
.
has_var
(
param_grad_name
):
self
.
_create_var
(
block
,
block
.
vars
[
param_name
],
param_grad_name
)
accumulated_grad_names
.
append
(
param_grad_name
)
#
if not block.has_var(param_grad_name):
#
self._create_var(block, block.vars[param_name],
#
param_grad_name)
assert
block
.
has_var
(
param_grad_name
)
param_grad_var
=
block
.
var
(
param_grad_name
)
param_grad_var
.
persistable
=
True
...
...
@@ -4898,10 +4900,10 @@ class PipelineOptimizer(object):
#offset += 1
grad_name
=
op_role_var
[
i
+
1
]
# with _0 suffix
grad_var
=
block
.
vars
[
grad_name
]
real_grad_name
=
grad_name
[
0
:
grad_name
.
find
(
'@GRAD'
)]
+
'@GRAD'
# without _0 suffix
real_grad_var
=
block
.
vars
[
real_grad_name
]
# without _0 suffix
#
real_grad_name = grad_name[0:grad_name.find(
#
'@GRAD')] + '@GRAD' # without _0 suffix
#
real_grad_var = block.vars[
#
real_grad_name] # without _0 suffix
# new_grad_var_name = unique_name.generate(grad_name)
# new_var = self._create_var(block, grad_var,
# new_grad_var_name)
...
...
@@ -4911,7 +4913,7 @@ class PipelineOptimizer(object):
block
.
_insert_op
(
index
=
index
+
1
,
type
=
'sum'
,
inputs
=
{
'X'
:
[
grad_var
,
real
_grad_var
]},
inputs
=
{
'X'
:
[
grad_var
,
param
_grad_var
]},
outputs
=
{
'Out'
:
real_grad_var
},
attrs
=
{
#self._op_device_key: device,
...
...
@@ -4922,13 +4924,13 @@ class PipelineOptimizer(object):
else
:
grad_name
=
op_role_var
[
i
+
1
]
# with _0 suffix
grad_var
=
block
.
vars
[
grad_name
]
fp32_grad_var_name
=
param_name
+
core
.
grad_var_suffix
(
)
# without _0 suffix
fp32_grad_var
=
block
.
vars
[
fp32_grad_var_name
]
fp32_grad_var
.
persistable
=
True
#
fp32_grad_var_name = param_name + core.grad_var_suffix(
#
) # without _0 suffix
#
fp32_grad_var = block.vars[fp32_grad_var_name]
#
fp32_grad_var.persistable = True
cast_grad_var_name
=
unique_name
.
generate
(
fp32_grad_var
_name
)
cast_grad_var
=
self
.
_create_var
(
block
,
fp32
_grad_var
,
param_grad
_name
)
cast_grad_var
=
self
.
_create_var
(
block
,
param
_grad_var
,
cast_grad_var_name
)
cast_grad_var
.
persistable
=
False
block
.
_insert_op
(
...
...
@@ -4947,7 +4949,7 @@ class PipelineOptimizer(object):
block
.
_insert_op
(
index
=
index
+
2
,
type
=
'sum'
,
inputs
=
{
'X'
:
[
fp32
_grad_var
,
cast_grad_var
]},
inputs
=
{
'X'
:
[
param
_grad_var
,
cast_grad_var
]},
outputs
=
{
'Out'
:
fp32_grad_var
},
attrs
=
{
# self._op_device_key: device,
...
...
@@ -4995,6 +4997,7 @@ class PipelineOptimizer(object):
# self._op_role_key: self._op_role.Backward,
# # self._op_role_var_key: op_role_var
# })
return
first_optimize_op_index
,
accumulated_grad_names
def
_add_sub_blocks
(
self
,
main_block
,
program_list
):
main_program
=
main_block
.
program
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录