Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
2835d5ad
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2835d5ad
编写于
2月 13, 2020
作者:
Y
Yang Zhang
提交者:
GitHub
2月 13, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Upgrade paddle API used in mixed precision training (#227)
上级
59b70495
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
32 addition
and
38 deletion
+32
-38
ppdet/experimental/mixed_precision.py
ppdet/experimental/mixed_precision.py
+32
-38
未找到文件。
ppdet/experimental/mixed_precision.py
浏览文件 @
2835d5ad
...
...
@@ -129,30 +129,27 @@ class DynamicLossScale(LossScale):
def
increment
(
self
):
enough_steps
=
layers
.
less_than
(
self
.
increment_every
,
self
.
good_steps
+
1
)
with
layers
.
Switch
()
as
switch
:
with
switch
.
case
(
enough_steps
):
new_scale
=
self
.
scale
*
self
.
factor
scale_valid
=
layers
.
isfinite
(
new_scale
)
with
layers
.
Switch
()
as
switch2
:
with
switch2
.
case
(
scale_valid
):
layers
.
assign
(
new_scale
,
self
.
scale
)
layers
.
assign
(
layers
.
zeros_like
(
self
.
good_steps
),
self
.
good_steps
)
with
switch2
.
default
():
layers
.
increment
(
self
.
good_steps
)
with
switch
.
default
():
layers
.
increment
(
self
.
good_steps
)
def
increment_step
():
layers
.
increment
(
self
.
good_steps
)
def
maybe_update
():
new_scale
=
self
.
scale
*
self
.
factor
scale_valid
=
layers
.
isfinite
(
new_scale
)
def
update_scale_and_step
():
layers
.
assign
(
new_scale
,
self
.
scale
)
layers
.
assign
(
layers
.
zeros_like
(
self
.
good_steps
),
self
.
good_steps
)
layers
.
cond
(
scale_valid
,
update_scale_and_step
)
layers
.
cond
(
enough_steps
,
maybe_update
,
increment_step
)
def
decrement
(
self
):
new_scale
=
self
.
scale
/
self
.
factor
one
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
1.0
)
less_than_one
=
layers
.
less_than
(
new_scale
,
one
)
with
layers
.
Switch
()
as
switch
:
with
switch
.
case
(
less_than_one
):
layers
.
assign
(
one
,
self
.
scale
)
with
switch
.
default
():
layers
.
assign
(
new_scale
,
self
.
scale
)
layers
.
assign
(
layers
.
elementwise_max
(
new_scale
,
one
),
self
.
scale
)
layers
.
assign
(
layers
.
zeros_like
(
self
.
good_steps
),
self
.
good_steps
)
...
...
@@ -275,12 +272,13 @@ def scale_gradient(block, context):
fwd_var
=
block
.
_var_recursive
(
context
[
name
])
if
not
isinstance
(
fwd_var
,
Parameter
):
continue
# TODO verify all use cases
clip_op_desc
=
block
.
desc
.
append_op
()
clip_op_desc
.
set_type
(
"elementwise_div"
)
clip_op_desc
.
set_input
(
"X"
,
[
name
])
clip_op_desc
.
set_input
(
"Y"
,
[
scale
.
name
])
clip_op_desc
.
set_output
(
"Out"
,
[
name
])
clip_op_desc
.
_set_attr
(
op_role_attr_name
,
bwd_role
)
scale_op_desc
=
block
.
desc
.
append_op
()
scale_op_desc
.
set_type
(
"elementwise_div"
)
scale_op_desc
.
set_input
(
"X"
,
[
name
])
scale_op_desc
.
set_input
(
"Y"
,
[
scale
.
name
])
scale_op_desc
.
set_output
(
"Out"
,
[
name
])
scale_op_desc
.
_set_attr
(
"axis"
,
-
1
)
scale_op_desc
.
_set_attr
(
op_role_attr_name
,
bwd_role
)
def
update_loss_scale
(
grads
):
...
...
@@ -289,12 +287,8 @@ def update_loss_scale(grads):
return
per_grad_check
=
layers
.
stack
([
layers
.
reduce_sum
(
g
)
for
g
in
grads
])
grad_valid
=
layers
.
isfinite
(
per_grad_check
)
with
layers
.
Switch
()
as
switch
:
with
switch
.
case
(
grad_valid
):
state
.
increment
()
with
switch
.
default
():
state
.
decrement
()
layers
.
cond
(
grad_valid
,
lambda
:
state
.
increment
(),
lambda
:
state
.
decrement
())
return
grad_valid
...
...
@@ -309,15 +303,15 @@ def backward(self, loss, **kwargs):
else
:
kwargs
[
'callbacks'
]
=
callbacks
param_grads
=
self
.
_backward
(
loss
,
**
kwargs
)
def
zero_grad
():
for
_
,
g
in
param_grads
:
layers
.
assign
(
layers
.
zeros_like
(
g
),
g
)
if
state
is
not
None
:
grad_valid
=
update_loss_scale
(
v
for
k
,
v
in
param_grads
)
if
state
.
dynamic_scaling
:
with
layers
.
Switch
()
as
switch
:
with
switch
.
case
(
grad_valid
):
pass
with
switch
.
default
():
for
_
,
g
in
param_grads
:
layers
.
assign
(
layers
.
zeros_like
(
g
),
g
)
layers
.
cond
(
grad_valid
,
None
,
zero_grad
)
return
param_grads
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录