Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
be2e3e67
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
be2e3e67
编写于
11月 26, 2019
作者:
Z
Zhen Wang
提交者:
GitHub
11月 26, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix some typos in AMP. (#21354)
* fix some typos in AMP. test=develop * delete useless codes. test=develop
上级
afb13484
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
17 addition
and
32 deletion
+17
-32
python/paddle/fluid/contrib/mixed_precision/decorator.py
python/paddle/fluid/contrib/mixed_precision/decorator.py
+7
-5
python/paddle/fluid/contrib/mixed_precision/fp16_lists.py
python/paddle/fluid/contrib/mixed_precision/fp16_lists.py
+3
-3
python/paddle/fluid/contrib/mixed_precision/fp16_utils.py
python/paddle/fluid/contrib/mixed_precision/fp16_utils.py
+5
-22
python/paddle/fluid/incubate/fleet/base/fleet_base.py
python/paddle/fluid/incubate/fleet/base/fleet_base.py
+2
-2
未找到文件。
python/paddle/fluid/contrib/mixed_precision/decorator.py
浏览文件 @
be2e3e67
...
...
@@ -24,10 +24,10 @@ from .fp16_lists import AutoMixedPrecisionLists
__all__
=
[
"decorate"
]
class
OptimizerWithMixedPrecison
(
object
):
class
OptimizerWithMixedPrecis
i
on
(
object
):
"""
Optimizer with mixed-precision (MP) training. This is a wrapper of a common
optimizer, plus the support of mixed-precision pretraining. The object
optimizer, plus the support of mixed-precision pre
-
training. The object
of this class almost has the same behavior as the common optimizer, with the
methods `minimize()`, `backward()`, `apply_gradients()` implemented.
Additionally, it enables the MP training automatically, i.e, the creation
...
...
@@ -116,7 +116,7 @@ class OptimizerWithMixedPrecison(object):
no_grad_set
=
None
,
callbacks
=
None
):
"""
Backward prop
o
gation or auto differentiation for gradients' computation.
Backward prop
a
gation or auto differentiation for gradients' computation.
Args:
loss (Variable): The loss Variable to minimize.
...
...
@@ -124,7 +124,7 @@ class OptimizerWithMixedPrecison(object):
parameters in `parameter_list`.
parameter_list (list|None): A list of Variables to update.
no_grad_set (set|None): A set of Variables should be ignored.
callbacks (list|None): A list of callable
s to run when appending
callbacks (list|None): A list of callable
objects to run when appending
backward operator for one parameter.
Returns:
...
...
@@ -136,6 +136,8 @@ class OptimizerWithMixedPrecison(object):
self
.
_params_grads
=
self
.
_optimizer
.
backward
(
self
.
_scaled_loss
,
startup_program
,
parameter_list
,
no_grad_set
,
callbacks
)
# Change the op_role_var attr for some ops, so that gradients
# transferred across GPUs can be FP16.
update_role_var_grad
(
self
.
_train_program
,
self
.
_params_grads
)
scaled_params_grads
=
[]
for
p
,
g
in
self
.
_params_grads
:
...
...
@@ -257,7 +259,7 @@ def decorate(optimizer,
"""
if
amp_lists
is
None
:
amp_lists
=
AutoMixedPrecisionLists
()
mp_optimizer
=
OptimizerWithMixedPrecison
(
mp_optimizer
=
OptimizerWithMixedPrecis
i
on
(
optimizer
,
amp_lists
,
init_loss_scaling
,
use_dynamic_loss_scaling
,
incr_every_n_steps
,
decr_every_n_nan_or_inf
,
incr_ratio
,
decr_ratio
)
...
...
python/paddle/fluid/contrib/mixed_precision/fp16_lists.py
浏览文件 @
be2e3e67
...
...
@@ -22,7 +22,7 @@ class AutoMixedPrecisionLists(object):
AutoMixedPrecisionLists is a class for black/white list. It can update
pre-defined black list and white list according to users' custom black
white lists. The lists are used for an algorithm which determines op's
exec
tu
ion mode (fp32 or fp16).
exec
ut
ion mode (fp32 or fp16).
Args:
custom_white_list (set): Users' custom white list.
...
...
@@ -95,7 +95,7 @@ black_list = {
# This set contains two types of ops. All ops supported fp16 calculation. One
# of two types is considered numerically-safe, but may be made unsafe by an
# up
dtream blacklist op. Another type do not have numerically-significant
# up
stream blacklist op. Another type do not have numerically-significant
# effects, like stack, flatten2.
gray_list
=
{
'elementwise_add'
,
...
...
python/paddle/fluid/contrib/mixed_precision/fp16_utils.py
浏览文件 @
be2e3e67
...
...
@@ -16,24 +16,6 @@ from __future__ import print_function
from
...
import
core
from
...
import
layers
from
...
import
framework
def
append_cast_op
(
i
,
o
,
prog
):
"""
Append a cast op in a given Program to cast input `i` to data type `o.dtype`.
Args:
i (Variable): The input Variable.
o (Variable): The output Variable.
prog (Program): The Program to append cast op.
"""
prog
.
global_block
().
append_op
(
type
=
"cast"
,
inputs
=
{
"X"
:
i
},
outputs
=
{
"Out"
:
o
},
attrs
=
{
"in_dtype"
:
i
.
dtype
,
"out_dtype"
:
o
.
dtype
})
def
_rename_arg
(
op
,
old_name
,
new_name
):
...
...
@@ -75,7 +57,7 @@ def _insert_cast_op(block, op, idx, src_dtype, dest_dtype):
op (Operator): The operator to insert cast op.
idx (int): The index of current operator.
src_dtype (VarType): The input variable dtype of cast op.
des
r
_dtype (VarType): The output variable dtype of cast op.
des
t
_dtype (VarType): The output variable dtype of cast op.
Returns:
num_cast_op (int): The number of cast ops that have been inserted.
...
...
@@ -261,7 +243,7 @@ def rewrite_program(main_prog, amp_lists):
def
update_role_var_grad
(
main_prog
,
params_grads
):
"""
Update op_role_var attr for some ops to make sure the gradients
transfer
ed across gpu
s is FP16.
transfer
red across GPU
s is FP16.
1. Check whether the op that outputs gradient is cast or not.
2. If op is cast and gradient is FP32, remove the op_role_var
and find the prev op which outputs FP16 gradient
...
...
@@ -293,7 +275,8 @@ def update_role_var_grad(main_prog, params_grads):
attr_val
.
extend
(
op_for_fp16_grad
.
attr
(
op_role_var_attr_name
))
op_for_fp16_grad
.
_set_attr
(
op_role_var_attr_name
,
attr_val
)
# maximize the allreduce overlap
# Maximize the all_reduce overlap, and perform the cast
# operation after gradients transfer.
op
.
_set_attr
(
'op_role'
,
OPTIMIZE
)
...
...
@@ -303,7 +286,7 @@ def update_loss_scaling(is_overall_finite, prev_loss_scaling, num_good_steps,
"""
Update loss scaling according to overall gradients. If all gradients is
finite after incr_every_n_steps, loss scaling will increase by incr_ratio.
Otherwis
w, loss scaling will decrease by decr_ratio after
Otherwis
e, loss scaling will decrease by decr_ratio after
decr_every_n_nan_or_inf steps and each step some gradients are infinite.
Args:
...
...
python/paddle/fluid/incubate/fleet/base/fleet_base.py
浏览文件 @
be2e3e67
...
...
@@ -23,7 +23,7 @@ from paddle.fluid.optimizer import SGD
from
paddle.fluid.incubate.fleet.base.role_maker
import
MPISymetricRoleMaker
from
paddle.fluid.incubate.fleet.base.role_maker
import
RoleMakerBase
from
paddle.fluid.incubate.fleet.base.role_maker
import
UserDefinedRoleMaker
from
paddle.fluid.contrib.mixed_precision.decorator
import
OptimizerWithMixedPrecison
from
paddle.fluid.contrib.mixed_precision.decorator
import
OptimizerWithMixedPrecis
i
on
class
Mode
:
...
...
@@ -259,7 +259,7 @@ class DistributedOptimizer(object):
def
__init__
(
self
,
optimizer
,
strategy
=
None
):
if
not
isinstance
(
optimizer
,
SGD
.
__bases__
)
\
and
not
isinstance
(
optimizer
,
OptimizerWithMixedPrecison
):
and
not
isinstance
(
optimizer
,
OptimizerWithMixedPrecis
i
on
):
raise
TypeError
(
"optimizer must be an instance of Optimizer"
)
self
.
_optimizer
=
optimizer
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录