未验证 提交 be2e3e67 编写于 作者: Z Zhen Wang 提交者: GitHub

Fix some typos in AMP. (#21354)

* fix some typos in AMP. test=develop

* delete useless codes. test=develop
上级 afb13484
......@@ -24,10 +24,10 @@ from .fp16_lists import AutoMixedPrecisionLists
__all__ = ["decorate"]
class OptimizerWithMixedPrecison(object):
class OptimizerWithMixedPrecision(object):
"""
Optimizer with mixed-precision (MP) training. This is a wrapper of a common
optimizer, plus the support of mixed-precision pretraining. The object
optimizer, plus the support of mixed-precision pre-training. The object
of this class almost has the same behavior as the common optimizer, with the
methods `minimize()`, `backward()`, `apply_gradients()` implemented.
Additionally, it enables the MP training automatically, i.e, the creation
......@@ -116,7 +116,7 @@ class OptimizerWithMixedPrecison(object):
no_grad_set=None,
callbacks=None):
"""
Backward propogation or auto differentiation for gradients' computation.
Backward propagation or auto differentiation for gradients' computation.
Args:
loss (Variable): The loss Variable to minimize.
......@@ -124,7 +124,7 @@ class OptimizerWithMixedPrecison(object):
parameters in `parameter_list`.
parameter_list (list|None): A list of Variables to update.
no_grad_set (set|None): A set of Variables should be ignored.
callbacks (list|None): A list of callables to run when appending
callbacks (list|None): A list of callable objects to run when appending
backward operator for one parameter.
Returns:
......@@ -136,6 +136,8 @@ class OptimizerWithMixedPrecison(object):
self._params_grads = self._optimizer.backward(
self._scaled_loss, startup_program, parameter_list, no_grad_set,
callbacks)
# Change the op_role_var attr for some ops, so that gradients
# transferred across GPUs can be FP16.
update_role_var_grad(self._train_program, self._params_grads)
scaled_params_grads = []
for p, g in self._params_grads:
......@@ -257,7 +259,7 @@ def decorate(optimizer,
"""
if amp_lists is None:
amp_lists = AutoMixedPrecisionLists()
mp_optimizer = OptimizerWithMixedPrecison(
mp_optimizer = OptimizerWithMixedPrecision(
optimizer, amp_lists, init_loss_scaling, use_dynamic_loss_scaling,
incr_every_n_steps, decr_every_n_nan_or_inf, incr_ratio, decr_ratio)
......
......@@ -22,7 +22,7 @@ class AutoMixedPrecisionLists(object):
AutoMixedPrecisionLists is a class for black/white list. It can update
pre-defined black list and white list according to users' custom black
white lists. The lists are used for an algorithm which determines op's
exectuion mode (fp32 or fp16).
execution mode (fp32 or fp16).
Args:
custom_white_list (set): Users' custom white list.
......@@ -95,7 +95,7 @@ black_list = {
# This set contains two types of ops. All ops supported fp16 calculation. One
# of two types is considered numerically-safe, but may be made unsafe by an
# updtream blacklist op. Another type do not have numerically-significant
# upstream blacklist op. Another type do not have numerically-significant
# effects, like stack, flatten2.
gray_list = {
'elementwise_add',
......
......@@ -16,24 +16,6 @@ from __future__ import print_function
from ... import core
from ... import layers
from ... import framework
def append_cast_op(i, o, prog):
"""
Append a cast op in a given Program to cast input `i` to data type `o.dtype`.
Args:
i (Variable): The input Variable.
o (Variable): The output Variable.
prog (Program): The Program to append cast op.
"""
prog.global_block().append_op(
type="cast",
inputs={"X": i},
outputs={"Out": o},
attrs={"in_dtype": i.dtype,
"out_dtype": o.dtype})
def _rename_arg(op, old_name, new_name):
......@@ -75,7 +57,7 @@ def _insert_cast_op(block, op, idx, src_dtype, dest_dtype):
op (Operator): The operator to insert cast op.
idx (int): The index of current operator.
src_dtype (VarType): The input variable dtype of cast op.
desr_dtype (VarType): The output variable dtype of cast op.
dest_dtype (VarType): The output variable dtype of cast op.
Returns:
num_cast_op (int): The number of cast ops that have been inserted.
......@@ -261,7 +243,7 @@ def rewrite_program(main_prog, amp_lists):
def update_role_var_grad(main_prog, params_grads):
"""
Update op_role_var attr for some ops to make sure the gradients
transfered across gpus is FP16.
transferred across GPUs is FP16.
1. Check whether the op that outputs gradient is cast or not.
2. If op is cast and gradient is FP32, remove the op_role_var
and find the prev op which outputs FP16 gradient
......@@ -293,7 +275,8 @@ def update_role_var_grad(main_prog, params_grads):
attr_val.extend(op_for_fp16_grad.attr(op_role_var_attr_name))
op_for_fp16_grad._set_attr(op_role_var_attr_name, attr_val)
# maximize the allreduce overlap
# Maximize the all_reduce overlap, and perform the cast
# operation after gradients transfer.
op._set_attr('op_role', OPTIMIZE)
......@@ -303,7 +286,7 @@ def update_loss_scaling(is_overall_finite, prev_loss_scaling, num_good_steps,
"""
Update loss scaling according to overall gradients. If all gradients is
finite after incr_every_n_steps, loss scaling will increase by incr_ratio.
Otherwisw, loss scaling will decrease by decr_ratio after
Otherwise, loss scaling will decrease by decr_ratio after
decr_every_n_nan_or_inf steps and each step some gradients are infinite.
Args:
......
......@@ -23,7 +23,7 @@ from paddle.fluid.optimizer import SGD
from paddle.fluid.incubate.fleet.base.role_maker import MPISymetricRoleMaker
from paddle.fluid.incubate.fleet.base.role_maker import RoleMakerBase
from paddle.fluid.incubate.fleet.base.role_maker import UserDefinedRoleMaker
from paddle.fluid.contrib.mixed_precision.decorator import OptimizerWithMixedPrecison
from paddle.fluid.contrib.mixed_precision.decorator import OptimizerWithMixedPrecision
class Mode:
......@@ -259,7 +259,7 @@ class DistributedOptimizer(object):
def __init__(self, optimizer, strategy=None):
if not isinstance(optimizer, SGD.__bases__) \
and not isinstance(optimizer, OptimizerWithMixedPrecison):
and not isinstance(optimizer, OptimizerWithMixedPrecision):
raise TypeError("optimizer must be an instance of Optimizer")
self._optimizer = optimizer
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册