From be2e3e67d938f956f82034fd32b359df6dca48a5 Mon Sep 17 00:00:00 2001
From: Zhen Wang <wangzhen31@baidu.com>
Date: Tue, 26 Nov 2019 18:56:22 +0800
Subject: [PATCH] Fix some typos in AMP. (#21354)

* fix some typos in AMP. test=develop

* delete useless codes. test=develop
---
 .../contrib/mixed_precision/decorator.py      | 12 +++++----
 .../contrib/mixed_precision/fp16_lists.py     |  6 ++---
 .../contrib/mixed_precision/fp16_utils.py     | 27 ++++---------------
 .../fluid/incubate/fleet/base/fleet_base.py   |  4 +--
 4 files changed, 17 insertions(+), 32 deletions(-)

diff --git a/python/paddle/fluid/contrib/mixed_precision/decorator.py b/python/paddle/fluid/contrib/mixed_precision/decorator.py
index 1a6725b075..bf3d60ae45 100644
--- a/python/paddle/fluid/contrib/mixed_precision/decorator.py
+++ b/python/paddle/fluid/contrib/mixed_precision/decorator.py
@@ -24,10 +24,10 @@ from .fp16_lists import AutoMixedPrecisionLists
 __all__ = ["decorate"]
 
 
-class OptimizerWithMixedPrecison(object):
+class OptimizerWithMixedPrecision(object):
     """
     Optimizer with mixed-precision (MP) training. This is a wrapper of a common 
-    optimizer, plus the support of mixed-precision pretraining. The object
+    optimizer, plus the support of mixed-precision pre-training. The object
     of this class almost has the same behavior as the common optimizer, with the 
     methods `minimize()`, `backward()`, `apply_gradients()` implemented. 
     Additionally, it enables the MP training automatically, i.e, the creation 
@@ -116,7 +116,7 @@ class OptimizerWithMixedPrecison(object):
                  no_grad_set=None,
                  callbacks=None):
         """
-        Backward propogation or auto differentiation for gradients' computation.
+        Backward propagation or auto differentiation for gradients' computation.
 
         Args:
             loss (Variable): The loss Variable to minimize.
@@ -124,7 +124,7 @@ class OptimizerWithMixedPrecison(object):
                                        parameters in `parameter_list`.
             parameter_list (list|None): A list of Variables to update.
             no_grad_set (set|None): A set of Variables should be ignored.
-            callbacks (list|None): A list of callables to run when appending 
+            callbacks (list|None): A list of callable objects to run when appending
                                    backward operator for one parameter.
 
         Returns:
@@ -136,6 +136,8 @@ class OptimizerWithMixedPrecison(object):
         self._params_grads = self._optimizer.backward(
             self._scaled_loss, startup_program, parameter_list, no_grad_set,
             callbacks)
+        # Change the op_role_var attr for some ops, so that gradients
+        # transferred across GPUs can be FP16.
         update_role_var_grad(self._train_program, self._params_grads)
         scaled_params_grads = []
         for p, g in self._params_grads:
@@ -257,7 +259,7 @@ def decorate(optimizer,
     """
     if amp_lists is None:
         amp_lists = AutoMixedPrecisionLists()
-    mp_optimizer = OptimizerWithMixedPrecison(
+    mp_optimizer = OptimizerWithMixedPrecision(
         optimizer, amp_lists, init_loss_scaling, use_dynamic_loss_scaling,
         incr_every_n_steps, decr_every_n_nan_or_inf, incr_ratio, decr_ratio)
 
diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py
index 0c7e623d46..1f301b7148 100644
--- a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py
+++ b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py
@@ -22,7 +22,7 @@ class AutoMixedPrecisionLists(object):
     AutoMixedPrecisionLists is a class for black/white list. It can update
     pre-defined black list and white list according to users' custom black
     white lists. The lists are used for an algorithm which determines op's
-    exectuion mode (fp32 or fp16).
+    execution mode (fp32 or fp16).
 
     Args:
         custom_white_list (set): Users' custom white list.
@@ -95,7 +95,7 @@ black_list = {
 
 # This set contains two types of ops. All ops supported fp16 calculation. One 
 # of two types is considered numerically-safe, but may be made unsafe by an
-# updtream blacklist op. Another type do not have numerically-significant 
+# upstream blacklist op. Another type do not have numerically-significant
 # effects, like stack, flatten2.
 gray_list = {
     'elementwise_add',
@@ -139,7 +139,7 @@ gray_list = {
 '''
 # The set of ops that don't support fp16 calculation
 unsupported_fp16_list = {
-		# from python/paddle/fluid/layers/io.py
+	# from python/paddle/fluid/layers/io.py
     'send',
     'send_barrier',
     'recv',
diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py
index 1a4eae3e61..78f16c39db 100644
--- a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py
+++ b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py
@@ -16,24 +16,6 @@ from __future__ import print_function
 
 from ... import core
 from ... import layers
-from ... import framework
-
-
-def append_cast_op(i, o, prog):
-    """
-    Append a cast op in a given Program to cast input `i` to data type `o.dtype`.
-
-    Args:
-        i (Variable): The input Variable.
-        o (Variable): The output Variable.
-        prog (Program): The Program to append cast op.
-    """
-    prog.global_block().append_op(
-        type="cast",
-        inputs={"X": i},
-        outputs={"Out": o},
-        attrs={"in_dtype": i.dtype,
-               "out_dtype": o.dtype})
 
 
 def _rename_arg(op, old_name, new_name):
@@ -75,7 +57,7 @@ def _insert_cast_op(block, op, idx, src_dtype, dest_dtype):
         op (Operator): The operator to insert cast op.
         idx (int): The index of current operator.
         src_dtype (VarType): The input variable dtype of cast op.
-        desr_dtype (VarType): The output variable dtype of cast op.
+        dest_dtype (VarType): The output variable dtype of cast op.
 
     Returns:
         num_cast_op (int): The number of cast ops that have been inserted.
@@ -261,7 +243,7 @@ def rewrite_program(main_prog, amp_lists):
 def update_role_var_grad(main_prog, params_grads):
     """
     Update op_role_var attr for some ops to make sure the gradients
-    transfered across gpus is FP16.
+    transferred across GPUs is FP16.
     1. Check whether the op that outputs gradient is cast or not.
     2. If op is cast and gradient is FP32, remove the op_role_var
        and find the prev op which outputs FP16 gradient
@@ -293,7 +275,8 @@ def update_role_var_grad(main_prog, params_grads):
                 attr_val.extend(op_for_fp16_grad.attr(op_role_var_attr_name))
             op_for_fp16_grad._set_attr(op_role_var_attr_name, attr_val)
 
-            # maximize the allreduce overlap
+            # Maximize the all_reduce overlap, and perform the cast
+            # operation after gradients transfer.
             op._set_attr('op_role', OPTIMIZE)
 
 
@@ -303,7 +286,7 @@ def update_loss_scaling(is_overall_finite, prev_loss_scaling, num_good_steps,
     """
     Update loss scaling according to overall gradients. If all gradients is 
     finite after incr_every_n_steps, loss scaling will increase by incr_ratio. 
-    Otherwisw, loss scaling will decrease by decr_ratio after 
+    Otherwise, loss scaling will decrease by decr_ratio after
     decr_every_n_nan_or_inf steps and each step some gradients are infinite.
 
     Args:
diff --git a/python/paddle/fluid/incubate/fleet/base/fleet_base.py b/python/paddle/fluid/incubate/fleet/base/fleet_base.py
index 8e7cee1fb6..93d30d3d74 100644
--- a/python/paddle/fluid/incubate/fleet/base/fleet_base.py
+++ b/python/paddle/fluid/incubate/fleet/base/fleet_base.py
@@ -23,7 +23,7 @@ from paddle.fluid.optimizer import SGD
 from paddle.fluid.incubate.fleet.base.role_maker import MPISymetricRoleMaker
 from paddle.fluid.incubate.fleet.base.role_maker import RoleMakerBase
 from paddle.fluid.incubate.fleet.base.role_maker import UserDefinedRoleMaker
-from paddle.fluid.contrib.mixed_precision.decorator import OptimizerWithMixedPrecison
+from paddle.fluid.contrib.mixed_precision.decorator import OptimizerWithMixedPrecision
 
 
 class Mode:
@@ -259,7 +259,7 @@ class DistributedOptimizer(object):
 
     def __init__(self, optimizer, strategy=None):
         if not isinstance(optimizer, SGD.__bases__) \
-                 and not isinstance(optimizer, OptimizerWithMixedPrecison):
+                 and not isinstance(optimizer, OptimizerWithMixedPrecision):
             raise TypeError("optimizer must be an instance of Optimizer")
 
         self._optimizer = optimizer
-- 
GitLab