diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 793a421cedeca993891b1cbc28fded2ca6d5ca74..edaacfdf97d30aa3aaef463a7df36ce47773d010 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -426,7 +426,8 @@ paddle.fluid.contrib.HDFSClient.upload (ArgSpec(args=['self', 'hdfs_path', 'loca paddle.fluid.contrib.multi_download (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'trainer_id', 'trainers', 'multi_processes'], varargs=None, keywords=None, defaults=(5,)), ('document', '100927be598ed8f9eaa1f3ef1b23568a')) paddle.fluid.contrib.multi_upload (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'multi_processes', 'overwrite', 'sync'], varargs=None, keywords=None, defaults=(5, False, True)), ('document', '183f34c83d30dbe16e09e8716c41958a')) paddle.fluid.contrib.extend_with_decoupled_weight_decay (ArgSpec(args=['base_optimizer'], varargs=None, keywords=None, defaults=None), ('document', 'a1095dfd4ec725747f662d69cd7659d4')) -paddle.fluid.contrib.mixed_precision.decorate (ArgSpec(args=['optimizer', 'init_loss_scaling', 'incr_every_n_steps', 'decr_every_n_nan_or_inf', 'incr_ratio', 'decr_ratio', 'use_dynamic_loss_scaling'], varargs=None, keywords=None, defaults=(1.0, 1000, 2, 2.0, 0.8, False)), ('document', 'bdb8f9dbb0d94b3957272c53eeee9818')) +paddle.fluid.contrib.mixed_precision.decorate (ArgSpec(args=['optimizer', 'amp_lists', 'init_loss_scaling', 'incr_every_n_steps', 'decr_every_n_nan_or_inf', 'incr_ratio', 'decr_ratio', 'use_dynamic_loss_scaling'], varargs=None, keywords=None, defaults=(None, 1.0, 1000, 2, 2.0, 0.8, False)), ('document', 'd05e71f5b0bd6d92bb94e70e00b3f9cf')) +paddle.fluid.contrib.mixed_precision.AutoMixedPrecisionLists.__init__ (ArgSpec(args=['self', 'custom_white_list', 'custom_black_list'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.fused_elemwise_activation (ArgSpec(args=['x', 'y', 'functor_list', 'axis', 'scale', 'save_intermediate_out'], varargs=None, keywords=None, defaults=(-1, 0.0, True)), ('document', '1c4b247a2858cea8d9d8750693688270')) paddle.fluid.contrib.BasicGRUUnit.__init__ (ArgSpec(args=['self', 'name_scope', 'hidden_size', 'param_attr', 'bias_attr', 'gate_activation', 'activation', 'dtype'], varargs=None, keywords=None, defaults=(None, None, None, None, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.BasicGRUUnit.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'f35ab374c7d5165c3daf3bd64a5a2ec1')) diff --git a/python/paddle/fluid/contrib/mixed_precision/__init__.py b/python/paddle/fluid/contrib/mixed_precision/__init__.py index c2c3dc284f519abc183e90a12f45a7ad8b04d14f..c6296bcac93015c5f6c55861575a45a3a33b3628 100644 --- a/python/paddle/fluid/contrib/mixed_precision/__init__.py +++ b/python/paddle/fluid/contrib/mixed_precision/__init__.py @@ -15,5 +15,7 @@ from __future__ import print_function from . import decorator from .decorator import * +from .fp16_lists import AutoMixedPrecisionLists __all__ = decorator.__all__ +__all__ += fp16_lists.__all__ diff --git a/python/paddle/fluid/contrib/mixed_precision/decorator.py b/python/paddle/fluid/contrib/mixed_precision/decorator.py index 5a4b94a8d41b6b5953a50919037d033430e908c5..abca8c52a454891db35de20d03679774a9861c5b 100644 --- a/python/paddle/fluid/contrib/mixed_precision/decorator.py +++ b/python/paddle/fluid/contrib/mixed_precision/decorator.py @@ -19,6 +19,7 @@ from ... import unique_name from . import fp16_utils from .fp16_utils import create_master_params_grads, master_param_to_train_param from .fp16_utils import update_loss_scaling, rewrite_program +from .fp16_lists import AutoMixedPrecisionLists __all__ = ["decorate"] @@ -34,6 +35,7 @@ class OptimizerWithMixedPrecison(object): Args: optimizer (Optimizer): A common Optimizer object. + amp_lists (AutoMixedPrecisionLists): An AutoMixedPrecisionLists object. init_loss_scaling (float): The initial loss scaling factor. use_dynamic_loss_scaling (bool): Whether to use dynamic loss scaling. incr_every_n_steps(int): Increases loss scaling every n consecutive @@ -48,10 +50,11 @@ class OptimizerWithMixedPrecison(object): """ - def __init__(self, optimizer, init_loss_scaling, use_dynamic_loss_scaling, - incr_every_n_steps, decr_every_n_nan_or_inf, incr_ratio, - decr_ratio): + def __init__(self, optimizer, amp_lists, init_loss_scaling, + use_dynamic_loss_scaling, incr_every_n_steps, + decr_every_n_nan_or_inf, incr_ratio, decr_ratio): self._optimizer = optimizer + self._amp_lists = amp_lists self._param_grads = None self._train_program = default_main_program() self._startup_prog = default_startup_program() @@ -120,7 +123,7 @@ class OptimizerWithMixedPrecison(object): A list of (param, grad), which is a tuple of a parameter and its gradient respectively, and the scaled loss. """ - rewrite_program(self._train_program) + rewrite_program(self._train_program, self._amp_lists) scaled_loss = loss * self._loss_scaling self._param_grads = self._optimizer.backward( scaled_loss, startup_program, parameter_list, no_grad_set, @@ -189,6 +192,7 @@ class OptimizerWithMixedPrecison(object): def decorate(optimizer, + amp_lists=None, init_loss_scaling=1.0, incr_every_n_steps=1000, decr_every_n_nan_or_inf=2, @@ -200,6 +204,7 @@ def decorate(optimizer, Args: optimizer(Optimizer): A common Optimizer. + amp_lists (AutoMixedPrecisionLists): An AutoMixedPrecisionLists object. init_loss_scaling(float): The initial loss scaling factor. incr_every_n_steps(int): Increases loss scaling every n consecutive steps with finite gradients. @@ -227,9 +232,10 @@ def decorate(optimizer, scaled_loss, _, _ = mp_optimizer.minimize(loss) """ - + if amp_lists is None: + amp_lists = AutoMixedPrecisionLists() mp_optimizer = OptimizerWithMixedPrecison( - optimizer, init_loss_scaling, use_dynamic_loss_scaling, + optimizer, amp_lists, init_loss_scaling, use_dynamic_loss_scaling, incr_every_n_steps, decr_every_n_nan_or_inf, incr_ratio, decr_ratio) return mp_optimizer diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py index 59bc0dc4be6c0f191fb8d3cbf519bd298e31d687..d3641b646f32ea9d581603e2bc5e9c56dd21909b 100644 --- a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py +++ b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py @@ -12,6 +12,47 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy + +__all__ = ["AutoMixedPrecisionLists"] + + +class AutoMixedPrecisionLists(object): + """ + AutoMixedPrecisionLists is a class for black/white list. It can update + pre-defined black list and white list according to users' custom black + white lists. The lists are used for an algorithm which determines op's + exectuion mode (fp32 or fp16). + + Args: + custom_white_list (set): Users' custom white list. + custom_black_list (set): Users' custom black list. + """ + + def __init__(self, custom_white_list=None, custom_black_list=None): + self._custom_white_list = custom_white_list + self._custom_black_list = custom_black_list + self.white_list = copy.copy(white_list) + self.black_list = copy.copy(black_list) + self.gray_list = copy.copy(gray_list) + self._update_list() + + def _update_list(self): + """ + Update black and white list according to users' custom list. + """ + if self._custom_white_list: + for op_name in self._custom_white_list: + if op_name in self.black_list: + self.black_list.remove(op_name) + self.white_list.add(op_name) + if self._custom_black_list: + for op_name in self._custom_black_list: + if op_name in self.white_list: + self.white_list.remove(op_name) + self.black_list.add(op_name) + + # The three sets listed below are changed dynamiclly. They don't contain all # paddle ops currently. diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py index a3ca946cf4c66e275ea314e9d5988a3ddc93a627..51c67cf0017fb54d21f6402bcec64b07f75c1025 100644 --- a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py +++ b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py @@ -17,7 +17,6 @@ from __future__ import print_function from ... import core from ... import layers from ... import framework -from .fp16_lists import black_list, white_list, gray_list def append_cast_op(i, o, prog): @@ -218,7 +217,7 @@ def find_true_prev_op(ops, var_name): return op -def rewrite_program(main_prog): +def rewrite_program(main_prog, amp_lists): """ Traverse all ops in current block and insert cast op according to which set current op belongs to. @@ -244,11 +243,11 @@ def rewrite_program(main_prog): black_op_set = set() for i in range(len(ops)): op = ops[i] - if op.type in black_list: + if op.type in amp_lists.black_list: black_op_set.add(op) - elif op.type in white_list: + elif op.type in amp_lists.white_list: white_op_set.add(op) - elif op.type in op.type in gray_list: + elif op.type in amp_lists.gray_list: is_black_op = False is_white_op = False for in_name in op.input_names: @@ -265,10 +264,10 @@ def rewrite_program(main_prog): prev_op = in_var.op # if it's one of inputs if prev_op in black_op_set or \ - prev_op.type in black_list: + prev_op.type in amp_lists.black_list: is_black_op = True if prev_op in white_op_set or \ - prev_op.type in white_list: + prev_op.type in amp_lists.white_list: is_white_op = True if is_black_op: black_op_set.add(op)