未验证 提交 1402ef9e 编写于 作者: G gongweibao 提交者: GitHub

Add interface so user can get scaled loss when they use customized loss (#20613)

上级 ed79287a
...@@ -557,7 +557,7 @@ paddle.fluid.contrib.HDFSClient.upload (ArgSpec(args=['self', 'hdfs_path', 'loca ...@@ -557,7 +557,7 @@ paddle.fluid.contrib.HDFSClient.upload (ArgSpec(args=['self', 'hdfs_path', 'loca
paddle.fluid.contrib.multi_download (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'trainer_id', 'trainers', 'multi_processes'], varargs=None, keywords=None, defaults=(5,)), ('document', '100927be598ed8f9eaa1f3ef1b23568a')) paddle.fluid.contrib.multi_download (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'trainer_id', 'trainers', 'multi_processes'], varargs=None, keywords=None, defaults=(5,)), ('document', '100927be598ed8f9eaa1f3ef1b23568a'))
paddle.fluid.contrib.multi_upload (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'multi_processes', 'overwrite', 'sync'], varargs=None, keywords=None, defaults=(5, False, True)), ('document', '183f34c83d30dbe16e09e8716c41958a')) paddle.fluid.contrib.multi_upload (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'multi_processes', 'overwrite', 'sync'], varargs=None, keywords=None, defaults=(5, False, True)), ('document', '183f34c83d30dbe16e09e8716c41958a'))
paddle.fluid.contrib.extend_with_decoupled_weight_decay (ArgSpec(args=['base_optimizer'], varargs=None, keywords=None, defaults=None), ('document', 'a1095dfd4ec725747f662d69cd7659d4')) paddle.fluid.contrib.extend_with_decoupled_weight_decay (ArgSpec(args=['base_optimizer'], varargs=None, keywords=None, defaults=None), ('document', 'a1095dfd4ec725747f662d69cd7659d4'))
paddle.fluid.contrib.mixed_precision.decorate (ArgSpec(args=['optimizer', 'amp_lists', 'init_loss_scaling', 'incr_every_n_steps', 'decr_every_n_nan_or_inf', 'incr_ratio', 'decr_ratio', 'use_dynamic_loss_scaling'], varargs=None, keywords=None, defaults=(None, 1.0, 1000, 2, 2.0, 0.8, True)), ('document', '5f118631fc8632afb981b3a26daae731')) paddle.fluid.contrib.mixed_precision.decorate (ArgSpec(args=['optimizer', 'amp_lists', 'init_loss_scaling', 'incr_every_n_steps', 'decr_every_n_nan_or_inf', 'incr_ratio', 'decr_ratio', 'use_dynamic_loss_scaling'], varargs=None, keywords=None, defaults=(None, 1.0, 1000, 2, 2.0, 0.8, True)), ('document', '6b0a44eb05c8707c1eff2e786f673edb'))
paddle.fluid.contrib.mixed_precision.AutoMixedPrecisionLists ('paddle.fluid.contrib.mixed_precision.fp16_lists.AutoMixedPrecisionLists', ('document', 'c116ec6bb5d30998792daea8db21ee40')) paddle.fluid.contrib.mixed_precision.AutoMixedPrecisionLists ('paddle.fluid.contrib.mixed_precision.fp16_lists.AutoMixedPrecisionLists', ('document', 'c116ec6bb5d30998792daea8db21ee40'))
paddle.fluid.contrib.mixed_precision.AutoMixedPrecisionLists.__init__ (ArgSpec(args=['self', 'custom_white_list', 'custom_black_list'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.mixed_precision.AutoMixedPrecisionLists.__init__ (ArgSpec(args=['self', 'custom_white_list', 'custom_black_list'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.fused_elemwise_activation (ArgSpec(args=['x', 'y', 'functor_list', 'axis', 'scale', 'save_intermediate_out'], varargs=None, keywords=None, defaults=(-1, 0.0, True)), ('document', '1c4b247a2858cea8d9d8750693688270')) paddle.fluid.contrib.fused_elemwise_activation (ArgSpec(args=['x', 'y', 'functor_list', 'axis', 'scale', 'save_intermediate_out'], varargs=None, keywords=None, defaults=(-1, 0.0, True)), ('document', '1c4b247a2858cea8d9d8750693688270'))
......
...@@ -58,6 +58,7 @@ class OptimizerWithMixedPrecison(object): ...@@ -58,6 +58,7 @@ class OptimizerWithMixedPrecison(object):
self._param_grads = None self._param_grads = None
self._train_program = default_main_program() self._train_program = default_main_program()
self._startup_prog = default_startup_program() self._startup_prog = default_startup_program()
self._scaled_loss = None
self._loss_scaling = layers.create_global_var( self._loss_scaling = layers.create_global_var(
name=unique_name.generate("loss_scaling"), name=unique_name.generate("loss_scaling"),
shape=[1], shape=[1],
...@@ -101,6 +102,13 @@ class OptimizerWithMixedPrecison(object): ...@@ -101,6 +102,13 @@ class OptimizerWithMixedPrecison(object):
""" """
return self._loss_scaling return self._loss_scaling
def get_scaled_loss(self):
"""Return the scaled loss.
It's useful when you feed customed loss into executor.
"""
return self._scaled_loss
def backward(self, def backward(self,
loss, loss,
startup_program=None, startup_program=None,
...@@ -124,9 +132,9 @@ class OptimizerWithMixedPrecison(object): ...@@ -124,9 +132,9 @@ class OptimizerWithMixedPrecison(object):
gradient respectively, and the scaled loss. gradient respectively, and the scaled loss.
""" """
rewrite_program(self._train_program, self._amp_lists) rewrite_program(self._train_program, self._amp_lists)
scaled_loss = loss * self._loss_scaling self._scaled_loss = loss * self._loss_scaling
self._params_grads = self._optimizer.backward( self._params_grads = self._optimizer.backward(
scaled_loss, startup_program, parameter_list, no_grad_set, self._scaled_loss, startup_program, parameter_list, no_grad_set,
callbacks) callbacks)
update_role_var_grad(self._train_program, self._params_grads) update_role_var_grad(self._train_program, self._params_grads)
scaled_params_grads = [] scaled_params_grads = []
...@@ -245,7 +253,7 @@ def decorate(optimizer, ...@@ -245,7 +253,7 @@ def decorate(optimizer,
optimizer=optimizer, init_loss_scaling=8.0) optimizer=optimizer, init_loss_scaling=8.0)
ops, param_grads = mp_optimizer.minimize(loss) ops, param_grads = mp_optimizer.minimize(loss)
scaled_loss = mp_optimizer.get_loss_scaling() scaled_loss = mp_optimizer.get_scaled_loss()
""" """
if amp_lists is None: if amp_lists is None:
amp_lists = AutoMixedPrecisionLists() amp_lists = AutoMixedPrecisionLists()
......
...@@ -140,7 +140,8 @@ def train(net_type, use_cuda, save_dirname, is_local): ...@@ -140,7 +140,8 @@ def train(net_type, use_cuda, save_dirname, is_local):
use_dynamic_loss_scaling=True) use_dynamic_loss_scaling=True)
mp_optimizer.minimize(avg_cost) mp_optimizer.minimize(avg_cost)
scaled_loss = mp_optimizer.get_loss_scaling() loss_scaling = mp_optimizer.get_loss_scaling()
scaled_loss = mp_optimizer.get_scaled_loss()
BATCH_SIZE = 128 BATCH_SIZE = 128
PASS_NUM = 1 PASS_NUM = 1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册