From 856873482aecafae0cca727051fe512b1f0c0fd7 Mon Sep 17 00:00:00 2001 From: zhangbo9674 <82555433+zhangbo9674@users.noreply.github.com> Date: Thu, 1 Jul 2021 10:35:03 +0800 Subject: [PATCH] [AMP] add get() and set() for Grad_scaler (#33835) * add get and set for Grad_scaler * refine some API name and comments * refine API name and comments * refine some comments --- python/paddle/amp/grad_scaler.py | 287 ++++++++++++++++++ .../paddle/fluid/dygraph/amp/loss_scaler.py | 112 +++++++ .../test_imperative_auto_mixed_precision.py | 28 ++ 3 files changed, 427 insertions(+) diff --git a/python/paddle/amp/grad_scaler.py b/python/paddle/amp/grad_scaler.py index 770b660a9e..827a320b2c 100644 --- a/python/paddle/amp/grad_scaler.py +++ b/python/paddle/amp/grad_scaler.py @@ -145,3 +145,290 @@ class GradScaler(AmpScaler): optimizer.clear_grad() """ return super(GradScaler, self).minimize(optimizer, *args, **kwargs) + + def is_enable(self): + """ + Enable loss scaling or not. + + Returns: + bool: enable loss scaling return True else return False. + + Examples: + .. code-block:: python + + import paddle + scaler = paddle.amp.GradScaler(enable=True, + init_loss_scaling=1024, + incr_ratio=2.0, + decr_ratio=0.5, + incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, + use_dynamic_loss_scaling=True) + enable = scaler.is_enable() + print(enable) # True + """ + return super(GradScaler, self).is_enable() + + def is_use_dynamic_loss_scaling(self): + """ + Whether to use dynamic loss scaling. + + Returns: + bool: if fixed loss_scaling is used return False, if the loss scaling is updated dynamicly return true. + + Examples: + .. code-block:: python + + import paddle + scaler = paddle.amp.GradScaler(enable=True, + init_loss_scaling=1024, + incr_ratio=2.0, + decr_ratio=0.5, + incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, + use_dynamic_loss_scaling=True) + use_dynamic_loss_scaling = scaler.is_use_dynamic_loss_scaling() + print(use_dynamic_loss_scaling) # True + """ + return super(GradScaler, self).is_use_dynamic_loss_scaling() + + def get_init_loss_scaling(self): + """ + Return the initial loss scaling factor. + + Reurns: + float: the initial loss scaling factor. + + Examples: + .. code-block:: python + + import paddle + scaler = paddle.amp.GradScaler(enable=True, + init_loss_scaling=1024, + incr_ratio=2.0, + decr_ratio=0.5, + incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, + use_dynamic_loss_scaling=True) + init_loss_scaling = scaler.get_init_loss_scaling() + print(init_loss_scaling) # 1024 + """ + return super(GradScaler, self).get_init_loss_scaling() + + def set_init_loss_scaling(self, new_init_loss_scaling): + """ + Set the initial loss scaling factor by `new_init_loss_scaling`. + + Args: + new_init_loss_scaling(int): The new_init_loss_scaling used to update initial loss scaling factor. + + Examples: + .. code-block:: python + + import paddle + scaler = paddle.amp.GradScaler(enable=True, + init_loss_scaling=1024, + incr_ratio=2.0, + decr_ratio=0.5, + incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, + use_dynamic_loss_scaling=True) + print(scaler.get_init_loss_scaling()) # 1024 + new_init_loss_scaling = 1000 + scaler.set_init_loss_scaling(new_init_loss_scaling) + print(scaler.get_init_loss_scaling()) # 1000 + """ + super(GradScaler, self).set_init_loss_scaling(new_init_loss_scaling) + + def get_incr_ratio(self): + """ + Return the multiplier to use when increasing the loss scaling. + + Reurns: + float: the multiplier to use when increasing the loss scaling. + + Examples: + .. code-block:: python + + import paddle + scaler = paddle.amp.GradScaler(enable=True, + init_loss_scaling=1024, + incr_ratio=2.0, + decr_ratio=0.5, + incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, + use_dynamic_loss_scaling=True) + incr_ratio = scaler.get_incr_ratio() + print(incr_ratio) # 2.0 + """ + return super(GradScaler, self).get_incr_ratio() + + def set_incr_ratio(self, new_incr_ratio): + """ + Set the multiplier to use when increasing the loss scaling by `new_incr_ratio`, `new_incr_ratio` should > 1.0. + + Args: + new_incr_ratio(float): The new_incr_ratio used to update the multiplier to use when increasing the loss scaling. + + Examples: + .. code-block:: python + + import paddle + scaler = paddle.amp.GradScaler(enable=True, + init_loss_scaling=1024, + incr_ratio=2.0, + decr_ratio=0.5, + incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, + use_dynamic_loss_scaling=True) + print(scaler.get_incr_ratio()) # 2.0 + new_incr_ratio = 3.0 + scaler.set_incr_ratio(new_incr_ratio) + print(scaler.get_incr_ratio()) # 3.0 + """ + super(GradScaler, self).set_incr_ratio(new_incr_ratio) + + def get_decr_ratio(self): + """ + Get the less-than-one-multiplier to use when decreasing the loss scaling. + + Reurns: + float: the less-than-one-multiplier to use when decreasing the loss scaling. + + Examples: + .. code-block:: python + + import paddle + scaler = paddle.amp.GradScaler(enable=True, + init_loss_scaling=1024, + incr_ratio=2.0, + decr_ratio=0.5, + incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, + use_dynamic_loss_scaling=True) + decr_ratio = scaler.get_decr_ratio() + print(decr_ratio) # 0.5 + """ + return super(GradScaler, self).get_decr_ratio() + + def set_decr_ratio(self, new_decr_ratio): + """ + Set the less-than-one-multiplier to use when decreasing the loss scaling by `new_incr_ratio`, `new_decr_ratio` should < 1.0. + + Args: + new_decr_ratio(float): The new_decr_ratio used to update the less-than-one-multiplier to use when decreasing the loss scaling. + + Examples: + .. code-block:: python + + import paddle + scaler = paddle.amp.GradScaler(enable=True, + init_loss_scaling=1024, + incr_ratio=2.0, + decr_ratio=0.5, + incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, + use_dynamic_loss_scaling=True) + print(scaler.get_decr_ratio()) # 0.5 + new_decr_ratio = 0.1 + scaler.set_decr_ratio(new_decr_ratio) + print(scaler.get_decr_ratio()) # 0.1 + """ + super(GradScaler, self).set_decr_ratio(new_decr_ratio) + + def get_incr_every_n_steps(self): + """ + Return the num `n`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients. + + Reurns: + int: the num `n`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients. + + Examples: + .. code-block:: python + + import paddle + scaler = paddle.amp.GradScaler(enable=True, + init_loss_scaling=1024, + incr_ratio=2.0, + decr_ratio=0.5, + incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, + use_dynamic_loss_scaling=True) + incr_every_n_steps = scaler.get_incr_every_n_steps() + print(incr_every_n_steps) # 1000 + """ + return super(GradScaler, self).get_incr_every_n_steps() + + def set_incr_every_n_steps(self, new_incr_every_n_steps): + """ + Set the num `n` by `new_incr_every_n_steps`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients. + + Args: + new_incr_every_n_steps(int): The new_incr_every_n_steps used to update the num `n`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients. + + Examples: + .. code-block:: python + + import paddle + scaler = paddle.amp.GradScaler(enable=True, + init_loss_scaling=1024, + incr_ratio=2.0, + decr_ratio=0.5, + incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, + use_dynamic_loss_scaling=True) + print(scaler.get_incr_every_n_steps()) # 1000 + new_incr_every_n_steps = 2000 + scaler.set_incr_every_n_steps(new_incr_every_n_steps) + print(scaler.get_incr_every_n_steps()) # 2000 + """ + super(GradScaler, self).set_incr_every_n_steps(new_incr_every_n_steps) + + def get_decr_every_n_nan_or_inf(self): + """ + Return the num `n`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients. + + Reurns: + int: the num `n`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients. + + Examples: + .. code-block:: python + + import paddle + scaler = paddle.amp.GradScaler(enable=True, + init_loss_scaling=1024, + incr_ratio=2.0, + decr_ratio=0.5, + incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, + use_dynamic_loss_scaling=True) + decr_every_n_nan_or_inf = scaler.get_decr_every_n_nan_or_inf() + print(decr_every_n_nan_or_inf) # 2 + """ + return super(GradScaler, self).get_decr_every_n_nan_or_inf() + + def set_decr_every_n_nan_or_inf(self, new_decr_every_n_nan_or_inf): + """ + Set the num `n` by `new_decr_every_n_nan_or_inf`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients. + + Args: + new_decr_every_n_nan_or_inf(int): The new_decr_every_n_nan_or_inf used to update the num `n`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients. + + Examples: + .. code-block:: python + + import paddle + scaler = paddle.amp.GradScaler(enable=True, + init_loss_scaling=1024, + incr_ratio=2.0, + decr_ratio=0.5, + incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, + use_dynamic_loss_scaling=True) + print(scaler.get_decr_every_n_nan_or_inf()) # 2 + new_decr_every_n_nan_or_inf = 3 + scaler.set_decr_every_n_nan_or_inf(new_decr_every_n_nan_or_inf) + print(scaler.get_decr_every_n_nan_or_inf()) # 3 + """ + super(GradScaler, + self).set_decr_every_n_nan_or_inf(new_decr_every_n_nan_or_inf) diff --git a/python/paddle/fluid/dygraph/amp/loss_scaler.py b/python/paddle/fluid/dygraph/amp/loss_scaler.py index e0bd60fbeb..1817b78b60 100644 --- a/python/paddle/fluid/dygraph/amp/loss_scaler.py +++ b/python/paddle/fluid/dygraph/amp/loss_scaler.py @@ -244,3 +244,115 @@ class AmpScaler(object): self._incr_count = 0 return + + def is_enable(self): + """ + Enable loss scaling or not. + + Returns: + bool: enable loss scaling return True else return False. + """ + return self._enable + + def is_use_dynamic_loss_scaling(self): + """ + Whether to use dynamic loss scaling. + + Returns: + bool: if fixed loss_scaling is used return False, if the loss scaling is updated dynamicly return true. + """ + return self._use_dynamic_loss_scaling + + def get_init_loss_scaling(self): + """ + Return the initial loss scaling factor. + + Reurns: + float: the initial loss scaling factor. + """ + return self._init_loss_scaling + + def set_init_loss_scaling(self, new_init_loss_scaling): + """ + Set the initial loss scaling factor by `new_init_loss_scaling`. + + Args: + new_init_loss_scaling(int): The new_init_loss_scaling used to update initial loss scaling factor.s + """ + self._init_loss_scaling = new_init_loss_scaling + self._scale = to_variable( + np.array([self._init_loss_scaling]).astype(np.float32)) + + def get_incr_ratio(self): + """ + Return the multiplier to use when increasing the loss scaling. + + Reurns: + float: the multiplier to use when increasing the loss scaling. + """ + return self._incr_ratio + + def set_incr_ratio(self, new_incr_ratio): + """ + Set the multiplier to use when increasing the loss scaling by `new_incr_ratio`, `new_incr_ratio` should > 1.0. + + Args: + new_incr_ratio(float): The new_incr_ratio used to update the multiplier to use when increasing the loss scaling. + """ + assert new_incr_ratio > 1.0, "The new_incr_ratio must be > 1.0." + self._incr_ratio = new_incr_ratio + + def get_decr_ratio(self): + """ + Get the less-than-one-multiplier to use when decreasing the loss scaling. + + Reurns: + float: the less-than-one-multiplier to use when decreasing the loss scaling. + """ + return self._decr_ratio + + def set_decr_ratio(self, new_decr_ratio): + """ + Set the less-than-one-multiplier to use when decreasing the loss scaling by `new_incr_ratio`, `new_decr_ratio` should < 1.0. + + Args: + new_decr_ratio(float): The new_decr_ratio used to update the less-than-one-multiplier to use when decreasing the loss scaling. + """ + assert new_decr_ratio < 1.0, "The new_decr_ratio must be < 1.0." + self._decr_ratio = new_decr_ratio + + def get_incr_every_n_steps(self): + """ + Return the num `n`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients. + + Reurns: + int: the num `n`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients. + """ + return self._incr_every_n_steps + + def set_incr_every_n_steps(self, new_incr_every_n_steps): + """ + Set the num `n` by `new_incr_every_n_steps`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients. + + Args: + new_incr_every_n_steps(int): The new_incr_every_n_steps used to update the num `n`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients. + """ + self._incr_every_n_steps = new_incr_every_n_steps + + def get_decr_every_n_nan_or_inf(self): + """ + Return the num `n`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients. + + Reurns: + int: the num `n`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients. + """ + return self._decr_every_n_nan_or_inf + + def set_decr_every_n_nan_or_inf(self, new_decr_every_n_nan_or_inf): + """ + Set the num `n` by `new_decr_every_n_nan_or_inf`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients. + + Args: + new_decr_every_n_nan_or_inf(int): The new_decr_every_n_nan_or_inf used to update the num `n`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients. + """ + self._decr_every_n_nan_or_inf = new_decr_every_n_nan_or_inf diff --git a/python/paddle/fluid/tests/unittests/test_imperative_auto_mixed_precision.py b/python/paddle/fluid/tests/unittests/test_imperative_auto_mixed_precision.py index a56797971b..e3d2bda892 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_auto_mixed_precision.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_mixed_precision.py @@ -209,6 +209,34 @@ class TestAmpScaler(unittest.TestCase): self.assertTrue( np.array_equal(param.numpy(), params_init[param.name])) + def test_get_and_set(self): + with fluid.dygraph.guard(): + scaler = paddle.amp.GradScaler( + enable=True, + init_loss_scaling=1024, + incr_ratio=2.0, + decr_ratio=0.5, + incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, + use_dynamic_loss_scaling=True) + self.assertEqual(scaler.is_enable() == True, True) + self.assertEqual(scaler.get_init_loss_scaling() == 1024, True) + self.assertEqual(scaler.get_incr_ratio() == 2.0, True) + self.assertEqual(scaler.get_decr_ratio() == 0.5, True) + self.assertEqual(scaler.get_incr_every_n_steps() == 1000, True) + self.assertEqual(scaler.get_decr_every_n_nan_or_inf() == 2, True) + self.assertEqual(scaler.is_use_dynamic_loss_scaling() == True, True) + scaler.set_decr_every_n_nan_or_inf(4) + self.assertEqual(scaler.get_decr_every_n_nan_or_inf() == 4, True) + scaler.set_decr_ratio(0.1) + self.assertEqual(scaler.get_decr_ratio() == 0.1, True) + scaler.set_incr_every_n_steps(200) + self.assertEqual(scaler.get_incr_every_n_steps() == 200, True) + scaler.set_incr_ratio(3.0) + self.assertEqual(scaler.get_incr_ratio() == 3.0, True) + scaler.set_init_loss_scaling(100) + self.assertEqual(scaler.get_init_loss_scaling() == 100, True) + def reader_decorator(reader): def __reader__(): -- GitLab