@@ -28,6 +28,7 @@ from deepspeed.utils import logger
INITIAL_LOSS_SCALE='init_scale'
SCALE_WINDOW='scale_window'
DELAYED_SHIFT='delayed_shift'
CONSECUTIVE_HYSTERESIS='consecutive_hysteresis'
MIN_LOSS_SCALE='min_scale'
...
...
@@ -111,6 +112,7 @@ class DynamicLossScaler(LossScalerBase):
init_scale (float, optional, default=2**32): Initial loss scale attempted by :class:`DynamicLossScaler.`
scale_factor (float, optional, default=2.0): Factor used when adjusting the loss scale. If an overflow is encountered, the loss scale is readjusted to loss scale/``scale_factor``. If ``scale_window`` consecutive iterations take place without an overflow, the loss scale is readjusted to loss_scale*``scale_factor``.
scale_window (int, optional, default=1000): Number of consecutive iterations without an overflow to wait before increasing the loss scale.
consecutive_hysteresis (bool, optional, default=False): Whether to refill hysteresis if we reach an iteration that doesn't overflow
"""
def__init__(self,
...
...
@@ -190,6 +192,9 @@ class DynamicLossScaler(LossScalerBase):
self.last_overflow_iter=self.cur_iter
else:
ifself.consecutive_hysteresis:
ifdist.get_rank()==0:
hysteresis_msg=f"Consecutive hysteresis is enabled. Restoring hysteresis to {self.delayed_shift}"
| <i>**consecutive_hysteresis**</i> is a **fp16** parameter representing whether to refill the hysteresis if we reach an iteration that doesn't overflow | `false` |