提交 0a3825c5 编写于 作者: littletomatodonkey's avatar littletomatodonkey

support warmup for cosine and expo decay

上级 ffff9080
...@@ -112,35 +112,19 @@ class CosineWarmup(object): ...@@ -112,35 +112,19 @@ class CosineWarmup(object):
self.lr = lr self.lr = lr
self.step_each_epoch = step_each_epoch self.step_each_epoch = step_each_epoch
self.epochs = epochs self.epochs = epochs
self.warmup_epoch = fluid.layers.fill_constant( self.warmup_epoch = warmup_epoch
shape=[1],
value=float(warmup_epoch),
dtype='float32',
force_cpu=True)
def __call__(self): def __call__(self):
global_step = _decay_step_counter() learning_rate = fluid.layers.cosine_decay(
learning_rate = fluid.layers.tensor.create_global_var( learning_rate=self.lr,
shape=[1], step_each_epoch=self.step_each_epoch,
value=0.0, epochs=self.epochs)
dtype='float32',
persistable=True, learning_rate = fluid.layers.linear_lr_warmup(
name="learning_rate") learning_rate,
epoch = ops.floor(global_step / self.step_each_epoch) warmup_steps=self.warmup_epoch * self.step_each_epoch,
with fluid.layers.control_flow.Switch() as switch: start_lr=0.0,
with switch.case(epoch < self.warmup_epoch): end_lr=self.lr)
decayed_lr = self.lr * \
(global_step / (self.step_each_epoch * self.warmup_epoch))
fluid.layers.tensor.assign(
input=decayed_lr, output=learning_rate)
with switch.default():
current_step = global_step - self.warmup_epoch * self.step_each_epoch
total_step = (
self.epochs - self.warmup_epoch) * self.step_each_epoch
decayed_lr = self.lr * \
(ops.cos(current_step * math.pi / total_step) + 1) / 2
fluid.layers.tensor.assign(
input=decayed_lr, output=learning_rate)
return learning_rate return learning_rate
...@@ -169,37 +153,22 @@ class ExponentialWarmup(object): ...@@ -169,37 +153,22 @@ class ExponentialWarmup(object):
super(ExponentialWarmup, self).__init__() super(ExponentialWarmup, self).__init__()
self.lr = lr self.lr = lr
self.step_each_epoch = step_each_epoch self.step_each_epoch = step_each_epoch
self.decay_epochs = decay_epochs * self.step_each_epoch self.decay_epochs = decay_epochs
self.decay_rate = decay_rate self.decay_rate = decay_rate
self.warmup_epoch = fluid.layers.fill_constant( self.warmup_epoch = warmup_epoch
shape=[1],
value=float(warmup_epoch),
dtype='float32',
force_cpu=True)
def __call__(self): def __call__(self):
global_step = _decay_step_counter() learning_rate = fluid.layers.exponential_decay(
learning_rate = fluid.layers.tensor.create_global_var( learning_rate=self.lr,
shape=[1], decay_steps=self.decay_epochs * self.step_each_epoch,
value=0.0, decay_rate=self.decay_rate,
dtype='float32', staircase=False)
persistable=True,
name="learning_rate") learning_rate = fluid.layers.linear_lr_warmup(
learning_rate,
epoch = ops.floor(global_step / self.step_each_epoch) warmup_steps=self.warmup_epoch * self.step_each_epoch,
with fluid.layers.control_flow.Switch() as switch: start_lr=0.0,
with switch.case(epoch < self.warmup_epoch): end_lr=self.lr)
decayed_lr = self.lr * \
(global_step / (self.step_each_epoch * self.warmup_epoch))
fluid.layers.tensor.assign(
input=decayed_lr, output=learning_rate)
with switch.default():
rest_step = global_step - self.warmup_epoch * self.step_each_epoch
div_res = ops.floor(rest_step / self.decay_epochs)
decayed_lr = self.lr * (self.decay_rate**div_res)
fluid.layers.tensor.assign(
input=decayed_lr, output=learning_rate)
return learning_rate return learning_rate
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册