未验证 提交 f9702922 编写于 作者: littletomatodonkey's avatar littletomatodonkey 提交者: GitHub

add cosine decay strategy (#387)

Add cosine decay lr strategy.
上级 b70501ab
...@@ -58,6 +58,26 @@ class PiecewiseDecay(object): ...@@ -58,6 +58,26 @@ class PiecewiseDecay(object):
return fluid.layers.piecewise_decay(self.milestones, values) return fluid.layers.piecewise_decay(self.milestones, values)
@serializable
class CosineDecay(object):
"""
Cosine learning rate decay
Args:
max_iters (float): max iterations for the training process.
if you commbine cosine decay with warmup, it is recommended that
the max_iter is much larger than the warmup iter
"""
def __init__(self, max_iters=180000):
self.max_iters = max_iters
def __call__(self, base_lr=None, learning_rate=None):
assert base_lr is not None, "either base LR or values should be provided"
lr = fluid.layers.cosine_decay(base_lr, 1, self.max_iters)
return lr
@serializable @serializable
class LinearWarmup(object): class LinearWarmup(object):
""" """
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册