未验证 提交 72efef63 编写于 作者: R ruri 提交者: GitHub

Merge pull request #15887 from shippingwang/cosine_decay_op

add cosine decay op, test=develop
......@@ -337,6 +337,7 @@ paddle.fluid.layers.polynomial_decay ArgSpec(args=['learning_rate', 'decay_steps
paddle.fluid.layers.piecewise_decay ArgSpec(args=['boundaries', 'values'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.noam_decay ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.append_LARS ArgSpec(args=['params_grads', 'learning_rate', 'weight_decay'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.cosine_decay ArgSpec(args=['learning_rate', 'step_each_epoch', 'epochs'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.InitState.__init__ ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32'))
paddle.fluid.contrib.StateCell.__init__ ArgSpec(args=['self', 'inputs', 'states', 'out_state', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.StateCell.compute_state ArgSpec(args=['self', 'inputs'], varargs=None, keywords=None, defaults=None)
......
......@@ -28,10 +28,12 @@ from . import ops
from . import tensor
from ..initializer import init_on_cpu
from ..framework import default_main_program, Parameter, unique_name, name_scope
import math
__all__ = [
'exponential_decay', 'natural_exp_decay', 'inverse_time_decay',
'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS'
'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS',
'cosine_decay'
]
......@@ -307,6 +309,41 @@ def piecewise_decay(boundaries, values):
return lr
def cosine_decay(learning_rate, step_each_epoch, epochs):
"""
Applies cosine decay to the learning rate.
when training a model, it is often recommended to lower the learning rate as the
training progresses. By using this function, the learning rate will be decayed by
following cosine decay strategy.
decayed_lr = learning_rate * 0.5 * (math.cos(epoch * math.pi / epochs) + 1)
Args:
learning_rate(Variable|float): The initial learning rate.
step_each_epoch(int): the number of steps in an epoch.
epochs(int): the number of epochs.
Returns:
Variable: The decayed learning rate.
Examples:
..code-block:: python
base_lr = 0.1
lr = fluid.layers.cosine_decay(
learning_rate = base_lr, step_each_epoch=10000, epochs=120)
"""
with default_main_program()._lr_schedule_guard():
global_step = _decay_step_counter()
cur_epoch = ops.floor(global_step / step_each_epoch)
decayed_lr = learning_rate * 0.5 * (
ops.cos(cur_epoch * math.pi / epochs) + 1)
return decayed_lr
def append_LARS(params_grads, learning_rate, weight_decay):
"""
Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
......
......@@ -82,6 +82,13 @@ def piecewise_decay(global_step, boundaries, values):
return values[len(values) - 1]
def cosine_decay(global_step, learning_rate, step_each_epoch, epochs):
cur_epoch = math.floor(global_step / step_each_epoch)
decayed_lr = learning_rate * 0.5 * (
math.cos(cur_epoch * math.pi / epochs) + 1)
return decayed_lr
class TestLearningRateDecay(unittest.TestCase):
def check_decay(self, python_decay_fn, fluid_decay_fn, kwargs):
places = [fluid.CPUPlace()]
......@@ -149,6 +156,11 @@ class TestLearningRateDecay(unittest.TestCase):
"boundaries": [3, 6, 9],
"values": [0.1, 0.2, 0.3, 0.4]
}),
(cosine_decay, layers.cosine_decay, {
"learning_rate": 0.1,
"step_each_epoch": 100,
"epochs": 120
}),
]
for py_decay_fn, fluid_decay_fn, kwargs in decay_fns:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册