提交 13e89151 编写于 作者: S shippingwang 提交者: ceci3

add cosine decay op, test=develop

上级 b2ce8320
...@@ -336,6 +336,7 @@ paddle.fluid.layers.natural_exp_decay ArgSpec(args=['learning_rate', 'decay_step ...@@ -336,6 +336,7 @@ paddle.fluid.layers.natural_exp_decay ArgSpec(args=['learning_rate', 'decay_step
paddle.fluid.layers.inverse_time_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.layers.inverse_time_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.polynomial_decay ArgSpec(args=['learning_rate', 'decay_steps', 'end_learning_rate', 'power', 'cycle'], varargs=None, keywords=None, defaults=(0.0001, 1.0, False)) paddle.fluid.layers.polynomial_decay ArgSpec(args=['learning_rate', 'decay_steps', 'end_learning_rate', 'power', 'cycle'], varargs=None, keywords=None, defaults=(0.0001, 1.0, False))
paddle.fluid.layers.piecewise_decay ArgSpec(args=['boundaries', 'values'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.piecewise_decay ArgSpec(args=['boundaries', 'values'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.cosine_decay ArgSpec(args=['learning_rate', 'step_each_epoch', 'epochs'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.noam_decay ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.noam_decay ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.append_LARS ArgSpec(args=['params_grads', 'learning_rate', 'weight_decay'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.append_LARS ArgSpec(args=['params_grads', 'learning_rate', 'weight_decay'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.InitState.__init__ ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32')) paddle.fluid.contrib.InitState.__init__ ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32'))
......
...@@ -28,10 +28,12 @@ from . import ops ...@@ -28,10 +28,12 @@ from . import ops
from . import tensor from . import tensor
from ..initializer import init_on_cpu from ..initializer import init_on_cpu
from ..framework import default_main_program, Parameter, unique_name, name_scope from ..framework import default_main_program, Parameter, unique_name, name_scope
import math
__all__ = [ __all__ = [
'exponential_decay', 'natural_exp_decay', 'inverse_time_decay', 'exponential_decay', 'natural_exp_decay', 'inverse_time_decay',
'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS' 'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS',
'cosine_decay'
] ]
...@@ -307,6 +309,39 @@ def piecewise_decay(boundaries, values): ...@@ -307,6 +309,39 @@ def piecewise_decay(boundaries, values):
return lr return lr
def cosine_decay(learning_rate, step_each_epoch, epochs):
"""
Applies cosine decay to the learning rate.
when training a model, it is oftem recommended to lower the learning rate as the
training progresses. By using this function, the learning rate will be decayed by
following cosine decay strategy.
Args:
learning_rate(Variable|float): The initial learning rate.
step_each_epoch(int): the number of steps in an epoch.
epochs(int): the number of epochs.
Returns:
Variable: The decayed learning rate.
Examples:
..code-block:: python
base_lr = 0.1
lr = fluid.layers.cosine_decay(
learning_rate = base_lr, step_each_epoch=10000, epochs=120)
"""
with default_main_program()._lr_schedule_guard():
global_step = _decay_step_counter()
cur_epoch = ops.floor(global_step / step_each_epoch)
decayed_lr = learning_rate * 0.5 * (
ops.cos(cur_epoch * math.pi / epochs) + 1)
return decayed_lr
def append_LARS(params_grads, learning_rate, weight_decay): def append_LARS(params_grads, learning_rate, weight_decay):
""" """
Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
......
...@@ -82,6 +82,13 @@ def piecewise_decay(global_step, boundaries, values): ...@@ -82,6 +82,13 @@ def piecewise_decay(global_step, boundaries, values):
return values[len(values) - 1] return values[len(values) - 1]
def cosine_decay(global_step, learning_rate, step_each_epoch, epochs):
cur_epoch = math.floor(global_step / step_each_epoch)
decayed_lr = learning_rate * 0.5 * (
math.cos(cur_epoch * math.pi / epochs) + 1)
return decayed_lr
class TestLearningRateDecay(unittest.TestCase): class TestLearningRateDecay(unittest.TestCase):
def check_decay(self, python_decay_fn, fluid_decay_fn, kwargs): def check_decay(self, python_decay_fn, fluid_decay_fn, kwargs):
places = [fluid.CPUPlace()] places = [fluid.CPUPlace()]
...@@ -149,6 +156,11 @@ class TestLearningRateDecay(unittest.TestCase): ...@@ -149,6 +156,11 @@ class TestLearningRateDecay(unittest.TestCase):
"boundaries": [3, 6, 9], "boundaries": [3, 6, 9],
"values": [0.1, 0.2, 0.3, 0.4] "values": [0.1, 0.2, 0.3, 0.4]
}), }),
(cosine_decay, layers.cosine_decay, {
"learning_rate": 0.1,
"step_each_epoch": 100,
"epochs": 120
}),
] ]
for py_decay_fn, fluid_decay_fn, kwargs in decay_fns: for py_decay_fn, fluid_decay_fn, kwargs in decay_fns:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册