未验证 提交 4a3a2d6b 编写于 作者: G guguguzi 提交者: GitHub

Add api MultiplicativeDecay (#38250)

* delete the modification of dygraph

* CI

* check CI

* modify the retrun value of get_lr
上级 c8fbd3cd
...@@ -205,6 +205,13 @@ def lambda_lr(epoch_num, learning_rate, lr_lambda, verbose=False): ...@@ -205,6 +205,13 @@ def lambda_lr(epoch_num, learning_rate, lr_lambda, verbose=False):
return learning_rate * lr_lambda(epoch_num) return learning_rate * lr_lambda(epoch_num)
def multiplicative_lr(epoch_num, learning_rate, lr_lambda, verbose=False):
latest_lr = learning_rate
for i in range(epoch_num):
latest_lr = latest_lr * lr_lambda(i + 1)
return latest_lr
def piecewise_lr(epoch_num, boundaries, values, verbose=False): def piecewise_lr(epoch_num, boundaries, values, verbose=False):
assert len(boundaries) + 1 == len(values) assert len(boundaries) + 1 == len(values)
for i in range(len(boundaries)): for i in range(len(boundaries)):
...@@ -519,6 +526,10 @@ class TestLRScheduler(unittest.TestCase): ...@@ -519,6 +526,10 @@ class TestLRScheduler(unittest.TestCase):
"learning_rate": 0.5, "learning_rate": 0.5,
"lr_lambda": lambda x: 0.95**x, "lr_lambda": lambda x: 0.95**x,
"verbose": True "verbose": True
}), (multiplicative_lr, paddle.optimizer.lr.MultiplicativeDecay, {
"learning_rate": 0.5,
"lr_lambda": lambda x: 0.95,
"verbose": True
}), (cosine_annealing_lr, paddle.optimizer.lr.CosineAnnealingDecay, { }), (cosine_annealing_lr, paddle.optimizer.lr.CosineAnnealingDecay, {
"learning_rate": 0.5, "learning_rate": 0.5,
"T_max": 10, "T_max": 10,
......
...@@ -17,7 +17,7 @@ import numpy ...@@ -17,7 +17,7 @@ import numpy
import warnings import warnings
from paddle import Tensor from paddle import Tensor
__all__ = [ #noqa __all__ = [ # noqa
'LRScheduler', 'LRScheduler',
'NoamDecay', 'NoamDecay',
'PiecewiseDecay', 'PiecewiseDecay',
...@@ -30,7 +30,8 @@ __all__ = [ #noqa ...@@ -30,7 +30,8 @@ __all__ = [ #noqa
'StepDecay', 'StepDecay',
'LambdaDecay', 'LambdaDecay',
'ReduceOnPlateau', 'ReduceOnPlateau',
'CosineAnnealingDecay' 'CosineAnnealingDecay',
'MultiplicativeDecay'
] ]
...@@ -1355,7 +1356,8 @@ class ReduceOnPlateau(LRScheduler): ...@@ -1355,7 +1356,8 @@ class ReduceOnPlateau(LRScheduler):
if isinstance(metrics, (Tensor, numpy.ndarray)): if isinstance(metrics, (Tensor, numpy.ndarray)):
assert len(metrics.shape) == 1 and metrics.shape[0] == 1, "the metrics.shape " \ assert len(metrics.shape) == 1 and metrics.shape[0] == 1, "the metrics.shape " \
"should be (1L,), but the current metrics.shape is {}. Maybe that " \ "should be (1L,), but the current metrics.shape is {}. Maybe that " \
"you should call paddle.mean to process it first.".format(metrics.shape) "you should call paddle.mean to process it first.".format(
metrics.shape)
elif not isinstance(metrics, elif not isinstance(metrics,
(int, float, numpy.float32, numpy.float64)): (int, float, numpy.float32, numpy.float64)):
raise TypeError( raise TypeError(
...@@ -1513,3 +1515,68 @@ class CosineAnnealingDecay(LRScheduler): ...@@ -1513,3 +1515,68 @@ class CosineAnnealingDecay(LRScheduler):
def _get_closed_form_lr(self): def _get_closed_form_lr(self):
return self.eta_min + (self.base_lr - self.eta_min) * (1 + math.cos( return self.eta_min + (self.base_lr - self.eta_min) * (1 + math.cos(
math.pi * self.last_epoch / self.T_max)) / 2 math.pi * self.last_epoch / self.T_max)) / 2
class MultiplicativeDecay(LRScheduler):
"""
Multiply the learning rate of ``optimizer`` by the factor given in function ``lr_lambda`` .
The algorithm can be described as the code below.
.. code-block:: text
learning_rate = 0.5 # init learning_rate
lr_lambda = lambda epoch: 0.95
learning_rate = 0.5 # epoch 0,
learning_rate = 0.475 # epoch 1, 0.5*0.95
learning_rate = 0.45125 # epoch 2, 0.475*0.95
Args:
learning_rate (float): The initial learning rate. It is a python float number.
lr_lambda (function): A function which computes a factor by ``epoch`` , and then multiply the last learning rate by this factor.
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .
Returns:
``MultiplicativeDecay`` instance to schedule learning rate.
Examples:
.. code-block:: python
import paddle
import numpy as np
# train on default dynamic graph mode
linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.lr.MultiplicativeDecay(learning_rate=0.5, lr_lambda=lambda x:0.95, verbose=True)
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
for epoch in range(20):
for batch_id in range(5):
x = paddle.uniform([10, 10])
out = linear(x)
loss = paddle.mean(out)
loss.backward()
sgd.step()
sgd.clear_gradients()
scheduler.step() # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
"""
def __init__(self, learning_rate, lr_lambda, last_epoch=-1, verbose=False):
if not callable(lr_lambda):
raise TypeError(
"The type of 'lr_lambda' in 'MultiplicativeDecay' must be 'function', but received %s."
% type(lr_lambda))
self.lr_lambda = lr_lambda
super(MultiplicativeDecay, self).__init__(learning_rate, last_epoch,
verbose)
def get_lr(self):
if self.last_epoch > 0:
return self.last_lr * self.lr_lambda(self.last_epoch)
else:
return self.base_lr
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册