未验证 提交 63f242b6 编写于 作者: L LoneRanger 提交者: GitHub

replace PiecewiseDecay, StepDecay, MultiStepDecay, LambdaDecay with 2.0 version (#53992)

* replace PiecewiseDecay(LearningRateDecay) with PiecewiseDecay(LRScheduler)

* fix bug

* fix bug

* replace the StepDecay,MultiStepDecay,LambdaDecay with 2.0 version
上级 54b86fd4
......@@ -23,13 +23,9 @@ from ..data_feeder import check_type
__all__ = [
'NoamDecay',
'PiecewiseDecay',
'PolynomialDecay',
'LinearLrWarmup',
'ReduceLROnPlateau',
'StepDecay',
'MultiStepDecay',
'LambdaDecay',
]
......@@ -131,68 +127,6 @@ class LearningRateDecay:
raise NotImplementedError()
class PiecewiseDecay(LearningRateDecay):
"""
:api_attr: imperative
Piecewise decay scheduler.
The algorithm can be described as the code below.
.. code-block:: text
boundaries = [10000, 20000]
values = [1.0, 0.5, 0.1]
if global_step < 10000:
learning_rate = 1.0
elif 10000 <= global_step < 20000:
learning_rate = 0.5
else:
learning_rate = 0.1
Parameters:
boundaries(list): A list of steps numbers. The type of element in the list is python int.
values(list): A list of learning rate values that will be picked during
different step boundaries. The type of element in the list is python float.
begin(int): The begin step to initialize the global_step in the description above.
step(int, optional): The step size used to calculate the new global_step in the description above.
The default value is 1.
dtype(str, optional): The data type used to create the learning rate variable. The data type can be set as
'float32', 'float64'. The default value is 'float32'.
Returns:
None.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle
boundaries = [10000, 20000]
values = [1.0, 0.5, 0.1]
with fluid.dygraph.guard():
emb = paddle.nn.Embedding(10, 10)
optimizer = fluid.optimizer.SGD(
learning_rate=fluid.dygraph.PiecewiseDecay(boundaries, values, 0),
parameter_list = emb.parameters() )
"""
def __init__(self, boundaries, values, begin, step=1, dtype='float32'):
super().__init__(begin, step, dtype)
self.boundaries = boundaries
self.values = values
self.vars = []
for value in values:
self.vars.append(value)
def step(self):
for i in range(len(self.boundaries)):
if self.step_num < self.boundaries[i]:
return self.vars[i]
return self.create_lr_var(self.vars[len(self.values) - 1])
class PolynomialDecay(LearningRateDecay):
r"""
:api_attr: imperative
......@@ -742,241 +676,3 @@ class _LearningRateEpochDecay(LearningRateDecay):
def get_lr(self):
raise NotImplementedError
class StepDecay(_LearningRateEpochDecay):
"""
:api_attr: imperative
Decays the learning rate of ``optimizer`` by ``decay_rate`` every ``step_size`` number of epoch.
The algorithm can be described as the code below.
.. code-block:: text
learning_rate = 0.5
step_size = 30
decay_rate = 0.1
learning_rate = 0.5 if epoch < 30
learning_rate = 0.05 if 30 <= epoch < 60
learning_rate = 0.005 if 60 <= epoch < 90
...
Parameters:
learning_rate (float|int): The initial learning rate. It can be set to python float or int number.
step_size (int): Period of learning rate decay.
decay_rate (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * decay_rate`` .
It should be less than 1.0. Default: 0.1.
Returns:
None.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
import paddle
with fluid.dygraph.guard():
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10)
input = fluid.dygraph.to_variable(x)
scheduler = fluid.dygraph.StepDecay(0.5, step_size=3)
adam = fluid.optimizer.Adam(learning_rate = scheduler, parameter_list = linear.parameters())
for epoch in range(9):
for batch_id in range(5):
out = linear(input)
loss = paddle.mean(out)
adam.minimize(loss)
scheduler.epoch()
print("epoch:{}, current lr is {}" .format(epoch, adam.current_step_lr()))
# epoch:0, current lr is 0.5
# epoch:1, current lr is 0.5
# epoch:2, current lr is 0.5
# epoch:3, current lr is 0.05
# epoch:4, current lr is 0.05
# epoch:5, current lr is 0.05
# epoch:6, current lr is 0.005
# epoch:7, current lr is 0.005
# epoch:8, current lr is 0.005
"""
def __init__(self, learning_rate, step_size, decay_rate=0.1):
if not isinstance(step_size, int):
raise TypeError(
"The type of 'step_size' must be 'int', but received %s."
% type(step_size)
)
if decay_rate >= 1.0:
raise ValueError('decay_rate should be < 1.0.')
self.step_size = step_size
self.decay_rate = decay_rate
super().__init__(learning_rate)
def get_lr(self):
decay_rate = self.create_lr_var(self.decay_rate)
i = self.epoch_num // self.step_size
return self.base_lr * (decay_rate**i)
class MultiStepDecay(_LearningRateEpochDecay):
"""
:api_attr: imperative
Decays the learning rate of ``optimizer`` by ``decay_rate`` once ``epoch`` reaches one of the milestones.
The algorithm can be described as the code below.
.. code-block:: text
learning_rate = 0.5
milestones = [30, 50]
decay_rate = 0.1
if epoch < 30:
learning_rate = 0.5
elif epoch < 50:
learning_rate = 0.05
else:
learning_rate = 0.005
Parameters:
learning_rate (float|int): The initial learning rate. It can be set to python float or int number.
milestones (tuple|list): List or tuple of each boundaries. Must be increasing.
decay_rate (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * decay_rate`` .
It should be less than 1.0. Default: 0.1.
Returns:
None.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
import paddle
with fluid.dygraph.guard():
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10)
input = fluid.dygraph.to_variable(x)
scheduler = fluid.dygraph.MultiStepDecay(0.5, milestones=[3, 5])
adam = fluid.optimizer.Adam(learning_rate = scheduler, parameter_list = linear.parameters())
for epoch in range(6):
for batch_id in range(5):
out = linear(input)
loss = paddle.mean(out)
adam.minimize(loss)
scheduler.epoch()
print("epoch:{}, current lr is {}" .format(epoch, adam.current_step_lr()))
# epoch:0, current lr is 0.5
# epoch:1, current lr is 0.5
# epoch:2, current lr is 0.5
# epoch:3, current lr is 0.05
# epoch:4, current lr is 0.05
# epoch:5, current lr is 0.005
"""
def __init__(self, learning_rate, milestones, decay_rate=0.1):
if not isinstance(milestones, (tuple, list)):
raise TypeError(
"The type of 'milestones' in 'MultiStepDecay' must be 'tuple, list', but received %s."
% type(milestones)
)
if not all(
[
milestones[i] < milestones[i + 1]
for i in range(len(milestones) - 1)
]
):
raise ValueError('The elements of milestones must be incremented')
if decay_rate >= 1.0:
raise ValueError('decay_rate should be < 1.0.')
self.milestones = milestones
self.decay_rate = decay_rate
super().__init__(learning_rate)
def get_lr(self):
decay_rate = self.create_lr_var(self.decay_rate)
for i in range(len(self.milestones)):
if self.epoch_num < self.milestones[i]:
return self.base_lr * (decay_rate**i)
return self.base_lr * (decay_rate ** len(self.milestones))
class LambdaDecay(_LearningRateEpochDecay):
"""
:api_attr: imperative
Sets the learning rate of ``optimizer`` to the initial lr times a multiplicative factor, and this multiplicative
factor is computed by function ``lr_lambda`` . ``lr_lambda`` is function which receives ``epoch`` .
The algorithm can be described as the code below.
.. code-block:: text
learning_rate = 0.5 # init learning_rate
lr_lambda = lambda epoch: 0.95 ** epoch
learning_rate = 0.5 # epoch 0
learning_rate = 0.475 # epoch 1
learning_rate = 0.45125 # epoch 2
Parameters:
learning_rate (float|int): The initial learning rate. It can be set to python float or int number.
lr_lambda (function): A function which computes a multiplicative factor given an integer parameter ``epoch`` , and
then multiply the initial learning rate by this multiplicative factor.
Returns:
None.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
import paddle
with fluid.dygraph.guard():
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10)
input = fluid.dygraph.to_variable(x)
scheduler = fluid.dygraph.LambdaDecay(0.5, lr_lambda=lambda x: 0.95**x)
adam = fluid.optimizer.Adam(learning_rate = scheduler, parameter_list = linear.parameters())
for epoch in range(6):
for batch_id in range(5):
out = linear(input)
loss = paddle.mean(out)
adam.minimize(loss)
scheduler.epoch()
print("epoch:%d, current lr is %f" .format(epoch, adam.current_step_lr()))
# epoch:0, current lr is 0.5
# epoch:1, current lr is 0.475
# epoch:2, current lr is 0.45125
"""
def __init__(self, learning_rate, lr_lambda):
if not callable(lr_lambda):
raise TypeError(
"The type of 'lr_lambda' in 'LambdaDecay' must be 'function', but received %s."
% type(lr_lambda)
)
self.lr_lambda = lr_lambda
super().__init__(learning_rate)
def get_lr(self):
base_lr = self.create_lr_var(self.base_lr)
return self.base_lr * self.lr_lambda(self.epoch_num)
......@@ -410,10 +410,10 @@ def piecewise_decay(boundaries, values):
paddle.enable_static()
boundaries = [10000, 20000]
values = [1.0, 0.5, 0.1]
optimizer = fluid.optimizer.Momentum(
optimizer = paddle.optimizer.Momentum(
momentum=0.9,
learning_rate=fluid.layers.piecewise_decay(boundaries=boundaries, values=values),
regularization=paddle.regularizer.L2Decay(1e-4))
learning_rate=paddle.optimizer.lr.PiecewiseDecay(boundaries, values),
weight_decay=paddle.regularizer.L2Decay(1e-4))
"""
......@@ -422,7 +422,7 @@ def piecewise_decay(boundaries, values):
raise ValueError("len(values) - len(boundaries) should be 1")
if in_dygraph_mode():
decay = imperate_lr.PiecewiseDecay(boundaries, values, 0)
decay = paddle.optimizer.lr.PiecewiseDecay(boundaries, values)
return decay
else:
global_step = _decay_step_counter()
......
......@@ -98,6 +98,8 @@ class LRScheduler:
type(learning_rate)
)
)
if learning_rate < 0:
raise ValueError(f"Invalid learning rate: {learning_rate}")
self.base_lr = float(learning_rate)
self.last_lr = float(learning_rate)
self.last_epoch = last_epoch
......
......@@ -376,9 +376,9 @@ def dyfunc_NoamDecay():
def dyfunc_PiecewiseDecay():
boundaries = [10000, 20000]
values = [1.0, 0.5, 0.1]
pd = fluid.dygraph.PiecewiseDecay(boundaries, values, begin=0)
pd = paddle.optimizer.lr.PiecewiseDecay(boundaries, values)
lr = pd()
return lr
return paddle.to_tensor(lr)
def dyfunc_PolynomialDecay():
......
......@@ -94,11 +94,11 @@ def train(to_static):
learning_rate = cfg.learning_rate
values = [learning_rate * (gamma**i) for i in range(step_num + 1)]
lr = fluid.dygraph.PiecewiseDecay(
boundaries=boundaries, values=values, begin=0
lr = paddle.optimizer.lr.PiecewiseDecay(
boundaries=boundaries, values=values
)
lr = fluid.layers.linear_lr_warmup(
lr = paddle.optimizer.lr.LinearWarmup(
learning_rate=lr,
warmup_steps=cfg.warm_up_iter,
start_lr=0.0,
......
......@@ -262,7 +262,7 @@ class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
def get_optimizer(self):
bd = [3, 6, 9]
optimizer = SGDOptimizer(
learning_rate=fluid.layers.piecewise_decay(
learning_rate=paddle.optimizer.lr.PiecewiseDecay(
boundaries=bd,
values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
)
......@@ -470,20 +470,20 @@ class TestOptimizerLearningRate(unittest.TestCase):
bd = [2, 4, 6, 8]
value = [0.2, 0.4, 0.6, 0.8, 1.0]
adam = fluid.optimizer.Adam(
fluid.dygraph.PiecewiseDecay(bd, value, 0),
parameter_list=linear.parameters(),
scheduler = paddle.optimizer.lr.PiecewiseDecay(bd, value)
adam = paddle.optimizer.Adam(
scheduler,
parameters=linear.parameters(),
)
np.testing.assert_allclose(
adam.current_step_lr(), 0.2, rtol=1e-06, atol=0.0
)
np.testing.assert_allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0)
ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
for i in range(12):
adam.minimize(loss)
lr = adam.current_step_lr()
lr = adam.get_lr()
adam.step()
scheduler.step()
np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
def test_lr_decay_natural_exp(self):
......
......@@ -127,7 +127,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
learning_rate=0.1,
gamma=0.5,
)
Step_scheduler = fluid.dygraph.StepDecay(0.5, step_size=3)
Step_scheduler = paddle.optimizer.lr.StepDecay(0.5, step_size=3)
Reducelr_scheduler = fluid.dygraph.ReduceLROnPlateau(
learning_rate=1.0, decay_rate=0.5, patience=5, cooldown=3
)
......@@ -154,7 +154,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
adam3.minimize(loss)
linear.clear_gradients()
Step_scheduler.epoch()
Step_scheduler.get_lr()
Reducelr_scheduler.step(loss)
paddle.save(linear.state_dict(), "save_path.pdparams")
......@@ -163,7 +163,9 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
learning_rate=0.1,
gamma=0.5,
)
Step_scheduler_test = fluid.dygraph.StepDecay(0.5, step_size=3)
Step_scheduler_test = paddle.optimizer.lr.StepDecay(
0.5, step_size=3
)
Reducelr_scheduler_test = fluid.dygraph.ReduceLROnPlateau(
learning_rate=1.0, decay_rate=0.5, patience=5, cooldown=3
)
......@@ -189,8 +191,8 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
)
adam_test.set_dict(opt_state)
self.assertEqual(
adam_test._learning_rate.epoch_num,
adam2._learning_rate.epoch_num,
adam_test._learning_rate.last_epoch,
adam2._learning_rate.last_epoch,
"epoch_num is different before and after set_dict",
)
self.assertEqual(
......@@ -288,19 +290,20 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
decay_rate = 0.2
linear = paddle.nn.Linear(10, 10)
scheduler = fluid.dygraph.MultiStepDecay(
scheduler = paddle.optimizer.lr.MultiStepDecay(
learning_rate, milestones, decay_rate
)
adam = fluid.optimizer.AdamOptimizer(
learning_rate=scheduler, parameter_list=linear.parameters()
adam = paddle.optimizer.Adam(
learning_rate=scheduler, parameters=linear.parameters()
)
for epoch in range(10):
right_result = multi_step_decay(
epoch, learning_rate, milestones, decay_rate
)
fluid_result = adam.current_step_lr()
scheduler.epoch()
fluid_result = adam.get_lr()
adam.step()
scheduler.step()
self.assertAlmostEqual(
right_result,
fluid_result,
......@@ -310,35 +313,36 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
)
with self.assertRaises(ValueError):
lr = fluid.dygraph.MultiStepDecay(
lr = paddle.optimizer.lr.MultiStepDecay(
learning_rate, [30, 50, 20], 0.1
)
with self.assertRaises(ValueError):
lr = fluid.dygraph.MultiStepDecay(
lr = paddle.optimizer.lr.MultiStepDecay(
learning_rate, [20, 30, 50], 1
)
with self.assertRaises(TypeError):
lr = fluid.dygraph.MultiStepDecay("test", [20, 30, 50])
lr = paddle.optimizer.lr.MultiStepDecay("test", [20, 30, 50])
with self.assertRaises(ValueError):
lr = fluid.dygraph.MultiStepDecay(-1, [20, 30, 50])
lr = paddle.optimizer.lr.MultiStepDecay(-1, [20, 30, 50])
def test_StepDecay(self):
with fluid.dygraph.guard():
learning_rate = 0.5
step_size = 3
decay_rate = 0.2
scheduler = fluid.dygraph.StepDecay(
scheduler = paddle.optimizer.lr.StepDecay(
learning_rate, step_size, decay_rate
)
for epoch in range(10):
right_result = step_decay(
epoch, learning_rate, step_size, decay_rate
)
fluid_result = scheduler().numpy().item()
scheduler.epoch()
fluid_result = scheduler()
scheduler.get_lr()
scheduler.step()
self.assertAlmostEqual(
right_result,
fluid_result,
......@@ -348,16 +352,18 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
)
with self.assertRaises(TypeError):
lr = fluid.dygraph.StepDecay(learning_rate, "test", 0.1)
lr = paddle.optimizer.lr.StepDecay(learning_rate, "test", 0.1)
with self.assertRaises(ValueError):
lr = fluid.dygraph.StepDecay(learning_rate, 20, 2)
lr = paddle.optimizer.lr.StepDecay(learning_rate, 20, 2)
def test_LambdaDecay(self):
with fluid.dygraph.guard():
learning_rate = 0.5
lr_lambda = lambda x: 0.95**x
scheduler = fluid.dygraph.LambdaDecay(learning_rate, lr_lambda)
scheduler = paddle.optimizer.lr.LambdaDecay(
learning_rate, lr_lambda
)
linear = paddle.nn.Linear(10, 10)
adam = fluid.optimizer.Adam(
......@@ -366,8 +372,9 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
for epoch in range(30):
right_result = lambda_decay(epoch, learning_rate, lr_lambda)
fluid_result = scheduler().numpy().item()
scheduler.epoch()
fluid_result = scheduler()
scheduler.get_lr()
scheduler.step()
self.assertAlmostEqual(
right_result,
fluid_result,
......@@ -377,7 +384,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
)
with self.assertRaises(TypeError):
lr = fluid.dygraph.LambdaDecay(learning_rate, "test")
lr = paddle.optimizer.lr.LambdaDecay(learning_rate, "test")
class TestLearningRateDecay(unittest.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册