提交 c180ee8a 编写于 作者: Z zhongpu 提交者: hong

add learning rate api of optimizer (#22080) (#22339)

* add learning rate api; test=develop

* fix uni test converage; test=develop

* fix travis ci error; test=develop

* fix comment; test=develop

* fix example error; test=develop

* polish the api description, test=develop
Co-authored-by: Nzhongpu <2013000149@qq.com>
Co-authored-by: Nhong <43953930+phlrain@users.noreply.github.com>
上级 c14298c5
...@@ -285,6 +285,68 @@ class Optimizer(object): ...@@ -285,6 +285,68 @@ class Optimizer(object):
dtype='float32' if self._dtype is None else self._dtype, dtype='float32' if self._dtype is None else self._dtype,
persistable=True) persistable=True)
@framework.dygraph_only
def current_step_lr(self):
"""
.. note::
**This API is ONLY avaliable in Dygraph mode**
Get current step learning rate. The return value is all the same When LearningRateDecay is not used,
otherwise return the step learning rate.
Returns:
float: The learning rate of the current step.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
# example1: LearningRateDecay is not used, return value is all the same
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
adam = fluid.optimizer.Adam(0.001, parameter_list = emb.parameters())
lr = adam.current_step_lr()
print(lr) # 0.001
# example2: PiecewiseDecay is used, return the step learning rate
with fluid.dygraph.guard():
inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
linear = fluid.dygraph.nn.Linear(10, 10)
inp = fluid.dygraph.to_variable(inp)
out = linear(inp)
loss = fluid.layers.reduce_mean(out)
bd = [2, 4, 6, 8]
value = [0.2, 0.4, 0.6, 0.8, 1.0]
adam = fluid.optimizer.Adam(fluid.dygraph.PiecewiseDecay(bd, value, 0),
parameter_list=linear.parameters())
# first step: learning rate is 0.2
np.allclose(adam.current_step_lr(), 0.2, rtol=1e-06, atol=0.0) # True
# learning rate for different steps
ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
for i in range(12):
adam.minimize(loss)
lr = adam.current_step_lr()
np.allclose(lr, ret[i], rtol=1e-06, atol=0.0) # True
"""
current_lr = self._global_learning_rate()
if current_lr:
return self._global_learning_rate().numpy()[0]
if isinstance(self._learning_rate, float):
return self._learning_rate
else:
step_lr = self._learning_rate.step()
if isinstance(step_lr, (float, int)):
return step_lr
else:
return step_lr.numpy()[0]
def _global_learning_rate(self, program=None): def _global_learning_rate(self, program=None):
""" """
get global decayed learning rate get global decayed learning rate
......
...@@ -340,6 +340,95 @@ class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase): ...@@ -340,6 +340,95 @@ class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
self._check_mlp() self._check_mlp()
class TestOptimizerLearningRate(unittest.TestCase):
def test_constant_lr(self):
with fluid.dygraph.guard():
a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
linear = fluid.dygraph.nn.Linear(10, 10)
a = fluid.dygraph.to_variable(a)
b = linear(a)
loss = fluid.layers.reduce_mean(b)
adam = fluid.optimizer.Adam(
0.001, parameter_list=linear.parameters())
self.assertTrue(
np.allclose(
adam.current_step_lr(), 0.001, rtol=1e-06, atol=0.0))
for i in range(10):
adam.minimize(loss)
lr = adam.current_step_lr()
self.assertTrue(np.allclose(lr, 0.001, rtol=1e-06, atol=0.0))
def test_lr_decay(self):
with fluid.dygraph.guard():
a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
linear = fluid.dygraph.nn.Linear(10, 10)
a = fluid.dygraph.to_variable(a)
b = linear(a)
loss = fluid.layers.reduce_mean(b)
bd = [2, 4, 6, 8]
value = [0.2, 0.4, 0.6, 0.8, 1.0]
adam = fluid.optimizer.Adam(
fluid.dygraph.PiecewiseDecay(bd, value, 0),
parameter_list=linear.parameters())
self.assertTrue(
np.allclose(
adam.current_step_lr(), 0.2, rtol=1e-06, atol=0.0))
ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
for i in range(12):
adam.minimize(loss)
lr = adam.current_step_lr()
self.assertTrue(np.allclose(lr, ret[i], rtol=1e-06, atol=0.0))
def test_lr_decay_natural_exp(self):
with fluid.dygraph.guard():
a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
linear = fluid.dygraph.nn.Linear(10, 10)
a = fluid.dygraph.to_variable(a)
b = linear(a)
loss = fluid.layers.reduce_mean(b)
base_lr = 1.0
adam = fluid.optimizer.Adam(
fluid.dygraph.NaturalExpDecay(
learning_rate=base_lr,
decay_steps=3,
decay_rate=0.5,
staircase=True),
parameter_list=linear.parameters())
self.assertTrue(
np.allclose(
adam.current_step_lr(), 1.0, rtol=1e-06, atol=0.0))
ret = [1.0, 1.0, 1.0, np.exp(-0.5), np.exp(-0.5)]
for i in range(5):
adam.minimize(loss)
lr = adam.current_step_lr()
self.assertTrue(np.allclose(lr, ret[i], rtol=1e-06, atol=0.0))
class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase): class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
def get_optimizer_dygraph(self, parameter_list): def get_optimizer_dygraph(self, parameter_list):
optimizer = MomentumOptimizer( optimizer = MomentumOptimizer(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册