From c180ee8ac65bbca94f75bb401ba917df3944f829 Mon Sep 17 00:00:00 2001 From: zhongpu <2013000149@qq.com> Date: Fri, 17 Jan 2020 14:21:56 +0800 Subject: [PATCH] add learning rate api of optimizer (#22080) (#22339) * add learning rate api; test=develop * fix uni test converage; test=develop * fix travis ci error; test=develop * fix comment; test=develop * fix example error; test=develop * polish the api description, test=develop Co-authored-by: zhongpu <2013000149@qq.com> Co-authored-by: hong <43953930+phlrain@users.noreply.github.com> --- python/paddle/fluid/optimizer.py | 62 +++++++++++++ .../unittests/test_imperative_optimizer.py | 89 +++++++++++++++++++ 2 files changed, 151 insertions(+) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index f3af2c74192..0fbf31b8ab1 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -285,6 +285,68 @@ class Optimizer(object): dtype='float32' if self._dtype is None else self._dtype, persistable=True) + @framework.dygraph_only + def current_step_lr(self): + """ + .. note:: + **This API is ONLY avaliable in Dygraph mode** + + Get current step learning rate. The return value is all the same When LearningRateDecay is not used, + otherwise return the step learning rate. + + Returns: + float: The learning rate of the current step. + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + import numpy as np + + # example1: LearningRateDecay is not used, return value is all the same + with fluid.dygraph.guard(): + emb = fluid.dygraph.Embedding([10, 10]) + adam = fluid.optimizer.Adam(0.001, parameter_list = emb.parameters()) + lr = adam.current_step_lr() + print(lr) # 0.001 + + # example2: PiecewiseDecay is used, return the step learning rate + with fluid.dygraph.guard(): + inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + linear = fluid.dygraph.nn.Linear(10, 10) + inp = fluid.dygraph.to_variable(inp) + out = linear(inp) + loss = fluid.layers.reduce_mean(out) + + bd = [2, 4, 6, 8] + value = [0.2, 0.4, 0.6, 0.8, 1.0] + adam = fluid.optimizer.Adam(fluid.dygraph.PiecewiseDecay(bd, value, 0), + parameter_list=linear.parameters()) + + # first step: learning rate is 0.2 + np.allclose(adam.current_step_lr(), 0.2, rtol=1e-06, atol=0.0) # True + + # learning rate for different steps + ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0] + for i in range(12): + adam.minimize(loss) + lr = adam.current_step_lr() + np.allclose(lr, ret[i], rtol=1e-06, atol=0.0) # True + + """ + current_lr = self._global_learning_rate() + if current_lr: + return self._global_learning_rate().numpy()[0] + + if isinstance(self._learning_rate, float): + return self._learning_rate + else: + step_lr = self._learning_rate.step() + if isinstance(step_lr, (float, int)): + return step_lr + else: + return step_lr.numpy()[0] + def _global_learning_rate(self, program=None): """ get global decayed learning rate diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py index ac12e79156d..398b31f006b 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py @@ -340,6 +340,95 @@ class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase): self._check_mlp() +class TestOptimizerLearningRate(unittest.TestCase): + def test_constant_lr(self): + with fluid.dygraph.guard(): + a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + + linear = fluid.dygraph.nn.Linear(10, 10) + + a = fluid.dygraph.to_variable(a) + + b = linear(a) + + loss = fluid.layers.reduce_mean(b) + + adam = fluid.optimizer.Adam( + 0.001, parameter_list=linear.parameters()) + + self.assertTrue( + np.allclose( + adam.current_step_lr(), 0.001, rtol=1e-06, atol=0.0)) + + for i in range(10): + adam.minimize(loss) + lr = adam.current_step_lr() + + self.assertTrue(np.allclose(lr, 0.001, rtol=1e-06, atol=0.0)) + + def test_lr_decay(self): + with fluid.dygraph.guard(): + a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + + linear = fluid.dygraph.nn.Linear(10, 10) + + a = fluid.dygraph.to_variable(a) + + b = linear(a) + + loss = fluid.layers.reduce_mean(b) + + bd = [2, 4, 6, 8] + value = [0.2, 0.4, 0.6, 0.8, 1.0] + + adam = fluid.optimizer.Adam( + fluid.dygraph.PiecewiseDecay(bd, value, 0), + parameter_list=linear.parameters()) + + self.assertTrue( + np.allclose( + adam.current_step_lr(), 0.2, rtol=1e-06, atol=0.0)) + + ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0] + for i in range(12): + adam.minimize(loss) + lr = adam.current_step_lr() + + self.assertTrue(np.allclose(lr, ret[i], rtol=1e-06, atol=0.0)) + + def test_lr_decay_natural_exp(self): + with fluid.dygraph.guard(): + a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + + linear = fluid.dygraph.nn.Linear(10, 10) + + a = fluid.dygraph.to_variable(a) + + b = linear(a) + + loss = fluid.layers.reduce_mean(b) + base_lr = 1.0 + + adam = fluid.optimizer.Adam( + fluid.dygraph.NaturalExpDecay( + learning_rate=base_lr, + decay_steps=3, + decay_rate=0.5, + staircase=True), + parameter_list=linear.parameters()) + + self.assertTrue( + np.allclose( + adam.current_step_lr(), 1.0, rtol=1e-06, atol=0.0)) + + ret = [1.0, 1.0, 1.0, np.exp(-0.5), np.exp(-0.5)] + for i in range(5): + adam.minimize(loss) + lr = adam.current_step_lr() + + self.assertTrue(np.allclose(lr, ret[i], rtol=1e-06, atol=0.0)) + + class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase): def get_optimizer_dygraph(self, parameter_list): optimizer = MomentumOptimizer( -- GitLab