From 00c0139e6e816c7c8ba327a1d0a0d8d3420ecab8 Mon Sep 17 00:00:00 2001
From: hong <43953930+phlrain@users.noreply.github.com>
Date: Thu, 16 Jan 2020 19:43:36 +0800
Subject: [PATCH] add learning rate api of optimizer (#22080)

* add learning rate api; test=develop

* fix uni test converage; test=develop

* fix travis ci error; test=develop

* fix comment; test=develop

* fix example error; test=develop

* polish the api description, test=develop

Co-authored-by: zhongpu <2013000149@qq.com>
---
 python/paddle/fluid/optimizer.py              | 62 +++++++++++++
 .../unittests/test_imperative_optimizer.py    | 89 +++++++++++++++++++
 2 files changed, 151 insertions(+)

diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py
index 52574575c34..99eb435ff08 100644
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -285,6 +285,68 @@ class Optimizer(object):
                 dtype='float32' if self._dtype is None else self._dtype,
                 persistable=True)
 
+    @framework.dygraph_only
+    def current_step_lr(self):
+        """
+        .. note::
+          **This API is ONLY avaliable in Dygraph mode**
+        
+        Get current step learning rate. The return value is all the same When LearningRateDecay is not used,
+        otherwise return the step learning rate.
+
+        Returns:
+            float: The learning rate of the current step.
+
+        Examples:
+            .. code-block:: python
+
+                import paddle.fluid as fluid
+                import numpy as np
+
+                # example1: LearningRateDecay is not used, return value is all the same
+                with fluid.dygraph.guard():
+                    emb = fluid.dygraph.Embedding([10, 10])
+                    adam = fluid.optimizer.Adam(0.001, parameter_list = emb.parameters())
+                    lr = adam.current_step_lr()
+                    print(lr) # 0.001
+
+                # example2: PiecewiseDecay is used, return the step learning rate
+                with fluid.dygraph.guard():
+                    inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
+                    linear = fluid.dygraph.nn.Linear(10, 10)
+                    inp = fluid.dygraph.to_variable(inp)
+                    out = linear(inp)
+                    loss = fluid.layers.reduce_mean(out)
+                    
+                    bd = [2, 4, 6, 8]
+                    value = [0.2, 0.4, 0.6, 0.8, 1.0]
+                    adam = fluid.optimizer.Adam(fluid.dygraph.PiecewiseDecay(bd, value, 0),
+                                           parameter_list=linear.parameters())
+
+                    # first step: learning rate is 0.2
+                    np.allclose(adam.current_step_lr(), 0.2, rtol=1e-06, atol=0.0) # True
+
+                    # learning rate for different steps
+                    ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
+                    for i in range(12):
+                        adam.minimize(loss)
+                        lr = adam.current_step_lr()
+                        np.allclose(lr, ret[i], rtol=1e-06, atol=0.0) # True
+
+        """
+        current_lr = self._global_learning_rate()
+        if current_lr:
+            return self._global_learning_rate().numpy()[0]
+
+        if isinstance(self._learning_rate, float):
+            return self._learning_rate
+        else:
+            step_lr = self._learning_rate.step()
+            if isinstance(step_lr, (float, int)):
+                return step_lr
+            else:
+                return step_lr.numpy()[0]
+
     def _global_learning_rate(self, program=None):
         """
         get global decayed learning rate
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
index ac12e79156d..398b31f006b 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
@@ -340,6 +340,95 @@ class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
         self._check_mlp()
 
 
+class TestOptimizerLearningRate(unittest.TestCase):
+    def test_constant_lr(self):
+        with fluid.dygraph.guard():
+            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
+
+            linear = fluid.dygraph.nn.Linear(10, 10)
+
+            a = fluid.dygraph.to_variable(a)
+
+            b = linear(a)
+
+            loss = fluid.layers.reduce_mean(b)
+
+            adam = fluid.optimizer.Adam(
+                0.001, parameter_list=linear.parameters())
+
+            self.assertTrue(
+                np.allclose(
+                    adam.current_step_lr(), 0.001, rtol=1e-06, atol=0.0))
+
+            for i in range(10):
+                adam.minimize(loss)
+                lr = adam.current_step_lr()
+
+                self.assertTrue(np.allclose(lr, 0.001, rtol=1e-06, atol=0.0))
+
+    def test_lr_decay(self):
+        with fluid.dygraph.guard():
+            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
+
+            linear = fluid.dygraph.nn.Linear(10, 10)
+
+            a = fluid.dygraph.to_variable(a)
+
+            b = linear(a)
+
+            loss = fluid.layers.reduce_mean(b)
+
+            bd = [2, 4, 6, 8]
+            value = [0.2, 0.4, 0.6, 0.8, 1.0]
+
+            adam = fluid.optimizer.Adam(
+                fluid.dygraph.PiecewiseDecay(bd, value, 0),
+                parameter_list=linear.parameters())
+
+            self.assertTrue(
+                np.allclose(
+                    adam.current_step_lr(), 0.2, rtol=1e-06, atol=0.0))
+
+            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
+            for i in range(12):
+                adam.minimize(loss)
+                lr = adam.current_step_lr()
+
+                self.assertTrue(np.allclose(lr, ret[i], rtol=1e-06, atol=0.0))
+
+    def test_lr_decay_natural_exp(self):
+        with fluid.dygraph.guard():
+            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
+
+            linear = fluid.dygraph.nn.Linear(10, 10)
+
+            a = fluid.dygraph.to_variable(a)
+
+            b = linear(a)
+
+            loss = fluid.layers.reduce_mean(b)
+            base_lr = 1.0
+
+            adam = fluid.optimizer.Adam(
+                fluid.dygraph.NaturalExpDecay(
+                    learning_rate=base_lr,
+                    decay_steps=3,
+                    decay_rate=0.5,
+                    staircase=True),
+                parameter_list=linear.parameters())
+
+            self.assertTrue(
+                np.allclose(
+                    adam.current_step_lr(), 1.0, rtol=1e-06, atol=0.0))
+
+            ret = [1.0, 1.0, 1.0, np.exp(-0.5), np.exp(-0.5)]
+            for i in range(5):
+                adam.minimize(loss)
+                lr = adam.current_step_lr()
+
+                self.assertTrue(np.allclose(lr, ret[i], rtol=1e-06, atol=0.0))
+
+
 class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
     def get_optimizer_dygraph(self, parameter_list):
         optimizer = MomentumOptimizer(
-- 
GitLab