From 63f242b6fb5abb00ea4951705b400567a08f18c8 Mon Sep 17 00:00:00 2001
From: LoneRanger <836253168@qq.com>
Date: Wed, 28 Jun 2023 11:49:42 +0800
Subject: [PATCH] replace PiecewiseDecay, StepDecay, MultiStepDecay,
 LambdaDecay with 2.0 version (#53992)

* replace PiecewiseDecay(LearningRateDecay) with PiecewiseDecay(LRScheduler)

* fix bug

* fix bug

* replace the StepDecay,MultiStepDecay,LambdaDecay with 2.0 version
---
 .../fluid/dygraph/learning_rate_scheduler.py  | 304 ------------------
 .../fluid/layers/learning_rate_scheduler.py   |   8 +-
 python/paddle/optimizer/lr.py                 |   2 +
 .../test_basic_api_transformation.py          |   4 +-
 test/dygraph_to_static/test_yolov3.py         |   6 +-
 test/legacy_test/test_imperative_optimizer.py |  18 +-
 .../test_learning_rate_scheduler.py           |  53 +--
 7 files changed, 50 insertions(+), 345 deletions(-)

diff --git a/python/paddle/fluid/dygraph/learning_rate_scheduler.py b/python/paddle/fluid/dygraph/learning_rate_scheduler.py
index dd17dbe5272..c0ecda7059d 100644
--- a/python/paddle/fluid/dygraph/learning_rate_scheduler.py
+++ b/python/paddle/fluid/dygraph/learning_rate_scheduler.py
@@ -23,13 +23,9 @@ from ..data_feeder import check_type
 
 __all__ = [
     'NoamDecay',
-    'PiecewiseDecay',
     'PolynomialDecay',
     'LinearLrWarmup',
     'ReduceLROnPlateau',
-    'StepDecay',
-    'MultiStepDecay',
-    'LambdaDecay',
 ]
 
 
@@ -131,68 +127,6 @@ class LearningRateDecay:
         raise NotImplementedError()
 
 
-class PiecewiseDecay(LearningRateDecay):
-    """
-    :api_attr: imperative
-
-    Piecewise decay scheduler.
-
-    The algorithm can be described as the code below.
-
-    .. code-block:: text
-
-        boundaries = [10000, 20000]
-        values = [1.0, 0.5, 0.1]
-        if global_step < 10000:
-            learning_rate = 1.0
-        elif 10000 <= global_step < 20000:
-            learning_rate = 0.5
-        else:
-            learning_rate = 0.1
-
-    Parameters:
-        boundaries(list): A list of steps numbers. The type of element in the list is python int.
-        values(list): A list of learning rate values that will be picked during
-            different step boundaries. The type of element in the list is python float.
-        begin(int): The begin step to initialize the global_step in the description above.
-        step(int, optional): The step size used to calculate the new global_step in the description above.
-            The default value is 1.
-        dtype(str, optional): The data type used to create the learning rate variable. The data type can be set as
-            'float32', 'float64'. The default value is 'float32'.
-
-    Returns:
-        None.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          import paddle
-          boundaries = [10000, 20000]
-          values = [1.0, 0.5, 0.1]
-          with fluid.dygraph.guard():
-              emb = paddle.nn.Embedding(10, 10)
-              optimizer = fluid.optimizer.SGD(
-                 learning_rate=fluid.dygraph.PiecewiseDecay(boundaries, values, 0),
-                 parameter_list = emb.parameters() )
-    """
-
-    def __init__(self, boundaries, values, begin, step=1, dtype='float32'):
-        super().__init__(begin, step, dtype)
-        self.boundaries = boundaries
-        self.values = values
-
-        self.vars = []
-        for value in values:
-            self.vars.append(value)
-
-    def step(self):
-        for i in range(len(self.boundaries)):
-            if self.step_num < self.boundaries[i]:
-                return self.vars[i]
-        return self.create_lr_var(self.vars[len(self.values) - 1])
-
-
 class PolynomialDecay(LearningRateDecay):
     r"""
     :api_attr: imperative
@@ -742,241 +676,3 @@ class _LearningRateEpochDecay(LearningRateDecay):
 
     def get_lr(self):
         raise NotImplementedError
-
-
-class StepDecay(_LearningRateEpochDecay):
-    """
-    :api_attr: imperative
-
-    Decays the learning rate of ``optimizer`` by ``decay_rate`` every ``step_size`` number of epoch.
-
-    The algorithm can be described as the code below.
-
-    .. code-block:: text
-
-        learning_rate = 0.5
-        step_size = 30
-        decay_rate = 0.1
-
-        learning_rate = 0.5     if epoch < 30
-        learning_rate = 0.05    if 30 <= epoch < 60
-        learning_rate = 0.005   if 60 <= epoch < 90
-        ...
-
-    Parameters:
-        learning_rate (float|int): The initial learning rate. It can be set to python float or int number.
-        step_size (int): Period of learning rate decay.
-        decay_rate (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * decay_rate`` .
-            It should be less than 1.0. Default: 0.1.
-
-    Returns:
-        None.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import numpy as np
-            import paddle
-            with fluid.dygraph.guard():
-                x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
-                linear = paddle.nn.Linear(10, 10)
-                input = fluid.dygraph.to_variable(x)
-                scheduler = fluid.dygraph.StepDecay(0.5, step_size=3)
-                adam = fluid.optimizer.Adam(learning_rate = scheduler, parameter_list = linear.parameters())
-
-                for epoch in range(9):
-                    for batch_id in range(5):
-                        out = linear(input)
-                        loss = paddle.mean(out)
-                        adam.minimize(loss)
-                    scheduler.epoch()
-
-                    print("epoch:{}, current lr is {}" .format(epoch, adam.current_step_lr()))
-                    # epoch:0, current lr is 0.5
-                    # epoch:1, current lr is 0.5
-                    # epoch:2, current lr is 0.5
-                    # epoch:3, current lr is 0.05
-                    # epoch:4, current lr is 0.05
-                    # epoch:5, current lr is 0.05
-                    # epoch:6, current lr is 0.005
-                    # epoch:7, current lr is 0.005
-                    # epoch:8, current lr is 0.005
-
-    """
-
-    def __init__(self, learning_rate, step_size, decay_rate=0.1):
-        if not isinstance(step_size, int):
-            raise TypeError(
-                "The type of 'step_size' must be 'int', but received %s."
-                % type(step_size)
-            )
-        if decay_rate >= 1.0:
-            raise ValueError('decay_rate should be < 1.0.')
-
-        self.step_size = step_size
-        self.decay_rate = decay_rate
-        super().__init__(learning_rate)
-
-    def get_lr(self):
-        decay_rate = self.create_lr_var(self.decay_rate)
-        i = self.epoch_num // self.step_size
-        return self.base_lr * (decay_rate**i)
-
-
-class MultiStepDecay(_LearningRateEpochDecay):
-    """
-    :api_attr: imperative
-
-    Decays the learning rate of ``optimizer`` by ``decay_rate`` once ``epoch`` reaches one of the milestones.
-
-    The algorithm can be described as the code below.
-
-    .. code-block:: text
-
-        learning_rate = 0.5
-        milestones = [30, 50]
-        decay_rate = 0.1
-        if epoch < 30:
-            learning_rate = 0.5
-        elif epoch < 50:
-            learning_rate = 0.05
-        else:
-            learning_rate = 0.005
-
-    Parameters:
-        learning_rate (float|int): The initial learning rate. It can be set to python float or int number.
-        milestones (tuple|list): List or tuple of each boundaries. Must be increasing.
-        decay_rate (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * decay_rate`` .
-            It should be less than 1.0. Default: 0.1.
-
-    Returns:
-        None.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import numpy as np
-            import paddle
-            with fluid.dygraph.guard():
-                x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
-                linear = paddle.nn.Linear(10, 10)
-                input = fluid.dygraph.to_variable(x)
-                scheduler = fluid.dygraph.MultiStepDecay(0.5, milestones=[3, 5])
-                adam = fluid.optimizer.Adam(learning_rate = scheduler, parameter_list = linear.parameters())
-
-                for epoch in range(6):
-                    for batch_id in range(5):
-                        out = linear(input)
-                        loss = paddle.mean(out)
-                        adam.minimize(loss)
-                    scheduler.epoch()
-
-                    print("epoch:{}, current lr is {}" .format(epoch, adam.current_step_lr()))
-                    # epoch:0, current lr is 0.5
-                    # epoch:1, current lr is 0.5
-                    # epoch:2, current lr is 0.5
-                    # epoch:3, current lr is 0.05
-                    # epoch:4, current lr is 0.05
-                    # epoch:5, current lr is 0.005
-
-    """
-
-    def __init__(self, learning_rate, milestones, decay_rate=0.1):
-        if not isinstance(milestones, (tuple, list)):
-            raise TypeError(
-                "The type of 'milestones' in 'MultiStepDecay' must be 'tuple, list', but received %s."
-                % type(milestones)
-            )
-
-        if not all(
-            [
-                milestones[i] < milestones[i + 1]
-                for i in range(len(milestones) - 1)
-            ]
-        ):
-            raise ValueError('The elements of milestones must be incremented')
-        if decay_rate >= 1.0:
-            raise ValueError('decay_rate should be < 1.0.')
-
-        self.milestones = milestones
-        self.decay_rate = decay_rate
-        super().__init__(learning_rate)
-
-    def get_lr(self):
-        decay_rate = self.create_lr_var(self.decay_rate)
-        for i in range(len(self.milestones)):
-            if self.epoch_num < self.milestones[i]:
-                return self.base_lr * (decay_rate**i)
-
-        return self.base_lr * (decay_rate ** len(self.milestones))
-
-
-class LambdaDecay(_LearningRateEpochDecay):
-    """
-    :api_attr: imperative
-
-    Sets the learning rate of ``optimizer`` to the initial lr times a multiplicative factor, and this multiplicative
-    factor is computed by function ``lr_lambda`` . ``lr_lambda`` is function which receives ``epoch`` .
-
-    The algorithm can be described as the code below.
-
-    .. code-block:: text
-
-        learning_rate = 0.5        # init learning_rate
-        lr_lambda = lambda epoch: 0.95 ** epoch
-
-        learning_rate = 0.5        # epoch 0
-        learning_rate = 0.475      # epoch 1
-        learning_rate = 0.45125    # epoch 2
-
-    Parameters:
-        learning_rate (float|int): The initial learning rate. It can be set to python float or int number.
-        lr_lambda (function): A function which computes a multiplicative factor given an integer parameter ``epoch`` , and
-            then multiply the initial learning rate by this multiplicative factor.
-
-    Returns:
-        None.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import numpy as np
-            import paddle
-            with fluid.dygraph.guard():
-                x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
-                linear = paddle.nn.Linear(10, 10)
-                input = fluid.dygraph.to_variable(x)
-                scheduler = fluid.dygraph.LambdaDecay(0.5, lr_lambda=lambda x: 0.95**x)
-                adam = fluid.optimizer.Adam(learning_rate = scheduler, parameter_list = linear.parameters())
-
-                for epoch in range(6):
-                    for batch_id in range(5):
-                        out = linear(input)
-                        loss = paddle.mean(out)
-                        adam.minimize(loss)
-                    scheduler.epoch()
-
-                    print("epoch:%d, current lr is %f" .format(epoch, adam.current_step_lr()))
-                    # epoch:0, current lr is 0.5
-                    # epoch:1, current lr is 0.475
-                    # epoch:2, current lr is 0.45125
-
-    """
-
-    def __init__(self, learning_rate, lr_lambda):
-        if not callable(lr_lambda):
-            raise TypeError(
-                "The type of 'lr_lambda' in 'LambdaDecay' must be 'function', but received %s."
-                % type(lr_lambda)
-            )
-
-        self.lr_lambda = lr_lambda
-        super().__init__(learning_rate)
-
-    def get_lr(self):
-        base_lr = self.create_lr_var(self.base_lr)
-
-        return self.base_lr * self.lr_lambda(self.epoch_num)
diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py
index bc1c8e78038..6605ef7a7b7 100644
--- a/python/paddle/fluid/layers/learning_rate_scheduler.py
+++ b/python/paddle/fluid/layers/learning_rate_scheduler.py
@@ -410,10 +410,10 @@ def piecewise_decay(boundaries, values):
               paddle.enable_static()
               boundaries = [10000, 20000]
               values = [1.0, 0.5, 0.1]
-              optimizer = fluid.optimizer.Momentum(
+              optimizer = paddle.optimizer.Momentum(
                   momentum=0.9,
-                  learning_rate=fluid.layers.piecewise_decay(boundaries=boundaries, values=values),
-                  regularization=paddle.regularizer.L2Decay(1e-4))
+                  learning_rate=paddle.optimizer.lr.PiecewiseDecay(boundaries, values),
+                  weight_decay=paddle.regularizer.L2Decay(1e-4))
 
 
     """
@@ -422,7 +422,7 @@ def piecewise_decay(boundaries, values):
             raise ValueError("len(values) - len(boundaries) should be 1")
 
         if in_dygraph_mode():
-            decay = imperate_lr.PiecewiseDecay(boundaries, values, 0)
+            decay = paddle.optimizer.lr.PiecewiseDecay(boundaries, values)
             return decay
         else:
             global_step = _decay_step_counter()
diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py
index aa87a455d56..681ff33ca67 100644
--- a/python/paddle/optimizer/lr.py
+++ b/python/paddle/optimizer/lr.py
@@ -98,6 +98,8 @@ class LRScheduler:
                     type(learning_rate)
                 )
             )
+        if learning_rate < 0:
+            raise ValueError(f"Invalid learning rate: {learning_rate}")
         self.base_lr = float(learning_rate)
         self.last_lr = float(learning_rate)
         self.last_epoch = last_epoch
diff --git a/test/dygraph_to_static/test_basic_api_transformation.py b/test/dygraph_to_static/test_basic_api_transformation.py
index 62499a9aada..e0a0d9f5923 100644
--- a/test/dygraph_to_static/test_basic_api_transformation.py
+++ b/test/dygraph_to_static/test_basic_api_transformation.py
@@ -376,9 +376,9 @@ def dyfunc_NoamDecay():
 def dyfunc_PiecewiseDecay():
     boundaries = [10000, 20000]
     values = [1.0, 0.5, 0.1]
-    pd = fluid.dygraph.PiecewiseDecay(boundaries, values, begin=0)
+    pd = paddle.optimizer.lr.PiecewiseDecay(boundaries, values)
     lr = pd()
-    return lr
+    return paddle.to_tensor(lr)
 
 
 def dyfunc_PolynomialDecay():
diff --git a/test/dygraph_to_static/test_yolov3.py b/test/dygraph_to_static/test_yolov3.py
index 640fe7c3fda..eb51fcc20e9 100644
--- a/test/dygraph_to_static/test_yolov3.py
+++ b/test/dygraph_to_static/test_yolov3.py
@@ -94,11 +94,11 @@ def train(to_static):
         learning_rate = cfg.learning_rate
         values = [learning_rate * (gamma**i) for i in range(step_num + 1)]
 
-        lr = fluid.dygraph.PiecewiseDecay(
-            boundaries=boundaries, values=values, begin=0
+        lr = paddle.optimizer.lr.PiecewiseDecay(
+            boundaries=boundaries, values=values
         )
 
-        lr = fluid.layers.linear_lr_warmup(
+        lr = paddle.optimizer.lr.LinearWarmup(
             learning_rate=lr,
             warmup_steps=cfg.warm_up_iter,
             start_lr=0.0,
diff --git a/test/legacy_test/test_imperative_optimizer.py b/test/legacy_test/test_imperative_optimizer.py
index 514dd318ed2..36a60dfcf95 100644
--- a/test/legacy_test/test_imperative_optimizer.py
+++ b/test/legacy_test/test_imperative_optimizer.py
@@ -262,7 +262,7 @@ class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
     def get_optimizer(self):
         bd = [3, 6, 9]
         optimizer = SGDOptimizer(
-            learning_rate=fluid.layers.piecewise_decay(
+            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
                 boundaries=bd,
                 values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
             )
@@ -470,20 +470,20 @@ class TestOptimizerLearningRate(unittest.TestCase):
             bd = [2, 4, 6, 8]
             value = [0.2, 0.4, 0.6, 0.8, 1.0]
 
-            adam = fluid.optimizer.Adam(
-                fluid.dygraph.PiecewiseDecay(bd, value, 0),
-                parameter_list=linear.parameters(),
+            scheduler = paddle.optimizer.lr.PiecewiseDecay(bd, value)
+            adam = paddle.optimizer.Adam(
+                scheduler,
+                parameters=linear.parameters(),
             )
 
-            np.testing.assert_allclose(
-                adam.current_step_lr(), 0.2, rtol=1e-06, atol=0.0
-            )
+            np.testing.assert_allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0)
 
             ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
             for i in range(12):
                 adam.minimize(loss)
-                lr = adam.current_step_lr()
-
+                lr = adam.get_lr()
+                adam.step()
+                scheduler.step()
                 np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
 
     def test_lr_decay_natural_exp(self):
diff --git a/test/legacy_test/test_learning_rate_scheduler.py b/test/legacy_test/test_learning_rate_scheduler.py
index f1cc1fe81b7..b38e29b7df0 100644
--- a/test/legacy_test/test_learning_rate_scheduler.py
+++ b/test/legacy_test/test_learning_rate_scheduler.py
@@ -127,7 +127,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
                 learning_rate=0.1,
                 gamma=0.5,
             )
-            Step_scheduler = fluid.dygraph.StepDecay(0.5, step_size=3)
+            Step_scheduler = paddle.optimizer.lr.StepDecay(0.5, step_size=3)
             Reducelr_scheduler = fluid.dygraph.ReduceLROnPlateau(
                 learning_rate=1.0, decay_rate=0.5, patience=5, cooldown=3
             )
@@ -154,7 +154,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
                 adam3.minimize(loss)
                 linear.clear_gradients()
 
-                Step_scheduler.epoch()
+                Step_scheduler.get_lr()
                 Reducelr_scheduler.step(loss)
 
             paddle.save(linear.state_dict(), "save_path.pdparams")
@@ -163,7 +163,9 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
                 learning_rate=0.1,
                 gamma=0.5,
             )
-            Step_scheduler_test = fluid.dygraph.StepDecay(0.5, step_size=3)
+            Step_scheduler_test = paddle.optimizer.lr.StepDecay(
+                0.5, step_size=3
+            )
             Reducelr_scheduler_test = fluid.dygraph.ReduceLROnPlateau(
                 learning_rate=1.0, decay_rate=0.5, patience=5, cooldown=3
             )
@@ -189,8 +191,8 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
             )
             adam_test.set_dict(opt_state)
             self.assertEqual(
-                adam_test._learning_rate.epoch_num,
-                adam2._learning_rate.epoch_num,
+                adam_test._learning_rate.last_epoch,
+                adam2._learning_rate.last_epoch,
                 "epoch_num is different before and after set_dict",
             )
             self.assertEqual(
@@ -288,19 +290,20 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
             decay_rate = 0.2
             linear = paddle.nn.Linear(10, 10)
 
-            scheduler = fluid.dygraph.MultiStepDecay(
+            scheduler = paddle.optimizer.lr.MultiStepDecay(
                 learning_rate, milestones, decay_rate
             )
 
-            adam = fluid.optimizer.AdamOptimizer(
-                learning_rate=scheduler, parameter_list=linear.parameters()
+            adam = paddle.optimizer.Adam(
+                learning_rate=scheduler, parameters=linear.parameters()
             )
             for epoch in range(10):
                 right_result = multi_step_decay(
                     epoch, learning_rate, milestones, decay_rate
                 )
-                fluid_result = adam.current_step_lr()
-                scheduler.epoch()
+                fluid_result = adam.get_lr()
+                adam.step()
+                scheduler.step()
                 self.assertAlmostEqual(
                     right_result,
                     fluid_result,
@@ -310,35 +313,36 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
                 )
 
             with self.assertRaises(ValueError):
-                lr = fluid.dygraph.MultiStepDecay(
+                lr = paddle.optimizer.lr.MultiStepDecay(
                     learning_rate, [30, 50, 20], 0.1
                 )
 
             with self.assertRaises(ValueError):
-                lr = fluid.dygraph.MultiStepDecay(
+                lr = paddle.optimizer.lr.MultiStepDecay(
                     learning_rate, [20, 30, 50], 1
                 )
 
             with self.assertRaises(TypeError):
-                lr = fluid.dygraph.MultiStepDecay("test", [20, 30, 50])
+                lr = paddle.optimizer.lr.MultiStepDecay("test", [20, 30, 50])
 
             with self.assertRaises(ValueError):
-                lr = fluid.dygraph.MultiStepDecay(-1, [20, 30, 50])
+                lr = paddle.optimizer.lr.MultiStepDecay(-1, [20, 30, 50])
 
     def test_StepDecay(self):
         with fluid.dygraph.guard():
             learning_rate = 0.5
             step_size = 3
             decay_rate = 0.2
-            scheduler = fluid.dygraph.StepDecay(
+            scheduler = paddle.optimizer.lr.StepDecay(
                 learning_rate, step_size, decay_rate
             )
             for epoch in range(10):
                 right_result = step_decay(
                     epoch, learning_rate, step_size, decay_rate
                 )
-                fluid_result = scheduler().numpy().item()
-                scheduler.epoch()
+                fluid_result = scheduler()
+                scheduler.get_lr()
+                scheduler.step()
                 self.assertAlmostEqual(
                     right_result,
                     fluid_result,
@@ -348,16 +352,18 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
                 )
 
             with self.assertRaises(TypeError):
-                lr = fluid.dygraph.StepDecay(learning_rate, "test", 0.1)
+                lr = paddle.optimizer.lr.StepDecay(learning_rate, "test", 0.1)
 
             with self.assertRaises(ValueError):
-                lr = fluid.dygraph.StepDecay(learning_rate, 20, 2)
+                lr = paddle.optimizer.lr.StepDecay(learning_rate, 20, 2)
 
     def test_LambdaDecay(self):
         with fluid.dygraph.guard():
             learning_rate = 0.5
             lr_lambda = lambda x: 0.95**x
-            scheduler = fluid.dygraph.LambdaDecay(learning_rate, lr_lambda)
+            scheduler = paddle.optimizer.lr.LambdaDecay(
+                learning_rate, lr_lambda
+            )
 
             linear = paddle.nn.Linear(10, 10)
             adam = fluid.optimizer.Adam(
@@ -366,8 +372,9 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
 
             for epoch in range(30):
                 right_result = lambda_decay(epoch, learning_rate, lr_lambda)
-                fluid_result = scheduler().numpy().item()
-                scheduler.epoch()
+                fluid_result = scheduler()
+                scheduler.get_lr()
+                scheduler.step()
                 self.assertAlmostEqual(
                     right_result,
                     fluid_result,
@@ -377,7 +384,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
                 )
 
             with self.assertRaises(TypeError):
-                lr = fluid.dygraph.LambdaDecay(learning_rate, "test")
+                lr = paddle.optimizer.lr.LambdaDecay(learning_rate, "test")
 
 
 class TestLearningRateDecay(unittest.TestCase):
-- 
GitLab