Add api MultiplicativeDecay (#38250)

* delete the modification of dygraph * CI * check CI * modify the retrun value of get_lr

Add api MultiplicativeDecay (#38250)
* delete the modification of dygraph * CI * check CI * modify the retrun value of get_lr
4a3a2d6b · guguguzi · GitHub · c8fbd3cd · 4a3a2d6b · 4a3a2d6b
Showing with 144 addition and 66 deletion

python/paddle/fluid/tests/unittests/test_lr_scheduler.py python/paddle/fluid/tests/unittests/test_lr_scheduler.py +11 -0

python/paddle/optimizer/lr.py python/paddle/optimizer/lr.py +133 -66

未找到文件。
--- a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py
+++ b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py
@@ -205,6 +205,13 @@ def lambda_lr(epoch_num, learning_rate, lr_lambda, verbose=False):
    return learning_rate * lr_lambda(epoch_num)
+def multiplicative_lr(epoch_num, learning_rate, lr_lambda, verbose=False):
+    latest_lr = learning_rate
+    for i in range(epoch_num):
+        latest_lr = latest_lr * lr_lambda(i + 1)
+    return latest_lr
 def piecewise_lr(epoch_num, boundaries, values, verbose=False):
    assert len(boundaries) + 1 == len(values)
    for i in range(len(boundaries)):
@@ -519,6 +526,10 @@ class TestLRScheduler(unittest.TestCase):
            "learning_rate": 0.5,
            "lr_lambda": lambda x: 0.95**x,
            "verbose": True
+        }), (multiplicative_lr, paddle.optimizer.lr.MultiplicativeDecay, {
+            "learning_rate": 0.5,
+            "lr_lambda": lambda x: 0.95,
+            "verbose": True
        }), (cosine_annealing_lr, paddle.optimizer.lr.CosineAnnealingDecay, {
            "learning_rate": 0.5,
            "T_max": 10,

--- a/python/paddle/optimizer/lr.py
+++ b/python/paddle/optimizer/lr.py
@@ -17,7 +17,7 @@ import numpy
 import warnings
 from paddle import Tensor
-__all__ = [ #noqa
+__all__ = [  # noqa
    'LRScheduler',
    'NoamDecay',
    'PiecewiseDecay',
@@ -30,7 +30,8 @@ __all__ = [ #noqa
    'StepDecay',
    'LambdaDecay',
    'ReduceOnPlateau',
-    'CosineAnnealingDecay'
+    'CosineAnnealingDecay',
+    'MultiplicativeDecay'
 ]
@@ -1355,7 +1356,8 @@ class ReduceOnPlateau(LRScheduler):
        if isinstance(metrics, (Tensor, numpy.ndarray)):
            assert len(metrics.shape) == 1 and metrics.shape[0] == 1, "the metrics.shape " \
                                                                      "should be (1L,), but the current metrics.shape is {}. Maybe that " \
-                "you should call paddle.mean to process it first.".format(metrics.shape)
+                                                                      "you should call paddle.mean to process it first.".format(
+                metrics.shape)
        elif not isinstance(metrics,
                            (int, float, numpy.float32, numpy.float64)):
            raise TypeError(
@@ -1513,3 +1515,68 @@ class CosineAnnealingDecay(LRScheduler):
    def _get_closed_form_lr(self):
        return self.eta_min + (self.base_lr - self.eta_min) * (1 + math.cos(
            math.pi * self.last_epoch / self.T_max)) / 2
+class MultiplicativeDecay(LRScheduler):
+    """
+    Multiply the learning rate of ``optimizer`` by the factor given in function ``lr_lambda`` .
+    The algorithm can be described as the code below.
+    .. code-block:: text
+        learning_rate = 0.5        # init learning_rate
+        lr_lambda = lambda epoch: 0.95
+        learning_rate = 0.5        # epoch 0,
+        learning_rate = 0.475      # epoch 1, 0.5*0.95
+        learning_rate = 0.45125    # epoch 2, 0.475*0.95
+    Args:
+        learning_rate (float): The initial learning rate. It is a python float number.
+        lr_lambda (function): A function which computes a factor by ``epoch`` , and then multiply the last learning rate by this factor.
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+        verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .
+    Returns:
+        ``MultiplicativeDecay`` instance to schedule learning rate.
+    Examples:
+        .. code-block:: python
+            import paddle
+            import numpy as np
+            # train on default dynamic graph mode
+            linear = paddle.nn.Linear(10, 10)
+            scheduler = paddle.optimizer.lr.MultiplicativeDecay(learning_rate=0.5, lr_lambda=lambda x:0.95, verbose=True)
+            sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
+            for epoch in range(20):
+                for batch_id in range(5):
+                    x = paddle.uniform([10, 10])
+                    out = linear(x)
+                    loss = paddle.mean(out)
+                    loss.backward()
+                    sgd.step()
+                    sgd.clear_gradients()
+                    scheduler.step()    # If you update learning rate each step
+              # scheduler.step()        # If you update learning rate each epoch
+    """
+    def __init__(self, learning_rate, lr_lambda, last_epoch=-1, verbose=False):
+        if not callable(lr_lambda):
+            raise TypeError(
+                "The type of 'lr_lambda' in 'MultiplicativeDecay' must be 'function', but received %s."
+                % type(lr_lambda))
+        self.lr_lambda = lr_lambda
+        super(MultiplicativeDecay, self).__init__(learning_rate, last_epoch,
+                                                  verbose)
+    def get_lr(self):
+        if self.last_epoch > 0:
+            return self.last_lr * self.lr_lambda(self.last_epoch)
+        else:
+            return self.base_lr