learning_rate.py 12.3 KB
Newer Older
S
shippingwang 已提交
1
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
W
WuHaobo 已提交
2
#
S
shippingwang 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
W
WuHaobo 已提交
6 7 8
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
S
shippingwang 已提交
9 10 11 12 13
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
D
dongshuilong 已提交
14 15 16
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

L
littletomatodonkey 已提交
17
from paddle.optimizer import lr
D
dongshuilong 已提交
18
from paddle.optimizer.lr import LRScheduler
W
WuHaobo 已提交
19 20


L
littletomatodonkey 已提交
21
class Linear(object):
W
WuHaobo 已提交
22
    """
L
littletomatodonkey 已提交
23
    Linear learning rate decay
W
WuHaobo 已提交
24
    Args:
L
littletomatodonkey 已提交
25 26 27 28
        lr (float): The initial learning rate. It is a python float number.
        epochs(int): The decay step size. It determines the decay cycle.
        end_lr(float, optional): The minimum final learning rate. Default: 0.0001.
        power(float, optional): Power of polynomial. Default: 1.0.
29 30
        warmup_epoch(int): The epoch numbers for LinearWarmup. Default: 0.
        warmup_start_lr(float): Initial learning rate of warm up. Default: 0.0.
L
littletomatodonkey 已提交
31
        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
W
WuHaobo 已提交
32 33
    """

L
littletomatodonkey 已提交
34 35 36 37 38 39 40
    def __init__(self,
                 learning_rate,
                 epochs,
                 step_each_epoch,
                 end_lr=0.0,
                 power=1.0,
                 warmup_epoch=0,
41
                 warmup_start_lr=0.0,
L
littletomatodonkey 已提交
42 43 44 45
                 last_epoch=-1,
                 **kwargs):
        super(Linear, self).__init__()
        self.learning_rate = learning_rate
46
        self.steps = (epochs - warmup_epoch) * step_each_epoch
L
littletomatodonkey 已提交
47 48 49
        self.end_lr = end_lr
        self.power = power
        self.last_epoch = last_epoch
50 51
        self.warmup_steps = round(warmup_epoch * step_each_epoch)
        self.warmup_start_lr = warmup_start_lr
W
WuHaobo 已提交
52

L
littletomatodonkey 已提交
53 54 55
    def __call__(self):
        learning_rate = lr.PolynomialDecay(
            learning_rate=self.learning_rate,
56
            decay_steps=self.steps,
L
littletomatodonkey 已提交
57 58 59
            end_lr=self.end_lr,
            power=self.power,
            last_epoch=self.last_epoch)
60
        if self.warmup_steps > 0:
L
littletomatodonkey 已提交
61 62
            learning_rate = lr.LinearWarmup(
                learning_rate=learning_rate,
63 64
                warmup_steps=self.warmup_steps,
                start_lr=self.warmup_start_lr,
L
littletomatodonkey 已提交
65 66 67 68 69 70
                end_lr=self.learning_rate,
                last_epoch=self.last_epoch)
        return learning_rate


class Cosine(object):
W
WuHaobo 已提交
71
    """
L
littletomatodonkey 已提交
72 73
    Cosine learning rate decay
    lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
W
WuHaobo 已提交
74 75 76 77
    Args:
        lr(float): initial learning rate
        step_each_epoch(int): steps each epoch
        epochs(int): total training epochs
78 79 80
        eta_min(float): Minimum learning rate. Default: 0.0.
        warmup_epoch(int): The epoch numbers for LinearWarmup. Default: 0.
        warmup_start_lr(float): Initial learning rate of warm up. Default: 0.0.
L
littletomatodonkey 已提交
81
        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
W
WuHaobo 已提交
82 83
    """

L
littletomatodonkey 已提交
84 85 86 87
    def __init__(self,
                 learning_rate,
                 step_each_epoch,
                 epochs,
88
                 eta_min=0.0,
L
littletomatodonkey 已提交
89
                 warmup_epoch=0,
90
                 warmup_start_lr=0.0,
L
littletomatodonkey 已提交
91 92 93 94
                 last_epoch=-1,
                 **kwargs):
        super(Cosine, self).__init__()
        self.learning_rate = learning_rate
95 96
        self.T_max = (epochs - warmup_epoch) * step_each_epoch
        self.eta_min = eta_min
L
littletomatodonkey 已提交
97
        self.last_epoch = last_epoch
98 99
        self.warmup_steps = round(warmup_epoch * step_each_epoch)
        self.warmup_start_lr = warmup_start_lr
W
WuHaobo 已提交
100

L
littletomatodonkey 已提交
101 102 103 104
    def __call__(self):
        learning_rate = lr.CosineAnnealingDecay(
            learning_rate=self.learning_rate,
            T_max=self.T_max,
105
            eta_min=self.eta_min,
L
littletomatodonkey 已提交
106
            last_epoch=self.last_epoch)
107
        if self.warmup_steps > 0:
L
littletomatodonkey 已提交
108 109
            learning_rate = lr.LinearWarmup(
                learning_rate=learning_rate,
110 111
                warmup_steps=self.warmup_steps,
                start_lr=self.warmup_start_lr,
L
littletomatodonkey 已提交
112 113 114 115 116 117
                end_lr=self.learning_rate,
                last_epoch=self.last_epoch)
        return learning_rate


class Step(object):
S
shippingwang 已提交
118
    """
L
littletomatodonkey 已提交
119
    Piecewise learning rate decay
S
shippingwang 已提交
120 121
    Args:
        step_each_epoch(int): steps each epoch
L
littletomatodonkey 已提交
122 123 124 125
        learning_rate (float): The initial learning rate. It is a python float number.
        step_size (int): the interval to update.
        gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
            It should be less than 1.0. Default: 0.1.
126 127
        warmup_epoch(int): The epoch numbers for LinearWarmup. Default: 0.
        warmup_start_lr(float): Initial learning rate of warm up. Default: 0.0.
L
littletomatodonkey 已提交
128
        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
S
shippingwang 已提交
129 130
    """

S
shippingwang 已提交
131
    def __init__(self,
L
littletomatodonkey 已提交
132 133
                 learning_rate,
                 step_size,
S
shippingwang 已提交
134
                 step_each_epoch,
L
littletomatodonkey 已提交
135 136
                 gamma,
                 warmup_epoch=0,
137
                 warmup_start_lr=0.0,
L
littletomatodonkey 已提交
138
                 last_epoch=-1,
S
shippingwang 已提交
139
                 **kwargs):
L
littletomatodonkey 已提交
140 141 142 143 144
        super(Step, self).__init__()
        self.step_size = step_each_epoch * step_size
        self.learning_rate = learning_rate
        self.gamma = gamma
        self.last_epoch = last_epoch
145 146
        self.warmup_steps = round(warmup_epoch * step_each_epoch)
        self.warmup_start_lr = warmup_start_lr
S
shippingwang 已提交
147

L
littletomatodonkey 已提交
148 149 150 151 152 153
    def __call__(self):
        learning_rate = lr.StepDecay(
            learning_rate=self.learning_rate,
            step_size=self.step_size,
            gamma=self.gamma,
            last_epoch=self.last_epoch)
154
        if self.warmup_steps > 0:
L
littletomatodonkey 已提交
155 156
            learning_rate = lr.LinearWarmup(
                learning_rate=learning_rate,
157 158
                warmup_steps=self.warmup_steps,
                start_lr=self.warmup_start_lr,
L
littletomatodonkey 已提交
159 160 161 162 163 164
                end_lr=self.learning_rate,
                last_epoch=self.last_epoch)
        return learning_rate


class Piecewise(object):
W
WuHaobo 已提交
165
    """
L
littletomatodonkey 已提交
166
    Piecewise learning rate decay
W
WuHaobo 已提交
167
    Args:
L
littletomatodonkey 已提交
168 169 170
        boundaries(list): A list of steps numbers. The type of element in the list is python int.
        values(list): A list of learning rate values that will be picked during different epoch boundaries.
            The type of element in the list is python float.
171 172
        warmup_epoch(int): The epoch numbers for LinearWarmup. Default: 0.
        warmup_start_lr(float): Initial learning rate of warm up. Default: 0.0.
L
littletomatodonkey 已提交
173
        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
W
WuHaobo 已提交
174 175 176
    """

    def __init__(self,
L
littletomatodonkey 已提交
177 178 179 180
                 step_each_epoch,
                 decay_epochs,
                 values,
                 warmup_epoch=0,
181
                 warmup_start_lr=0.0,
L
littletomatodonkey 已提交
182 183 184 185 186 187
                 last_epoch=-1,
                 **kwargs):
        super(Piecewise, self).__init__()
        self.boundaries = [step_each_epoch * e for e in decay_epochs]
        self.values = values
        self.last_epoch = last_epoch
188 189
        self.warmup_steps = round(warmup_epoch * step_each_epoch)
        self.warmup_start_lr = warmup_start_lr
W
WuHaobo 已提交
190 191

    def __call__(self):
L
littletomatodonkey 已提交
192 193 194 195
        learning_rate = lr.PiecewiseDecay(
            boundaries=self.boundaries,
            values=self.values,
            last_epoch=self.last_epoch)
196
        if self.warmup_steps > 0:
L
littletomatodonkey 已提交
197 198
            learning_rate = lr.LinearWarmup(
                learning_rate=learning_rate,
199 200
                warmup_steps=self.warmup_steps,
                start_lr=self.warmup_start_lr,
L
littletomatodonkey 已提交
201 202 203
                end_lr=self.values[0],
                last_epoch=self.last_epoch)
        return learning_rate
D
dongshuilong 已提交
204 205 206 207 208


class MultiStepDecay(LRScheduler):
    """
    Update the learning rate by ``gamma`` once ``epoch`` reaches one of the milestones.
209
    The algorithm can be described as the code below.
D
dongshuilong 已提交
210 211 212 213 214 215 216 217 218 219 220 221 222
    .. code-block:: text
        learning_rate = 0.5
        milestones = [30, 50]
        gamma = 0.1
        if epoch < 30:
            learning_rate = 0.5
        elif epoch < 50:
            learning_rate = 0.05
        else:
            learning_rate = 0.005
    Args:
        learning_rate (float): The initial learning rate. It is a python float number.
        milestones (tuple|list): List or tuple of each boundaries. Must be increasing.
223
        gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
D
dongshuilong 已提交
224 225 226
            It should be less than 1.0. Default: 0.1.
        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
        verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .
227

D
dongshuilong 已提交
228 229 230
    Returns:
        ``MultiStepDecay`` instance to schedule learning rate.
    Examples:
231

D
dongshuilong 已提交
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
        .. code-block:: python
            import paddle
            import numpy as np
            # train on default dynamic graph mode
            linear = paddle.nn.Linear(10, 10)
            scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True)
            sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
            for epoch in range(20):
                for batch_id in range(5):
                    x = paddle.uniform([10, 10])
                    out = linear(x)
                    loss = paddle.mean(out)
                    loss.backward()
                    sgd.step()
                    sgd.clear_gradients()
                    scheduler.step()    # If you update learning rate each step
              # scheduler.step()        # If you update learning rate each epoch
            # train on static graph mode
            paddle.enable_static()
            main_prog = paddle.static.Program()
            start_prog = paddle.static.Program()
            with paddle.static.program_guard(main_prog, start_prog):
                x = paddle.static.data(name='x', shape=[None, 4, 5])
                y = paddle.static.data(name='y', shape=[None, 4, 5])
                z = paddle.static.nn.fc(x, 100)
                loss = paddle.mean(z)
                scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True)
                sgd = paddle.optimizer.SGD(learning_rate=scheduler)
                sgd.minimize(loss)
            exe = paddle.static.Executor()
            exe.run(start_prog)
            for epoch in range(20):
                for batch_id in range(5):
                    out = exe.run(
                        main_prog,
                        feed={
                            'x': np.random.randn(3, 4, 5).astype('float32'),
                            'y': np.random.randn(3, 4, 5).astype('float32')
                        },
                        fetch_list=loss.name)
                    scheduler.step()    # If you update learning rate each step
              # scheduler.step()        # If you update learning rate each epoch
    """

    def __init__(self,
                 learning_rate,
                 milestones,
                 epochs,
                 step_each_epoch,
                 gamma=0.1,
                 last_epoch=-1,
                 verbose=False):
        if not isinstance(milestones, (tuple, list)):
            raise TypeError(
                "The type of 'milestones' in 'MultiStepDecay' must be 'tuple, list', but received %s."
                % type(milestones))
        if not all([
                milestones[i] < milestones[i + 1]
                for i in range(len(milestones) - 1)
        ]):
            raise ValueError('The elements of milestones must be incremented')
        if gamma >= 1.0:
            raise ValueError('gamma should be < 1.0.')
        self.milestones = [x * step_each_epoch for x in milestones]
        self.gamma = gamma
        super(MultiStepDecay, self).__init__(learning_rate, last_epoch,
                                             verbose)

    def get_lr(self):
        for i in range(len(self.milestones)):
            if self.last_epoch < self.milestones[i]:
                return self.base_lr * (self.gamma**i)
        return self.base_lr * (self.gamma**len(self.milestones))