optimizer.py 4.2 KB
Newer Older
W
WuHaobo 已提交
1
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
W
WuHaobo 已提交
2
#
W
WuHaobo 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
W
WuHaobo 已提交
6 7 8
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
W
WuHaobo 已提交
9 10 11 12 13
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
W
WuHaobo 已提交
14 15 16 17 18

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

19
from paddle import optimizer as optim
littletomatodonkey's avatar
littletomatodonkey 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34


class Momentum(object):
    """
    Simple Momentum optimizer with velocity state.
    Args:
        learning_rate (float|Variable) - The learning rate used to update parameters.
            Can be a float value or a Variable with one float value as data element.
        momentum (float) - Momentum factor.
        regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
    """

    def __init__(self,
                 learning_rate,
                 momentum,
35 36
                 weight_decay=None,
                 grad_clip=None):
littletomatodonkey's avatar
littletomatodonkey 已提交
37 38 39
        super(Momentum, self).__init__()
        self.learning_rate = learning_rate
        self.momentum = momentum
40 41
        self.weight_decay = weight_decay
        self.grad_clip = grad_clip
littletomatodonkey's avatar
littletomatodonkey 已提交
42

43 44
    def __call__(self, parameters):
        opt = optim.Momentum(
littletomatodonkey's avatar
littletomatodonkey 已提交
45 46
            learning_rate=self.learning_rate,
            momentum=self.momentum,
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
            weight_decay=self.weight_decay,
            grad_clip=self.grad_clip,
            parameters=parameters)
        return opt


class Adam(object):
    def __init__(self,
                 learning_rate=0.001,
                 beta1=0.9,
                 beta2=0.999,
                 epsilon=1e-08,
                 parameter_list=None,
                 weight_decay=None,
                 grad_clip=None,
                 name=None,
                 lazy_mode=False):
        self.learning_rate = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.parameter_list = parameter_list
        self.learning_rate = learning_rate
        self.weight_decay = weight_decay
        self.grad_clip = grad_clip
        self.name = name
        self.lazy_mode = lazy_mode

    def __call__(self, parameters):
        opt = optim.Adam(
            learning_rate=self.learning_rate,
            beta1=self.beta1,
            beta2=self.beta2,
            epsilon=self.epsilon,
            weight_decay=self.weight_decay,
            grad_clip=self.grad_clip,
            name=self.name,
            lazy_mode=self.lazy_mode,
            parameters=parameters)
littletomatodonkey's avatar
littletomatodonkey 已提交
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
        return opt


class RMSProp(object):
    """
    Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning rate method.
    Args:
        learning_rate (float|Variable) - The learning rate used to update parameters.
            Can be a float value or a Variable with one float value as data element.
        momentum (float) - Momentum factor.
        rho (float) - rho value in equation.
        epsilon (float) - avoid division by zero, default is 1e-6.
        regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
    """

    def __init__(self,
                 learning_rate,
103
                 momentum=0.0,
littletomatodonkey's avatar
littletomatodonkey 已提交
104 105
                 rho=0.95,
                 epsilon=1e-6,
106 107
                 weight_decay=None,
                 grad_clip=None):
littletomatodonkey's avatar
littletomatodonkey 已提交
108 109 110 111 112
        super(RMSProp, self).__init__()
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.rho = rho
        self.epsilon = epsilon
113 114
        self.weight_decay = weight_decay
        self.grad_clip = grad_clip
littletomatodonkey's avatar
littletomatodonkey 已提交
115

116 117
    def __call__(self, parameters):
        opt = optim.RMSProp(
littletomatodonkey's avatar
littletomatodonkey 已提交
118 119 120 121
            learning_rate=self.learning_rate,
            momentum=self.momentum,
            rho=self.rho,
            epsilon=self.epsilon,
122 123 124 125
            weight_decay=self.weight_decay,
            grad_clip=self.grad_clip,
            parameters=parameters)
        return opt