param_attr.py 8.3 KB
Newer Older
1
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
D
dzhwinter 已提交
2
#
F
fengjiayi 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
D
dzhwinter 已提交
6
#
D
dzhwinter 已提交
7
#     http://www.apache.org/licenses/LICENSE-2.0
D
dzhwinter 已提交
8
#
F
fengjiayi 已提交
9 10 11 12 13
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
F
update  
fengjiayi 已提交
14

15 16
from __future__ import print_function

17 18
import six

19 20
from .initializer import Initializer, Xavier, Constant
from .regularizer import WeightDecayRegularizer
Y
Yu Yang 已提交
21

22 23 24 25
__all__ = [
    'ParamAttr',
    'WeightNormParamAttr',
]
Y
Yu Yang 已提交
26

Y
Yu Yang 已提交
27 28

class ParamAttr(object):
C
chengduoZH 已提交
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
    """
    Parameter attributes object. To fine-tuning network training process, user
    can set parameter's attributes to control training details. Such as learning rate,
    regularization, trainable, do_model_average and the method to initialize param.


    Args:
        name(str): The parameter's name. Default None.
        initializer(Initializer): The method to initial this parameter. Default None.
        learning_rate(float): The parameter's learning rate. The learning rate when
            optimize is :math:`global\_lr * parameter\_lr * scheduler\_factor`.
            Default 1.0.
        regularizer(WeightDecayRegularizer): Regularization factor. Default None.
        trainable(bool): Whether this parameter is trainable. Default True.
        gradient_clip(BaseGradientClipAttr): The method to clip this parameter's
            gradient. Default None.
        do_model_average(bool): Whether this parameter should do model average.
            Default False.

    Examples:
        .. code-block:: python

Z
Zeng Jinle 已提交
51 52
            import paddle.fluid as fluid

C
chengduoZH 已提交
53 54
            w_param_attrs = fluid.ParamAttr(name="fc_weight",
                                            learning_rate=0.5,
T
Tink_Y 已提交
55
                                            regularizer=fluid.regularizer.L2Decay(1.0),
C
chengduoZH 已提交
56
                                            trainable=True)
Z
Zeng Jinle 已提交
57
            x = fluid.layers.data(name='X', shape=[1], dtype='float32')
C
chengduoZH 已提交
58 59 60
            y_predict = fluid.layers.fc(input=x, size=10, param_attr=w_param_attrs)
    """

Y
Yu Yang 已提交
61 62 63 64 65
    def __init__(self,
                 name=None,
                 initializer=None,
                 learning_rate=1.0,
                 regularizer=None,
Y
Yu Yang 已提交
66
                 trainable=True,
W
wanghaoshuang 已提交
67
                 gradient_clip=None,
C
chengduoZH 已提交
68
                 do_model_average=False):
Y
Yu Yang 已提交
69 70 71 72 73
        self.name = name
        self.initializer = initializer
        self.learning_rate = learning_rate
        self.regularizer = regularizer
        self.trainable = trainable
F
fengjiayi 已提交
74
        self.gradient_clip = gradient_clip
W
wanghaoshuang 已提交
75
        self.model_average = do_model_average
Y
Yu Yang 已提交
76

Y
yuyang18 已提交
77
    def _set_default_initializer(self, initializer):
C
chengduoZH 已提交
78 79 80
        """
        Set the default initializer, the initializer should be Constant,
        Uniform, Normal, Xavier, MSRA.
C
chengduoZH 已提交
81 82 83 84 85 86

        Args:
            initializer(Initializer): the initializer to set.

        Returns:
            None
C
chengduoZH 已提交
87
        """
Y
Yu Yang 已提交
88 89 90 91 92 93 94 95 96 97
        if initializer is None:
            if self.initializer is None:
                raise ValueError("ParamAttr.initializer is not set")
            return

        if self.initializer is not None:
            return

        self.initializer = initializer

Y
yuyang18 已提交
98
    def _set_default_param_initializer(self):
C
chengduoZH 已提交
99 100
        """
        Set the default initializer for the parameter with Xavier.
C
chengduoZH 已提交
101 102 103 104 105 106

        Args:
            None.

        Returns:
            None.
C
chengduoZH 已提交
107
        """
Y
yuyang18 已提交
108
        self._set_default_initializer(Xavier())
Y
Yu Yang 已提交
109

Y
yuyang18 已提交
110
    def _set_default_bias_initializer(self):
C
chengduoZH 已提交
111 112
        """
        Set the default initializer for the bias with Constant(0.0).
C
chengduoZH 已提交
113 114 115 116 117 118

        Args:
            None.

        Returns:
            None.
C
chengduoZH 已提交
119
        """
Y
yuyang18 已提交
120
        self._set_default_initializer(Constant(0.0))
Y
Yu Yang 已提交
121 122

    @staticmethod
Y
yuyang18 已提交
123
    def _to_attr(arg):
C
chengduoZH 已提交
124 125 126 127 128 129 130 131 132 133 134 135 136 137
        """
        Create ParamAttr[s].

        Args:
            arg: Arguments to initialize ParamAttr[s]. arg's type can be
                str, Initializer, float, WeightDecayRegularizer, BaseGradientClipAttr,
                bool, ParamAttr, or a list of above type.

        Returns:
            ParamAttr[s]: ParamAttr[s] initialized with arg.

        Raises:
            arg can not initialize a ParamAttr.
        """
Y
Yu Yang 已提交
138 139
        if arg is None:
            return ParamAttr()
140
        elif isinstance(arg, list) or isinstance(arg, tuple):
Y
yuyang18 已提交
141
            return [ParamAttr._to_attr(a) for a in arg]
Y
Yu Yang 已提交
142 143
        elif isinstance(arg, ParamAttr):
            return arg
144
        elif isinstance(arg, six.string_types):
Y
Yu Yang 已提交
145 146 147 148 149 150
            return ParamAttr(name=arg)
        elif isinstance(arg, Initializer):
            return ParamAttr(initializer=arg)
        elif isinstance(arg, WeightDecayRegularizer):
            return ParamAttr(regularizer=arg)
        elif isinstance(arg, bool):
Y
yuyang18 已提交
151
            return ParamAttr._to_attr(None) if arg else False
Y
Yu Yang 已提交
152 153 154
        else:
            raise TypeError("{0} cast to ParamAttr".format(type(arg)))

Y
yuyang18 已提交
155
    def _to_kwargs(self, with_initializer=False):
C
chengduoZH 已提交
156 157 158 159 160 161 162 163 164
        """
        Returns the attributes of this parameter.

        Args:
            with_initializer(bool): Whether to add initializer attr.

        Returns:
            Parameter attributes(map): The attributes of this parameter.
        """
Y
Yu Yang 已提交
165 166
        kwargs = {
            'name': self.name,
G
guosheng 已提交
167 168 169
            'optimize_attr': {
                'learning_rate': self.learning_rate
            },
Y
Yu Yang 已提交
170
            'regularizer': self.regularizer,
Y
Yu Yang 已提交
171
            'trainable': self.trainable,
W
wanghaoshuang 已提交
172
            'gradient_clip_attr': self.gradient_clip,
W
wanghaoshuang 已提交
173
            'model_average': self.model_average
Y
Yu Yang 已提交
174 175 176 177
        }
        if with_initializer:
            kwargs['initializer'] = self.initializer
        return kwargs
G
guosheng 已提交
178 179 180 181


class WeightNormParamAttr(ParamAttr):
    """
C
chengduoZH 已提交
182
    Used for weight Norm. Weight Norm is a reparameterization of the weight vectors
183
    in a neural network that decouples the magnitude of those weight vectors from
C
chengduoZH 已提交
184 185 186 187 188 189
    their direction. Weight Norm has been implemented as discussed in this
    paper: `Weight Normalization: A Simple Reparameterization to Accelerate
    Training of Deep Neural Networks
    <https://arxiv.org/pdf/1602.07868.pdf>`_.

    Args:
190
        dim(int): Dimension over which to compute the norm. Default None.
X
Xin Pan 已提交
191 192 193 194 195 196 197 198 199 200 201
        name(str): The parameter's name. Default None.
        initializer(Initializer): The method to initial this parameter. Default None.
        learning_rate(float): The parameter's learning rate. The learning rate when
            optimize is :math:`global\_lr * parameter\_lr * scheduler\_factor`.
            Default 1.0.
        regularizer(WeightDecayRegularizer): Regularization factor. Default None.
        trainable(bool): Whether this parameter is trainable. Default True.
        gradient_clip(BaseGradientClipAttr): The method to clip this parameter's
            gradient. Default None.
        do_model_average(bool): Whether this parameter should do model average.
            Default False.
C
chengduoZH 已提交
202 203 204

    Examples:
        .. code-block:: python
205 206
            
            import paddle.fluid as fluid
C
chengduoZH 已提交
207 208 209
            data = fluid.layers.data(name="data", shape=[3, 32, 32], dtype="float32")
            fc = fluid.layers.fc(input=data,
                                 size=1000,
210
                                 param_attr=fluid.WeightNormParamAttr(
C
chengduoZH 已提交
211 212 213
                                      dim=None,
                                      name='weight_norm_param'))

G
guosheng 已提交
214 215 216
    """
    # List to record the parameters reparameterized by weight normalization.
    # If these parameters are treated as Variable rather than Parameter,
217
    # it can be used to discriminate these parameters and help to serialize
G
guosheng 已提交
218 219 220
    # these paramters for inference.
    params_with_weight_norm = []

X
Xin Pan 已提交
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
    def __init__(self,
                 dim=None,
                 name=None,
                 initializer=None,
                 learning_rate=1.0,
                 regularizer=None,
                 trainable=True,
                 gradient_clip=None,
                 do_model_average=False):
        super(WeightNormParamAttr, self).__init__(
            name=name,
            initializer=initializer,
            learning_rate=learning_rate,
            regularizer=regularizer,
            trainable=trainable,
            gradient_clip=gradient_clip,
            do_model_average=do_model_average)
G
guosheng 已提交
238
        self.dim = dim