layers.py 16.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

X
Xin Pan 已提交
15
import collections
16 17 18
import contextlib
import sys
import numpy as np
M
minqiyang 已提交
19
import collections
20
import six
C
chengduo 已提交
21
from . import parallel_helper
X
Xin Pan 已提交
22
from .. import unique_name
23
from paddle.fluid import core
24
from .layer_object_helper import LayerObjectHelper
25
from .base import program_desc_tracing_guard
26
from paddle.fluid import framework
27
from ..param_attr import ParamAttr
H
hong 已提交
28 29
import copy
import warnings
30

31
__all__ = ['Layer']
32 33


X
Xin Pan 已提交
34
class Layer(core.Layer):
35
    """Dynamic graph Layer based on OOD, includes the parameters of the layer, the structure of the forward graph and so on.
X
Xin Pan 已提交
36

37
    Parameters:
38 39 40
        name_scope (str, optional): prefix name used by the layer to name parameters.
            If prefix is "my_layer", parameter name in MyLayer
            can be "mylayer_0.w_n", where w is the parameter
X
Xin Pan 已提交
41
            base name and n is an unique suffix auto-generated.
42
            If None, prefix name will be lower cased class name. Default: None.
43 44 45 46 47 48 49
        dtype(str or core.VarDesc.VarType, optional): data type of this parameter.
                If set str, it can be "bool",  "float16", "float32", "float64",
                "int8", "int16", "int32", "int64", "uint8" or "uint16".
                Default: ``core.VarDesc.VarType.FP32``
    
    Returns:
        None
X
Xin Pan 已提交
50
    """
X
Xin Pan 已提交
51

52 53 54 55 56 57 58 59 60
    def __init__(self, name_scope=None, dtype=core.VarDesc.VarType.FP32):
        if name_scope is None:
            name_scope = self.__class__.__name__.lower()
            self._full_name = unique_name.generate(name_scope)
        else:
            # TODO: remove name_scope parameter and all hard-coded usages
            self._full_name = unique_name.generate(name_scope + "/" +
                                                   self.__class__.__name__)
        self._helper = LayerObjectHelper(self._full_name)
X
Xin Pan 已提交
61
        self._built = False
M
minqiyang 已提交
62
        self._dtype = dtype
63

X
Xin Pan 已提交
64 65
        self._parameters = collections.OrderedDict()
        self._sub_layers = collections.OrderedDict()
L
lujun 已提交
66
        self._loaddict_holder = collections.OrderedDict()
67

M
minqiyang 已提交
68
    def train(self):
M
minqiyang 已提交
69
        framework._dygraph_tracer().train_mode()
M
minqiyang 已提交
70 71

    def eval(self):
M
minqiyang 已提交
72
        framework._dygraph_tracer().eval_mode()
M
minqiyang 已提交
73

X
Xin Pan 已提交
74
    def full_name(self):
75
        """Full name for this layer, composed by name_scope + "/" + MyLayer.__class__.__name__
X
Xin Pan 已提交
76

77 78
        Returns:
            str: full name of this layer.
X
Xin Pan 已提交
79 80 81
        """
        return self._full_name

82 83
    def create_parameter(self,
                         shape,
84 85
                         attr=None,
                         dtype='float32',
86 87
                         is_bias=False,
                         default_initializer=None):
88 89 90
        """Create parameters for this layer.
        
        Parameters:
91 92 93
            shape(list): Shape of the parameter.
            attr(ParamAttr, optional): Parameter attribute of weight. Please refer to :ref:`api_fluid_ParamAttr`. Default: None.
            dtype(str or core.VarDesc.VarType or str, optional): Data type of this parameter.
94
                If set str, it can be "bool",  "float16", "float32", "float64",
95 96
                "int8", "int16", "int32", "int64", "uint8" or "uint16". Default: "float32".
            is_bias(bool, optional): if this is a bias parameter. Default: False.
97 98
            default_initializer(Initializer, optional): the default initializer for this parameter.
                If set None, default initializer will be set to :ref:`api_fluid_initializer_XavierInitializer` and :ref:`api_fluid_initializer_ConstantInitializer`
99
                for non-bias and bias parameter, respectively. Default: None.
100

101 102
        Returns:
            :ref:`api_guide_Variable_en` : created parameter.
103
        """
H
hong 已提交
104 105 106 107
        temp_attr = copy.deepcopy(attr)
        if isinstance(temp_attr, six.string_types) and temp_attr == "":
            temp_attr = None
        return self._helper.create_parameter(temp_attr, shape, dtype, is_bias,
108 109 110 111 112 113 114 115
                                             default_initializer)

    # TODO: Add more parameter list when we need them
    def create_variable(self,
                        name=None,
                        persistable=None,
                        dtype=None,
                        type=core.VarDesc.VarType.LOD_TENSOR):
116
        """Create Variable for this layer.
117

118 119 120 121 122 123 124 125
        Parameters:
            name(str, optional): name of the variable. Please refer to :ref:`api_guide_Name` . Default: None
            persistable(bool, optional): if set this variable persistable. Default: False
            dtype(str or core.VarDesc.VarType, optional): data type of this parameter.
                If set str, it can be "bool",  "float16", "float32", "float64",
                "int8", "int16", "int32", "int64", "uint8" or "uint16".
                If set None, it will be ``core.VarDesc.VarType.FP32``. Default: None
            type(core.VarDesc.VarType, optional): type of the variable. No need to set this parameter. Default: ``core.VarDesc.VarType.LOD_TENSOR``
126

127 128
        Returns:
            :ref:`api_guide_Variable_en` : created Variable.
129 130 131 132 133 134 135 136 137 138
        """
        if name is not None:
            var_name = ".".join([self._full_name, name])
        else:
            var_name = unique_name.generate(".".join(
                [self._full_name, "_generated_var"]))

        return self._helper.main_program.current_block().create_var(
            name=var_name, persistable=persistable, dtype=dtype, type=type)

X
polish  
Xin Pan 已提交
139
    def parameters(self, include_sublayers=True):
140
        """Returns a list of all Parameters from current layer and its sub-layers.
X
Xin Pan 已提交
141

142 143
        Parameters:
            include_sublayers(bool, optional): Whether include the parameters of sublayers. If True, also include the parameters from sublayers. Default: True
X
Xin Pan 已提交
144

145 146
        Returns:
            list of :ref:`api_guide_Variable_en` : a list of Parameters.
X
Xin Pan 已提交
147
        """
X
polish  
Xin Pan 已提交
148
        ret = [p for p in self._parameters.values()]
149
        parameters_set = set(ret)
X
polish  
Xin Pan 已提交
150 151 152
        if include_sublayers:
            for l in self._sub_layers.values():
                for p in l.parameters(include_sublayers):
153 154 155
                    if p in parameters_set:
                        continue
                    parameters_set.add(p)
X
polish  
Xin Pan 已提交
156 157
                    ret.append(p)
        return ret
X
Xin Pan 已提交
158

X
Xin Pan 已提交
159 160 161
    def sublayers(self, include_sublayers=True):
        """Returns a list of sub layers.

162 163
        Parameters:
            include_sublayers(bool, optional): Whether return the sublayers of sublayers. If True, also include the sublayers of sublayers. Default: True
X
Xin Pan 已提交
164

165 166
        Returns:
            list of Layer : a list of sub layers.
X
Xin Pan 已提交
167 168 169 170 171 172 173 174
        """
        ret = [l for l in self._sub_layers.values()]
        if include_sublayers:
            for l in self._sub_layers.values():
                for sub_l in l.sublayers(include_sublayers):
                    ret.append(sub_l)
        return ret

X
Xin Pan 已提交
175
    def clear_gradients(self):
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
        """
        Clear the gradients of all parameters for this layer.
        
        Returns:
            None
        
        Examples:
            .. code-block:: python

                import paddle.fluid as fluid
                import numpy as np

                with fluid.dygraph.guard():
                    value = np.arange(26).reshape(2, 13).astype("float32")
                    a = fluid.dygraph.to_variable(value)
                    linear = fluid.Linear(13, 5, dtype="float32")
                    adam = fluid.optimizer.Adam(learning_rate=0.01, 
                                                parameter_list=linear.parameters())
                    out = linear(a)
                    out.backward()
                    adam.minimize(out)
                    linear.clear_gradients()

        """
X
Xin Pan 已提交
200
        for p in self.parameters():
201 202
            if p.trainable:
                p.clear_gradient()
X
Xin Pan 已提交
203

204
    def _build_once(self, *args, **kwargs):
205 206
        pass

207
    def __call__(self, *inputs, **kwargs):
X
Xin Pan 已提交
208
        if not self._built:
209 210 211 212 213
            with program_desc_tracing_guard(False):
                self._build_once(*inputs, **kwargs)
                if parallel_helper._is_data_parallel_mode():
                    parallel_helper._broadcast_parameters(
                        self._parameters.values())
214
            self._built = True
215

216
        outputs = self.forward(*inputs, **kwargs)
M
minqiyang 已提交
217
        return outputs
M
minqiyang 已提交
218

219
    def forward(self, *inputs, **kwargs):
220 221 222 223 224 225 226 227
        """
        Defines the computation performed at every call.
        Should be overridden by all subclasses.

        Parameters:
            *inputs(tuple): unpacked tuple arguments
            **kwargs(dict): unpacked dict arguments
        """
228
        raise NotImplementedError
X
Xin Pan 已提交
229 230 231 232

    def backward(self, *inputs):
        raise ValueError("Layer shouldn't implement backward")

X
Xin Pan 已提交
233 234 235
    def add_sublayer(self, name, sublayer):
        """Adds a sub Layer instance.

236
        Added sublayer can be accessed by self.name
X
Xin Pan 已提交
237

238 239 240
        Parameters:
            name(str): name of this sublayer.
            sublayer(Layer): an instance of Layer.
X
Xin Pan 已提交
241
        Returns:
242
            Layer: the sublayer passed in.
X
Xin Pan 已提交
243 244
        """
        assert isinstance(sublayer, core.Layer)
245

X
Xin Pan 已提交
246 247 248 249 250 251
        self._sub_layers[name] = sublayer
        return sublayer

    def add_parameter(self, name, parameter):
        """Adds a Parameter instance.

252
        Added parameter can be accessed by self.name
X
Xin Pan 已提交
253

254 255 256
        Parameters:
            name(str): name of this sublayer.
            parameter(Parameter): an instance of Parameter.
X
Xin Pan 已提交
257
        Returns:
258
            Parameter: the parameter passed in.
X
Xin Pan 已提交
259 260
        """
        assert isinstance(parameter, framework.Parameter)
261

H
hong 已提交
262 263 264 265 266
        if len(self._loaddict_holder) > 0:
            assert parameter.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in stat_dict".format(
                parameter.name)

            parameter.set_value(self._loaddict_holder[parameter.name])
267 268

        self._parameters[name] = parameter
X
Xin Pan 已提交
269 270
        return parameter

X
Xin Pan 已提交
271 272 273 274 275
    def __getattr__(self, name):
        if name in self._parameters:
            return self._parameters[name]
        elif name in self._sub_layers:
            return self._sub_layers[name]
276 277
        else:
            return object.__getattribute__(self, name)
X
Xin Pan 已提交
278 279

    def __setattr__(self, name, value):
280 281
        if isinstance(getattr(type(self), name, None), property):
            object.__setattr__(self, name, value)
X
Xin Pan 已提交
282 283 284 285 286
        if isinstance(value, framework.Parameter):
            params = self.__dict__.get('_parameters', None)
            if params is None:
                raise ValueError(
                    "super(YourLayer, self).__init__() should be called first")
H
hong 已提交
287 288 289 290 291 292
            if len(self._loaddict_holder) > 0:
                assert value.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in stat_dict".format(
                    value.name)

                value.set_value(self._loaddict_holder[value.name])

293
            params[name] = value
X
Xin Pan 已提交
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
        elif isinstance(value, core.Layer):
            layers = self.__dict__.get('_sub_layers', None)
            if layers is None:
                raise ValueError(
                    "super(YourLayer, self).__init__() should be called first")
            layers[name] = value
        else:
            object.__setattr__(self, name, value)

    def __delattr__(self, name):
        if name in self._parameters:
            del self._parameters[name]
        elif name in self._sub_layers:
            del self._sub_layers[name]
        else:
            object.__delattr__(self, name)

H
hong 已提交
311 312 313 314
    def state_dict(self,
                   destination=None,
                   include_sublayers=True,
                   structured_name_prefix=""):
H
hong 已提交
315
        '''
316
        Get all parameters of current layer and its sub-layers. And set all the parameters into a dict
H
hong 已提交
317

318 319 320
        Parameters:
            destination(dict, optional) : If provide, all the parameters will set to this dict . Default: None
            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
H
hong 已提交
321 322

        Retruns:
323
            dict: a dict contains all the parameters
H
hong 已提交
324 325

        Examples:
326 327
            .. code-block:: python

H
hong 已提交
328 329
                import paddle.fluid as fluid
                with fluid.dygraph.guard():
330
                    emb = fluid.dygraph.Embedding([10, 10])
H
hong 已提交
331 332 333 334 335 336

                    state_dict = emb.state_dict()
                    fluid.save_dygraph( state_dict, "paddle_dy")

        '''

337 338 339 340
        if destination is None:
            destination = collections.OrderedDict()
        for name, data in self._parameters.items():
            if data is not None:
H
hong 已提交
341
                destination[structured_name_prefix + name] = data
342 343 344 345 346 347

        if include_sublayers:
            for layer_name, layer_item in self._sub_layers.items():
                if layer_item is not None:
                    destination_temp = destination.copy()
                    destination_temp.update(
H
hong 已提交
348 349 350
                        layer_item.state_dict(
                            destination_temp, include_sublayers,
                            structured_name_prefix + layer_name + "."))
351 352 353
                    destination = destination_temp
        return destination

H
hong 已提交
354 355 356 357
    def set_dict(self,
                 stat_dict,
                 include_sublayers=True,
                 use_structured_name=True):
H
hong 已提交
358
        '''
359
        Set parameters from stat_dict. All the parameters will be reset by the tensor in the stat_dict
H
hong 已提交
360

361 362 363
        Parameters:
            state_dict(dict) : Dict contains all the parameters
            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
H
hong 已提交
364 365
            use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key. 
                                                  Default: True
H
hong 已提交
366 367 368 369
        Returns:
            None

        Examples:
370 371
            .. code-block:: python

H
hong 已提交
372 373
                import paddle.fluid as fluid
                with fluid.dygraph.guard():
374
                    emb = fluid.dygraph.Embedding([10, 10])
H
hong 已提交
375 376 377 378 379 380 381 382 383

                    state_dict = emb.state_dict()
                    fluid.save_dygraph( state_dict, "paddle_dy")
                    
                    para_state_dict, _ = fluid.load_dygraph( "paddle_dy")

                    emb.set_dict( para_state_dict )

        '''
H
hong 已提交
384 385 386 387 388 389 390 391 392
        self.load_dict(
            stat_dict,
            include_sublayers=include_sublayers,
            use_structured_name=use_structured_name)

    def load_dict(self,
                  stat_dict,
                  include_sublayers=True,
                  use_structured_name=True):
H
hong 已提交
393
        '''
394
        Set parameters from stat_dict. All the parameters will be reset by the tensor in the stat_dict
H
hong 已提交
395 396 397

        This api will be Deprecated. Please use set_dict

398 399 400
        Parameters:
            state_dict(dict) : Dict contains all the parameters
            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
H
hong 已提交
401 402
            use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key.
                                                  Default: True
H
hong 已提交
403 404 405 406
        Returns:
            None

        Examples:
407 408
            .. code-block:: python

H
hong 已提交
409 410
                import paddle.fluid as fluid
                with fluid.dygraph.guard():
411
                    emb = fluid.dygraph.Embedding([10, 10])
H
hong 已提交
412 413 414 415 416 417 418 419 420 421

                    state_dict = emb.state_dict()
                    fluid.save_dygraph( state_dict, "paddle_dy")
                    
                    para_state_dict, _ = fluid.load_dygraph( "paddle_dy")

                    emb.load_dict( para_state_dict )

        '''

H
hong 已提交
422 423 424 425 426 427
        inner_state_dict = self.state_dict()

        for name, para in inner_state_dict.items():
            key_name = name if use_structured_name else para.name
            if key_name in stat_dict:
                para.set_value(stat_dict[key_name])
H
hong 已提交
428 429
            else:
                raise RuntimeError(
H
hong 已提交
430 431 432 433 434 435 436 437 438 439 440
                    "Parameter not found, Can't not find [ {} ] in stat_dict"
                    "use_structured_name is set to [{}]".format(
                        key_name, use_structured_name))
        unused_para_list = []
        for k, v in stat_dict.items():
            if k not in inner_state_dict:
                unused_para_list.append(k)
        if len(unused_para_list) > 0:
            warnings.warn(
                "Varibale [ {} ] are not used, because not included in layers state_dict".
                format(" ".join(unused_para_list)))