layers.py 15.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

X
Xin Pan 已提交
15
import collections
16 17 18
import contextlib
import sys
import numpy as np
M
minqiyang 已提交
19
import collections
20
import six
C
chengduo 已提交
21
from . import parallel_helper
X
Xin Pan 已提交
22
from .. import unique_name
23
from paddle.fluid import core
24
from .layer_object_helper import LayerObjectHelper
25
from .base import program_desc_tracing_guard
26
from paddle.fluid import framework
27
from ..param_attr import ParamAttr
H
hong 已提交
28 29
import copy
import warnings
30

31
__all__ = ['Layer']
32 33


X
Xin Pan 已提交
34
class Layer(core.Layer):
35
    """Dynamic graph Layer based on OOD, includes the parameters of the layer, the structure of the forward graph and so on.
X
Xin Pan 已提交
36

37
    Parameters:
38 39 40
        name_scope (str, optional): prefix name used by the layer to name parameters.
            If prefix is "my_layer", parameter name in MyLayer
            can be "mylayer_0.w_n", where w is the parameter
X
Xin Pan 已提交
41
            base name and n is an unique suffix auto-generated.
42
            If None, prefix name will be lower cased class name. Default: None.
43 44 45 46 47 48 49
        dtype(str or core.VarDesc.VarType, optional): data type of this parameter.
                If set str, it can be "bool",  "float16", "float32", "float64",
                "int8", "int16", "int32", "int64", "uint8" or "uint16".
                Default: ``core.VarDesc.VarType.FP32``
    
    Returns:
        None
X
Xin Pan 已提交
50
    """
X
Xin Pan 已提交
51

52 53 54 55 56 57 58 59 60
    def __init__(self, name_scope=None, dtype=core.VarDesc.VarType.FP32):
        if name_scope is None:
            name_scope = self.__class__.__name__.lower()
            self._full_name = unique_name.generate(name_scope)
        else:
            # TODO: remove name_scope parameter and all hard-coded usages
            self._full_name = unique_name.generate(name_scope + "/" +
                                                   self.__class__.__name__)
        self._helper = LayerObjectHelper(self._full_name)
X
Xin Pan 已提交
61
        self._built = False
M
minqiyang 已提交
62
        self._dtype = dtype
63

X
Xin Pan 已提交
64 65
        self._parameters = collections.OrderedDict()
        self._sub_layers = collections.OrderedDict()
L
lujun 已提交
66
        self._loaddict_holder = collections.OrderedDict()
67

M
minqiyang 已提交
68
    def train(self):
M
minqiyang 已提交
69
        framework._dygraph_tracer().train_mode()
M
minqiyang 已提交
70 71

    def eval(self):
M
minqiyang 已提交
72
        framework._dygraph_tracer().eval_mode()
M
minqiyang 已提交
73

X
Xin Pan 已提交
74
    def full_name(self):
75
        """Full name for this layer, composed by name_scope + "/" + MyLayer.__class__.__name__
X
Xin Pan 已提交
76

77 78
        Returns:
            str: full name of this layer.
X
Xin Pan 已提交
79 80 81
        """
        return self._full_name

82 83
    def create_parameter(self,
                         shape,
84 85
                         attr=None,
                         dtype='float32',
86 87
                         is_bias=False,
                         default_initializer=None):
88 89 90
        """Create parameters for this layer.
        
        Parameters:
91 92 93
            shape(list): Shape of the parameter.
            attr(ParamAttr, optional): Parameter attribute of weight. Please refer to :ref:`api_fluid_ParamAttr`. Default: None.
            dtype(str or core.VarDesc.VarType or str, optional): Data type of this parameter.
94
                If set str, it can be "bool",  "float16", "float32", "float64",
95 96
                "int8", "int16", "int32", "int64", "uint8" or "uint16". Default: "float32".
            is_bias(bool, optional): if this is a bias parameter. Default: False.
97 98
            default_initializer(Initializer, optional): the default initializer for this parameter.
                If set None, default initializer will be set to :ref:`api_fluid_initializer_XavierInitializer` and :ref:`api_fluid_initializer_ConstantInitializer`
99
                for non-bias and bias parameter, respectively. Default: None.
100

101 102
        Returns:
            :ref:`api_guide_Variable_en` : created parameter.
103
        """
H
hong 已提交
104 105 106 107
        temp_attr = copy.deepcopy(attr)
        if isinstance(temp_attr, six.string_types) and temp_attr == "":
            temp_attr = None
        return self._helper.create_parameter(temp_attr, shape, dtype, is_bias,
108 109 110 111 112 113 114 115
                                             default_initializer)

    # TODO: Add more parameter list when we need them
    def create_variable(self,
                        name=None,
                        persistable=None,
                        dtype=None,
                        type=core.VarDesc.VarType.LOD_TENSOR):
116
        """Create Variable for this layer.
117

118 119 120 121 122 123 124 125
        Parameters:
            name(str, optional): name of the variable. Please refer to :ref:`api_guide_Name` . Default: None
            persistable(bool, optional): if set this variable persistable. Default: False
            dtype(str or core.VarDesc.VarType, optional): data type of this parameter.
                If set str, it can be "bool",  "float16", "float32", "float64",
                "int8", "int16", "int32", "int64", "uint8" or "uint16".
                If set None, it will be ``core.VarDesc.VarType.FP32``. Default: None
            type(core.VarDesc.VarType, optional): type of the variable. No need to set this parameter. Default: ``core.VarDesc.VarType.LOD_TENSOR``
126

127 128
        Returns:
            :ref:`api_guide_Variable_en` : created Variable.
129 130 131 132 133 134 135 136 137 138
        """
        if name is not None:
            var_name = ".".join([self._full_name, name])
        else:
            var_name = unique_name.generate(".".join(
                [self._full_name, "_generated_var"]))

        return self._helper.main_program.current_block().create_var(
            name=var_name, persistable=persistable, dtype=dtype, type=type)

X
polish  
Xin Pan 已提交
139
    def parameters(self, include_sublayers=True):
140
        """Returns a list of all Parameters from current layer and its sub-layers.
X
Xin Pan 已提交
141

142 143
        Parameters:
            include_sublayers(bool, optional): Whether include the parameters of sublayers. If True, also include the parameters from sublayers. Default: True
X
Xin Pan 已提交
144

145 146
        Returns:
            list of :ref:`api_guide_Variable_en` : a list of Parameters.
X
Xin Pan 已提交
147
        """
X
polish  
Xin Pan 已提交
148
        ret = [p for p in self._parameters.values()]
149
        parameters_set = set(ret)
X
polish  
Xin Pan 已提交
150 151 152
        if include_sublayers:
            for l in self._sub_layers.values():
                for p in l.parameters(include_sublayers):
153 154 155
                    if p in parameters_set:
                        continue
                    parameters_set.add(p)
X
polish  
Xin Pan 已提交
156 157
                    ret.append(p)
        return ret
X
Xin Pan 已提交
158

X
Xin Pan 已提交
159 160 161
    def sublayers(self, include_sublayers=True):
        """Returns a list of sub layers.

162 163
        Parameters:
            include_sublayers(bool, optional): Whether return the sublayers of sublayers. If True, also include the sublayers of sublayers. Default: True
X
Xin Pan 已提交
164

165 166
        Returns:
            list of Layer : a list of sub layers.
X
Xin Pan 已提交
167 168 169 170 171 172 173 174
        """
        ret = [l for l in self._sub_layers.values()]
        if include_sublayers:
            for l in self._sub_layers.values():
                for sub_l in l.sublayers(include_sublayers):
                    ret.append(sub_l)
        return ret

X
Xin Pan 已提交
175 176
    def clear_gradients(self):
        for p in self.parameters():
177 178
            if p.trainable:
                p.clear_gradient()
X
Xin Pan 已提交
179

180
    def _build_once(self, *args, **kwargs):
181 182
        pass

183
    def __call__(self, *inputs, **kwargs):
X
Xin Pan 已提交
184
        if not self._built:
185 186 187 188 189
            with program_desc_tracing_guard(False):
                self._build_once(*inputs, **kwargs)
                if parallel_helper._is_data_parallel_mode():
                    parallel_helper._broadcast_parameters(
                        self._parameters.values())
190
            self._built = True
191

192
        outputs = self.forward(*inputs, **kwargs)
M
minqiyang 已提交
193
        return outputs
M
minqiyang 已提交
194

195
    def forward(self, *inputs, **kwargs):
196 197 198 199 200 201 202 203
        """
        Defines the computation performed at every call.
        Should be overridden by all subclasses.

        Parameters:
            *inputs(tuple): unpacked tuple arguments
            **kwargs(dict): unpacked dict arguments
        """
204
        raise NotImplementedError
X
Xin Pan 已提交
205 206 207 208

    def backward(self, *inputs):
        raise ValueError("Layer shouldn't implement backward")

X
Xin Pan 已提交
209 210 211
    def add_sublayer(self, name, sublayer):
        """Adds a sub Layer instance.

212
        Added sublayer can be accessed by self.name
X
Xin Pan 已提交
213

214 215 216
        Parameters:
            name(str): name of this sublayer.
            sublayer(Layer): an instance of Layer.
X
Xin Pan 已提交
217
        Returns:
218
            Layer: the sublayer passed in.
X
Xin Pan 已提交
219 220
        """
        assert isinstance(sublayer, core.Layer)
221

X
Xin Pan 已提交
222 223 224 225 226 227
        self._sub_layers[name] = sublayer
        return sublayer

    def add_parameter(self, name, parameter):
        """Adds a Parameter instance.

228
        Added parameter can be accessed by self.name
X
Xin Pan 已提交
229

230 231 232
        Parameters:
            name(str): name of this sublayer.
            parameter(Parameter): an instance of Parameter.
X
Xin Pan 已提交
233
        Returns:
234
            Parameter: the parameter passed in.
X
Xin Pan 已提交
235 236
        """
        assert isinstance(parameter, framework.Parameter)
237

H
hong 已提交
238 239 240 241 242
        if len(self._loaddict_holder) > 0:
            assert parameter.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in stat_dict".format(
                parameter.name)

            parameter.set_value(self._loaddict_holder[parameter.name])
243 244

        self._parameters[name] = parameter
X
Xin Pan 已提交
245 246
        return parameter

X
Xin Pan 已提交
247 248 249 250 251
    def __getattr__(self, name):
        if name in self._parameters:
            return self._parameters[name]
        elif name in self._sub_layers:
            return self._sub_layers[name]
252 253
        else:
            return object.__getattribute__(self, name)
X
Xin Pan 已提交
254 255

    def __setattr__(self, name, value):
256 257
        if isinstance(getattr(type(self), name, None), property):
            object.__setattr__(self, name, value)
X
Xin Pan 已提交
258 259 260 261 262
        if isinstance(value, framework.Parameter):
            params = self.__dict__.get('_parameters', None)
            if params is None:
                raise ValueError(
                    "super(YourLayer, self).__init__() should be called first")
H
hong 已提交
263 264 265 266 267 268
            if len(self._loaddict_holder) > 0:
                assert value.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in stat_dict".format(
                    value.name)

                value.set_value(self._loaddict_holder[value.name])

269
            params[name] = value
X
Xin Pan 已提交
270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
        elif isinstance(value, core.Layer):
            layers = self.__dict__.get('_sub_layers', None)
            if layers is None:
                raise ValueError(
                    "super(YourLayer, self).__init__() should be called first")
            layers[name] = value
        else:
            object.__setattr__(self, name, value)

    def __delattr__(self, name):
        if name in self._parameters:
            del self._parameters[name]
        elif name in self._sub_layers:
            del self._sub_layers[name]
        else:
            object.__delattr__(self, name)

H
hong 已提交
287 288 289 290
    def state_dict(self,
                   destination=None,
                   include_sublayers=True,
                   structured_name_prefix=""):
H
hong 已提交
291
        '''
292
        Get all parameters of current layer and its sub-layers. And set all the parameters into a dict
H
hong 已提交
293

294 295 296
        Parameters:
            destination(dict, optional) : If provide, all the parameters will set to this dict . Default: None
            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
H
hong 已提交
297 298

        Retruns:
299
            dict: a dict contains all the parameters
H
hong 已提交
300 301

        Examples:
302 303
            .. code-block:: python

H
hong 已提交
304 305
                import paddle.fluid as fluid
                with fluid.dygraph.guard():
306
                    emb = fluid.dygraph.Embedding([10, 10])
H
hong 已提交
307 308 309 310 311 312

                    state_dict = emb.state_dict()
                    fluid.save_dygraph( state_dict, "paddle_dy")

        '''

313 314 315 316
        if destination is None:
            destination = collections.OrderedDict()
        for name, data in self._parameters.items():
            if data is not None:
H
hong 已提交
317
                destination[structured_name_prefix + name] = data
318 319 320 321 322 323

        if include_sublayers:
            for layer_name, layer_item in self._sub_layers.items():
                if layer_item is not None:
                    destination_temp = destination.copy()
                    destination_temp.update(
H
hong 已提交
324 325 326
                        layer_item.state_dict(
                            destination_temp, include_sublayers,
                            structured_name_prefix + layer_name + "."))
327 328 329
                    destination = destination_temp
        return destination

H
hong 已提交
330 331 332 333
    def set_dict(self,
                 stat_dict,
                 include_sublayers=True,
                 use_structured_name=True):
H
hong 已提交
334
        '''
335
        Set parameters from stat_dict. All the parameters will be reset by the tensor in the stat_dict
H
hong 已提交
336

337 338 339
        Parameters:
            state_dict(dict) : Dict contains all the parameters
            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
H
hong 已提交
340 341
            use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key. 
                                                  Default: True
H
hong 已提交
342 343 344 345
        Returns:
            None

        Examples:
346 347
            .. code-block:: python

H
hong 已提交
348 349
                import paddle.fluid as fluid
                with fluid.dygraph.guard():
350
                    emb = fluid.dygraph.Embedding([10, 10])
H
hong 已提交
351 352 353 354 355 356 357 358 359

                    state_dict = emb.state_dict()
                    fluid.save_dygraph( state_dict, "paddle_dy")
                    
                    para_state_dict, _ = fluid.load_dygraph( "paddle_dy")

                    emb.set_dict( para_state_dict )

        '''
H
hong 已提交
360 361 362 363 364 365 366 367 368
        self.load_dict(
            stat_dict,
            include_sublayers=include_sublayers,
            use_structured_name=use_structured_name)

    def load_dict(self,
                  stat_dict,
                  include_sublayers=True,
                  use_structured_name=True):
H
hong 已提交
369
        '''
370
        Set parameters from stat_dict. All the parameters will be reset by the tensor in the stat_dict
H
hong 已提交
371 372 373

        This api will be Deprecated. Please use set_dict

374 375 376
        Parameters:
            state_dict(dict) : Dict contains all the parameters
            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
H
hong 已提交
377 378
            use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key.
                                                  Default: True
H
hong 已提交
379 380 381 382
        Returns:
            None

        Examples:
383 384
            .. code-block:: python

H
hong 已提交
385 386
                import paddle.fluid as fluid
                with fluid.dygraph.guard():
387
                    emb = fluid.dygraph.Embedding([10, 10])
H
hong 已提交
388 389 390 391 392 393 394 395 396 397

                    state_dict = emb.state_dict()
                    fluid.save_dygraph( state_dict, "paddle_dy")
                    
                    para_state_dict, _ = fluid.load_dygraph( "paddle_dy")

                    emb.load_dict( para_state_dict )

        '''

H
hong 已提交
398 399 400 401 402 403
        inner_state_dict = self.state_dict()

        for name, para in inner_state_dict.items():
            key_name = name if use_structured_name else para.name
            if key_name in stat_dict:
                para.set_value(stat_dict[key_name])
H
hong 已提交
404 405
            else:
                raise RuntimeError(
H
hong 已提交
406 407 408 409 410 411 412 413 414 415 416
                    "Parameter not found, Can't not find [ {} ] in stat_dict"
                    "use_structured_name is set to [{}]".format(
                        key_name, use_structured_name))
        unused_para_list = []
        for k, v in stat_dict.items():
            if k not in inner_state_dict:
                unused_para_list.append(k)
        if len(unused_para_list) > 0:
            warnings.warn(
                "Varibale [ {} ] are not used, because not included in layers state_dict".
                format(" ".join(unused_para_list)))