layers.py 21.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

X
Xin Pan 已提交
15
import collections
16 17 18
import contextlib
import sys
import numpy as np
M
minqiyang 已提交
19
import collections
20
import six
21
import re
C
chengduo 已提交
22
from . import parallel_helper
X
Xin Pan 已提交
23
from .. import unique_name
24
from paddle.fluid import core
25
from .layer_object_helper import LayerObjectHelper
26
from .base import program_desc_tracing_guard
27
from paddle.fluid import framework
28
from ..param_attr import ParamAttr
H
hong 已提交
29 30
import copy
import warnings
31

32
__all__ = ['Layer']
33

34 35 36 37 38 39 40 41
_first_cap_re = re.compile('(.)([A-Z][a-z]+)')
_all_cap_re = re.compile('([a-z])([A-Z])')


def _convert_camel_to_snake(name):
    s1 = _first_cap_re.sub(r'\1_\2', name)
    return _all_cap_re.sub(r'\1_\2', s1).lower()

42

X
Xin Pan 已提交
43
class Layer(core.Layer):
44
    """Dynamic graph Layer based on OOD, includes the parameters of the layer, the structure of the forward graph and so on.
X
Xin Pan 已提交
45

46
    Parameters:
47 48
        name_scope (str, optional): prefix name used by the layer to name parameters.
            If prefix is "my_layer", parameter name in MyLayer
49 50 51
            can be "my_layer_0.w_n", where "w" is the parameter
            base name and "n" is an unique suffix auto-generated.
            If None, prefix name will be snake cased class name. Default: None.
52 53 54 55 56 57 58
        dtype(str or core.VarDesc.VarType, optional): data type of this parameter.
                If set str, it can be "bool",  "float16", "float32", "float64",
                "int8", "int16", "int32", "int64", "uint8" or "uint16".
                Default: ``core.VarDesc.VarType.FP32``
    
    Returns:
        None
X
Xin Pan 已提交
59
    """
X
Xin Pan 已提交
60

61 62
    def __init__(self, name_scope=None, dtype=core.VarDesc.VarType.FP32):
        if name_scope is None:
63 64
            name_scope = _convert_camel_to_snake(self.__class__.__name__)
        self._full_name = unique_name.generate(name_scope)
65
        self._helper = LayerObjectHelper(self._full_name)
X
Xin Pan 已提交
66
        self._built = False
M
minqiyang 已提交
67
        self._dtype = dtype
68

X
Xin Pan 已提交
69 70
        self._parameters = collections.OrderedDict()
        self._sub_layers = collections.OrderedDict()
L
lujun 已提交
71
        self._loaddict_holder = collections.OrderedDict()
72

M
minqiyang 已提交
73
    def train(self):
M
minqiyang 已提交
74
        framework._dygraph_tracer().train_mode()
M
minqiyang 已提交
75 76

    def eval(self):
M
minqiyang 已提交
77
        framework._dygraph_tracer().eval_mode()
M
minqiyang 已提交
78

X
Xin Pan 已提交
79
    def full_name(self):
80
        """Full name for this layer, composed by name_scope + "/" + MyLayer.__class__.__name__
X
Xin Pan 已提交
81

82 83
        Returns:
            str: full name of this layer.
X
Xin Pan 已提交
84 85 86
        """
        return self._full_name

87 88
    def create_parameter(self,
                         shape,
89 90
                         attr=None,
                         dtype='float32',
91 92
                         is_bias=False,
                         default_initializer=None):
93 94 95
        """Create parameters for this layer.
        
        Parameters:
96 97 98
            shape(list): Shape of the parameter.
            attr(ParamAttr, optional): Parameter attribute of weight. Please refer to :ref:`api_fluid_ParamAttr`. Default: None.
            dtype(str or core.VarDesc.VarType or str, optional): Data type of this parameter.
99
                If set str, it can be "bool",  "float16", "float32", "float64",
100 101
                "int8", "int16", "int32", "int64", "uint8" or "uint16". Default: "float32".
            is_bias(bool, optional): if this is a bias parameter. Default: False.
102 103
            default_initializer(Initializer, optional): the default initializer for this parameter.
                If set None, default initializer will be set to :ref:`api_fluid_initializer_XavierInitializer` and :ref:`api_fluid_initializer_ConstantInitializer`
104
                for non-bias and bias parameter, respectively. Default: None.
105

106 107
        Returns:
            :ref:`api_guide_Variable_en` : created parameter.
108
        """
H
hong 已提交
109 110 111 112
        temp_attr = copy.deepcopy(attr)
        if isinstance(temp_attr, six.string_types) and temp_attr == "":
            temp_attr = None
        return self._helper.create_parameter(temp_attr, shape, dtype, is_bias,
113 114 115 116 117 118 119 120
                                             default_initializer)

    # TODO: Add more parameter list when we need them
    def create_variable(self,
                        name=None,
                        persistable=None,
                        dtype=None,
                        type=core.VarDesc.VarType.LOD_TENSOR):
121
        """Create Variable for this layer.
122

123 124 125 126 127 128 129 130
        Parameters:
            name(str, optional): name of the variable. Please refer to :ref:`api_guide_Name` . Default: None
            persistable(bool, optional): if set this variable persistable. Default: False
            dtype(str or core.VarDesc.VarType, optional): data type of this parameter.
                If set str, it can be "bool",  "float16", "float32", "float64",
                "int8", "int16", "int32", "int64", "uint8" or "uint16".
                If set None, it will be ``core.VarDesc.VarType.FP32``. Default: None
            type(core.VarDesc.VarType, optional): type of the variable. No need to set this parameter. Default: ``core.VarDesc.VarType.LOD_TENSOR``
131

132 133
        Returns:
            :ref:`api_guide_Variable_en` : created Variable.
134 135 136 137 138 139 140 141 142 143
        """
        if name is not None:
            var_name = ".".join([self._full_name, name])
        else:
            var_name = unique_name.generate(".".join(
                [self._full_name, "_generated_var"]))

        return self._helper.main_program.current_block().create_var(
            name=var_name, persistable=persistable, dtype=dtype, type=type)

X
polish  
Xin Pan 已提交
144
    def parameters(self, include_sublayers=True):
145
        """Returns a list of all Parameters from current layer and its sub-layers.
X
Xin Pan 已提交
146

147 148
        Parameters:
            include_sublayers(bool, optional): Whether include the parameters of sublayers. If True, also include the parameters from sublayers. Default: True
X
Xin Pan 已提交
149

150 151
        Returns:
            list of :ref:`api_guide_Variable_en` : a list of Parameters.
X
Xin Pan 已提交
152
        """
153 154 155 156 157
        ret = [
            param
            for _, param in self.named_parameters(
                include_sublayers=include_sublayers)
        ]
X
polish  
Xin Pan 已提交
158
        return ret
X
Xin Pan 已提交
159

X
Xin Pan 已提交
160 161 162
    def sublayers(self, include_sublayers=True):
        """Returns a list of sub layers.

163 164
        Parameters:
            include_sublayers(bool, optional): Whether return the sublayers of sublayers. If True, also include the sublayers of sublayers. Default: True
X
Xin Pan 已提交
165

166 167
        Returns:
            list of Layer : a list of sub layers.
X
Xin Pan 已提交
168
        """
169 170 171 172 173
        ret = [
            layer
            for _, layer in self.named_sublayers(
                include_sublayers=include_sublayers)
        ]
X
Xin Pan 已提交
174 175
        return ret

176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
    def named_parameters(self, prefix='', include_sublayers=True):
        """
        Returns an iterator over all parameters in the Layer, yielding tuple of name and parameter.

        Parameters:
            prefix(str, optional): Prefix to prepend to all parameter names. Default: ''.
            include_sublayers(bool, optional): Whether include the parameters of sublayers.
                If True, also include the named parameters from sublayers. Default: True.

        Yields:
            (string, Parameter): Tuple of name and Parameter

        Examples:
            .. code-block:: python

                import paddle.fluid as fluid

                with fluid.dygraph.guard():
                    fc1 = fluid.Linear(10, 3)
                    fc2 = fluid.Linear(3, 10, bias_attr=False)
                    model = fluid.dygraph.Sequential(fc1, fc2)
                    for name, param in model.named_parameters():
                        print(name, param)

        """
        params_set = set()
        named_sublayers = self.named_sublayers(
            prefix=prefix,
            include_sublayers=include_sublayers,
            include_self=True)
        for layer_prefix, sublayer in named_sublayers:
            params = sublayer._parameters.items()
            for key, param in params:
                if param is None or param in params_set:
                    continue
                params_set.add(param)
                name = layer_prefix + ('.' if layer_prefix else '') + key
                yield name, param

    def named_sublayers(self,
                        prefix='',
                        include_sublayers=True,
                        include_self=False,
                        layers_set=None):
        """
        Returns an iterator over all sublayers in the Layer, yielding tuple of name and sublayer.
        The duplicate sublayer will only be yielded once.

        Parameters:
            prefix(str, optional): Prefix to prepend to all parameter names. Default: ''.
            include_sublayers(bool, optional): Whether include the sublayers. Default: True.
            include_self(bool, optional): Whether include the Layer itself. Default: False.
            layers_set(set, optioanl): The set to record duplicate sublayers. Default: None.

        Yields:
            (string, Layer): Tuple of name and Layer

        Examples:
            .. code-block:: python

                import paddle.fluid as fluid

                with fluid.dygraph.guard():
                    fc1 = fluid.Linear(10, 3)
                    fc2 = fluid.Linear(3, 10, bias_attr=False)
                    model = fluid.dygraph.Sequential(fc1, fc2)
                    for prefix, layer in model.named_sublayers():
                        print(prefix, layer)

        """
        if layers_set is None:
            layers_set = set()
        if include_self and self not in layers_set:
            layers_set.add(self)
            yield prefix, self
        if include_sublayers:
            for key, layer in self._sub_layers.items():
                if layer is None:
                    continue
                layer_prefix = prefix + ('.' if prefix else '') + key
                for p, l in layer.named_sublayers(
                        prefix=layer_prefix,
                        include_sublayers=include_sublayers,
                        include_self=True,
                        layers_set=layers_set):
                    yield p, l

X
Xin Pan 已提交
263
    def clear_gradients(self):
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
        """
        Clear the gradients of all parameters for this layer.
        
        Returns:
            None
        
        Examples:
            .. code-block:: python

                import paddle.fluid as fluid
                import numpy as np

                with fluid.dygraph.guard():
                    value = np.arange(26).reshape(2, 13).astype("float32")
                    a = fluid.dygraph.to_variable(value)
                    linear = fluid.Linear(13, 5, dtype="float32")
                    adam = fluid.optimizer.Adam(learning_rate=0.01, 
                                                parameter_list=linear.parameters())
                    out = linear(a)
                    out.backward()
                    adam.minimize(out)
                    linear.clear_gradients()

        """
X
Xin Pan 已提交
288
        for p in self.parameters():
289 290
            if p.trainable:
                p.clear_gradient()
X
Xin Pan 已提交
291

292
    def _build_once(self, *args, **kwargs):
293 294
        pass

295
    def __call__(self, *inputs, **kwargs):
X
Xin Pan 已提交
296
        if not self._built:
297 298 299 300 301
            with program_desc_tracing_guard(False):
                self._build_once(*inputs, **kwargs)
                if parallel_helper._is_data_parallel_mode():
                    parallel_helper._broadcast_parameters(
                        self._parameters.values())
302
            self._built = True
303

304
        outputs = self.forward(*inputs, **kwargs)
M
minqiyang 已提交
305
        return outputs
M
minqiyang 已提交
306

307
    def forward(self, *inputs, **kwargs):
308 309 310 311 312 313 314 315
        """
        Defines the computation performed at every call.
        Should be overridden by all subclasses.

        Parameters:
            *inputs(tuple): unpacked tuple arguments
            **kwargs(dict): unpacked dict arguments
        """
316
        raise NotImplementedError
X
Xin Pan 已提交
317 318 319 320

    def backward(self, *inputs):
        raise ValueError("Layer shouldn't implement backward")

X
Xin Pan 已提交
321 322 323
    def add_sublayer(self, name, sublayer):
        """Adds a sub Layer instance.

324
        Added sublayer can be accessed by self.name
X
Xin Pan 已提交
325

326 327 328
        Parameters:
            name(str): name of this sublayer.
            sublayer(Layer): an instance of Layer.
X
Xin Pan 已提交
329
        Returns:
330
            Layer: the sublayer passed in.
X
Xin Pan 已提交
331 332
        """
        assert isinstance(sublayer, core.Layer)
333

X
Xin Pan 已提交
334 335 336 337 338 339
        self._sub_layers[name] = sublayer
        return sublayer

    def add_parameter(self, name, parameter):
        """Adds a Parameter instance.

340
        Added parameter can be accessed by self.name
X
Xin Pan 已提交
341

342 343 344
        Parameters:
            name(str): name of this sublayer.
            parameter(Parameter): an instance of Parameter.
X
Xin Pan 已提交
345
        Returns:
346
            Parameter: the parameter passed in.
X
Xin Pan 已提交
347
        """
348 349 350 351 352 353
        if parameter is None:
            self._parameters[name] = None
        elif not isinstance(parameter, framework.Parameter):
            raise TypeError(
                "parameter assignment requires Parameter or None, but got '{}'"
                .format(type(parameter).__name__))
354

H
hong 已提交
355 356 357 358 359
        if len(self._loaddict_holder) > 0:
            assert parameter.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in stat_dict".format(
                parameter.name)

            parameter.set_value(self._loaddict_holder[parameter.name])
360 361

        self._parameters[name] = parameter
X
Xin Pan 已提交
362 363
        return parameter

X
Xin Pan 已提交
364 365 366 367 368
    def __getattr__(self, name):
        if name in self._parameters:
            return self._parameters[name]
        elif name in self._sub_layers:
            return self._sub_layers[name]
369 370
        else:
            return object.__getattribute__(self, name)
X
Xin Pan 已提交
371 372

    def __setattr__(self, name, value):
S
songyouwei 已提交
373 374 375 376 377
        def _remove_if_exist(*dicts):
            for d in dicts:
                if name in d:
                    del d[name]

378 379
        if isinstance(getattr(type(self), name, None), property):
            object.__setattr__(self, name, value)
380
        params = self.__dict__.get('_parameters', None)
X
Xin Pan 已提交
381 382 383 384
        if isinstance(value, framework.Parameter):
            if params is None:
                raise ValueError(
                    "super(YourLayer, self).__init__() should be called first")
H
hong 已提交
385 386 387 388 389 390
            if len(self._loaddict_holder) > 0:
                assert value.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in stat_dict".format(
                    value.name)

                value.set_value(self._loaddict_holder[value.name])

S
songyouwei 已提交
391
            _remove_if_exist(self.__dict__, self._sub_layers)
392
            params[name] = value
393 394 395 396 397 398
        elif params is not None and name in params:
            if value is not None:
                raise TypeError(
                    "assignment to parameter '{}' should be of type Parameter or None, but got '{}'"
                    .format(name, type(value).__name__))
            params[name] = None
X
Xin Pan 已提交
399
        else:
400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
            layers = self.__dict__.get('_sub_layers', None)
            if isinstance(value, core.Layer):
                if layers is None:
                    raise ValueError(
                        "super(YourLayer, self).__init__() should be called first"
                    )

                _remove_if_exist(self.__dict__, self._parameters)
                layers[name] = value
            elif layers is not None and name in layers:
                if value is not None:
                    raise TypeError(
                        "assignment to sublayer '{}' should be of type Layer or None, but got '{}'"
                        .format(name, type(value).__name__))
                layers[name] = None
            else:
                object.__setattr__(self, name, value)
X
Xin Pan 已提交
417 418 419 420 421 422 423 424 425

    def __delattr__(self, name):
        if name in self._parameters:
            del self._parameters[name]
        elif name in self._sub_layers:
            del self._sub_layers[name]
        else:
            object.__delattr__(self, name)

H
hong 已提交
426 427 428 429
    def state_dict(self,
                   destination=None,
                   include_sublayers=True,
                   structured_name_prefix=""):
H
hong 已提交
430
        '''
431
        Get all parameters of current layer and its sub-layers. And set all the parameters into a dict
H
hong 已提交
432

433 434 435
        Parameters:
            destination(dict, optional) : If provide, all the parameters will set to this dict . Default: None
            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
H
hong 已提交
436 437

        Retruns:
438
            dict: a dict contains all the parameters
H
hong 已提交
439 440

        Examples:
441 442
            .. code-block:: python

H
hong 已提交
443 444
                import paddle.fluid as fluid
                with fluid.dygraph.guard():
445
                    emb = fluid.dygraph.Embedding([10, 10])
H
hong 已提交
446 447 448 449 450 451

                    state_dict = emb.state_dict()
                    fluid.save_dygraph( state_dict, "paddle_dy")

        '''

452 453 454 455
        if destination is None:
            destination = collections.OrderedDict()
        for name, data in self._parameters.items():
            if data is not None:
H
hong 已提交
456
                destination[structured_name_prefix + name] = data
457 458 459 460 461 462

        if include_sublayers:
            for layer_name, layer_item in self._sub_layers.items():
                if layer_item is not None:
                    destination_temp = destination.copy()
                    destination_temp.update(
H
hong 已提交
463 464 465
                        layer_item.state_dict(
                            destination_temp, include_sublayers,
                            structured_name_prefix + layer_name + "."))
466 467 468
                    destination = destination_temp
        return destination

H
hong 已提交
469 470 471 472
    def set_dict(self,
                 stat_dict,
                 include_sublayers=True,
                 use_structured_name=True):
H
hong 已提交
473
        '''
474
        Set parameters from stat_dict. All the parameters will be reset by the tensor in the stat_dict
H
hong 已提交
475

476 477 478
        Parameters:
            state_dict(dict) : Dict contains all the parameters
            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
H
hong 已提交
479 480
            use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key. 
                                                  Default: True
H
hong 已提交
481 482 483 484
        Returns:
            None

        Examples:
485 486
            .. code-block:: python

H
hong 已提交
487 488
                import paddle.fluid as fluid
                with fluid.dygraph.guard():
489
                    emb = fluid.dygraph.Embedding([10, 10])
H
hong 已提交
490 491 492 493 494 495 496 497 498

                    state_dict = emb.state_dict()
                    fluid.save_dygraph( state_dict, "paddle_dy")
                    
                    para_state_dict, _ = fluid.load_dygraph( "paddle_dy")

                    emb.set_dict( para_state_dict )

        '''
H
hong 已提交
499 500 501 502 503 504 505 506 507
        self.load_dict(
            stat_dict,
            include_sublayers=include_sublayers,
            use_structured_name=use_structured_name)

    def load_dict(self,
                  stat_dict,
                  include_sublayers=True,
                  use_structured_name=True):
H
hong 已提交
508
        '''
509
        Set parameters from stat_dict. All the parameters will be reset by the tensor in the stat_dict
H
hong 已提交
510 511 512

        This api will be Deprecated. Please use set_dict

513 514 515
        Parameters:
            state_dict(dict) : Dict contains all the parameters
            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
H
hong 已提交
516 517
            use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key.
                                                  Default: True
H
hong 已提交
518 519 520 521
        Returns:
            None

        Examples:
522 523
            .. code-block:: python

H
hong 已提交
524 525
                import paddle.fluid as fluid
                with fluid.dygraph.guard():
526
                    emb = fluid.dygraph.Embedding([10, 10])
H
hong 已提交
527 528 529 530 531 532 533 534 535 536

                    state_dict = emb.state_dict()
                    fluid.save_dygraph( state_dict, "paddle_dy")
                    
                    para_state_dict, _ = fluid.load_dygraph( "paddle_dy")

                    emb.load_dict( para_state_dict )

        '''

H
hong 已提交
537 538 539 540 541 542
        inner_state_dict = self.state_dict()

        for name, para in inner_state_dict.items():
            key_name = name if use_structured_name else para.name
            if key_name in stat_dict:
                para.set_value(stat_dict[key_name])
H
hong 已提交
543 544
            else:
                raise RuntimeError(
H
hong 已提交
545 546 547 548 549 550 551 552 553 554 555
                    "Parameter not found, Can't not find [ {} ] in stat_dict"
                    "use_structured_name is set to [{}]".format(
                        key_name, use_structured_name))
        unused_para_list = []
        for k, v in stat_dict.items():
            if k not in inner_state_dict:
                unused_para_list.append(k)
        if len(unused_para_list) > 0:
            warnings.warn(
                "Varibale [ {} ] are not used, because not included in layers state_dict".
                format(" ".join(unused_para_list)))