layers.py 12.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

X
Xin Pan 已提交
15
import collections
16 17 18
import contextlib
import sys
import numpy as np
M
minqiyang 已提交
19
import collections
20
import six
C
chengduo 已提交
21
from . import parallel_helper
X
Xin Pan 已提交
22
from .. import unique_name
23
from paddle.fluid import core
24
from .layer_object_helper import LayerObjectHelper
25
from paddle.fluid import framework
26
from ..param_attr import ParamAttr
H
hong 已提交
27
from paddle.fluid.framework import Variable
28

29
__all__ = ['Layer']
30 31


X
Xin Pan 已提交
32
class Layer(core.Layer):
X
Xin Pan 已提交
33 34 35 36 37 38 39 40 41
    """Layers composed of operators.

    Args:
        name_scope: prefix name used by the layer to name parameters.
            If prefix is "my_model/layer_1", parameter name in MyLayer
            can be "my_model/layer_1/MyLayer/w_n", where w is the parameter
            base name and n is an unique suffix auto-generated.
        dtype: data type for the variables in the layer.
    """
X
Xin Pan 已提交
42

X
Xin Pan 已提交
43 44 45
    def __init__(self, name_scope, dtype=core.VarDesc.VarType.FP32):
        self._full_name = unique_name.generate(name_scope + "/" +
                                               self.__class__.__name__)
X
Xin Pan 已提交
46
        self._built = False
M
minqiyang 已提交
47
        self._dtype = dtype
X
Xin Pan 已提交
48 49
        self._parameters = collections.OrderedDict()
        self._sub_layers = collections.OrderedDict()
L
lujun 已提交
50
        self._loaddict_holder = collections.OrderedDict()
51

52 53
        self._helper = LayerObjectHelper(self._full_name)

M
minqiyang 已提交
54
    def train(self):
M
minqiyang 已提交
55
        framework._dygraph_tracer().train_mode()
M
minqiyang 已提交
56 57

    def eval(self):
M
minqiyang 已提交
58
        framework._dygraph_tracer().eval_mode()
M
minqiyang 已提交
59

X
Xin Pan 已提交
60 61 62 63 64 65 66 67 68
    def full_name(self):
        """Full name for this layers.

          Full name is composed by name_scope + "/" + MyLayer.__class__.__name__

        Returns full name of this name.
        """
        return self._full_name

69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
    def create_parameter(self,
                         attr,
                         shape,
                         dtype,
                         is_bias=False,
                         default_initializer=None):
        """Create parameters for this layers.

           Args:
               attr: [ParamAttr] should be the parameter attribute for this parameter
               shape: shape of the paramter
               dtype: data type of this parameter
               is_bias: if this is a bias parameter
               default_initializer: set the default initializer for this parameter

        Returns created parameter Variable.
        """
86 87 88 89
        if isinstance(attr, ParamAttr) and (attr.name is not None):
            attr.name = ".".join([self._full_name, attr.name])
        elif isinstance(attr, six.string_types):
            attr = ".".join([self._full_name, attr])
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
        return self._helper.create_parameter(attr, shape, dtype, is_bias,
                                             default_initializer)

    # TODO: Add more parameter list when we need them
    def create_variable(self,
                        name=None,
                        persistable=None,
                        dtype=None,
                        type=core.VarDesc.VarType.LOD_TENSOR):
        """Create Variable for this layers.

           Args:
               name: name of the variable
               persistable: if set this variable persistable
               dtype: data type of data in the variable
               type: type of the variable

        Returns created Variable.
        """
        if name is not None:
            var_name = ".".join([self._full_name, name])
        else:
            var_name = unique_name.generate(".".join(
                [self._full_name, "_generated_var"]))

        return self._helper.main_program.current_block().create_var(
            name=var_name, persistable=persistable, dtype=dtype, type=type)

X
polish  
Xin Pan 已提交
118 119
    def parameters(self, include_sublayers=True):
        """Returns a list of Parameters from current and sub-layers.
X
Xin Pan 已提交
120 121 122 123 124 125

        Args:
            include_sublayers: If true, also include the parameters from
            sublayers.

        Returns a list of Parameters.
X
Xin Pan 已提交
126
        """
X
polish  
Xin Pan 已提交
127 128 129 130 131 132
        ret = [p for p in self._parameters.values()]
        if include_sublayers:
            for l in self._sub_layers.values():
                for p in l.parameters(include_sublayers):
                    ret.append(p)
        return ret
X
Xin Pan 已提交
133

X
Xin Pan 已提交
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
    def sublayers(self, include_sublayers=True):
        """Returns a list of sub layers.

        Args:
            include_sublayers: If true, also include the layers from sublayers.

        Returns a list of sub layers.
        """
        ret = [l for l in self._sub_layers.values()]
        if include_sublayers:
            for l in self._sub_layers.values():
                for sub_l in l.sublayers(include_sublayers):
                    ret.append(sub_l)
        return ret

X
Xin Pan 已提交
149 150
    def clear_gradients(self):
        for p in self.parameters():
151 152
            if p.trainable:
                p.clear_gradient()
X
Xin Pan 已提交
153

154
    def _build_once(self, *args, **kwargs):
155 156
        pass

157
    def __call__(self, *inputs, **kwargs):
X
Xin Pan 已提交
158
        if not self._built:
159
            self._build_once(*inputs, **kwargs)
C
chengduo 已提交
160 161
            if parallel_helper._is_data_parallel_mode():
                parallel_helper._broadcast_parameters(self._parameters.values())
162

163
        outputs = self.forward(*inputs, **kwargs)
X
Xin Pan 已提交
164
        self._built = True
M
minqiyang 已提交
165
        return outputs
M
minqiyang 已提交
166

167
    def forward(self, *inputs, **kwargs):
168
        raise NotImplementedError
X
Xin Pan 已提交
169 170 171 172

    def backward(self, *inputs):
        raise ValueError("Layer shouldn't implement backward")

X
Xin Pan 已提交
173 174 175 176 177 178 179 180 181 182 183 184
    def add_sublayer(self, name, sublayer):
        """Adds a sub Layer instance.

          Added sublayer can be access like self.name.

        Args:
            name: name of this sublayer.
            sublayer: an instance of Layer.
        Returns:
            the sublayer passed in.
        """
        assert isinstance(sublayer, core.Layer)
185

X
Xin Pan 已提交
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
        self._sub_layers[name] = sublayer
        return sublayer

    def add_parameter(self, name, parameter):
        """Adds a Parameter instance.

          Added parameter can be access like self.name.

        Args:
            name: name of this sublayer.
            parameter: an instance of Parameter.
        Returns:
            the parameter passed in.
        """
        assert isinstance(parameter, framework.Parameter)
201

H
hong 已提交
202 203 204 205 206
        if len(self._loaddict_holder) > 0:
            assert parameter.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in stat_dict".format(
                parameter.name)

            parameter.set_value(self._loaddict_holder[parameter.name])
207 208

        self._parameters[name] = parameter
X
Xin Pan 已提交
209 210
        return parameter

X
Xin Pan 已提交
211 212 213 214 215
    def __getattr__(self, name):
        if name in self._parameters:
            return self._parameters[name]
        elif name in self._sub_layers:
            return self._sub_layers[name]
216 217
        else:
            return object.__getattribute__(self, name)
X
Xin Pan 已提交
218 219

    def __setattr__(self, name, value):
220 221
        if isinstance(getattr(type(self), name, None), property):
            object.__setattr__(self, name, value)
X
Xin Pan 已提交
222 223 224 225 226
        if isinstance(value, framework.Parameter):
            params = self.__dict__.get('_parameters', None)
            if params is None:
                raise ValueError(
                    "super(YourLayer, self).__init__() should be called first")
H
hong 已提交
227 228 229 230 231 232
            if len(self._loaddict_holder) > 0:
                assert value.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in stat_dict".format(
                    value.name)

                value.set_value(self._loaddict_holder[value.name])

233 234 235 236 237
            if name in params:
                # remove unused param in tracer
                if framework._dygraph_tracer_ is not None:
                    framework._dygraph_tracer_._vars.pop(params[name].name,
                                                         None)
238
            params[name] = value
X
Xin Pan 已提交
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
        elif isinstance(value, core.Layer):
            layers = self.__dict__.get('_sub_layers', None)
            if layers is None:
                raise ValueError(
                    "super(YourLayer, self).__init__() should be called first")
            layers[name] = value
        else:
            object.__setattr__(self, name, value)

    def __delattr__(self, name):
        if name in self._parameters:
            del self._parameters[name]
        elif name in self._sub_layers:
            del self._sub_layers[name]
        else:
            object.__delattr__(self, name)

256
    def state_dict(self, destination=None, include_sublayers=True):
H
hong 已提交
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
        '''
        Get all parameter of current and sub-layers. And set all the parameters into a dict

        Args:
            destination(dict|optical) : If provide, all the parameter will set to this dict . Defaul is None
            include_sublayers(bool) : If true, also include the parameters from sublayers.

        Retruns:
            state_dict(dict) : dict contains all the parameters

        Examples:
            .. code-block:: python                                                                                              
                import paddle.fluid as fluid
                with fluid.dygraph.guard():
                    emb = fluid.dygraph.Embedding( "emb", [10, 10])

                    state_dict = emb.state_dict()
                    fluid.save_dygraph( state_dict, "paddle_dy")

        '''

278 279 280 281
        if destination is None:
            destination = collections.OrderedDict()
        for name, data in self._parameters.items():
            if data is not None:
282
                destination[data.name] = data
283 284 285 286 287 288

        if include_sublayers:
            for layer_name, layer_item in self._sub_layers.items():
                if layer_item is not None:
                    destination_temp = destination.copy()
                    destination_temp.update(
289
                        layer_item.state_dict(destination_temp,
290 291 292 293
                                              include_sublayers))
                    destination = destination_temp
        return destination

H
hong 已提交
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
    def set_dict(self, stat_dict, include_sublayers=True):
        '''
        Set parameter from stat_dict. All the parameter will be reset by the tensor in the stat_dict

        Args:
            state_dict(dict) : Dict contains all the Parameter
            include_sublayers(bool) : If true, also include the parameters from sublayers.
        Returns:
            None

        Examples:
            .. code-block:: python                                                                                              
                import paddle.fluid as fluid
                with fluid.dygraph.guard():
                    emb = fluid.dygraph.Embedding( "emb", [10, 10])

                    state_dict = emb.state_dict()
                    fluid.save_dygraph( state_dict, "paddle_dy")
                    
                    para_state_dict, _ = fluid.load_dygraph( "paddle_dy")

                    emb.set_dict( para_state_dict )

        '''
        self.load_dict(stat_dict, include_sublayers=include_sublayers)

320
    def load_dict(self, stat_dict, include_sublayers=True):
H
hong 已提交
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
        '''
        Set parameter from stat_dict. All the parameter will be reset by the tensor in the stat_dict

        This api will be Deprecated. Please use set_dict

        Args:
            state_dict(dict) : Dict contains all the Parameter
            include_sublayers(bool) : If true, also include the parameters from sublayers.
        Returns:
            None

        Examples:
            .. code-block:: python                                                                                              
                import paddle.fluid as fluid
                with fluid.dygraph.guard():
                    emb = fluid.dygraph.Embedding( "emb", [10, 10])

                    state_dict = emb.state_dict()
                    fluid.save_dygraph( state_dict, "paddle_dy")
                    
                    para_state_dict, _ = fluid.load_dygraph( "paddle_dy")

                    emb.load_dict( para_state_dict )

        '''

L
lujun 已提交
347
        self._loaddict_holder = stat_dict
348 349
        for name, item in self.__dict__.get('_parameters', None).items():
            if item.name in stat_dict:
H
hong 已提交
350 351 352 353 354
                item.set_value(stat_dict[item.name])
            else:
                raise RuntimeError(
                    "Parameter not found, Can't not find [ {} ] in stat_dict".
                    format(item.name))
355 356 357 358 359

        if include_sublayers:
            for layer_name, layer_item in self._sub_layers.items():
                if layer_item is not None:
                    layer_item.load_dict(stat_dict)