layers.py 11.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

X
Xin Pan 已提交
15
import collections
16 17 18
import contextlib
import sys
import numpy as np
M
minqiyang 已提交
19
import collections
20
import six
C
chengduo 已提交
21
from . import parallel_helper
X
Xin Pan 已提交
22
from .. import unique_name
23
from paddle.fluid import core
24
from .layer_object_helper import LayerObjectHelper
25
from paddle.fluid import framework
26
from ..param_attr import ParamAttr
27

X
Xin Pan 已提交
28
__all__ = ['Layer', 'PyLayer']
29 30


X
Xin Pan 已提交
31
class Layer(core.Layer):
X
Xin Pan 已提交
32 33 34 35 36 37 38 39 40
    """Layers composed of operators.

    Args:
        name_scope: prefix name used by the layer to name parameters.
            If prefix is "my_model/layer_1", parameter name in MyLayer
            can be "my_model/layer_1/MyLayer/w_n", where w is the parameter
            base name and n is an unique suffix auto-generated.
        dtype: data type for the variables in the layer.
    """
X
Xin Pan 已提交
41

X
Xin Pan 已提交
42 43 44
    def __init__(self, name_scope, dtype=core.VarDesc.VarType.FP32):
        self._full_name = unique_name.generate(name_scope + "/" +
                                               self.__class__.__name__)
X
Xin Pan 已提交
45
        self._built = False
M
minqiyang 已提交
46
        self._dtype = dtype
X
Xin Pan 已提交
47 48
        self._parameters = collections.OrderedDict()
        self._sub_layers = collections.OrderedDict()
L
lujun 已提交
49
        self._loaddict_holder = collections.OrderedDict()
50

51 52
        self._helper = LayerObjectHelper(self._full_name)

M
minqiyang 已提交
53
    def train(self):
M
minqiyang 已提交
54
        framework._dygraph_tracer().train_mode()
M
minqiyang 已提交
55 56

    def eval(self):
M
minqiyang 已提交
57
        framework._dygraph_tracer().eval_mode()
M
minqiyang 已提交
58

X
Xin Pan 已提交
59 60 61 62 63 64 65 66 67
    def full_name(self):
        """Full name for this layers.

          Full name is composed by name_scope + "/" + MyLayer.__class__.__name__

        Returns full name of this name.
        """
        return self._full_name

68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
    def create_parameter(self,
                         attr,
                         shape,
                         dtype,
                         is_bias=False,
                         default_initializer=None):
        """Create parameters for this layers.

           Args:
               attr: [ParamAttr] should be the parameter attribute for this parameter
               shape: shape of the paramter
               dtype: data type of this parameter
               is_bias: if this is a bias parameter
               default_initializer: set the default initializer for this parameter

        Returns created parameter Variable.
        """
85 86 87 88
        if isinstance(attr, ParamAttr) and (attr.name is not None):
            attr.name = ".".join([self._full_name, attr.name])
        elif isinstance(attr, six.string_types):
            attr = ".".join([self._full_name, attr])
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
        return self._helper.create_parameter(attr, shape, dtype, is_bias,
                                             default_initializer)

    # TODO: Add more parameter list when we need them
    def create_variable(self,
                        name=None,
                        persistable=None,
                        dtype=None,
                        type=core.VarDesc.VarType.LOD_TENSOR):
        """Create Variable for this layers.

           Args:
               name: name of the variable
               persistable: if set this variable persistable
               dtype: data type of data in the variable
               type: type of the variable

        Returns created Variable.
        """
        if name is not None:
            var_name = ".".join([self._full_name, name])
        else:
            var_name = unique_name.generate(".".join(
                [self._full_name, "_generated_var"]))

        return self._helper.main_program.current_block().create_var(
            name=var_name, persistable=persistable, dtype=dtype, type=type)

X
polish  
Xin Pan 已提交
117 118
    def parameters(self, include_sublayers=True):
        """Returns a list of Parameters from current and sub-layers.
X
Xin Pan 已提交
119 120 121 122 123 124

        Args:
            include_sublayers: If true, also include the parameters from
            sublayers.

        Returns a list of Parameters.
X
Xin Pan 已提交
125
        """
X
polish  
Xin Pan 已提交
126 127 128 129 130 131
        ret = [p for p in self._parameters.values()]
        if include_sublayers:
            for l in self._sub_layers.values():
                for p in l.parameters(include_sublayers):
                    ret.append(p)
        return ret
X
Xin Pan 已提交
132

X
Xin Pan 已提交
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
    def sublayers(self, include_sublayers=True):
        """Returns a list of sub layers.

        Args:
            include_sublayers: If true, also include the layers from sublayers.

        Returns a list of sub layers.
        """
        ret = [l for l in self._sub_layers.values()]
        if include_sublayers:
            for l in self._sub_layers.values():
                for sub_l in l.sublayers(include_sublayers):
                    ret.append(sub_l)
        return ret

X
Xin Pan 已提交
148 149
    def clear_gradients(self):
        for p in self.parameters():
150
            p.clear_gradient()
X
Xin Pan 已提交
151

L
lujun 已提交
152
    def build_once(self, *args):
153 154
        pass

155
    def __call__(self, *inputs):
X
Xin Pan 已提交
156
        if not self._built:
L
lujun 已提交
157
            self.build_once(*inputs)
C
chengduo 已提交
158 159
            if parallel_helper._is_data_parallel_mode():
                parallel_helper._broadcast_parameters(self._parameters.values())
160

161
        outputs = self.forward(*inputs)
X
Xin Pan 已提交
162
        self._built = True
M
minqiyang 已提交
163
        return outputs
M
minqiyang 已提交
164

165 166
    def forward(self, *inputs):
        raise NotImplementedError
X
Xin Pan 已提交
167 168 169 170

    def backward(self, *inputs):
        raise ValueError("Layer shouldn't implement backward")

X
Xin Pan 已提交
171 172 173 174 175 176 177 178 179 180 181 182
    def add_sublayer(self, name, sublayer):
        """Adds a sub Layer instance.

          Added sublayer can be access like self.name.

        Args:
            name: name of this sublayer.
            sublayer: an instance of Layer.
        Returns:
            the sublayer passed in.
        """
        assert isinstance(sublayer, core.Layer)
183

X
Xin Pan 已提交
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
        self._sub_layers[name] = sublayer
        return sublayer

    def add_parameter(self, name, parameter):
        """Adds a Parameter instance.

          Added parameter can be access like self.name.

        Args:
            name: name of this sublayer.
            parameter: an instance of Parameter.
        Returns:
            the parameter passed in.
        """
        assert isinstance(parameter, framework.Parameter)
        self._parameters[name] = parameter
L
lujun 已提交
200 201 202
        if parameter.name in self._loaddict_holder:
            self._parameters[name] = self._loaddict_holder[parameter.name]
            parameter = self._loaddict_holder[parameter.name]
X
Xin Pan 已提交
203 204
        return parameter

X
Xin Pan 已提交
205 206 207 208 209 210 211 212 213 214 215 216
    def __getattr__(self, name):
        if name in self._parameters:
            return self._parameters[name]
        elif name in self._sub_layers:
            return self._sub_layers[name]

    def __setattr__(self, name, value):
        if isinstance(value, framework.Parameter):
            params = self.__dict__.get('_parameters', None)
            if params is None:
                raise ValueError(
                    "super(YourLayer, self).__init__() should be called first")
L
lujun 已提交
217 218 219 220
            if value.name in self._loaddict_holder:
                params[name] = self._loaddict_holder[value.name]
            else:
                params[name] = value
X
Xin Pan 已提交
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
        elif isinstance(value, core.Layer):
            layers = self.__dict__.get('_sub_layers', None)
            if layers is None:
                raise ValueError(
                    "super(YourLayer, self).__init__() should be called first")
            layers[name] = value
        else:
            object.__setattr__(self, name, value)

    def __delattr__(self, name):
        if name in self._parameters:
            del self._parameters[name]
        elif name in self._sub_layers:
            del self._sub_layers[name]
        else:
            object.__delattr__(self, name)

238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
    def state_dict(self, destination=None, prefix='', include_sublayers=True):
        if destination is None:
            destination = collections.OrderedDict()
        for name, data in self._parameters.items():
            if data is not None:
                destination[prefix + name] = data

        if include_sublayers:
            for layer_name, layer_item in self._sub_layers.items():
                if layer_item is not None:
                    destination_temp = destination.copy()
                    destination_temp.update(
                        layer_item.state_dict(destination_temp, prefix +
                                              layer_name + ".",
                                              include_sublayers))
                    destination = destination_temp
        return destination

    def load_dict(self, stat_dict, include_sublayers=True):
L
lujun 已提交
257
        self._loaddict_holder = stat_dict
258 259
        for name, item in self.__dict__.get('_parameters', None).items():
            if item.name in stat_dict:
260 261 262 263
                var = item._ivar.value()
                tensor = var.get_tensor()
                tensor.set(stat_dict[item.name].numpy(),
                           framework._current_expected_place())
264 265 266 267 268 269

        if include_sublayers:
            for layer_name, layer_item in self._sub_layers.items():
                if layer_item is not None:
                    layer_item.load_dict(stat_dict)

X
Xin Pan 已提交
270

X
Xin Pan 已提交
271
class PyLayer(core.PyLayer):
X
Xin Pan 已提交
272 273
    """Layers composed of user-defined python codes."""

X
Xin Pan 已提交
274 275
    def __init__(self):
        super(PyLayer, self).__init__()
X
Xin Pan 已提交
276

M
minqiyang 已提交
277
    def train(self):
M
minqiyang 已提交
278
        framework._dygraph_tracer().train_mode()
M
minqiyang 已提交
279 280

    def eval(self):
M
minqiyang 已提交
281
        framework._dygraph_tracer().eval_mode()
M
minqiyang 已提交
282

X
Xin Pan 已提交
283 284 285 286 287 288 289 290 291 292 293 294 295 296
    @classmethod
    def _do_forward(cls, inputs):
        return cls._to_tuple(cls.forward(inputs))

    @classmethod
    def _do_backward(cls, inputs):
        return cls._to_tuple(cls.backward(inputs))

    @staticmethod
    def _to_tuple(inputs):
        if not isinstance(inputs, list) and not isinstance(inputs, tuple):
            inputs = [inputs]
        ret = []
        for inp in inputs:
297 298 299 300 301 302
            if isinstance(inp, core.LoDTensor):
                ret.append(inp)
            else:
                tensor = core.LoDTensor()
                tensor.set(inp, core.CPUPlace())
                ret.append(tensor)
X
Xin Pan 已提交
303 304
        return tuple(ret)

X
Xin Pan 已提交
305
    @staticmethod
M
minqiyang 已提交
306
    def forward(*inputs):
X
Xin Pan 已提交
307 308
        raise NotImplementedError

X
Xin Pan 已提交
309
    @staticmethod
M
minqiyang 已提交
310
    def backward(*douts):
X
Xin Pan 已提交
311
        raise NotImplementedError
X
Xin Pan 已提交
312 313

    @classmethod
M
minqiyang 已提交
314
    def __call__(cls, *inputs):
L
lujun 已提交
315
        tracer = framework._dygraph_tracer()
X
Xin Pan 已提交
316
        block = framework.default_main_program().current_block()
M
minqiyang 已提交
317
        ivar_inputs = [x._ivar for x in inputs]
X
Xin Pan 已提交
318

X
polish  
Xin Pan 已提交
319 320
        if not hasattr(cls, 'forward_id'):
            cls.forward_id = core.PyLayer.num_funcs() + 1
X
Xin Pan 已提交
321
            PyLayer.register_func(cls.forward_id, cls._do_forward)
X
polish  
Xin Pan 已提交
322
            cls.backward_id = core.PyLayer.num_funcs() + 1
X
Xin Pan 已提交
323
            PyLayer.register_func(cls.backward_id, cls._do_backward)
X
Xin Pan 已提交
324

325
        iop = core.OpBase(cls.__class__.__name__ + str(cls.forward_id))
X
polish  
Xin Pan 已提交
326 327
        iop.forward_id = cls.forward_id
        iop.backward_id = cls.backward_id
X
Xin Pan 已提交
328
        block.ops.append(iop)
M
minqiyang 已提交
329
        ivars = tracer.py_trace(iop, ivar_inputs, False)
X
Xin Pan 已提交
330 331
        ret = []
        for ivar in ivars:
M
minqiyang 已提交
332
            tensor = ivar.value().get_tensor()
X
Xin Pan 已提交
333 334 335 336 337 338 339 340 341
            py_var = framework.Variable(
                block,
                type=core.VarDesc.VarType.LOD_TENSOR,
                name=None,
                shape=tensor.shape(),
                dtype=tensor._dtype(),
                ivar=ivar)
            ret.append(py_var)
        return ret