layers.py 10.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

X
Xin Pan 已提交
15
import collections
16 17 18
import contextlib
import sys
import numpy as np
M
minqiyang 已提交
19
import collections
20
import six
X
Xin Pan 已提交
21
from .. import unique_name
22
from paddle.fluid import core
23
from .layer_object_helper import LayerObjectHelper
24
from paddle.fluid import framework
25
from ..param_attr import ParamAttr
26

X
Xin Pan 已提交
27
__all__ = ['Layer', 'PyLayer']
28 29


X
Xin Pan 已提交
30
class Layer(core.Layer):
X
Xin Pan 已提交
31 32 33 34 35 36 37 38 39
    """Layers composed of operators.

    Args:
        name_scope: prefix name used by the layer to name parameters.
            If prefix is "my_model/layer_1", parameter name in MyLayer
            can be "my_model/layer_1/MyLayer/w_n", where w is the parameter
            base name and n is an unique suffix auto-generated.
        dtype: data type for the variables in the layer.
    """
X
Xin Pan 已提交
40

X
Xin Pan 已提交
41 42 43
    def __init__(self, name_scope, dtype=core.VarDesc.VarType.FP32):
        self._full_name = unique_name.generate(name_scope + "/" +
                                               self.__class__.__name__)
X
Xin Pan 已提交
44
        self._built = False
M
minqiyang 已提交
45
        self._dtype = dtype
X
Xin Pan 已提交
46 47
        self._parameters = collections.OrderedDict()
        self._sub_layers = collections.OrderedDict()
48

49 50
        self._helper = LayerObjectHelper(self._full_name)

M
minqiyang 已提交
51
    def train(self):
M
minqiyang 已提交
52
        framework._dygraph_tracer().train_mode()
M
minqiyang 已提交
53 54

    def eval(self):
M
minqiyang 已提交
55
        framework._dygraph_tracer().eval_mode()
M
minqiyang 已提交
56

X
Xin Pan 已提交
57 58 59 60 61 62 63 64 65
    def full_name(self):
        """Full name for this layers.

          Full name is composed by name_scope + "/" + MyLayer.__class__.__name__

        Returns full name of this name.
        """
        return self._full_name

66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
    def create_parameter(self,
                         attr,
                         shape,
                         dtype,
                         is_bias=False,
                         default_initializer=None):
        """Create parameters for this layers.

           Args:
               attr: [ParamAttr] should be the parameter attribute for this parameter
               shape: shape of the paramter
               dtype: data type of this parameter
               is_bias: if this is a bias parameter
               default_initializer: set the default initializer for this parameter

        Returns created parameter Variable.
        """
83 84 85 86
        if isinstance(attr, ParamAttr) and (attr.name is not None):
            attr.name = ".".join([self._full_name, attr.name])
        elif isinstance(attr, six.string_types):
            attr = ".".join([self._full_name, attr])
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
        return self._helper.create_parameter(attr, shape, dtype, is_bias,
                                             default_initializer)

    # TODO: Add more parameter list when we need them
    def create_variable(self,
                        name=None,
                        persistable=None,
                        dtype=None,
                        type=core.VarDesc.VarType.LOD_TENSOR):
        """Create Variable for this layers.

           Args:
               name: name of the variable
               persistable: if set this variable persistable
               dtype: data type of data in the variable
               type: type of the variable

        Returns created Variable.
        """
        if name is not None:
            var_name = ".".join([self._full_name, name])
        else:
            var_name = unique_name.generate(".".join(
                [self._full_name, "_generated_var"]))

        return self._helper.main_program.current_block().create_var(
            name=var_name, persistable=persistable, dtype=dtype, type=type)

X
polish  
Xin Pan 已提交
115 116
    def parameters(self, include_sublayers=True):
        """Returns a list of Parameters from current and sub-layers.
X
Xin Pan 已提交
117 118 119 120 121 122

        Args:
            include_sublayers: If true, also include the parameters from
            sublayers.

        Returns a list of Parameters.
X
Xin Pan 已提交
123
        """
X
polish  
Xin Pan 已提交
124 125 126 127 128 129
        ret = [p for p in self._parameters.values()]
        if include_sublayers:
            for l in self._sub_layers.values():
                for p in l.parameters(include_sublayers):
                    ret.append(p)
        return ret
X
Xin Pan 已提交
130

X
Xin Pan 已提交
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
    def sublayers(self, include_sublayers=True):
        """Returns a list of sub layers.

        Args:
            include_sublayers: If true, also include the layers from sublayers.

        Returns a list of sub layers.
        """
        ret = [l for l in self._sub_layers.values()]
        if include_sublayers:
            for l in self._sub_layers.values():
                for sub_l in l.sublayers(include_sublayers):
                    ret.append(sub_l)
        return ret

X
Xin Pan 已提交
146 147
    def clear_gradients(self):
        for p in self.parameters():
L
lujun 已提交
148
            p.clear_gradient()
X
Xin Pan 已提交
149

L
lujun 已提交
150
    def build_once(self, *args):
151 152
        pass

153
    def __call__(self, *inputs):
X
Xin Pan 已提交
154
        if not self._built:
L
lujun 已提交
155
            self.build_once(*inputs)
156

157
        outputs = self.forward(*inputs)
X
Xin Pan 已提交
158
        self._built = True
M
minqiyang 已提交
159
        return outputs
M
minqiyang 已提交
160

161 162
    def forward(self, *inputs):
        raise NotImplementedError
X
Xin Pan 已提交
163 164 165 166

    def backward(self, *inputs):
        raise ValueError("Layer shouldn't implement backward")

X
Xin Pan 已提交
167 168 169 170 171 172 173 174 175 176 177 178
    def add_sublayer(self, name, sublayer):
        """Adds a sub Layer instance.

          Added sublayer can be access like self.name.

        Args:
            name: name of this sublayer.
            sublayer: an instance of Layer.
        Returns:
            the sublayer passed in.
        """
        assert isinstance(sublayer, core.Layer)
179

X
Xin Pan 已提交
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
        self._sub_layers[name] = sublayer
        return sublayer

    def add_parameter(self, name, parameter):
        """Adds a Parameter instance.

          Added parameter can be access like self.name.

        Args:
            name: name of this sublayer.
            parameter: an instance of Parameter.
        Returns:
            the parameter passed in.
        """
        assert isinstance(parameter, framework.Parameter)
        self._parameters[name] = parameter
        return parameter

X
Xin Pan 已提交
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
    def __getattr__(self, name):
        if name in self._parameters:
            return self._parameters[name]
        elif name in self._sub_layers:
            return self._sub_layers[name]

    def __setattr__(self, name, value):
        if isinstance(value, framework.Parameter):
            params = self.__dict__.get('_parameters', None)
            if params is None:
                raise ValueError(
                    "super(YourLayer, self).__init__() should be called first")
            params[name] = value
        elif isinstance(value, core.Layer):
            layers = self.__dict__.get('_sub_layers', None)
            if layers is None:
                raise ValueError(
                    "super(YourLayer, self).__init__() should be called first")
            layers[name] = value
        else:
            object.__setattr__(self, name, value)

    def __delattr__(self, name):
        if name in self._parameters:
            del self._parameters[name]
        elif name in self._sub_layers:
            del self._sub_layers[name]
        else:
            object.__delattr__(self, name)

228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
    def state_dict(self, destination=None, prefix='', include_sublayers=True):
        if destination is None:
            destination = collections.OrderedDict()
        for name, data in self._parameters.items():
            if data is not None:
                destination[prefix + name] = data

        if include_sublayers:
            for layer_name, layer_item in self._sub_layers.items():
                if layer_item is not None:
                    destination_temp = destination.copy()
                    destination_temp.update(
                        layer_item.state_dict(destination_temp, prefix +
                                              layer_name + ".",
                                              include_sublayers))
                    destination = destination_temp
        return destination

    def load_dict(self, stat_dict, include_sublayers=True):
        for name, item in self.__dict__.get('_parameters', None).items():
            if item.name in stat_dict:
L
lujun 已提交
249 250 251 252
                var = item._ivar.value()
                tensor = var.get_tensor()
                tensor.set(stat_dict[item.name].numpy(),
                           framework._current_expected_place())
253 254 255 256 257 258

        if include_sublayers:
            for layer_name, layer_item in self._sub_layers.items():
                if layer_item is not None:
                    layer_item.load_dict(stat_dict)

X
Xin Pan 已提交
259

X
Xin Pan 已提交
260
class PyLayer(core.PyLayer):
X
Xin Pan 已提交
261 262
    """Layers composed of user-defined python codes."""

X
Xin Pan 已提交
263 264
    def __init__(self):
        super(PyLayer, self).__init__()
X
Xin Pan 已提交
265

M
minqiyang 已提交
266
    def train(self):
M
minqiyang 已提交
267
        framework._dygraph_tracer().train_mode()
M
minqiyang 已提交
268 269

    def eval(self):
M
minqiyang 已提交
270
        framework._dygraph_tracer().eval_mode()
M
minqiyang 已提交
271

X
Xin Pan 已提交
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
    @classmethod
    def _do_forward(cls, inputs):
        return cls._to_tuple(cls.forward(inputs))

    @classmethod
    def _do_backward(cls, inputs):
        return cls._to_tuple(cls.backward(inputs))

    @staticmethod
    def _to_tuple(inputs):
        if not isinstance(inputs, list) and not isinstance(inputs, tuple):
            inputs = [inputs]
        ret = []
        for inp in inputs:
            tensor = core.LoDTensor()
            tensor.set(inp, core.CPUPlace())
            ret.append(tensor)
        return tuple(ret)

X
Xin Pan 已提交
291
    @staticmethod
M
minqiyang 已提交
292
    def forward(*inputs):
X
Xin Pan 已提交
293 294
        raise NotImplementedError

X
Xin Pan 已提交
295
    @staticmethod
M
minqiyang 已提交
296
    def backward(*douts):
X
Xin Pan 已提交
297
        raise NotImplementedError
X
Xin Pan 已提交
298 299

    @classmethod
M
minqiyang 已提交
300
    def __call__(cls, *inputs):
L
lujun 已提交
301
        tracer = framework._dygraph_tracer()
X
Xin Pan 已提交
302
        block = framework.default_main_program().current_block()
M
minqiyang 已提交
303
        ivar_inputs = [x._ivar for x in inputs]
X
Xin Pan 已提交
304

X
polish  
Xin Pan 已提交
305 306
        if not hasattr(cls, 'forward_id'):
            cls.forward_id = core.PyLayer.num_funcs() + 1
X
Xin Pan 已提交
307
            PyLayer.register_func(cls.forward_id, cls._do_forward)
X
polish  
Xin Pan 已提交
308
            cls.backward_id = core.PyLayer.num_funcs() + 1
X
Xin Pan 已提交
309
            PyLayer.register_func(cls.backward_id, cls._do_backward)
X
Xin Pan 已提交
310

311
        iop = core.OpBase(cls.__class__.__name__ + str(cls.forward_id))
X
polish  
Xin Pan 已提交
312 313
        iop.forward_id = cls.forward_id
        iop.backward_id = cls.backward_id
X
Xin Pan 已提交
314
        block.ops.append(iop)
M
minqiyang 已提交
315
        ivars = tracer.py_trace(iop, ivar_inputs, False)
X
Xin Pan 已提交
316 317
        ret = []
        for ivar in ivars:
M
minqiyang 已提交
318
            tensor = ivar.value().get_tensor()
X
Xin Pan 已提交
319 320 321 322 323 324 325 326 327
            py_var = framework.Variable(
                block,
                type=core.VarDesc.VarType.LOD_TENSOR,
                name=None,
                shape=tensor.shape(),
                dtype=tensor._dtype(),
                ivar=ivar)
            ret.append(py_var)
        return ret