layer_helper.py 19.1 KB
Newer Older
1
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
D
dzhwinter 已提交
2
#
D
dzhwinter 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
D
dzhwinter 已提交
6
#
D
dzhwinter 已提交
7
#     http://www.apache.org/licenses/LICENSE-2.0
D
dzhwinter 已提交
8
#
D
dzhwinter 已提交
9 10 11 12 13 14
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
from __future__ import print_function

Y
Yu Yang 已提交
17 18
import copy
import itertools
19
import six
20 21
import sys
import numpy as np
Y
Yu Yang 已提交
22

X
Xin Pan 已提交
23
from .framework import Variable, Parameter, default_main_program, default_startup_program, dtype_is_floating, _in_imperative_mode
24
from . import unique_name
M
minqiyang 已提交
25
from paddle.fluid.imperative import base as imperative_base
26
from paddle.fluid.initializer import Constant, Xavier
27 28
from .param_attr import ParamAttr, WeightNormParamAttr
from . import core
29
from six.moves import zip
Y
Yu Yang 已提交
30

Y
Yu Yang 已提交
31 32 33 34 35 36

class LayerHelper(object):
    def __init__(self, layer_type, **kwargs):
        self.kwargs = kwargs
        self.layer_type = layer_type
        name = self.kwargs.get('name', None)
X
Xin Pan 已提交
37 38 39
        # TODO(panyx0718, minqiyang): imperative mode
        # can not use both `layer_type` and `name`. Deprecate LayerHelper
        # and write a Helper for imperative mode.
Y
Yu Yang 已提交
40
        if name is None:
Y
Yu Yang 已提交
41
            self.kwargs['name'] = unique_name.generate(self.layer_type)
Y
Yu Yang 已提交
42 43 44 45 46 47

    @property
    def name(self):
        return self.kwargs['name']

    @property
48
    def main_program(self):
49
        return default_main_program()
Y
Yu Yang 已提交
50

Q
QI JUN 已提交
51
    @property
52
    def startup_program(self):
53
        return default_startup_program()
Q
QI JUN 已提交
54

55
    def to_variable(self, x):
56
        return imperative_base.to_variable(x, self.main_program.current_block())
57

Y
Yu Yang 已提交
58
    def append_op(self, *args, **kwargs):
59
        return self.main_program.current_block().append_op(*args, **kwargs)
Y
Yu Yang 已提交
60 61 62

    def multiple_input(self, input_param_name='input'):
        inputs = self.kwargs.get(input_param_name, [])
63 64 65 66
        ret = []
        if isinstance(inputs, list) or isinstance(inputs, tuple):
            for inp in inputs:
                ret.append(self.to_variable(inp))
Y
Yu Yang 已提交
67
        else:
68 69
            ret.append(self.to_variable(inputs))
        return ret
Y
Yu Yang 已提交
70 71 72 73 74 75 76 77 78

    def input(self, input_param_name='input'):
        inputs = self.multiple_input(input_param_name)
        if len(inputs) != 1:
            raise "{0} layer only takes one input".format(self.layer_type)
        return inputs[0]

    @property
    def param_attr(self):
Y
yuyang18 已提交
79
        return ParamAttr._to_attr(self.kwargs.get('param_attr', None))
Y
Yu Yang 已提交
80

Q
QI JUN 已提交
81
    @property
Q
QI JUN 已提交
82
    def bias_attr(self):
Y
yuyang18 已提交
83
        return ParamAttr._to_attr(self.kwargs.get('bias_attr', None))
Y
Yu Yang 已提交
84 85 86

    def multiple_param_attr(self, length):
        param_attr = self.param_attr
Y
Yu Yang 已提交
87
        if isinstance(param_attr, ParamAttr):
Y
Yu Yang 已提交
88 89 90 91 92 93
            param_attr = [param_attr]

        if len(param_attr) != 1 and len(param_attr) != length:
            raise ValueError("parameter number mismatch")
        elif len(param_attr) == 1 and length != 1:
            tmp = [None] * length
M
minqiyang 已提交
94
            for i in six.moves.range(length):
Y
Yu Yang 已提交
95 96 97 98 99 100 101
                tmp[i] = copy.deepcopy(param_attr[0])
            param_attr = tmp
        return param_attr

    def iter_inputs_and_params(self, input_param_name='input'):
        inputs = self.multiple_input(input_param_name)
        param_attrs = self.multiple_param_attr(len(inputs))
102
        for ipt, param_attr in zip(inputs, param_attrs):
Y
Yu Yang 已提交
103 104 105 106 107 108 109
            yield ipt, param_attr

    def input_dtype(self, input_param_name='input'):
        inputs = self.multiple_input(input_param_name)
        dtype = None
        for each in inputs:
            if dtype is None:
F
fengjiayi 已提交
110 111
                dtype = each.dtype
            elif dtype != each.dtype:
Q
Qiao Longfei 已提交
112 113
                raise ValueError("Data Type mismatch: %d to %d" %
                                 (dtype, each.dtype))
Y
Yu Yang 已提交
114 115
        return dtype

G
guosheng 已提交
116 117 118 119 120 121 122 123 124 125 126 127 128
    def _create_weight_normalize(self, attr, shape, dtype):
        from .layers import elementwise_mul, elementwise_div, reshape

        # Remove these ops when LayerHelper and layers support indicating
        # program and block.
        def __norm_op(x,
                      out=None,
                      p=2,
                      dim=None,
                      keep_dim=False,
                      block=self.startup_program.global_block()):
            if out is None:
                out = block.create_var(
Y
Yu Yang 已提交
129 130
                    name=unique_name.generate(".".join(
                        [self.name, 'weight_norm_norm'])),
G
guosheng 已提交
131 132 133
                    dtype=dtype,
                    persistable=False)
            abs_out = block.create_var(
Y
Yu Yang 已提交
134 135
                name=unique_name.generate(".".join(
                    [self.name, 'weight_norm_abs'])),
G
guosheng 已提交
136 137 138 139 140
                dtype=dtype,
                persistable=False)
            block.append_op(
                type='abs', inputs={'X': x}, outputs={'Out': abs_out})
            pow_out = block.create_var(
Y
Yu Yang 已提交
141 142
                name=unique_name.generate(".".join(
                    [self.name, 'weight_norm_pow'])),
G
guosheng 已提交
143 144 145 146 147 148 149 150
                dtype=dtype,
                persistable=False)
            block.append_op(
                type='pow',
                inputs={'X': abs_out},
                outputs={'Out': pow_out},
                attrs={'factor': float(p)})
            sum_out = block.create_var(
Y
Yu Yang 已提交
151 152
                name=unique_name.generate(".".join(
                    [self.name, 'weight_norm_sum'])),
G
guosheng 已提交
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
                dtype=dtype,
                persistable=False)
            block.append_op(
                type='reduce_sum',
                inputs={'X': pow_out},
                outputs={'Out': sum_out},
                attrs={
                    'dim': dim,
                    'keep_dim': keep_dim,
                    'reduce_all': True if dim is None else False
                })
            block.append_op(
                type='pow',
                inputs={'X': sum_out},
                outputs={'Out': out},
                attrs={'factor': 1. / p})
            return out

        def __reshape_op(x,
                         shape,
                         out=None,
                         block=self.startup_program.global_block()):
            if out is None:
                out = block.create_var(
Y
Yu Yang 已提交
177
                    name=unique_name.generate(".".join(
G
guosheng 已提交
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
                        [self.name, 'weight_norm_reshape'])),
                    dtype=dtype,
                    persistable=False)
            block.append_op(
                type='reshape',
                inputs={'X': x},
                outputs={'Out': out},
                attrs={'shape': shape})
            return out

        def __transpose_op(x,
                           axis,
                           out=None,
                           block=self.startup_program.global_block()):
            if out is None:
                out = block.create_var(
Y
Yu Yang 已提交
194
                    name=unique_name.generate(".".join(
G
guosheng 已提交
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
                        [self.name, 'weight_norm_transpose'])),
                    dtype=dtype,
                    persistable=False)
            block.append_op(
                type='transpose',
                inputs={'X': x},
                outputs={'Out': out},
                attrs={'axis': axis})
            return out

        def __norm_except_dim(x,
                              out=None,
                              dim=None,
                              block=self.startup_program.global_block()):
            """Computes the norm over all dimensions except dim"""
            if out is None:
                out = block.create_var(
Y
Yu Yang 已提交
212 213
                    name=unique_name.generate(".".join(
                        [self.name, 'weight_norm_norm'])),
G
guosheng 已提交
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
                    dtype=dtype,
                    persistable=False)
            if dim is None:
                __norm_op(x, out, dim=dim, block=block)
            elif dim == 0:
                out_shape = [x.shape[0]] + [1] * (len(x.shape) - 1)
                reshape = __reshape_op(x, shape=[x.shape[0], -1], block=block)
                norm = __norm_op(reshape, dim=1, block=block)
                __reshape_op(norm, out=out, shape=out_shape, block=block)
            elif dim == len(x.shape) - 1:
                out_shape = [1] * (len(x.shape) - 1) + [x.shape[-1]]
                reshape = __reshape_op(x, shape=[-1, x.shape[-1]], block=block)
                norm = __norm_op(reshape, dim=0, block=block)
                __reshape_op(norm, out=out, shape=out_shape, block=block)
            else:
229
                perm = list(range(len(x.shape)))
G
guosheng 已提交
230 231 232 233 234 235 236 237 238 239 240 241 242
                perm[0], perm[dim] = dim, 0
                transpose = __transpose_op(x, perm, block=block)
                norm = __norm_op(transpose, dim=0, block=block)
                __transpose_op(norm, perm, out=out, block=block)
            return out

        def __weight_normalize(g, v, dim):
            """Calculations for weight normalization"""
            norm = __norm_except_dim(
                v, dim=dim, block=self.main_program.current_block())
            scale = elementwise_div(
                x=g, y=norm)  # The shapes of g and norm are the same.
            # Currently, elementwise_mul only support broadcast when the shape
243 244
            # of y is a subset of the shape of x. Thus, we reshape y to squeeze
            # to achive the subset.
G
guosheng 已提交
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
            w = elementwise_mul(
                x=v,
                y=scale if dim is None else reshape(
                    x=scale, shape=[v.shape[dim]]),
                axis=-1 if dim is None else dim)
            # To serialize the original parameter for inference, maybe a
            # parameter rather than a variable should be returned.
            return w

        g_param_attr = copy.deepcopy(attr)
        g_param_attr.name = attr.name + '_g'
        g_param_shape = [1] * len(shape)
        if attr.dim is not None:
            g_param_shape[attr.dim] = shape[attr.dim]
        v_param_attr = copy.deepcopy(attr)
        v_param_attr.name = attr.name + '_v'
        v_param_shape = shape

        # Add to startup_program to initialize g and v.
        # Try to reconstruct the initializer of w by initializing g and v.
        # Set the initializers of g and v as below, then the distribution
        # of w is the same as initializing w with the given initializer.
        # For Data-Dependent Initialization, please compute the init-values
        # of g and v in external and then feed the values to g and v by
        # executing an extra program.
        g_param = self.startup_program.global_block().create_parameter(
            dtype=dtype,
            shape=g_param_shape,
Y
yuyang18 已提交
273
            **g_param_attr._to_kwargs(with_initializer=False))
G
guosheng 已提交
274 275 276
        v_param = self.startup_program.global_block().create_parameter(
            dtype=dtype,
            shape=v_param_shape,
Y
yuyang18 已提交
277
            **v_param_attr._to_kwargs(with_initializer=True))
G
guosheng 已提交
278 279 280 281 282 283 284 285
        __norm_except_dim(
            x=v_param,
            out=g_param,
            dim=attr.dim,
            block=self.startup_program.global_block())

        # Add weight normalization to main_program
        g_param = self.main_program.global_block().create_parameter(
Y
yuyang18 已提交
286
            dtype=dtype, shape=g_param_shape, **g_param_attr._to_kwargs())
G
guosheng 已提交
287
        v_param = self.main_program.global_block().create_parameter(
Y
yuyang18 已提交
288
            dtype=dtype, shape=v_param_shape, **v_param_attr._to_kwargs())
G
guosheng 已提交
289 290 291
        w_param = __weight_normalize(g_param, v_param, dim=attr.dim)
        return w_param

Y
Yu Yang 已提交
292 293 294 295 296 297
    def create_parameter(self,
                         attr,
                         shape,
                         dtype,
                         is_bias=False,
                         default_initializer=None):
298
        # Deepcopy the attr so that parameters can be shared in program
299
        attr = copy.deepcopy(attr)
Y
Yu Yang 已提交
300 301
        assert isinstance(attr, ParamAttr)
        suffix = 'b' if is_bias else 'w'
G
guosheng 已提交
302
        if attr.name is None:
Y
Yu Yang 已提交
303
            attr.name = unique_name.generate(".".join([self.name, suffix]))
Y
Yu Yang 已提交
304

G
guosheng 已提交
305
        if default_initializer is None and attr.initializer is None:
306 307
            if isinstance(dtype, core.VarDesc.VarType):
                if dtype != core.VarDesc.VarType.FP32 and \
308 309
                    dtype != core.VarDesc.VarType.FP64 and \
                    dtype != core.VarDesc.VarType.FP16:
310 311 312 313 314 315 316 317
                    raise TypeError(
                        "Can not create parameter with default initializer when dtype is not float type. Set default_initializer to fit the parameter dtype!"
                    )
            else:
                if not (dtype.startswith("float") or dtype == "double"):
                    raise TypeError(
                        "Can not create parameter with default initializer when dtype is not float type. Set default_initializer to fit the parameter dtype!"
                    )
Y
Yu Yang 已提交
318
            if is_bias:
Y
yuyang18 已提交
319
                attr._set_default_bias_initializer()
Y
Yu Yang 已提交
320
            else:
Y
yuyang18 已提交
321
                attr._set_default_param_initializer()
322
        else:
Y
yuyang18 已提交
323
            attr._set_default_initializer(default_initializer)
G
guosheng 已提交
324 325 326 327 328 329 330

        # If weight normalization is set, insert extra parameters and ops.
        # Refer to https://arxiv.org/pdf/1602.07868.pdf
        if isinstance(attr, WeightNormParamAttr):
            param = self._create_weight_normalize(attr, shape, dtype)
            WeightNormParamAttr.params_with_weight_norm.append(param)
            return param
X
Xin Pan 已提交
331
        if _in_imperative_mode():
X
Xin Pan 已提交
332 333
            # In imperative mode, we want the returned parameter to be
            # initialized so that it can be used imperatively.
M
minqiyang 已提交
334
            return self.main_program.global_block().create_parameter(
X
Xin Pan 已提交
335 336 337 338 339 340 341 342 343 344
                dtype=dtype,
                shape=shape,
                **attr._to_kwargs(with_initializer=True))
        else:
            self.startup_program.global_block().create_parameter(
                dtype=dtype,
                shape=shape,
                **attr._to_kwargs(with_initializer=True))
            return self.main_program.global_block().create_parameter(
                dtype=dtype, shape=shape, **attr._to_kwargs())
Y
Yu Yang 已提交
345

Q
Qiao Longfei 已提交
346 347 348 349 350 351
    def get_parameter(self, name):
        param = self.main_program.global_block().var(name)
        if not isinstance(param, Parameter):
            raise ValueError("no Parameter name %s found" % name)
        return param

X
Xin Pan 已提交
352 353 354 355 356 357 358 359 360
    def create_variable_for_type_inference(self, dtype, stop_gradient=False):
        """Create a temporary variable that should be type inferred layer.

        Note:
            The default type will be set to LOD_TENSOR. However, when
            the var is used as operator output, its type will be updated
            based on operator's `VarTypeInference` implementation in
            infer_var_type.
        """
361
        return self.main_program.current_block().create_var(
Y
Yu Yang 已提交
362
            name=unique_name.generate(".".join([self.name, 'tmp'])),
Q
QI JUN 已提交
363
            dtype=dtype,
X
Xin Pan 已提交
364
            type=core.VarDesc.VarType.LOD_TENSOR,
Q
QI JUN 已提交
365 366
            persistable=False,
            stop_gradient=stop_gradient)
Y
Yu Yang 已提交
367

Y
Yu Yang 已提交
368
    def create_variable(self, *args, **kwargs):
369
        return self.main_program.current_block().create_var(*args, **kwargs)
Y
Yu Yang 已提交
370

Q
Qiao Longfei 已提交
371
    def create_global_variable(self, persistable=False, *args, **kwargs):
Y
Yu Yang 已提交
372 373 374 375 376 377 378 379 380
        """
        create global variable, note that there is no initializer for this global variable.
        Args:
            persistable(bool): True if it is a checkpoint value.
            *args: See create_var's documentation
            **kwargs: See create_var's documentation

        Returns(Variable): the created variable.
        """
381
        return self.main_program.global_block().create_var(
Q
Qiao Longfei 已提交
382 383
            *args, persistable=persistable, **kwargs)

Y
Yu Yang 已提交
384 385 386 387 388 389 390 391 392 393
    def create_or_get_global_variable(self, name, *args, **kwargs):
        """
        Creates a global variable if not exists and returns the variable and
        a boolean flag which is true when it is a new variable.
        """
        if self.main_program.global_block().has_var(name):
            return self.main_program.global_block().var(name), False
        else:
            return self.create_global_variable(name=name, *args, **kwargs), True

Q
Qiao Longfei 已提交
394 395
    def set_variable_initializer(self, var, initializer):
        assert isinstance(var, Variable)
M
minqiyang 已提交
396
        if imperative_base.enabled():
397
            initializer(var, var.block)
M
minqiyang 已提交
398 399 400 401 402 403 404 405
        else:
            self.startup_program.global_block().create_var(
                name=var.name,
                type=var.type,
                dtype=var.dtype,
                shape=var.shape,
                persistable=True,
                initializer=initializer)
Y
Yu Yang 已提交
406

Y
Yu Yang 已提交
407
    def append_bias_op(self, input_var, dim_start=1, dim_end=None):
408
        """
X
xuwei06 已提交
409
        Append bias operator and return its output. If the user does not set
410
        bias_attr, append_bias_op will return input_var
X
xuwei06 已提交
411

412 413 414 415
        :param input_var: the input variable. The len(input_var.shape) is
        larger or equal than 2.
        :bias_initializer: an instance of a subclass of Initializer used to
        initialize the bias
X
xuwei06 已提交
416 417
        :param dim_start:
        :param dim_end: the shape of the bias will be
X
xuwei06 已提交
418
        input_var.shape[dim_start:dim_end]. The bias is broadcasted to other
X
xuwei06 已提交
419
        dimensions and added to input_var to get the output
420
        """
X
xuwei06 已提交
421
        size = list(input_var.shape[dim_start:dim_end])
Q
QI JUN 已提交
422
        bias_attr = self.bias_attr
Y
Yu Yang 已提交
423 424
        if not bias_attr:
            return input_var
425

Y
Yu Yang 已提交
426
        b = self.create_parameter(
Y
Yu Yang 已提交
427
            attr=bias_attr, shape=size, dtype=input_var.dtype, is_bias=True)
X
Xin Pan 已提交
428
        tmp = self.create_variable_for_type_inference(dtype=input_var.dtype)
Y
Yu Yang 已提交
429 430 431 432
        self.append_op(
            type='elementwise_add',
            inputs={'X': [input_var],
                    'Y': [b]},
X
xuwei06 已提交
433 434
            outputs={'Out': [tmp]},
            attrs={'axis': dim_start})
Y
Yu Yang 已提交
435 436
        return tmp

M
minqiyang 已提交
437
    def append_activation(self, input_var):
Y
Yu Yang 已提交
438 439 440
        act = self.kwargs.get('act', None)
        if act is None:
            return input_var
441
        if isinstance(act, six.string_types):
Y
Yu Yang 已提交
442
            act = {'type': act}
M
minqiyang 已提交
443 444
        else:
            raise TypeError(str(act) + " should be unicode or str")
445

K
Kexin Zhao 已提交
446 447
        if 'use_cudnn' in self.kwargs and self.kwargs.get('use_cudnn'):
            act['use_cudnn'] = self.kwargs.get('use_cudnn')
448 449
        if 'use_mkldnn' in self.kwargs:
            act['use_mkldnn'] = self.kwargs.get('use_mkldnn')
Y
Yu Yang 已提交
450
        act_type = act.pop('type')
D
dzhwinter 已提交
451 452
        tmp = input_var
        # NOTE(dzhwinter): some activation support inplace compution.
M
minqiyang 已提交
453
        # NOTE(minqiyang): currently, we don't support inplace in imperative mode
M
minqiyang 已提交
454
        if not imperative_base.enabled() and core.IsInplace(act_type):
M
minqiyang 已提交
455 456 457
            tmp = input_var
        else:
            tmp = self.create_variable_for_type_inference(dtype=input_var.dtype)
Y
Yu Yang 已提交
458 459 460
        self.append_op(
            type=act_type,
            inputs={"X": [input_var]},
461
            outputs={"Out": [tmp]},
Y
Yu Yang 已提交
462
            attrs=act)
463
        return tmp
464 465 466

    def _get_default_initializer(self, dtype):
        if dtype is None or dtype_is_floating(dtype) is True:
467
            return Xavier()
468 469
        else:
            # For integer and boolean types, initialize with all zeros
470
            return Constant()
Y
Yang Yu 已提交
471 472 473 474 475 476

    def is_instance(self, param_name, cls):
        param = self.kwargs.get(param_name, None)
        if not isinstance(param, cls):
            raise TypeError("The input {0} parameter of method {1} must be {2}",
                            param_name, self.layer_type, cls.__name__)