layer_helper.py 7.9 KB
Newer Older
D
dzhwinter 已提交
1
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
D
dzhwinter 已提交
2
#
D
dzhwinter 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
D
dzhwinter 已提交
6
#
D
dzhwinter 已提交
7
#     http://www.apache.org/licenses/LICENSE-2.0
D
dzhwinter 已提交
8
#
D
dzhwinter 已提交
9 10 11 12 13 14
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

Y
Yu Yang 已提交
15 16 17
import copy
import itertools

Q
Qiao Longfei 已提交
18
from framework import Variable, Parameter, default_main_program, default_startup_program, \
Y
Yu Yang 已提交
19
    unique_name, dtype_is_floating
20
from paddle.v2.fluid.initializer import Constant, Xavier
Y
Yu Yang 已提交
21
from param_attr import ParamAttr
Y
Yu Yang 已提交
22

Y
Yu Yang 已提交
23 24 25 26 27 28 29 30 31 32 33 34 35 36

class LayerHelper(object):
    def __init__(self, layer_type, **kwargs):
        self.kwargs = kwargs
        self.layer_type = layer_type
        name = self.kwargs.get('name', None)
        if name is None:
            self.kwargs['name'] = unique_name(self.layer_type)

    @property
    def name(self):
        return self.kwargs['name']

    @property
37
    def main_program(self):
38
        return default_main_program()
Y
Yu Yang 已提交
39

Q
QI JUN 已提交
40
    @property
41
    def startup_program(self):
42
        return default_startup_program()
Q
QI JUN 已提交
43

Y
Yu Yang 已提交
44
    def append_op(self, *args, **kwargs):
45
        return self.main_program.current_block().append_op(*args, **kwargs)
Y
Yu Yang 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69

    def multiple_input(self, input_param_name='input'):
        inputs = self.kwargs.get(input_param_name, [])
        type_error = TypeError(
            "Input of {0} layer should be Variable or sequence of Variable".
            format(self.layer_type))
        if isinstance(inputs, Variable):
            inputs = [inputs]
        elif not isinstance(inputs, list) and not isinstance(inputs, tuple):
            raise type_error
        else:
            for each in inputs:
                if not isinstance(each, Variable):
                    raise type_error
        return inputs

    def input(self, input_param_name='input'):
        inputs = self.multiple_input(input_param_name)
        if len(inputs) != 1:
            raise "{0} layer only takes one input".format(self.layer_type)
        return inputs[0]

    @property
    def param_attr(self):
Y
Yu Yang 已提交
70
        return ParamAttr.to_attr(self.kwargs.get('param_attr', None))
Y
Yu Yang 已提交
71

Q
QI JUN 已提交
72
    @property
Q
QI JUN 已提交
73
    def bias_attr(self):
Y
Yu Yang 已提交
74
        return ParamAttr.to_attr(self.kwargs.get('bias_attr', None))
Y
Yu Yang 已提交
75 76 77

    def multiple_param_attr(self, length):
        param_attr = self.param_attr
Y
Yu Yang 已提交
78
        if isinstance(param_attr, ParamAttr):
Y
Yu Yang 已提交
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
            param_attr = [param_attr]

        if len(param_attr) != 1 and len(param_attr) != length:
            raise ValueError("parameter number mismatch")
        elif len(param_attr) == 1 and length != 1:
            tmp = [None] * length
            for i in xrange(length):
                tmp[i] = copy.deepcopy(param_attr[0])
            param_attr = tmp
        return param_attr

    def iter_inputs_and_params(self, input_param_name='input'):
        inputs = self.multiple_input(input_param_name)
        param_attrs = self.multiple_param_attr(len(inputs))
        for ipt, param_attr in itertools.izip(inputs, param_attrs):
            yield ipt, param_attr

    def input_dtype(self, input_param_name='input'):
        inputs = self.multiple_input(input_param_name)
        dtype = None
        for each in inputs:
            if dtype is None:
F
fengjiayi 已提交
101 102
                dtype = each.dtype
            elif dtype != each.dtype:
Q
Qiao Longfei 已提交
103 104
                raise ValueError("Data Type mismatch: %d to %d" %
                                 (dtype, each.dtype))
Y
Yu Yang 已提交
105 106
        return dtype

Y
Yu Yang 已提交
107 108 109 110 111 112
    def create_parameter(self,
                         attr,
                         shape,
                         dtype,
                         is_bias=False,
                         default_initializer=None):
113
        # Deepcopy the attr so that parameters can be shared in program
Y
Yu Yang 已提交
114 115 116 117 118 119 120 121
        assert isinstance(attr, ParamAttr)
        suffix = 'b' if is_bias else 'w'

        if default_initializer is None:
            if is_bias:
                attr.set_default_bias_initializer()
            else:
                attr.set_default_param_initializer()
122
        else:
Y
Yu Yang 已提交
123 124 125 126
            attr.set_default_initializer(default_initializer)
        if attr.name is None:
            attr.name = unique_name(".".join([self.name, suffix]))

127
        self.startup_program.global_block().create_parameter(
Y
Yu Yang 已提交
128
            dtype=dtype, shape=shape, **attr.to_kwargs(with_initializer=True))
129
        return self.main_program.global_block().create_parameter(
Y
Yu Yang 已提交
130
            dtype=dtype, shape=shape, **attr.to_kwargs())
Y
Yu Yang 已提交
131

Q
Qiao Longfei 已提交
132 133 134 135 136 137
    def get_parameter(self, name):
        param = self.main_program.global_block().var(name)
        if not isinstance(param, Parameter):
            raise ValueError("no Parameter name %s found" % name)
        return param

Q
QI JUN 已提交
138
    def create_tmp_variable(self, dtype, stop_gradient=False):
139
        return self.main_program.current_block().create_var(
Q
QI JUN 已提交
140 141
            name=unique_name(".".join([self.name, 'tmp'])),
            dtype=dtype,
Q
QI JUN 已提交
142 143
            persistable=False,
            stop_gradient=stop_gradient)
Y
Yu Yang 已提交
144

Y
Yu Yang 已提交
145
    def create_variable(self, *args, **kwargs):
146
        return self.main_program.current_block().create_var(*args, **kwargs)
Y
Yu Yang 已提交
147

Q
Qiao Longfei 已提交
148
    def create_global_variable(self, persistable=False, *args, **kwargs):
149
        return self.main_program.global_block().create_var(
Q
Qiao Longfei 已提交
150 151 152 153
            *args, persistable=persistable, **kwargs)

    def set_variable_initializer(self, var, initializer):
        assert isinstance(var, Variable)
154
        self.startup_program.global_block().create_var(
Q
Qiao Longfei 已提交
155 156
            name=var.name,
            type=var.type,
F
fengjiayi 已提交
157
            dtype=var.dtype,
Q
Qiao Longfei 已提交
158 159 160
            shape=var.shape,
            persistable=True,
            initializer=initializer)
Y
Yu Yang 已提交
161

Y
Yu Yang 已提交
162
    def append_bias_op(self, input_var, dim_start=1, dim_end=None):
163
        """
X
xuwei06 已提交
164
        Append bias operator and return its output. If the user does not set
165
        bias_attr, append_bias_op will return input_var
X
xuwei06 已提交
166

167 168 169 170
        :param input_var: the input variable. The len(input_var.shape) is
        larger or equal than 2.
        :bias_initializer: an instance of a subclass of Initializer used to
        initialize the bias
X
xuwei06 已提交
171 172
        :param dim_start:
        :param dim_end: the shape of the bias will be
X
xuwei06 已提交
173
        input_var.shape[dim_start:dim_end]. The bias is broadcasted to other
X
xuwei06 已提交
174
        dimensions and added to input_var to get the output
175
        """
X
xuwei06 已提交
176
        size = list(input_var.shape[dim_start:dim_end])
Q
QI JUN 已提交
177
        bias_attr = self.bias_attr
Y
Yu Yang 已提交
178 179
        if not bias_attr:
            return input_var
180

Y
Yu Yang 已提交
181
        b = self.create_parameter(
Y
Yu Yang 已提交
182
            attr=bias_attr, shape=size, dtype=input_var.dtype, is_bias=True)
F
fengjiayi 已提交
183
        tmp = self.create_tmp_variable(dtype=input_var.dtype)
Y
Yu Yang 已提交
184 185 186 187
        self.append_op(
            type='elementwise_add',
            inputs={'X': [input_var],
                    'Y': [b]},
X
xuwei06 已提交
188 189
            outputs={'Out': [tmp]},
            attrs={'axis': dim_start})
Y
Yu Yang 已提交
190 191 192 193 194 195 196 197
        return tmp

    def append_activation(self, input_var):
        act = self.kwargs.get('act', None)
        if act is None:
            return input_var
        if isinstance(act, basestring):
            act = {'type': act}
F
fengjiayi 已提交
198
        tmp = self.create_tmp_variable(dtype=input_var.dtype)
Y
Yu Yang 已提交
199 200 201 202
        act_type = act.pop('type')
        self.append_op(
            type=act_type,
            inputs={"X": [input_var]},
F
fengjiayi 已提交
203
            outputs={"Out": [tmp]},
Y
Yu Yang 已提交
204 205
            attrs=act)
        return tmp
206 207 208

    def _get_default_initializer(self, dtype):
        if dtype is None or dtype_is_floating(dtype) is True:
209
            return Xavier()
210 211
        else:
            # For integer and boolean types, initialize with all zeros
212
            return Constant()
Y
Yang Yu 已提交
213 214 215 216 217 218

    def is_instance(self, param_name, cls):
        param = self.kwargs.get(param_name, None)
        if not isinstance(param, cls):
            raise TypeError("The input {0} parameter of method {1} must be {2}",
                            param_name, self.layer_type, cls.__name__)