layer.py 11.2 KB
Newer Older
Q
qiaolongfei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
14
"""
Y
Yu Yang 已提交
15
`paddle.v2.layer` is a part of model config packages in paddle.v2. In API v2,
16
we want to make Paddle a plain Python package. The model config package defines
Y
Yu Yang 已提交
17
the way how to configure a neural network topology in Paddle Python code.
18

Y
Yu Yang 已提交
19
The primary usage shows below.
20

Y
Yu Yang 已提交
21
..  code-block:: python
22

23
    import paddle
24

Y
Yu Yang 已提交
25 26 27 28
    img = paddle.layer.data(name='img', type=paddle.data_type.dense_vector(784))
    hidden = paddle.layer.fc(input=img, size=200)
    prediction = paddle.layer.fc(input=hidden, size=10,
                                 act=paddle.activation.Softmax())
29

Y
Yu Yang 已提交
30
    # use prediction instance where needed.
Y
Yu Yang 已提交
31
    parameters = paddle.parameters.create(cost)
32
"""
Q
qiaolongfei 已提交
33
import collections
X
xuwei06 已提交
34
import copy
X
xuwei06 已提交
35
import re
X
xuwei06 已提交
36 37 38
import paddle.trainer_config_helpers.layers as v1_layers
import paddle.trainer.config_parser as cp
from paddle.proto.ModelConfig_pb2 import ModelConfig, SubModelConfig
X
xuwei06 已提交
39 40
from config_base import __convert_to_v2__
import config_base
Q
qiaolongfei 已提交
41

X
xuwei06 已提交
42
__all__ = ['data', 'parse_network']
Q
qiaolongfei 已提交
43

X
xuwei06 已提交
44

X
xuwei06 已提交
45
def __need_to_keep__(name):
46 47
    return name in [
        'StaticInput', 'SubsequenceInput', 'GeneratedInput', 'LayerType',
X
xuwei06 已提交
48
        'layer_support', 'BaseGeneratedInput'
49
    ]
Q
qiaolongfei 已提交
50 51


X
xuwei06 已提交
52
def __need_to_wrap__(name):
X
xuwei06 已提交
53
    return name not in ['AggregateLevel', 'ExpandLevel', 'BaseGeneratedInput']
Q
qiaolongfei 已提交
54 55


X
xuwei06 已提交
56
def __convert_name__(inname):
57 58
    if __need_to_keep__(inname):
        return inname
X
xuwei06 已提交
59
    if inname == 'maxid_layer':
Y
Yu Yang 已提交
60 61 62 63 64 65 66 67 68 69 70 71 72
        return 'max_id'
    elif inname.endswith('memory') or inname.endswith(
            '_seq') or inname.endswith('_sim') or inname == 'hsigmoid':
        return inname
    elif inname in [
            'cross_entropy', 'multi_binary_label_cross_entropy',
            'cross_entropy_with_selfnorm'
    ]:
        return inname + "_cost"
    elif inname.endswith('_cost'):
        return inname
    elif inname.endswith("_layer"):
        return inname[:-len("_layer")]
X
xuwei06 已提交
73 74
    else:
        return inname
Y
Yu Yang 已提交
75 76


X
xuwei06 已提交
77 78 79 80
for name in v1_layers.__all__:
    obj = getattr(v1_layers, name)
    new_name = __convert_name__(name)
    if callable(obj) and __need_to_wrap__(name):
X
xuwei06 已提交
81
        globals()[new_name] = __convert_to_v2__(obj, new_name, __name__)
Q
qiaolongfei 已提交
82
    else:
X
xuwei06 已提交
83 84 85 86 87 88 89 90
        globals()[new_name] = obj
    __all__.append(new_name)


def __data_layer__(name, type, **kwargs):
    l = v1_layers.data_layer(name, type.dim, **kwargs)
    l.data_type = type
    return l
Q
qiaolongfei 已提交
91

X
xuwei06 已提交
92

X
xuwei06 已提交
93 94 95 96 97 98 99 100 101 102 103 104 105
def __map_data_docstr__(doc):
    doc = re.sub(r'(data = [^\)]+)\).*',
                 "data = paddle.layer.data(name=\"input\", "
                 "type=paddle.data_type.dense_vector(1000))", doc)

    doc = re.sub(r':param size:.*', ':param type: Data type of this data layer',
                 doc)
    doc = re.sub(r':type size:.*', ":type size: paddle.v2.data_type.InputType",
                 doc)
    return doc


__data_layer__.__doc__ = __map_data_docstr__(v1_layers.data_layer.__doc__)
Q
qiaolongfei 已提交
106

X
xuwei06 已提交
107
data = __convert_to_v2__(__data_layer__, 'name', __name__)
Q
qiaolongfei 已提交
108 109


110
def __get_used_layers__(output_layers):
X
xuwei06 已提交
111 112
    layer_names = set()
    parents = {}
X
xuwei06 已提交
113

X
xuwei06 已提交
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
    def add_parent(child, parent):
        if child in parents:
            parents[child].append(parent)
        else:
            parents[child] = [parent]

    def add_additional_parents():
        for sub_model in cp.g_config.model_config.sub_models:
            if sub_model.name == 'root':
                continue
            for link in sub_model.in_links:
                add_parent(link.link_name, link.layer_name)
                add_parent(sub_model.name, link.layer_name)
            for link in sub_model.out_links:
                add_parent(link.link_name, link.layer_name)
                add_parent(link.link_name, sub_model.name)
            for mem in sub_model.memories:
                if mem.boot_layer_name:
                    add_parent(mem.layer_name, mem.boot_layer_name)
                add_parent(mem.link_name, mem.layer_name)

C
caoying03 已提交
135 136 137 138 139 140 141
            if sub_model.HasField('generator'):
                # according to the implementation of text generation
                # in recurrent layer group, the generated word must be
                # the first out link
                add_parent(sub_model.out_links[0].layer_name,
                           sub_model.generator.eos_layer_name)

X
xuwei06 已提交
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
    def dfs_travel(layer_name):
        if layer_name in layer_names:
            return
        layer_names.add(layer_name)
        layer = cp.g_layer_map[layer_name]

        for inp in layer.inputs:
            dfs_travel(inp.input_layer_name)
        if layer.name in parents:
            for p in parents[layer.name]:
                dfs_travel(p)

    add_additional_parents()

    for layer in output_layers:
        dfs_travel(layer.full_name)

X
xuwei06 已提交
159 160 161 162 163 164 165 166 167 168 169 170 171 172
    # print layer needs to be specially handled because no other
    # layer depends on it. It is used to print the result of some
    # layers when running the model for debug purpose. So we explicitly
    # add a print layer to the topolty if its input is in the toplogy.
    for layer in cp.g_config.model_config.layers:
        if layer.type == 'print':
            used = True
            for inp in layer.inputs:
                if inp.input_layer_name not in layer_names:
                    used = False
                    break
            if used:
                layer_names.add(layer.name)

X
xuwei06 已提交
173 174 175
    return layer_names


176
def __get_used_parameters__(layer_names, sub_models):
X
xuwei06 已提交
177 178 179 180 181 182 183 184
    parameter_names = set()
    for name in layer_names:
        l = cp.g_layer_map[name]
        for inp in l.inputs:
            if inp.input_parameter_name:
                parameter_names.add(inp.input_parameter_name)
        if l.bias_parameter_name:
            parameter_names.add(l.bias_parameter_name)
185 186 187 188 189 190

    for sub_model in sub_models:
        for mem in sub_model.memories:
            if mem.HasField("boot_bias_parameter_name"):
                parameter_names.add(mem.boot_bias_parameter_name)

X
xuwei06 已提交
191 192 193 194 195 196 197 198 199 200 201
    return parameter_names


def __get_used_submodels__(layer_names):
    submodel_names = set()
    for submodel in cp.g_config.model_config.sub_models:
        if submodel.name in layer_names:
            submodel_names.add(submodel.name)
    return submodel_names


X
xuwei06 已提交
202 203 204 205 206 207 208 209 210
def __get_submodel_data_out_links__():
    data_links = set()
    for submodel in cp.g_config.model_config.sub_models:
        for link in submodel.out_links:
            if cp.g_layer_map[link.link_name].type == 'data':
                data_links.add(link.link_name)
    return data_links


X
xuwei06 已提交
211 212 213 214 215 216 217 218 219 220 221 222 223
def __get_used_evaluators__(layer_names):
    evaluator_names = set()
    for e in cp.g_config.model_config.evaluators:
        used = True
        for name in e.input_layers:
            if name not in layer_names:
                used = False
                break
        if used:
            evaluator_names.add(e.name)
    return evaluator_names


X
xuwei06 已提交
224 225
def __trim_submodel__(old_submodel, layer_names, input_layer_names,
                      output_layer_names, evaluator_names):
X
xuwei06 已提交
226 227 228

    submodel = SubModelConfig()
    submodel.name = old_submodel.name
X
xuwei06 已提交
229 230 231 232 233 234 235 236
    submodel.layer_names.extend(
        filter(lambda x: x in layer_names, old_submodel.layer_names))
    submodel.input_layer_names.extend(
        filter(lambda x: x in input_layer_names, submodel.layer_names))
    submodel.output_layer_names.extend(
        filter(lambda x: x in output_layer_names, submodel.layer_names))
    submodel.evaluator_names.extend(
        filter(lambda x: x in evaluator_names, old_submodel.evaluator_names))
X
xuwei06 已提交
237 238 239 240

    submodel.is_recurrent_layer_group = old_submodel.is_recurrent_layer_group
    submodel.reversed = old_submodel.reversed

X
xuwei06 已提交
241 242
    submodel.memories.extend(
        filter(lambda x: x.link_name in layer_names, old_submodel.memories))
X
xuwei06 已提交
243 244 245 246 247 248 249 250 251 252
    target_inlinkid = (old_submodel.target_inlinkid
                       if old_submodel.HasField('target_inlinkid') else -1)
    in_links = []
    for i, link in enumerate(old_submodel.in_links):
        if link.link_name in layer_names or i == target_inlinkid:
            in_links.append(link)
            if i == target_inlinkid:
                target_inlinkid = len(in_links) - 1
    submodel.in_links.extend(in_links)

X
xuwei06 已提交
253 254
    submodel.out_links.extend(
        filter(lambda x: x.link_name in layer_names, old_submodel.out_links))
X
xuwei06 已提交
255 256 257 258 259 260
    if old_submodel.HasField('generator'):
        submodel.generator.CopyFrom(old_submodel.generator)

    if old_submodel.HasField('target_inlinkid'):
        submodel.target_inlinkid = target_inlinkid
    return submodel
Q
qiaolongfei 已提交
261 262


X
xuwei06 已提交
263 264 265
def parse_network(output_layers, extra_layers=None):
    if not isinstance(output_layers, collections.Sequence):
        output_layers = [output_layers]
266 267 268
    if extra_layers is not None:
        if not isinstance(extra_layers, collections.Sequence):
            extra_layers = [extra_layers]
X
xuwei06 已提交
269 270
    else:
        extra_layers = []
Q
qiaolongfei 已提交
271

272
    layer_names = __get_used_layers__(list(output_layers) + list(extra_layers))
X
xuwei06 已提交
273 274 275
    submodel_names = __get_used_submodels__(layer_names)
    submodel_names.add('root')
    evaluator_names = __get_used_evaluators__(layer_names)
X
xuwei06 已提交
276
    data_out_links = __get_submodel_data_out_links__()
X
xuwei06 已提交
277 278
    input_layer_names = set()
    output_layer_names = set()
Q
qiaolongfei 已提交
279

X
xuwei06 已提交
280 281
    model_config = ModelConfig()
    model_config.type = cp.g_config.model_config.type
282 283 284 285 286

    for layer in output_layers:
        model_config.output_layer_names.append(layer.full_name)
        output_layer_names.add(layer.full_name)

X
xuwei06 已提交
287 288 289 290 291
    for l in cp.g_config.model_config.layers:
        if l.name not in layer_names:
            continue
        model_config.layers.extend([l])
        if l.type == 'data':
X
xuwei06 已提交
292
            if l.name in data_out_links:
293 294 295 296 297 298 299 300
                """
                In text generation, the outlink to save the generated word
                indices is a data_layer defined in recurrent_group. This
                data_layer is sure to be the output of the network in text
                generation task, so this statement excludes such a special
                data_layer from being inputs of the network, otherwise an error
                will occur during data feeding.
                """
301
                continue
X
xuwei06 已提交
302 303
            model_config.input_layer_names.append(l.name)
            input_layer_names.add(l.name)
Q
qiaolongfei 已提交
304

X
xuwei06 已提交
305 306 307
    for e in cp.g_config.model_config.evaluators:
        if e.name in evaluator_names:
            model_config.evaluators.extend([e])
Q
qiaolongfei 已提交
308

X
xuwei06 已提交
309 310
    for s in cp.g_config.model_config.sub_models:
        if s.name in submodel_names:
X
xuwei06 已提交
311 312
            s = __trim_submodel__(s, layer_names, input_layer_names,
                                  output_layer_names, evaluator_names)
X
xuwei06 已提交
313
            model_config.sub_models.extend([s])
L
Luo Tao 已提交
314

315 316 317 318 319 320 321
    parameter_names = __get_used_parameters__(layer_names,
                                              model_config.sub_models)

    for p in cp.g_config.model_config.parameters:
        if p.name in parameter_names:
            model_config.parameters.extend([p])

X
xuwei06 已提交
322
    return model_config
Y
Yu Yang 已提交
323 324


X
xuwei06 已提交
325
def get_layer(name):
X
xuwei06 已提交
326
    return config_base.__layer_map__.get(name)