rnn_crf.py 3.1 KB
Newer Older
1
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
E
emailweixu 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from paddle.trainer_config_helpers import *

import math

19 20 21 22 23
define_py_data_sources2(
    train_list="data/train.list",
    test_list="data/test.list",
    module="dataprovider",
    obj="process")
E
emailweixu 已提交
24 25 26 27 28 29

batch_size = 16
settings(
    learning_method=MomentumOptimizer(),
    batch_size=batch_size,
    regularization=L2Regularization(batch_size * 1e-5),
Y
Yu Yang 已提交
30
    model_average=ModelAverage(0.5),
31 32 33
    learning_rate=2e-3,
    learning_rate_decay_a=5e-7,
    learning_rate_decay_b=0.5, )
E
emailweixu 已提交
34

35
word_dim = 128
E
emailweixu 已提交
36 37 38
hidden_dim = 128
with_rnn = True

39 40 41
initial_std = 1 / math.sqrt(hidden_dim)
param_attr = ParamAttr(initial_std=initial_std)
cpu_layer_attr = ExtraLayerAttribute(device=-1)
E
emailweixu 已提交
42 43 44

default_device(0)

45
num_label_types = 23
E
emailweixu 已提交
46 47 48 49

features = data_layer(name="features", size=76328)
word = data_layer(name="word", size=6778)
pos = data_layer(name="pos", size=44)
50 51
chunk = data_layer(
    name="chunk", size=num_label_types, layer_attr=cpu_layer_attr)
E
emailweixu 已提交
52 53 54 55 56 57 58 59

emb = embedding_layer(
    input=word, size=word_dim, param_attr=ParamAttr(initial_std=0))

hidden1 = mixed_layer(
    size=hidden_dim,
    act=STanhActivation(),
    bias_attr=True,
60 61 62 63
    input=[
        full_matrix_projection(emb), table_projection(
            pos, param_attr=param_attr)
    ])
E
emailweixu 已提交
64 65 66 67 68 69

if with_rnn:
    rnn1 = recurrent_layer(
        act=ReluActivation(),
        bias_attr=True,
        input=hidden1,
70
        param_attr=ParamAttr(initial_std=0), )
E
emailweixu 已提交
71 72 73 74 75

hidden2 = mixed_layer(
    size=hidden_dim,
    act=STanhActivation(),
    bias_attr=True,
76 77 78
    input=[full_matrix_projection(hidden1)] +
    ([full_matrix_projection(
        rnn1, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), )
E
emailweixu 已提交
79 80

if with_rnn:
81
    rnn2 = recurrent_layer(
E
emailweixu 已提交
82 83 84 85
        reverse=True,
        act=ReluActivation(),
        bias_attr=True,
        input=hidden2,
86
        param_attr=ParamAttr(initial_std=0), )
E
emailweixu 已提交
87 88 89 90

crf_input = mixed_layer(
    size=num_label_types,
    bias_attr=False,
91 92 93
    input=[full_matrix_projection(hidden2), ] +
    ([full_matrix_projection(
        rnn2, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), )
E
emailweixu 已提交
94 95 96 97

crf = crf_layer(
    input=crf_input,
    label=chunk,
98 99 100
    param_attr=ParamAttr(
        name="crfw", initial_std=0),
    layer_attr=cpu_layer_attr, )
E
emailweixu 已提交
101 102 103 104 105 106

crf_decoding = crf_decoding_layer(
    size=num_label_types,
    input=crf_input,
    label=chunk,
    param_attr=ParamAttr(name="crfw"),
107
    layer_attr=cpu_layer_attr, )
E
emailweixu 已提交
108 109 110

sum_evaluator(
    name="error",
111
    input=crf_decoding, )
E
emailweixu 已提交
112 113 114

chunk_evaluator(
    name="chunk_f1",
115 116
    input=crf_decoding,
    label=chunk,
E
emailweixu 已提交
117
    chunk_scheme="IOB",
118
    num_chunk_types=11, )
E
emailweixu 已提交
119 120 121

inputs(word, pos, chunk, features)
outputs(crf)