test_label_semantic_roles.py 6.2 KB
Newer Older
Q
Qiao Longfei 已提交
1 2
import math

Q
Qiao Longfei 已提交
3 4 5
import numpy as np
import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05
6
import paddle.v2.fluid as fluid
Q
Qiao Longfei 已提交
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32

word_dict, verb_dict, label_dict = conll05.get_dict()
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
pred_len = len(verb_dict)

mark_dict_len = 2
word_dim = 32
mark_dim = 5
hidden_dim = 512
depth = 8
mix_hidden_lr = 1e-3

IS_SPARSE = True
PASS_NUM = 10
BATCH_SIZE = 20

embedding_name = 'emb'


def load_parameter(file_name, h, w):
    with open(file_name, 'rb') as f:
        f.read(16)  # skip header.
        return np.fromfile(f, dtype=np.float32).reshape(h, w)


Y
Yu Yang 已提交
33 34
def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
            **ignored):
Q
Qiao Longfei 已提交
35
    # 8 features
36
    predicate_embedding = fluid.layers.embedding(
Q
Qiao Longfei 已提交
37 38
        input=predicate,
        size=[pred_len, word_dim],
F
fengjiayi 已提交
39
        dtype='float32',
Q
Qiao Longfei 已提交
40
        is_sparse=IS_SPARSE,
Y
Yu Yang 已提交
41
        param_attr='vemb')
Q
Qiao Longfei 已提交
42

43
    mark_embedding = fluid.layers.embedding(
Q
Qiao Longfei 已提交
44 45
        input=mark,
        size=[mark_dict_len, mark_dim],
F
fengjiayi 已提交
46
        dtype='float32',
Q
Qiao Longfei 已提交
47 48 49 50
        is_sparse=IS_SPARSE)

    word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
    emb_layers = [
51
        fluid.layers.embedding(
Q
Qiao Longfei 已提交
52 53
            size=[word_dict_len, word_dim],
            input=x,
Y
Yu Yang 已提交
54 55
            param_attr=fluid.ParamAttr(
                name=embedding_name, trainable=False)) for x in word_input
Q
Qiao Longfei 已提交
56 57 58 59 60
    ]
    emb_layers.append(predicate_embedding)
    emb_layers.append(mark_embedding)

    hidden_0_layers = [
61
        fluid.layers.fc(input=emb, size=hidden_dim) for emb in emb_layers
Q
Qiao Longfei 已提交
62 63
    ]

64
    hidden_0 = fluid.layers.sums(input=hidden_0_layers)
Q
Qiao Longfei 已提交
65

66
    lstm_0 = fluid.layers.dynamic_lstm(
Q
Qiao Longfei 已提交
67 68 69 70 71 72 73 74 75 76
        input=hidden_0,
        size=hidden_dim,
        candidate_activation='relu',
        gate_activation='sigmoid',
        cell_activation='sigmoid')

    # stack L-LSTM and R-LSTM with direct edges
    input_tmp = [hidden_0, lstm_0]

    for i in range(1, depth):
77 78 79
        mix_hidden = fluid.layers.sums(input=[
            fluid.layers.fc(input=input_tmp[0], size=hidden_dim),
            fluid.layers.fc(input=input_tmp[1], size=hidden_dim)
Q
Qiao Longfei 已提交
80 81
        ])

82
        lstm = fluid.layers.dynamic_lstm(
Q
Qiao Longfei 已提交
83 84 85 86 87 88 89 90 91
            input=mix_hidden,
            size=hidden_dim,
            candidate_activation='relu',
            gate_activation='sigmoid',
            cell_activation='sigmoid',
            is_reverse=((i % 2) == 1))

        input_tmp = [mix_hidden, lstm]

92 93 94
    feature_out = fluid.layers.sums(input=[
        fluid.layers.fc(input=input_tmp[0], size=label_dict_len),
        fluid.layers.fc(input=input_tmp[1], size=label_dict_len)
Q
Qiao Longfei 已提交
95 96 97 98 99 100 101 102 103 104 105 106 107 108
    ])

    return feature_out


def to_lodtensor(data, place):
    seq_lens = [len(seq) for seq in data]
    cur_len = 0
    lod = [cur_len]
    for l in seq_lens:
        cur_len += l
        lod.append(cur_len)
    flattened_data = np.concatenate(data, axis=0).astype("int64")
    flattened_data = flattened_data.reshape([len(flattened_data), 1])
109
    res = fluid.LoDTensor()
Q
Qiao Longfei 已提交
110 111 112 113 114 115 116
    res.set(flattened_data, place)
    res.set_lod([lod])
    return res


def main():
    # define network topology
Y
Yu Yang 已提交
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
    word = fluid.layers.data(
        name='word_data', shape=[1], dtype='int64', lod_level=1)
    predicate = fluid.layers.data(
        name='verb_data', shape=[1], dtype='int64', lod_level=1)
    ctx_n2 = fluid.layers.data(
        name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
    ctx_n1 = fluid.layers.data(
        name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
    ctx_0 = fluid.layers.data(
        name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
    ctx_p1 = fluid.layers.data(
        name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
    ctx_p2 = fluid.layers.data(
        name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
    mark = fluid.layers.data(
        name='mark_data', shape=[1], dtype='int64', lod_level=1)
    feature_out = db_lstm(**locals())
    target = fluid.layers.data(
        name='target', shape=[1], dtype='int64', lod_level=1)
136
    crf_cost = fluid.layers.linear_chain_crf(
Q
Qiao Longfei 已提交
137 138
        input=feature_out,
        label=target,
Y
Yu Yang 已提交
139 140
        param_attr=fluid.ParamAttr(
            name='crfw', learning_rate=mix_hidden_lr))
141
    avg_cost = fluid.layers.mean(x=crf_cost)
Q
Qiao Longfei 已提交
142

Q
Qiao Longfei 已提交
143
    # TODO(qiao)
Q
Qiao Longfei 已提交
144
    # check other optimizers and check why out will be NAN
145 146
    sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.0001)
    sgd_optimizer.minimize(avg_cost)
Q
Qiao Longfei 已提交
147

Q
Qiao Longfei 已提交
148 149 150
    # TODO(qiao)
    # add dependency track and move this config before optimizer
    crf_decode = fluid.layers.crf_decoding(
Q
Qiao Longfei 已提交
151 152
        input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))

G
guosheng 已提交
153
    chunk_evaluator = fluid.evaluator.ChunkEvaluator(
Q
Qiao Longfei 已提交
154
        input=crf_decode,
Q
Qiao Longfei 已提交
155
        label=target,
Q
Qiao Longfei 已提交
156 157
        chunk_scheme="IOB",
        num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0)))
Q
Qiao Longfei 已提交
158

Q
Qiao Longfei 已提交
159 160 161 162
    train_data = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.conll05.test(), buf_size=8192),
        batch_size=BATCH_SIZE)
163
    place = fluid.CPUPlace()
Y
Yu Yang 已提交
164 165 166 167 168
    feeder = fluid.DataFeeder(
        feed_list=[
            word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, mark, target
        ],
        place=place)
169
    exe = fluid.Executor(place)
Q
Qiao Longfei 已提交
170

171
    exe.run(fluid.default_startup_program())
Q
Qiao Longfei 已提交
172

173
    embedding_param = fluid.g_scope.find_var(embedding_name).get_tensor()
Q
Qiao Longfei 已提交
174 175 176 177 178
    embedding_param.set(
        load_parameter(conll05.get_embedding(), word_dict_len, word_dim), place)

    batch_id = 0
    for pass_id in xrange(PASS_NUM):
G
guosheng 已提交
179
        chunk_evaluator.reset(exe)
Q
Qiao Longfei 已提交
180
        for data in train_data():
181 182 183 184 185 186
            cost, precision, recall, f1_score = exe.run(
                fluid.default_main_program(),
                feed=feeder.feed(data),
                fetch_list=[avg_cost] + chunk_evaluator.metrics)
            pass_precision, pass_recall, pass_f1_score = chunk_evaluator.eval(
                exe)
Q
Qiao Longfei 已提交
187

Q
Qiao Longfei 已提交
188
            if batch_id % 10 == 0:
189 190 191 192 193
                print("avg_cost:" + str(cost) + " precision:" + str(
                    precision) + " recall:" + str(recall) + " f1_score:" + str(
                        f1_score) + " pass_precision:" + str(
                            pass_precision) + " pass_recall:" + str(pass_recall)
                      + " pass_f1_score:" + str(pass_f1_score))
Q
Qiao Longfei 已提交
194 195 196 197 198 199 200 201 202

            # exit early for CI
            exit(0)

            batch_id = batch_id + 1


if __name__ == '__main__':
    main()