test_label_semantic_roles.py 13.3 KB
Newer Older
1
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
D
dzhwinter 已提交
2
#
D
dzhwinter 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
D
dzhwinter 已提交
6
#
D
dzhwinter 已提交
7
#     http://www.apache.org/licenses/LICENSE-2.0
D
dzhwinter 已提交
8
#
D
dzhwinter 已提交
9 10 11 12 13 14
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
from __future__ import print_function

Y
Yu Yang 已提交
17
import contextlib
Q
Qiao Longfei 已提交
18
import math
Q
Qiao Longfei 已提交
19
import numpy as np
Y
Yu Yang 已提交
20 21 22 23
import os
import time
import unittest

24 25
import paddle
import paddle.dataset.conll05 as conll05
26
import paddle.fluid as fluid
Q
Qiao Longfei 已提交
27

P
pangyoki 已提交
28 29
paddle.enable_static()

Q
Qiao Longfei 已提交
30 31 32
word_dict, verb_dict, label_dict = conll05.get_dict()
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
L
Liu Yiqun 已提交
33
pred_dict_len = len(verb_dict)
Q
Qiao Longfei 已提交
34 35 36 37 38 39 40 41 42

mark_dict_len = 2
word_dim = 32
mark_dim = 5
hidden_dim = 512
depth = 8
mix_hidden_lr = 1e-3

IS_SPARSE = True
43
PASS_NUM = 2
44
BATCH_SIZE = 10
Q
Qiao Longfei 已提交
45 46 47 48 49 50 51 52 53 54

embedding_name = 'emb'


def load_parameter(file_name, h, w):
    with open(file_name, 'rb') as f:
        f.read(16)  # skip header.
        return np.fromfile(f, dtype=np.float32).reshape(h, w)


Y
Yu Yang 已提交
55 56
def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
            **ignored):
Q
Qiao Longfei 已提交
57
    # 8 features
58
    predicate_embedding = fluid.layers.embedding(
Q
Qiao Longfei 已提交
59
        input=predicate,
L
Liu Yiqun 已提交
60
        size=[pred_dict_len, word_dim],
F
fengjiayi 已提交
61
        dtype='float32',
Q
Qiao Longfei 已提交
62
        is_sparse=IS_SPARSE,
Y
Yu Yang 已提交
63
        param_attr='vemb')
Q
Qiao Longfei 已提交
64

65
    mark_embedding = fluid.layers.embedding(
Q
Qiao Longfei 已提交
66 67
        input=mark,
        size=[mark_dict_len, mark_dim],
F
fengjiayi 已提交
68
        dtype='float32',
Q
Qiao Longfei 已提交
69 70 71 72
        is_sparse=IS_SPARSE)

    word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
    emb_layers = [
73
        fluid.layers.embedding(
Q
Qiao Longfei 已提交
74 75
            size=[word_dict_len, word_dim],
            input=x,
J
jshower 已提交
76 77
            param_attr=fluid.ParamAttr(
                name=embedding_name, trainable=False)) for x in word_input
Q
Qiao Longfei 已提交
78 79 80 81 82
    ]
    emb_layers.append(predicate_embedding)
    emb_layers.append(mark_embedding)

    hidden_0_layers = [
J
jshower 已提交
83
        fluid.layers.fc(input=emb, size=hidden_dim) for emb in emb_layers
Q
Qiao Longfei 已提交
84 85
    ]

86
    hidden_0 = fluid.layers.sums(input=hidden_0_layers)
Q
Qiao Longfei 已提交
87

88
    lstm_0 = fluid.layers.dynamic_lstm(
Q
Qiao Longfei 已提交
89 90 91 92 93 94 95 96 97 98
        input=hidden_0,
        size=hidden_dim,
        candidate_activation='relu',
        gate_activation='sigmoid',
        cell_activation='sigmoid')

    # stack L-LSTM and R-LSTM with direct edges
    input_tmp = [hidden_0, lstm_0]

    for i in range(1, depth):
99
        mix_hidden = fluid.layers.sums(input=[
J
jshower 已提交
100 101
            fluid.layers.fc(input=input_tmp[0], size=hidden_dim),
            fluid.layers.fc(input=input_tmp[1], size=hidden_dim)
Q
Qiao Longfei 已提交
102 103
        ])

104
        lstm = fluid.layers.dynamic_lstm(
Q
Qiao Longfei 已提交
105 106 107 108 109 110 111 112 113
            input=mix_hidden,
            size=hidden_dim,
            candidate_activation='relu',
            gate_activation='sigmoid',
            cell_activation='sigmoid',
            is_reverse=((i % 2) == 1))

        input_tmp = [mix_hidden, lstm]

114
    feature_out = fluid.layers.sums(input=[
115 116
        fluid.layers.fc(input=input_tmp[0], size=label_dict_len, act='tanh'),
        fluid.layers.fc(input=input_tmp[1], size=label_dict_len, act='tanh')
Q
Qiao Longfei 已提交
117 118 119 120 121
    ])

    return feature_out


武毅 已提交
122
def train(use_cuda, save_dirname=None, is_local=True):
Q
Qiao Longfei 已提交
123
    # define network topology
Y
Yu Yang 已提交
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
    word = fluid.layers.data(
        name='word_data', shape=[1], dtype='int64', lod_level=1)
    predicate = fluid.layers.data(
        name='verb_data', shape=[1], dtype='int64', lod_level=1)
    ctx_n2 = fluid.layers.data(
        name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
    ctx_n1 = fluid.layers.data(
        name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
    ctx_0 = fluid.layers.data(
        name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
    ctx_p1 = fluid.layers.data(
        name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
    ctx_p2 = fluid.layers.data(
        name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
    mark = fluid.layers.data(
        name='mark_data', shape=[1], dtype='int64', lod_level=1)
    feature_out = db_lstm(**locals())
    target = fluid.layers.data(
        name='target', shape=[1], dtype='int64', lod_level=1)
143
    crf_cost = fluid.layers.linear_chain_crf(
Q
Qiao Longfei 已提交
144 145
        input=feature_out,
        label=target,
J
jshower 已提交
146 147
        param_attr=fluid.ParamAttr(
            name='crfw', learning_rate=mix_hidden_lr))
Y
Yu Yang 已提交
148
    avg_cost = fluid.layers.mean(crf_cost)
Q
Qiao Longfei 已提交
149

Q
Qiao Longfei 已提交
150
    # TODO(qiao)
Q
Qiao Longfei 已提交
151
    # check other optimizers and check why out will be NAN
152
    sgd_optimizer = fluid.optimizer.SGD(
153
        learning_rate=fluid.layers.exponential_decay(
154
            learning_rate=0.01,
155 156
            decay_steps=100000,
            decay_rate=0.5,
Y
Yu Yang 已提交
157
            staircase=True))
W
Wu Yi 已提交
158
    sgd_optimizer.minimize(avg_cost)
Q
Qiao Longfei 已提交
159

Q
Qiao Longfei 已提交
160 161 162
    # TODO(qiao)
    # add dependency track and move this config before optimizer
    crf_decode = fluid.layers.crf_decoding(
Q
Qiao Longfei 已提交
163 164
        input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))

Q
Qiao Longfei 已提交
165
    train_data = paddle.batch(
J
jshower 已提交
166 167
        paddle.reader.shuffle(
            paddle.dataset.conll05.test(), buf_size=8192),
Q
Qiao Longfei 已提交
168
        batch_size=BATCH_SIZE)
169 170

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
Y
Yu Yang 已提交
171 172 173 174 175
    feeder = fluid.DataFeeder(
        feed_list=[
            word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, mark, target
        ],
        place=place)
176
    exe = fluid.Executor(place)
Q
Qiao Longfei 已提交
177

武毅 已提交
178 179 180 181 182 183 184 185 186 187
    def train_loop(main_program):
        exe.run(fluid.default_startup_program())
        embedding_param = fluid.global_scope().find_var(
            embedding_name).get_tensor()
        embedding_param.set(
            load_parameter(conll05.get_embedding(), word_dict_len, word_dim),
            place)

        start_time = time.time()
        batch_id = 0
188
        for pass_id in range(PASS_NUM):
武毅 已提交
189
            for data in train_data():
190 191 192 193
                cost = exe.run(main_program,
                               feed=feeder.feed(data),
                               fetch_list=[avg_cost])
                cost = cost[0]
武毅 已提交
194 195

                if batch_id % 10 == 0:
196
                    print("avg_cost:" + str(cost))
武毅 已提交
197
                    if batch_id != 0:
198 199
                        print("second per batch: " + str((time.time(
                        ) - start_time) / batch_id))
武毅 已提交
200
                    # Set the threshold low to speed up the CI test
201
                    if float(cost) < 80.0:
武毅 已提交
202 203
                        if save_dirname is not None:
                            # TODO(liuyiqun): Change the target to crf_decode
J
jshower 已提交
204 205 206 207 208
                            fluid.io.save_inference_model(save_dirname, [
                                'word_data', 'verb_data', 'ctx_n2_data',
                                'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data',
                                'ctx_p2_data', 'mark_data'
                            ], [feature_out], exe)
武毅 已提交
209 210 211 212
                        return

                batch_id = batch_id + 1

213 214 215 216
        raise RuntimeError(
            "This model should save_inference_model and return, but not reach here, please check!"
        )

武毅 已提交
217 218 219
    if is_local:
        train_loop(fluid.default_main_program())
    else:
G
gongweibao 已提交
220 221
        port = os.getenv("PADDLE_PSERVER_PORT", "6174")
        pserver_ips = os.getenv("PADDLE_PSERVER_IPS")  # ip,ip...
武毅 已提交
222 223 224 225
        eplist = []
        for ip in pserver_ips.split(","):
            eplist.append(':'.join([ip, port]))
        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
G
gongweibao 已提交
226
        trainers = int(os.getenv("PADDLE_TRAINERS"))
武毅 已提交
227
        current_endpoint = os.getenv("POD_IP") + ":" + port
G
gongweibao 已提交
228 229
        trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
        training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
武毅 已提交
230
        t = fluid.DistributeTranspiler()
Y
Yancey1989 已提交
231
        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
武毅 已提交
232 233 234 235 236 237 238 239
        if training_role == "PSERVER":
            pserver_prog = t.get_pserver_program(current_endpoint)
            pserver_startup = t.get_startup_program(current_endpoint,
                                                    pserver_prog)
            exe.run(pserver_startup)
            exe.run(pserver_prog)
        elif training_role == "TRAINER":
            train_loop(t.get_trainer_program())
Q
Qiao Longfei 已提交
240 241


242 243 244 245 246 247 248
def infer(use_cuda, save_dirname=None):
    if save_dirname is None:
        return

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)

249 250 251
    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
T
tianshuo78520a 已提交
252
        # the feed_target_names (the names of variables that will be fed
253 254 255 256 257
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
        [inference_program, feed_target_names,
         fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)

258
        # Setup input by creating LoDTensor to represent sequence of words.
259 260
        # Here each word is the basic element of the LoDTensor and the shape of
        # each word (base_shape) should be [1] since it is simply an index to
K
Kexin Zhao 已提交
261
        # look up for the corresponding word vector.
262
        # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]],
263 264 265 266
        # which has only one level of detail. Then the created LoDTensor will have only
        # one higher level structure (sequence of words, or sentence) than the basic
        # element (word). Hence the LoDTensor will hold data for three sentences of
        # length 3, 4 and 2, respectively.
267 268
        # Note that recursive_sequence_lengths should be a list of lists.
        recursive_seq_lens = [[3, 4, 2]]
K
Kexin Zhao 已提交
269
        base_shape = [1]
K
Kexin Zhao 已提交
270
        # The range of random integers is [low, high]
K
Kexin Zhao 已提交
271
        word = fluid.create_random_int_lodtensor(
272 273 274 275 276
            recursive_seq_lens,
            base_shape,
            place,
            low=0,
            high=word_dict_len - 1)
K
Kexin Zhao 已提交
277
        pred = fluid.create_random_int_lodtensor(
278 279 280 281 282
            recursive_seq_lens,
            base_shape,
            place,
            low=0,
            high=pred_dict_len - 1)
K
Kexin Zhao 已提交
283
        ctx_n2 = fluid.create_random_int_lodtensor(
284 285 286 287 288
            recursive_seq_lens,
            base_shape,
            place,
            low=0,
            high=word_dict_len - 1)
K
Kexin Zhao 已提交
289
        ctx_n1 = fluid.create_random_int_lodtensor(
290 291 292 293 294
            recursive_seq_lens,
            base_shape,
            place,
            low=0,
            high=word_dict_len - 1)
K
Kexin Zhao 已提交
295
        ctx_0 = fluid.create_random_int_lodtensor(
296 297 298 299 300
            recursive_seq_lens,
            base_shape,
            place,
            low=0,
            high=word_dict_len - 1)
K
Kexin Zhao 已提交
301
        ctx_p1 = fluid.create_random_int_lodtensor(
302 303 304 305 306
            recursive_seq_lens,
            base_shape,
            place,
            low=0,
            high=word_dict_len - 1)
K
Kexin Zhao 已提交
307
        ctx_p2 = fluid.create_random_int_lodtensor(
308 309 310 311 312
            recursive_seq_lens,
            base_shape,
            place,
            low=0,
            high=word_dict_len - 1)
K
Kexin Zhao 已提交
313
        mark = fluid.create_random_int_lodtensor(
314 315 316 317 318
            recursive_seq_lens,
            base_shape,
            place,
            low=0,
            high=mark_dict_len - 1)
319 320 321 322 323 324 325 326 327 328 329 330

        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
        # and results will contain a list of data corresponding to fetch_targets.
        assert feed_target_names[0] == 'word_data'
        assert feed_target_names[1] == 'verb_data'
        assert feed_target_names[2] == 'ctx_n2_data'
        assert feed_target_names[3] == 'ctx_n1_data'
        assert feed_target_names[4] == 'ctx_0_data'
        assert feed_target_names[5] == 'ctx_p1_data'
        assert feed_target_names[6] == 'ctx_p2_data'
        assert feed_target_names[7] == 'mark_data'

J
jshower 已提交
331 332 333 334 335 336 337 338 339 340 341 342 343
        results = exe.run(inference_program,
                          feed={
                              feed_target_names[0]: word,
                              feed_target_names[1]: pred,
                              feed_target_names[2]: ctx_n2,
                              feed_target_names[3]: ctx_n1,
                              feed_target_names[4]: ctx_0,
                              feed_target_names[5]: ctx_p1,
                              feed_target_names[6]: ctx_p2,
                              feed_target_names[7]: mark
                          },
                          fetch_list=fetch_targets,
                          return_numpy=False)
344
        print(results[0].recursive_sequence_lengths())
345
        np_data = np.array(results[0])
346
        print("Inference Shape: ", np_data.shape)
347 348


武毅 已提交
349
def main(use_cuda, is_local=True):
350 351 352 353 354 355
    if use_cuda and not fluid.core.is_compiled_with_cuda():
        return

    # Directory for saving the trained model
    save_dirname = "label_semantic_roles.inference.model"

武毅 已提交
356
    train(use_cuda, save_dirname, is_local)
357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
    infer(use_cuda, save_dirname)


class TestLabelSemanticRoles(unittest.TestCase):
    def test_cuda(self):
        with self.scope_prog_guard():
            main(use_cuda=True)

    def test_cpu(self):
        with self.scope_prog_guard():
            main(use_cuda=False)

    @contextlib.contextmanager
    def scope_prog_guard(self):
        prog = fluid.Program()
        startup_prog = fluid.Program()
        scope = fluid.core.Scope()
        with fluid.scope_guard(scope):
            with fluid.program_guard(prog, startup_prog):
                yield


Q
Qiao Longfei 已提交
379
if __name__ == '__main__':
380
    unittest.main()