test_label_semantic_roles.py 16.2 KB
Newer Older
1
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
D
dzhwinter 已提交
2
#
D
dzhwinter 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
D
dzhwinter 已提交
6
#
D
dzhwinter 已提交
7
#     http://www.apache.org/licenses/LICENSE-2.0
D
dzhwinter 已提交
8
#
D
dzhwinter 已提交
9 10 11 12 13 14
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
from __future__ import print_function

Y
Yu Yang 已提交
17
import contextlib
Q
Qiao Longfei 已提交
18
import math
Q
Qiao Longfei 已提交
19
import numpy as np
Y
Yu Yang 已提交
20 21 22
import os
import time
import unittest
23
import tempfile
Y
Yu Yang 已提交
24

25 26
import paddle
import paddle.dataset.conll05 as conll05
27
import paddle.fluid as fluid
Q
Qiao Longfei 已提交
28

P
pangyoki 已提交
29 30
paddle.enable_static()

Q
Qiao Longfei 已提交
31 32 33
word_dict, verb_dict, label_dict = conll05.get_dict()
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
L
Liu Yiqun 已提交
34
pred_dict_len = len(verb_dict)
Q
Qiao Longfei 已提交
35 36 37 38 39 40 41 42 43

mark_dict_len = 2
word_dim = 32
mark_dim = 5
hidden_dim = 512
depth = 8
mix_hidden_lr = 1e-3

IS_SPARSE = True
44
PASS_NUM = 2
45
BATCH_SIZE = 10
Q
Qiao Longfei 已提交
46 47 48 49 50 51 52 53 54 55

embedding_name = 'emb'


def load_parameter(file_name, h, w):
    with open(file_name, 'rb') as f:
        f.read(16)  # skip header.
        return np.fromfile(f, dtype=np.float32).reshape(h, w)


Y
Yu Yang 已提交
56 57
def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
            **ignored):
Q
Qiao Longfei 已提交
58
    # 8 features
59 60 61 62 63 64 65 66 67 68
    predicate_embedding = fluid.layers.embedding(input=predicate,
                                                 size=[pred_dict_len, word_dim],
                                                 dtype='float32',
                                                 is_sparse=IS_SPARSE,
                                                 param_attr='vemb')

    mark_embedding = fluid.layers.embedding(input=mark,
                                            size=[mark_dict_len, mark_dim],
                                            dtype='float32',
                                            is_sparse=IS_SPARSE)
Q
Qiao Longfei 已提交
69 70 71

    word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
    emb_layers = [
72 73 74 75 76
        fluid.layers.embedding(size=[word_dict_len, word_dim],
                               input=x,
                               param_attr=fluid.ParamAttr(name=embedding_name,
                                                          trainable=False))
        for x in word_input
Q
Qiao Longfei 已提交
77 78 79 80 81
    ]
    emb_layers.append(predicate_embedding)
    emb_layers.append(mark_embedding)

    hidden_0_layers = [
J
jshower 已提交
82
        fluid.layers.fc(input=emb, size=hidden_dim) for emb in emb_layers
Q
Qiao Longfei 已提交
83 84
    ]

85
    hidden_0 = fluid.layers.sums(input=hidden_0_layers)
Q
Qiao Longfei 已提交
86

87 88 89 90 91
    lstm_0 = fluid.layers.dynamic_lstm(input=hidden_0,
                                       size=hidden_dim,
                                       candidate_activation='relu',
                                       gate_activation='sigmoid',
                                       cell_activation='sigmoid')
Q
Qiao Longfei 已提交
92 93 94 95 96

    # stack L-LSTM and R-LSTM with direct edges
    input_tmp = [hidden_0, lstm_0]

    for i in range(1, depth):
97
        mix_hidden = fluid.layers.sums(input=[
J
jshower 已提交
98 99
            fluid.layers.fc(input=input_tmp[0], size=hidden_dim),
            fluid.layers.fc(input=input_tmp[1], size=hidden_dim)
Q
Qiao Longfei 已提交
100 101
        ])

102 103 104 105 106 107
        lstm = fluid.layers.dynamic_lstm(input=mix_hidden,
                                         size=hidden_dim,
                                         candidate_activation='relu',
                                         gate_activation='sigmoid',
                                         cell_activation='sigmoid',
                                         is_reverse=((i % 2) == 1))
Q
Qiao Longfei 已提交
108 109 110

        input_tmp = [mix_hidden, lstm]

111
    feature_out = fluid.layers.sums(input=[
112 113
        fluid.layers.fc(input=input_tmp[0], size=label_dict_len, act='tanh'),
        fluid.layers.fc(input=input_tmp[1], size=label_dict_len, act='tanh')
Q
Qiao Longfei 已提交
114 115 116 117 118
    ])

    return feature_out


武毅 已提交
119
def train(use_cuda, save_dirname=None, is_local=True):
Q
Qiao Longfei 已提交
120
    # define network topology
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
    word = fluid.layers.data(name='word_data',
                             shape=[1],
                             dtype='int64',
                             lod_level=1)
    predicate = fluid.layers.data(name='verb_data',
                                  shape=[1],
                                  dtype='int64',
                                  lod_level=1)
    ctx_n2 = fluid.layers.data(name='ctx_n2_data',
                               shape=[1],
                               dtype='int64',
                               lod_level=1)
    ctx_n1 = fluid.layers.data(name='ctx_n1_data',
                               shape=[1],
                               dtype='int64',
                               lod_level=1)
    ctx_0 = fluid.layers.data(name='ctx_0_data',
                              shape=[1],
                              dtype='int64',
                              lod_level=1)
    ctx_p1 = fluid.layers.data(name='ctx_p1_data',
                               shape=[1],
                               dtype='int64',
                               lod_level=1)
    ctx_p2 = fluid.layers.data(name='ctx_p2_data',
                               shape=[1],
                               dtype='int64',
                               lod_level=1)
    mark = fluid.layers.data(name='mark_data',
                             shape=[1],
                             dtype='int64',
                             lod_level=1)
Y
Yu Yang 已提交
153
    feature_out = db_lstm(**locals())
154 155 156 157 158 159 160 161 162
    target = fluid.layers.data(name='target',
                               shape=[1],
                               dtype='int64',
                               lod_level=1)
    crf_cost = fluid.layers.linear_chain_crf(input=feature_out,
                                             label=target,
                                             param_attr=fluid.ParamAttr(
                                                 name='crfw',
                                                 learning_rate=mix_hidden_lr))
Y
Yu Yang 已提交
163
    avg_cost = fluid.layers.mean(crf_cost)
Q
Qiao Longfei 已提交
164

Q
Qiao Longfei 已提交
165
    # TODO(qiao)
Q
Qiao Longfei 已提交
166
    # check other optimizers and check why out will be NAN
167
    sgd_optimizer = fluid.optimizer.SGD(
168 169 170 171
        learning_rate=fluid.layers.exponential_decay(learning_rate=0.01,
                                                     decay_steps=100000,
                                                     decay_rate=0.5,
                                                     staircase=True))
W
Wu Yi 已提交
172
    sgd_optimizer.minimize(avg_cost)
Q
Qiao Longfei 已提交
173

Q
Qiao Longfei 已提交
174 175 176
    # TODO(qiao)
    # add dependency track and move this config before optimizer
    crf_decode = fluid.layers.crf_decoding(
Q
Qiao Longfei 已提交
177 178
        input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))

179 180 181
    train_data = paddle.batch(paddle.reader.shuffle(
        paddle.dataset.conll05.test(), buf_size=8192),
                              batch_size=BATCH_SIZE)
182 183

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
184 185 186 187
    feeder = fluid.DataFeeder(feed_list=[
        word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, mark, target
    ],
                              place=place)
188
    exe = fluid.Executor(place)
Q
Qiao Longfei 已提交
189

武毅 已提交
190 191 192 193 194 195 196 197 198 199
    def train_loop(main_program):
        exe.run(fluid.default_startup_program())
        embedding_param = fluid.global_scope().find_var(
            embedding_name).get_tensor()
        embedding_param.set(
            load_parameter(conll05.get_embedding(), word_dict_len, word_dim),
            place)

        start_time = time.time()
        batch_id = 0
200
        for pass_id in range(PASS_NUM):
武毅 已提交
201
            for data in train_data():
202 203 204 205
                cost = exe.run(main_program,
                               feed=feeder.feed(data),
                               fetch_list=[avg_cost])
                cost = cost[0]
武毅 已提交
206 207

                if batch_id % 10 == 0:
208
                    print("avg_cost:" + str(cost))
武毅 已提交
209
                    if batch_id != 0:
210 211
                        print("second per batch: " +
                              str((time.time() - start_time) / batch_id))
武毅 已提交
212
                    # Set the threshold low to speed up the CI test
213
                    if float(cost) < 80.0:
武毅 已提交
214 215
                        if save_dirname is not None:
                            # TODO(liuyiqun): Change the target to crf_decode
216 217 218 219 220 221
                            fluid.io.save_inference_model(
                                save_dirname, [
                                    'word_data', 'verb_data', 'ctx_n2_data',
                                    'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data',
                                    'ctx_p2_data', 'mark_data'
                                ], [feature_out], exe)
武毅 已提交
222 223 224 225
                        return

                batch_id = batch_id + 1

226 227 228 229
        raise RuntimeError(
            "This model should save_inference_model and return, but not reach here, please check!"
        )

武毅 已提交
230 231 232
    if is_local:
        train_loop(fluid.default_main_program())
    else:
G
gongweibao 已提交
233 234
        port = os.getenv("PADDLE_PSERVER_PORT", "6174")
        pserver_ips = os.getenv("PADDLE_PSERVER_IPS")  # ip,ip...
武毅 已提交
235 236 237 238
        eplist = []
        for ip in pserver_ips.split(","):
            eplist.append(':'.join([ip, port]))
        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
G
gongweibao 已提交
239
        trainers = int(os.getenv("PADDLE_TRAINERS"))
武毅 已提交
240
        current_endpoint = os.getenv("POD_IP") + ":" + port
G
gongweibao 已提交
241 242
        trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
        training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
武毅 已提交
243
        t = fluid.DistributeTranspiler()
Y
Yancey1989 已提交
244
        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
武毅 已提交
245 246 247 248 249 250 251 252
        if training_role == "PSERVER":
            pserver_prog = t.get_pserver_program(current_endpoint)
            pserver_startup = t.get_startup_program(current_endpoint,
                                                    pserver_prog)
            exe.run(pserver_startup)
            exe.run(pserver_prog)
        elif training_role == "TRAINER":
            train_loop(t.get_trainer_program())
Q
Qiao Longfei 已提交
253 254


255 256 257 258 259 260 261
def infer(use_cuda, save_dirname=None):
    if save_dirname is None:
        return

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)

262 263 264
    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
T
tianshuo78520a 已提交
265
        # the feed_target_names (the names of variables that will be fed
266 267 268 269 270
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
        [inference_program, feed_target_names,
         fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)

271
        # Setup input by creating LoDTensor to represent sequence of words.
272 273
        # Here each word is the basic element of the LoDTensor and the shape of
        # each word (base_shape) should be [1] since it is simply an index to
K
Kexin Zhao 已提交
274
        # look up for the corresponding word vector.
275
        # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]],
276 277 278 279
        # which has only one level of detail. Then the created LoDTensor will have only
        # one higher level structure (sequence of words, or sentence) than the basic
        # element (word). Hence the LoDTensor will hold data for three sentences of
        # length 3, 4 and 2, respectively.
280 281
        # Note that recursive_sequence_lengths should be a list of lists.
        recursive_seq_lens = [[3, 4, 2]]
K
Kexin Zhao 已提交
282
        base_shape = [1]
K
Kexin Zhao 已提交
283
        # The range of random integers is [low, high]
284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
        word = fluid.create_random_int_lodtensor(recursive_seq_lens,
                                                 base_shape,
                                                 place,
                                                 low=0,
                                                 high=word_dict_len - 1)
        pred = fluid.create_random_int_lodtensor(recursive_seq_lens,
                                                 base_shape,
                                                 place,
                                                 low=0,
                                                 high=pred_dict_len - 1)
        ctx_n2 = fluid.create_random_int_lodtensor(recursive_seq_lens,
                                                   base_shape,
                                                   place,
                                                   low=0,
                                                   high=word_dict_len - 1)
        ctx_n1 = fluid.create_random_int_lodtensor(recursive_seq_lens,
                                                   base_shape,
                                                   place,
                                                   low=0,
                                                   high=word_dict_len - 1)
        ctx_0 = fluid.create_random_int_lodtensor(recursive_seq_lens,
                                                  base_shape,
                                                  place,
                                                  low=0,
                                                  high=word_dict_len - 1)
        ctx_p1 = fluid.create_random_int_lodtensor(recursive_seq_lens,
                                                   base_shape,
                                                   place,
                                                   low=0,
                                                   high=word_dict_len - 1)
        ctx_p2 = fluid.create_random_int_lodtensor(recursive_seq_lens,
                                                   base_shape,
                                                   place,
                                                   low=0,
                                                   high=word_dict_len - 1)
        mark = fluid.create_random_int_lodtensor(recursive_seq_lens,
                                                 base_shape,
                                                 place,
                                                 low=0,
                                                 high=mark_dict_len - 1)
324 325 326 327 328 329 330 331 332 333 334 335

        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
        # and results will contain a list of data corresponding to fetch_targets.
        assert feed_target_names[0] == 'word_data'
        assert feed_target_names[1] == 'verb_data'
        assert feed_target_names[2] == 'ctx_n2_data'
        assert feed_target_names[3] == 'ctx_n1_data'
        assert feed_target_names[4] == 'ctx_0_data'
        assert feed_target_names[5] == 'ctx_p1_data'
        assert feed_target_names[6] == 'ctx_p2_data'
        assert feed_target_names[7] == 'mark_data'

J
jshower 已提交
336 337 338 339 340 341 342 343 344 345 346 347 348
        results = exe.run(inference_program,
                          feed={
                              feed_target_names[0]: word,
                              feed_target_names[1]: pred,
                              feed_target_names[2]: ctx_n2,
                              feed_target_names[3]: ctx_n1,
                              feed_target_names[4]: ctx_0,
                              feed_target_names[5]: ctx_p1,
                              feed_target_names[6]: ctx_p2,
                              feed_target_names[7]: mark
                          },
                          fetch_list=fetch_targets,
                          return_numpy=False)
349
        print(results[0].recursive_sequence_lengths())
350
        np_data = np.array(results[0])
351
        print("Inference Shape: ", np_data.shape)
352 353


武毅 已提交
354
def main(use_cuda, is_local=True):
355 356 357
    if use_cuda and not fluid.core.is_compiled_with_cuda():
        return

358
    temp_dir = tempfile.TemporaryDirectory()
359
    # Directory for saving the trained model
360 361
    save_dirname = os.path.join(temp_dir.name,
                                "label_semantic_roles.inference.model")
362

武毅 已提交
363
    train(use_cuda, save_dirname, is_local)
364 365
    infer(use_cuda, save_dirname)

366 367
    temp_dir.cleanup()

368 369

class TestLabelSemanticRoles(unittest.TestCase):
370

371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388
    def test_cuda(self):
        with self.scope_prog_guard():
            main(use_cuda=True)

    def test_cpu(self):
        with self.scope_prog_guard():
            main(use_cuda=False)

    @contextlib.contextmanager
    def scope_prog_guard(self):
        prog = fluid.Program()
        startup_prog = fluid.Program()
        scope = fluid.core.Scope()
        with fluid.scope_guard(scope):
            with fluid.program_guard(prog, startup_prog):
                yield


Q
Qiao Longfei 已提交
389
if __name__ == '__main__':
390
    unittest.main()