infer.py 23.4 KB
Newer Older
1
import argparse
2 3
import numpy as np

4
import paddle
5 6 7 8 9
import paddle.fluid as fluid

import model
from model import wrap_encoder as encoder
from model import wrap_decoder as decoder
10
from model import fast_decode as fast_decoder
11
from config import *
12
from train import pad_batch_data
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
import reader


def parse_args():
    parser = argparse.ArgumentParser("Training for Transformer.")
    parser.add_argument(
        "--src_vocab_fpath",
        type=str,
        required=True,
        help="The path of vocabulary file of source language.")
    parser.add_argument(
        "--trg_vocab_fpath",
        type=str,
        required=True,
        help="The path of vocabulary file of target language.")
    parser.add_argument(
        "--test_file_pattern",
        type=str,
        required=True,
        help="The pattern to match test data files.")
    parser.add_argument(
        "--batch_size",
        type=int,
        default=50,
        help="The number of examples in one run for sequence generation.")
    parser.add_argument(
        "--pool_size",
        type=int,
        default=10000,
        help="The buffer size to pool data.")
    parser.add_argument(
        "--special_token",
        type=str,
        default=["<s>", "<e>", "<unk>"],
        nargs=3,
        help="The <bos>, <eos> and <unk> tokens in the dictionary.")
    parser.add_argument(
        'opts',
        help='See config.py for all options',
        default=None,
        nargs=argparse.REMAINDER)
    args = parser.parse_args()
55 56 57 58 59 60 61 62 63 64 65
    # Append args related to dict
    src_dict = reader.DataReader.load_dict(args.src_vocab_fpath)
    trg_dict = reader.DataReader.load_dict(args.trg_vocab_fpath)
    dict_args = [
        "src_vocab_size", str(len(src_dict)), "trg_vocab_size",
        str(len(trg_dict)), "bos_idx", str(src_dict[args.special_token[0]]),
        "eos_idx", str(src_dict[args.special_token[1]]), "unk_idx",
        str(src_dict[args.special_token[2]])
    ]
    merge_cfg_from_list(args.opts + dict_args,
                        [InferTaskConfig, ModelHyperParams])
66
    return args
67 68


69 70 71 72 73 74 75 76 77 78 79 80 81
def translate_batch(exe,
                    src_words,
                    encoder,
                    enc_in_names,
                    enc_out_names,
                    decoder,
                    dec_in_names,
                    dec_out_names,
                    beam_size,
                    max_length,
                    n_best,
                    batch_size,
                    n_head,
82
                    d_model,
83 84 85 86 87 88
                    src_pad_idx,
                    trg_pad_idx,
                    bos_idx,
                    eos_idx,
                    unk_idx,
                    output_unk=True):
89 90
    """
    Run the encoder program once and run the decoder program multiple times to
91 92
    implement beam search externally. This is deprecated since a faster beam
    search decoder based solely on Fluid operators has been added.
93 94 95 96 97 98 99
    """
    # Prepare data for encoder and run the encoder.
    enc_in_data = pad_batch_data(
        src_words,
        src_pad_idx,
        n_head,
        is_target=False,
100
        is_label=False,
101
        return_attn_bias=True,
G
guosheng 已提交
102
        return_max_len=False)
103 104
    # Append the data shape input to reshape the output of embedding layer.
    enc_in_data = enc_in_data + [
105
        np.array(
106 107
            [-1, enc_in_data[2].shape[-1], d_model], dtype="int32")
    ]
G
guosheng 已提交
108 109 110 111 112 113 114
    # Append the shape inputs to reshape before and after softmax in encoder
    # self attention.
    enc_in_data = enc_in_data + [
        np.array(
            [-1, enc_in_data[2].shape[-1]], dtype="int32"), np.array(
                enc_in_data[2].shape, dtype="int32")
    ]
115 116 117 118 119 120 121
    enc_output = exe.run(encoder,
                         feed=dict(zip(enc_in_names, enc_in_data)),
                         fetch_list=enc_out_names)[0]

    # Beam Search.
    # To store the beam info.
    scores = np.zeros((batch_size, beam_size), dtype="float32")
122 123 124
    prev_branchs = [[] for i in range(batch_size)]
    next_ids = [[] for i in range(batch_size)]
    # Use beam_inst_map to map beam idx to the instance idx in batch, since the
125
    # size of feeded batch is changing.
126 127 128 129 130 131
    beam_inst_map = {
        beam_idx: inst_idx
        for inst_idx, beam_idx in enumerate(range(batch_size))
    }
    # Use active_beams to recode the alive.
    active_beams = range(batch_size)
132

133
    def beam_backtrace(prev_branchs, next_ids, n_best=beam_size):
134 135 136 137 138 139 140 141 142 143 144
        """
        Decode and select n_best sequences for one instance by backtrace.
        """
        seqs = []
        for i in range(n_best):
            k = i
            seq = []
            for j in range(len(prev_branchs) - 1, -1, -1):
                seq.append(next_ids[j][k])
                k = prev_branchs[j][k]
            seq = seq[::-1]
145 146
            # Add the <bos>, since next_ids don't include the <bos>.
            seq = [bos_idx] + seq
147 148 149 150 151 152 153 154 155 156
            seqs.append(seq)
        return seqs

    def init_dec_in_data(batch_size, beam_size, enc_in_data, enc_output):
        """
        Initialize the input data for decoder.
        """
        trg_words = np.array(
            [[bos_idx]] * batch_size * beam_size, dtype="int64")
        trg_pos = np.array([[1]] * batch_size * beam_size, dtype="int64")
G
guosheng 已提交
157 158
        src_max_length, src_slf_attn_bias, trg_max_len = enc_in_data[2].shape[
            -1], enc_in_data[2], 1
159 160 161 162 163 164 165 166
        # This is used to remove attention on subsequent words.
        trg_slf_attn_bias = np.ones((batch_size * beam_size, trg_max_len,
                                     trg_max_len))
        trg_slf_attn_bias = np.triu(trg_slf_attn_bias, 1).reshape(
            [-1, 1, trg_max_len, trg_max_len])
        trg_slf_attn_bias = (np.tile(trg_slf_attn_bias, [1, n_head, 1, 1]) *
                             [-1e9]).astype("float32")
        # This is used to remove attention on the paddings of source sequences.
167
        trg_src_attn_bias = np.tile(
168 169 170 171 172
            src_slf_attn_bias[:, :, ::src_max_length, :][:, np.newaxis],
            [1, beam_size, 1, trg_max_len, 1]).reshape([
                -1, src_slf_attn_bias.shape[1], trg_max_len,
                src_slf_attn_bias.shape[-1]
            ])
173
        # Append the shape input to reshape the output of embedding layer.
174 175
        trg_data_shape = np.array(
            [batch_size * beam_size, trg_max_len, d_model], dtype="int32")
G
guosheng 已提交
176 177 178 179 180 181 182 183 184 185 186 187
        # Append the shape inputs to reshape before and after softmax in
        # decoder self attention.
        trg_slf_attn_pre_softmax_shape = np.array(
            [-1, trg_slf_attn_bias.shape[-1]], dtype="int32")
        trg_slf_attn_post_softmax_shape = np.array(
            trg_slf_attn_bias.shape, dtype="int32")
        # Append the shape inputs to reshape before and after softmax in
        # encoder-decoder attention.
        trg_src_attn_pre_softmax_shape = np.array(
            [-1, trg_src_attn_bias.shape[-1]], dtype="int32")
        trg_src_attn_post_softmax_shape = np.array(
            trg_src_attn_bias.shape, dtype="int32")
188 189 190
        enc_output = np.tile(
            enc_output[:, np.newaxis], [1, beam_size, 1, 1]).reshape(
                [-1, enc_output.shape[-2], enc_output.shape[-1]])
G
guosheng 已提交
191
        return trg_words, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, \
192 193 194
            trg_data_shape, trg_slf_attn_pre_softmax_shape, \
            trg_slf_attn_post_softmax_shape, trg_src_attn_pre_softmax_shape, \
            trg_src_attn_post_softmax_shape, enc_output
195

196
    def update_dec_in_data(dec_in_data, next_ids, active_beams, beam_inst_map):
197 198 199 200
        """
        Update the input data of decoder mainly by slicing from the previous
        input data and dropping the finished instance beams.
        """
G
guosheng 已提交
201
        trg_words, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, \
202 203 204
            trg_data_shape, trg_slf_attn_pre_softmax_shape, \
            trg_slf_attn_post_softmax_shape, trg_src_attn_pre_softmax_shape, \
            trg_src_attn_post_softmax_shape, enc_output = dec_in_data
205
        trg_cur_len = trg_slf_attn_bias.shape[-1] + 1
206 207
        trg_words = np.array(
            [
208
                beam_backtrace(prev_branchs[beam_idx], next_ids[beam_idx])
209 210 211 212 213
                for beam_idx in active_beams
            ],
            dtype="int64")
        trg_words = trg_words.reshape([-1, 1])
        trg_pos = np.array(
214
            [range(1, trg_cur_len + 1)] * len(active_beams) * beam_size,
215
            dtype="int64").reshape([-1, 1])
216
        active_beams = [beam_inst_map[beam_idx] for beam_idx in active_beams]
217 218 219
        active_beams_indice = (
            (np.array(active_beams) * beam_size)[:, np.newaxis] +
            np.array(range(beam_size))[np.newaxis, :]).flatten()
220 221 222 223 224 225 226 227
        # This is used to remove attention on subsequent words.
        trg_slf_attn_bias = np.ones((len(active_beams) * beam_size, trg_cur_len,
                                     trg_cur_len))
        trg_slf_attn_bias = np.triu(trg_slf_attn_bias, 1).reshape(
            [-1, 1, trg_cur_len, trg_cur_len])
        trg_slf_attn_bias = (np.tile(trg_slf_attn_bias, [1, n_head, 1, 1]) *
                             [-1e9]).astype("float32")
        # This is used to remove attention on the paddings of source sequences.
228 229
        trg_src_attn_bias = np.tile(trg_src_attn_bias[
            active_beams_indice, :, ::trg_src_attn_bias.shape[2], :],
230
                                    [1, 1, trg_cur_len, 1])
231
        # Append the shape input to reshape the output of embedding layer.
232 233 234
        trg_data_shape = np.array(
            [len(active_beams) * beam_size, trg_cur_len, d_model],
            dtype="int32")
G
guosheng 已提交
235 236 237 238 239 240 241 242 243 244 245 246
        # Append the shape inputs to reshape before and after softmax in
        # decoder self attention.
        trg_slf_attn_pre_softmax_shape = np.array(
            [-1, trg_slf_attn_bias.shape[-1]], dtype="int32")
        trg_slf_attn_post_softmax_shape = np.array(
            trg_slf_attn_bias.shape, dtype="int32")
        # Append the shape inputs to reshape before and after softmax in
        # encoder-decoder attention.
        trg_src_attn_pre_softmax_shape = np.array(
            [-1, trg_src_attn_bias.shape[-1]], dtype="int32")
        trg_src_attn_post_softmax_shape = np.array(
            trg_src_attn_bias.shape, dtype="int32")
247
        enc_output = enc_output[active_beams_indice, :, :]
G
guosheng 已提交
248
        return trg_words, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, \
249 250 251
            trg_data_shape, trg_slf_attn_pre_softmax_shape, \
            trg_slf_attn_post_softmax_shape, trg_src_attn_pre_softmax_shape, \
            trg_src_attn_post_softmax_shape, enc_output
252 253 254 255 256

    dec_in_data = init_dec_in_data(batch_size, beam_size, enc_in_data,
                                   enc_output)
    for i in range(max_length):
        predict_all = exe.run(decoder,
257
                              feed=dict(zip(dec_in_names, dec_in_data)),
258
                              fetch_list=dec_out_names)[0]
259
        predict_all = np.log(
260 261
            predict_all.reshape([len(beam_inst_map) * beam_size, i + 1, -1])
            [:, -1, :])
262
        predict_all = (predict_all + scores[active_beams].reshape(
263 264
            [len(beam_inst_map) * beam_size, -1])).reshape(
                [len(beam_inst_map), beam_size, -1])
265 266
        if not output_unk:  # To exclude the <unk> token.
            predict_all[:, :, unk_idx] = -1e9
267
        active_beams = []
268 269 270 271
        for beam_idx in range(batch_size):
            if not beam_inst_map.has_key(beam_idx):
                continue
            inst_idx = beam_inst_map[beam_idx]
272 273 274 275 276 277 278 279 280 281 282 283
            predict = (predict_all[inst_idx, :, :]
                       if i != 0 else predict_all[inst_idx, 0, :]).flatten()
            top_k_indice = np.argpartition(predict, -beam_size)[-beam_size:]
            top_scores_ids = top_k_indice[np.argsort(predict[top_k_indice])[::
                                                                            -1]]
            top_scores = predict[top_scores_ids]
            scores[beam_idx] = top_scores
            prev_branchs[beam_idx].append(top_scores_ids /
                                          predict_all.shape[-1])
            next_ids[beam_idx].append(top_scores_ids % predict_all.shape[-1])
            if next_ids[beam_idx][-1][0] != eos_idx:
                active_beams.append(beam_idx)
284
        if len(active_beams) == 0:
285
            break
286 287 288 289 290 291
        dec_in_data = update_dec_in_data(dec_in_data, next_ids, active_beams,
                                         beam_inst_map)
        beam_inst_map = {
            beam_idx: inst_idx
            for inst_idx, beam_idx in enumerate(active_beams)
        }
292 293

    # Decode beams and select n_best sequences for each instance by backtrace.
294 295 296 297
    seqs = [
        beam_backtrace(prev_branchs[beam_idx], next_ids[beam_idx], n_best)
        for beam_idx in range(batch_size)
    ]
298 299 300 301

    return seqs, scores[:, :n_best].tolist()


302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
def post_process_seq(seq,
                     bos_idx=ModelHyperParams.bos_idx,
                     eos_idx=ModelHyperParams.eos_idx,
                     output_bos=InferTaskConfig.output_bos,
                     output_eos=InferTaskConfig.output_eos):
    """
    Post-process the beam-search decoded sequence. Truncate from the first
    <eos> and remove the <bos> and <eos> tokens currently.
    """
    eos_pos = len(seq) - 1
    for i, idx in enumerate(seq):
        if idx == eos_idx:
            eos_pos = i
            break
    seq = seq[:eos_pos + 1]
    return filter(
        lambda idx: (output_bos or idx != bos_idx) and \
            (output_eos or idx != eos_idx),
        seq)


def py_infer(test_data, trg_idx2word):
    """
    Inference by beam search implented by python, while the calculations from
    symbols to probilities execute by Fluid operators.
    """
328 329
    place = fluid.CUDAPlace(0) if InferTaskConfig.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
330

331 332 333
    encoder_program = fluid.Program()
    with fluid.program_guard(main_program=encoder_program):
        enc_output = encoder(
G
guosheng 已提交
334
            ModelHyperParams.src_vocab_size, ModelHyperParams.max_length + 1,
G
guosheng 已提交
335 336 337
            ModelHyperParams.n_layer, ModelHyperParams.n_head,
            ModelHyperParams.d_key, ModelHyperParams.d_value,
            ModelHyperParams.d_model, ModelHyperParams.d_inner_hid,
338
            ModelHyperParams.dropout, ModelHyperParams.weight_sharing)
339 340 341

    decoder_program = fluid.Program()
    with fluid.program_guard(main_program=decoder_program):
G
guosheng 已提交
342 343 344 345 346
        predict = decoder(
            ModelHyperParams.trg_vocab_size, ModelHyperParams.max_length + 1,
            ModelHyperParams.n_layer, ModelHyperParams.n_head,
            ModelHyperParams.d_key, ModelHyperParams.d_value,
            ModelHyperParams.d_model, ModelHyperParams.d_inner_hid,
347
            ModelHyperParams.dropout, ModelHyperParams.weight_sharing)
348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370

    # Load model parameters of encoder and decoder separately from the saved
    # transformer model.
    encoder_var_names = []
    for op in encoder_program.block(0).ops:
        encoder_var_names += op.input_arg_names
    encoder_param_names = filter(
        lambda var_name: isinstance(encoder_program.block(0).var(var_name),
            fluid.framework.Parameter),
        encoder_var_names)
    encoder_params = map(encoder_program.block(0).var, encoder_param_names)
    decoder_var_names = []
    for op in decoder_program.block(0).ops:
        decoder_var_names += op.input_arg_names
    decoder_param_names = filter(
        lambda var_name: isinstance(decoder_program.block(0).var(var_name),
            fluid.framework.Parameter),
        decoder_var_names)
    decoder_params = map(decoder_program.block(0).var, decoder_param_names)
    fluid.io.load_vars(exe, InferTaskConfig.model_path, vars=encoder_params)
    fluid.io.load_vars(exe, InferTaskConfig.model_path, vars=decoder_params)

    # This is used here to set dropout to the test mode.
371 372
    encoder_program = encoder_program.inference_optimize()
    decoder_program = decoder_program.inference_optimize()
373

374
    for batch_id, data in enumerate(test_data.batch_generator()):
375
        batch_seqs, batch_scores = translate_batch(
G
guosheng 已提交
376 377
            exe,
            [item[0] for item in data],
378
            encoder_program,
379
            encoder_data_input_fields + encoder_util_input_fields,
G
guosheng 已提交
380
            [enc_output.name],
381
            decoder_program,
382 383
            decoder_data_input_fields[:-1] + decoder_util_input_fields +
            (decoder_data_input_fields[-1], ),
G
guosheng 已提交
384
            [predict.name],
385
            InferTaskConfig.beam_size,
386
            InferTaskConfig.max_out_len,
387 388 389
            InferTaskConfig.n_best,
            len(data),
            ModelHyperParams.n_head,
390
            ModelHyperParams.d_model,
G
guosheng 已提交
391 392
            ModelHyperParams.eos_idx,  # Use eos_idx to pad.
            ModelHyperParams.eos_idx,  # Use eos_idx to pad.
393 394 395 396
            ModelHyperParams.bos_idx,
            ModelHyperParams.eos_idx,
            ModelHyperParams.unk_idx,
            output_unk=InferTaskConfig.output_unk)
397
        for i in range(len(batch_seqs)):
398 399
            # Post-process the beam-search decoded sequences.
            seqs = map(post_process_seq, batch_seqs[i])
400 401 402 403 404
            scores = batch_scores[i]
            for seq in seqs:
                print(" ".join([trg_idx2word[idx] for idx in seq]))


405 406 407
def prepare_batch_input(insts, data_input_names, util_input_names, src_pad_idx,
                        bos_idx, n_head, d_model, place):
    """
408
    Put all padded data needed by beam search decoder into a dict.
409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467
    """
    src_word, src_pos, src_slf_attn_bias, src_max_len = pad_batch_data(
        [inst[0] for inst in insts], src_pad_idx, n_head, is_target=False)
    # start tokens
    trg_word = np.asarray([[bos_idx]] * len(insts), dtype="int64")
    trg_src_attn_bias = np.tile(src_slf_attn_bias[:, :, ::src_max_len, :],
                                [1, 1, 1, 1]).astype("float32")

    # These shape tensors are used in reshape_op.
    src_data_shape = np.array([-1, src_max_len, d_model], dtype="int32")
    trg_data_shape = np.array([-1, 1, d_model], dtype="int32")
    src_slf_attn_pre_softmax_shape = np.array(
        [-1, src_slf_attn_bias.shape[-1]], dtype="int32")
    src_slf_attn_post_softmax_shape = np.array(
        [-1] + list(src_slf_attn_bias.shape[1:]), dtype="int32")
    trg_slf_attn_pre_softmax_shape = np.array(
        [-1, 1], dtype="int32")  # only the first time step
    trg_slf_attn_post_softmax_shape = np.array(
        [-1, n_head, 1, 1], dtype="int32")  # only the first time step
    trg_src_attn_pre_softmax_shape = np.array(
        [-1, trg_src_attn_bias.shape[-1]], dtype="int32")
    trg_src_attn_post_softmax_shape = np.array(
        [-1] + list(trg_src_attn_bias.shape[1:]), dtype="int32")
    # These inputs are used to change the shapes in the loop of while op.
    attn_pre_softmax_shape_delta = np.array([0, 1], dtype="int32")
    attn_post_softmax_shape_delta = np.array([0, 0, 0, 1], dtype="int32")

    def to_lodtensor(data, place, lod=None):
        data_tensor = fluid.LoDTensor()
        data_tensor.set(data, place)
        if lod is not None:
            data_tensor.set_lod(lod)
        return data_tensor

    # beamsearch_op must use tensors with lod
    init_score = to_lodtensor(
        np.zeros_like(
            trg_word, dtype="float32"),
        place, [range(trg_word.shape[0] + 1)] * 2)
    trg_word = to_lodtensor(trg_word, place, [range(trg_word.shape[0] + 1)] * 2)

    data_input_dict = dict(
        zip(data_input_names, [
            src_word, src_pos, src_slf_attn_bias, trg_word, init_score,
            trg_src_attn_bias
        ]))
    util_input_dict = dict(
        zip(util_input_names, [
            src_data_shape, src_slf_attn_pre_softmax_shape,
            src_slf_attn_post_softmax_shape, trg_data_shape,
            trg_slf_attn_pre_softmax_shape, trg_slf_attn_post_softmax_shape,
            trg_src_attn_pre_softmax_shape, trg_src_attn_post_softmax_shape,
            attn_pre_softmax_shape_delta, attn_post_softmax_shape_delta
        ]))

    input_dict = dict(data_input_dict.items() + util_input_dict.items())
    return input_dict


468 469 470 471
def fast_infer(test_data, trg_idx2word):
    """
    Inference by beam search decoder based solely on Fluid operators.
    """
472 473 474
    place = fluid.CUDAPlace(0) if InferTaskConfig.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

475
    out_ids, out_scores = fast_decoder(
476 477 478 479 480
        ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size,
        ModelHyperParams.max_length + 1, ModelHyperParams.n_layer,
        ModelHyperParams.n_head, ModelHyperParams.d_key,
        ModelHyperParams.d_value, ModelHyperParams.d_model,
        ModelHyperParams.d_inner_hid, ModelHyperParams.dropout,
481 482
        ModelHyperParams.weight_sharing, InferTaskConfig.beam_size,
        InferTaskConfig.max_out_len, ModelHyperParams.eos_idx)
483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500

    fluid.io.load_vars(
        exe,
        InferTaskConfig.model_path,
        vars=filter(lambda var: isinstance(var, fluid.framework.Parameter),
                    fluid.default_main_program().list_vars()))

    # This is used here to set dropout to the test mode.
    infer_program = fluid.default_main_program().inference_optimize()

    for batch_id, data in enumerate(test_data.batch_generator()):
        data_input = prepare_batch_input(
            data, encoder_data_input_fields + fast_decoder_data_input_fields,
            encoder_util_input_fields + fast_decoder_util_input_fields,
            ModelHyperParams.eos_idx, ModelHyperParams.bos_idx,
            ModelHyperParams.n_head, ModelHyperParams.d_model, place)
        seq_ids, seq_scores = exe.run(infer_program,
                                      feed=data_input,
501
                                      fetch_list=[out_ids, out_scores],
502
                                      return_numpy=False)
503 504 505 506 507 508 509 510
        # How to parse the results:
        #   Suppose the lod of seq_ids is:
        #     [[0, 3, 6], [0, 12, 24, 40, 54, 67, 82]]
        #   then from lod[0]:
        #     there are 2 source sentences, beam width is 3.
        #   from lod[1]:
        #     the first source sentence has 3 hyps; the lengths are 12, 12, 16
        #     the second source sentence has 3 hyps; the lengths are 14, 13, 15
511
        hyps = [[] for i in range(len(data))]
512
        scores = [[] for i in range(len(data))]
513 514 515 516 517 518 519 520
        for i in range(len(seq_ids.lod()[0]) - 1):  # for each source sentence
            start = seq_ids.lod()[0][i]
            end = seq_ids.lod()[0][i + 1]
            for j in range(end - start):  # for each candidate
                sub_start = seq_ids.lod()[1][start + j]
                sub_end = seq_ids.lod()[1][start + j + 1]
                hyps[i].append(" ".join([
                    trg_idx2word[idx]
521 522
                    for idx in post_process_seq(
                        np.array(seq_ids)[sub_start:sub_end])
523
                ]))
524
                scores[i].append(np.array(seq_scores)[sub_end - 1])
525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544
                print hyps[i][-1]
                if len(hyps[i]) >= InferTaskConfig.n_best:
                    break


def infer(args, inferencer=fast_infer):
    place = fluid.CUDAPlace(0) if InferTaskConfig.use_gpu else fluid.CPUPlace()
    test_data = reader.DataReader(
        src_vocab_fpath=args.src_vocab_fpath,
        trg_vocab_fpath=args.trg_vocab_fpath,
        fpattern=args.test_file_pattern,
        batch_size=args.batch_size,
        use_token_batch=False,
        pool_size=args.pool_size,
        sort_type=reader.SortType.NONE,
        shuffle=False,
        shuffle_batch=False,
        start_mark=args.special_token[0],
        end_mark=args.special_token[1],
        unk_mark=args.special_token[2],
545 546
        # count start and end tokens out
        max_length=ModelHyperParams.max_length - 2,
547 548 549 550
        clip_last_batch=False)
    trg_idx2word = test_data.load_dict(
        dict_path=args.trg_vocab_fpath, reverse=True)
    inferencer(test_data, trg_idx2word)
551 552


553
if __name__ == "__main__":
554
    args = parse_args()
555
    infer(args)