infer.py 2.2 KB
Newer Older
C
caoying03 已提交
1
import os
P
pakchoi 已提交
2
import gzip
3
import click
C
caoying03 已提交
4
import numpy as np
P
pakchoi 已提交
5

C
caoying03 已提交
6 7
import paddle.v2 as paddle
from network_conf import ngram_lm
P
pakchoi 已提交
8 9


C
caoying03 已提交
10 11 12 13 14 15
def infer_a_batch(inferer, test_batch, id_to_word):
    probs = inferer.infer(input=test_batch)
    for i, res in enumerate(zip(test_batch, probs)):
        maxid = res[1].argsort()[-1]
        print("%.4f\t%s\t%s" % (res[1][maxid], id_to_word[maxid],
                                " ".join([id_to_word[w] for w in res[0]])))
P
pakchoi 已提交
16 17


18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
@click.command("infer")
@click.option(
    "--model_path",
    default="",
    help="The path of the trained model for generation.")
@click.option(
    "--batch_size",
    default=1,
    help="The number of testing examples in one forward batch in inferring.")
@click.option(
    "--use_gpu", default=False, help="Whether to use GPU in inference or not.")
@click.option(
    "--trainer_count",
    default=1,
    help="Whether to use GPU in inference or not.")
def infer(model_path, batch_size, use_gpu, trainer_count):
C
caoying03 已提交
34 35 36 37 38 39
    assert os.path.exists(model_path), "The trained model does not exist."
    assert (batch_size and trainer_count and batch_size >= trainer_count), (
        "batch_size and trainer_count must both be greater than 0. "
        "And batch_size must be equal to "
        "or greater than trainer_count.")

C
caoying03 已提交
40 41 42
    word_to_id = paddle.dataset.imikolov.build_dict()
    id_to_word = dict((v, k) for k, v in word_to_id.items())
    dict_size = len(word_to_id)
P
pakchoi 已提交
43

44
    paddle.init(use_gpu=use_gpu, trainer_count=trainer_count)
P
pakchoi 已提交
45

C
caoying03 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
    # load the trained model.
    with gzip.open(model_path) as f:
        parameters = paddle.parameters.Parameters.from_tar(f)
    prediction_layer = ngram_lm(
        is_train=False, hidden_size=128, emb_size=512, dict_size=dict_size)
    inferer = paddle.inference.Inference(
        output_layer=prediction_layer, parameters=parameters)

    test_batch = []
    for idx, item in enumerate(paddle.dataset.imikolov.test(word_to_id, 5)()):
        test_batch.append((item[:4]))
        if len(test_batch) == batch_size:
            infer_a_batch(inferer, test_batch, id_to_word)
            infer_data = []

    if len(test_batch):
        infer_a_batch(inferer, test_batch, id_to_word)
        infer_data = []
        infer_data_label = []


if __name__ == "__main__":
68
    infer()