infer.py 1.6 KB
Newer Older
C
caoying03 已提交
1
#!/usr/bin/env python
P
pakchoi 已提交
2
# -*- encoding:utf-8 -*-
C
caoying03 已提交
3
import os
P
pakchoi 已提交
4
import gzip
C
caoying03 已提交
5
import numpy as np
P
pakchoi 已提交
6

C
caoying03 已提交
7 8
import paddle.v2 as paddle
from network_conf import ngram_lm
P
pakchoi 已提交
9 10


C
caoying03 已提交
11 12 13 14 15 16
def infer_a_batch(inferer, test_batch, id_to_word):
    probs = inferer.infer(input=test_batch)
    for i, res in enumerate(zip(test_batch, probs)):
        maxid = res[1].argsort()[-1]
        print("%.4f\t%s\t%s" % (res[1][maxid], id_to_word[maxid],
                                " ".join([id_to_word[w] for w in res[0]])))
P
pakchoi 已提交
17 18


C
caoying03 已提交
19 20 21 22 23
def infer(model_path, batch_size):
    assert os.path.exists(model_path), "the trained model does not exist."
    word_to_id = paddle.dataset.imikolov.build_dict()
    id_to_word = dict((v, k) for k, v in word_to_id.items())
    dict_size = len(word_to_id)
P
pakchoi 已提交
24

C
caoying03 已提交
25
    paddle.init(use_gpu=False, trainer_count=1)
P
pakchoi 已提交
26

C
caoying03 已提交
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
    # load the trained model.
    with gzip.open(model_path) as f:
        parameters = paddle.parameters.Parameters.from_tar(f)
    prediction_layer = ngram_lm(
        is_train=False, hidden_size=128, emb_size=512, dict_size=dict_size)
    inferer = paddle.inference.Inference(
        output_layer=prediction_layer, parameters=parameters)

    test_batch = []
    for idx, item in enumerate(paddle.dataset.imikolov.test(word_to_id, 5)()):
        test_batch.append((item[:4]))
        if len(test_batch) == batch_size:
            infer_a_batch(inferer, test_batch, id_to_word)
            infer_data = []

    if len(test_batch):
        infer_a_batch(inferer, test_batch, id_to_word)
        infer_data = []
        infer_data_label = []


if __name__ == "__main__":
    infer("models/model_pass_00000_00020.tar.gz", 10)