infer.py 3.3 KB
Newer Older
C
caoying03 已提交
1 2 3
import os
import logging
import gzip
4
import click
5 6

import paddle.v2 as paddle
C
caoying03 已提交
7 8 9 10
from network_conf import ngram_lm

logger = logging.getLogger("paddle")
logger.setLevel(logging.WARNING)
11 12


C
caoying03 已提交
13
def decode_result(infer_res, dict_size):
14 15 16 17 18 19 20 21 22 23
    """
    Inferring probabilities are orginized as a complete binary tree.
    The actual labels are leaves (indices are counted from class number).
    This function travels paths decoded from inferring results.
    If the probability >0.5 then go to right child, otherwise go to left child.

    param infer_res: inferring result
    param dict_size: class number
    return predict_lbls: actual class
    """
24 25 26 27 28 29 30 31 32 33 34 35 36
    predict_lbls = []
    infer_res = infer_res > 0.5
    for i, probs in enumerate(infer_res):
        idx = 0
        result = 1
        while idx < len(probs):
            result <<= 1
            if probs[idx]:
                result |= 1
            if probs[idx]:
                idx = idx * 2 + 2  # right child
            else:
                idx = idx * 2 + 1  # left child
37

38 39 40 41 42
        predict_lbl = result - dict_size
        predict_lbls.append(predict_lbl)
    return predict_lbls


C
caoying03 已提交
43
def infer_a_batch(batch_ins, idx_word_dict, dict_size, inferer):
44
    infer_res = inferer.infer(input=batch_ins)
45

C
caoying03 已提交
46
    predict_lbls = decode_result(infer_res, dict_size)
47 48 49 50
    predict_words = [idx_word_dict[lbl] for lbl in predict_lbls]  # map to word

    # Ouput format: word1 word2 word3 word4 -> predict label
    for i, ins in enumerate(batch_ins):
C
caoying03 已提交
51 52 53
        print(" ".join([idx_word_dict[w]
                        for w in ins]) + " -> " + predict_words[i])

54

55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
@click.command("infer")
@click.option(
    "--model_path",
    default="",
    help="The path of the trained model for generation.")
@click.option(
    "--batch_size",
    default=1,
    help="The number of testing examples in one forward batch in inferring.")
@click.option(
    "--use_gpu", default=False, help="Whether to use GPU in inference or not.")
@click.option(
    "--trainer_count",
    default=1,
    help="Whether to use GPU in inference or not.")
def infer(model_path, batch_size, use_gpu, trainer_count):
C
caoying03 已提交
71 72 73 74 75
    assert os.path.exists(model_path), "The trained model does not exist."
    assert (batch_size and trainer_count and batch_size >= trainer_count), (
        "batch_size and trainer_count must both be greater than 0. "
        "And batch_size must be equal to "
        "or greater than trainer_count.")
76

77
    paddle.init(use_gpu=use_gpu, trainer_count=trainer_count)
78
    word_dict = paddle.dataset.imikolov.build_dict(min_word_freq=2)
79
    dict_size = len(word_dict)
C
caoying03 已提交
80
    prediction_layer = ngram_lm(
81
        is_train=False, hidden_size=256, embed_size=32, dict_size=dict_size)
82

C
caoying03 已提交
83
    with gzip.open(model_path, "r") as f:
84 85
        parameters = paddle.parameters.Parameters.from_tar(f)

86 87
    inferer = paddle.inference.Inference(
        output_layer=prediction_layer, parameters=parameters)
88
    idx_word_dict = dict((v, k) for k, v in word_dict.items())
89

C
caoying03 已提交
90 91
    batch_ins = []
    for ins in paddle.dataset.imikolov.test(word_dict, 5)():
92 93
        batch_ins.append(ins[:-1])
        if len(batch_ins) == batch_size:
C
caoying03 已提交
94
            infer_a_batch(batch_ins, idx_word_dict, dict_size, inferer)
95 96 97
            batch_ins = []

    if len(batch_ins) > 0:
C
caoying03 已提交
98
        infer_a_batch(batch_ins, idx_word_dict, dict_size, inferer)
99 100


C
caoying03 已提交
101
if __name__ == "__main__":
102
    infer()