infer.py

import os
import gzip
import click
import numpy as np

import paddle.v2 as paddle
from network_conf import ngram_lm


def infer_a_batch(inferer, test_batch, id_to_word):
    probs = inferer.infer(input=test_batch)
    for i, res in enumerate(zip(test_batch, probs)):
        maxid = res[1].argsort()[-1]
        print("%.4f\t%s\t%s" % (res[1][maxid], id_to_word[maxid],
                                " ".join([id_to_word[w] for w in res[0]])))


@click.command("infer")
@click.option(
    "--model_path",
    default="",
    help="The path of the trained model for generation.")
@click.option(
    "--batch_size",
    default=1,
    help="The number of testing examples in one forward batch in inferring.")
@click.option(
    "--use_gpu", default=False, help="Whether to use GPU in inference or not.")
@click.option(
    "--trainer_count",
    default=1,
    help="Whether to use GPU in inference or not.")
def infer(model_path, batch_size, use_gpu, trainer_count):
    assert os.path.exists(model_path), "The trained model does not exist."
    assert (batch_size and trainer_count and batch_size >= trainer_count), (
        "batch_size and trainer_count must both be greater than 0. "
        "And batch_size must be equal to "
        "or greater than trainer_count.")

    word_to_id = paddle.dataset.imikolov.build_dict()
    id_to_word = dict((v, k) for k, v in word_to_id.items())
    dict_size = len(word_to_id)

    paddle.init(use_gpu=use_gpu, trainer_count=trainer_count)

    # load the trained model.
    with gzip.open(model_path) as f:
        parameters = paddle.parameters.Parameters.from_tar(f)
    prediction_layer = ngram_lm(
        is_train=False, hidden_size=128, emb_size=512, dict_size=dict_size)
    inferer = paddle.inference.Inference(
        output_layer=prediction_layer, parameters=parameters)

    test_batch = []
    for idx, item in enumerate(paddle.dataset.imikolov.test(word_to_id, 5)()):
        test_batch.append((item[:4]))
        if len(test_batch) == batch_size:
            infer_a_batch(inferer, test_batch, id_to_word)
            infer_data = []

    if len(test_batch):
        infer_a_batch(inferer, test_batch, id_to_word)
        infer_data = []
        infer_data_label = []


if __name__ == "__main__":
    infer()