From d5365bb715c4438d05bc3c80a750c3448e09b347 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 6 Mar 2017 11:19:17 +0800 Subject: [PATCH] Add input data interface for inference --- demo/mnist/api_train_v2.py | 19 ++++---- doc/api/v2/run_logic.rst | 8 ++++ python/paddle/v2/inference.py | 88 ++++++++++++++++++++++++++++++++--- 3 files changed, 99 insertions(+), 16 deletions(-) diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py index 4fb1808ca11..81b330a6052 100644 --- a/demo/mnist/api_train_v2.py +++ b/demo/mnist/api_train_v2.py @@ -90,7 +90,7 @@ def main(): print "Pass %d, Batch %d, Cost %f, %s" % ( event.pass_id, event.batch_id, event.cost, event.metrics) if isinstance(event, paddle.event.EndPass): - result = trainer.test(reader=paddle.reader.batched( + result = trainer.test(reader=paddle.batch( paddle.dataset.mnist.test(), batch_size=128)) print "Test with Pass %d, Cost %f, %s\n" % ( event.pass_id, result.cost, result.metrics) @@ -110,17 +110,16 @@ def main(): print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1]) print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100) + test_creator = paddle.dataset.mnist.test() + test_data = [] + for item in test_creator(): + test_data.append(item[0]) + if len(test_data) == 100: + break + # output is a softmax layer. It returns probabilities. # Shape should be (100, 10) - probs = paddle.infer( - output=predict, - parameters=parameters, - reader=paddle.batch( - paddle.reader.firstn( - paddle.reader.map_readers(lambda item: (item[0], ), - paddle.dataset.mnist.test()), - n=100), - batch_size=32)) + probs = paddle.infer(output=predict, parameters=parameters, input=test_data) print probs.shape diff --git a/doc/api/v2/run_logic.rst b/doc/api/v2/run_logic.rst index 904d45966df..0f807873ff9 100644 --- a/doc/api/v2/run_logic.rst +++ b/doc/api/v2/run_logic.rst @@ -2,6 +2,7 @@ Trainer API ########### + ========== Parameters ========== @@ -24,3 +25,10 @@ Event .. automodule:: paddle.v2.event :members: + + +========= +Inference +========= + +.. autofunction:: paddle.v2.infer \ No newline at end of file diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py index 476fd3fa452..7c079a0d32d 100644 --- a/python/paddle/v2/inference.py +++ b/python/paddle/v2/inference.py @@ -1,9 +1,9 @@ +import numpy import py_paddle.swig_paddle as api - +import collections import topology +import minibatch from data_feeder import DataFeeder -import itertools -import numpy __all__ = ['infer'] @@ -21,9 +21,39 @@ class Inference(object): self.__gradient_machine__ = gm self.__data_types__ = topo.data_type() - def iter_infer(self, reader, reader_dict=None): + def iter_infer(self, + input=None, + batch_size=None, + reader=None, + reader_dict=None): if reader_dict is None: reader_dict = self.default_reader_dict() + + if reader is None: + assert input is not None and isinstance(input, collections.Iterable) + if not isinstance(input, collections.Iterable): + raise TypeError("When reader is None, input should be whole " + "inference data and should be iterable") + + if batch_size is None: + if not hasattr(input, '__len__'): + raise ValueError("Should set batch size when input data " + "don't contain length.") + batch_size = len(input) + + def __reader_impl__(): + for each_sample in input: + if len(reader_dict) == 1: + yield [each_sample] + else: + yield each_sample + + reader = minibatch.batch(__reader_impl__, batch_size=batch_size) + else: + if input is not None: + raise ValueError("User should set either input or reader, " + "should not set them both.") + feeder = DataFeeder(self.__data_types__, reader_dict) self.__gradient_machine__.start() for data_batch in reader(): @@ -54,6 +84,52 @@ class Inference(object): return reader_dict -def infer(output, parameters, reader, reader_dict=None, field='value'): +def infer(output, + parameters, + input=None, + batch_size=None, + reader=None, + reader_dict=None, + field='value'): + """ + Infer a neural network by given neural network output and parameters. The + user should pass either a batch of input data or reader method. + + Example usages: + + .. code-block:: python + + result = paddle.infer(prediction, parameters, input=SomeData, + batch_size=32) + print result + + :param output: output of the neural network that would be inferred + :type output: paddle.v2.config_base.Layer + :param parameters: parameters of the neural network. + :type parameters: paddle.v2.parameters.Parameters + :param input: input data batch. Should be a python iterable object, and each + element is the data batch. + :type input: collections.Iterable + :param batch_size: the batch size when perform inference. Default is the + length of input. + :type batch_size: int + :param reader: input data reader creator in batch. If this field is set, the + `input` and `batch_size` will be ignored. + :type reader: callable + :param reader_dict: Reader dictionary. Default could generate from input + value. + :param field: The prediction field. It should in [`value`, `ids`]. `value` + means return the prediction probabilities, `ids` means return + the prediction labels. Default is `value` + :type field: str + :return: a numpy array + :rtype: numpy.ndarray + """ + inferer = Inference(output=output, parameters=parameters) - return inferer.infer(field=field, reader=reader, reader_dict=reader_dict) + return inferer.infer( + field=field, + input=input, + batch_size=batch_size, + reader=reader, + reader_dict=reader_dict) -- GitLab