Add input data interface for inference

d5365bb7 · Yu Yang · 5f2cbce4 · d5365bb7 · d5365bb7 · d5365bb7
Showing with 99 addition and 16 deletion

demo/mnist/api_train_v2.py demo/mnist/api_train_v2.py +9 -10

doc/api/v2/run_logic.rst doc/api/v2/run_logic.rst +8 -0

python/paddle/v2/inference.py python/paddle/v2/inference.py +82 -6

未找到文件。
--- a/demo/mnist/api_train_v2.py
+++ b/demo/mnist/api_train_v2.py
@@ -90,7 +90,7 @@ def main():
                print "Pass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)
        if isinstance(event, paddle.event.EndPass):
-            result = trainer.test(reader=paddle.reader.batched(
+            result = trainer.test(reader=paddle.batch(
                paddle.dataset.mnist.test(), batch_size=128))
            print "Test with Pass %d, Cost %f, %s\n" % (
                event.pass_id, result.cost, result.metrics)
@@ -110,17 +110,16 @@ def main():
    print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1])
    print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100)

+    test_creator = paddle.dataset.mnist.test()
+    test_data = []
+    for item in test_creator():
+        test_data.append(item[0])
+        if len(test_data) == 100:
+            break
+
    # output is a softmax layer. It returns probabilities.
    # Shape should be (100, 10)
-    probs = paddle.infer(
-        output=predict,
-        parameters=parameters,
-        reader=paddle.batch(
-            paddle.reader.firstn(
-                paddle.reader.map_readers(lambda item: (item[0], ),
-                                          paddle.dataset.mnist.test()),
-                n=100),
-            batch_size=32))
+    probs = paddle.infer(output=predict, parameters=parameters, input=test_data)
    print probs.shape



--- a/doc/api/v2/run_logic.rst
+++ b/doc/api/v2/run_logic.rst
@@ -2,6 +2,7 @@
 Trainer API
 ###########

+
 ==========
 Parameters
 ==========
@@ -24,3 +25,10 @@ Event

 ..	automodule:: paddle.v2.event
 	:members:
+
+
+=========
+Inference
+=========
+
+..	autofunction:: paddle.v2.infer
\ No newline at end of file
--- a/python/paddle/v2/inference.py
+++ b/python/paddle/v2/inference.py
+import numpy
 import py_paddle.swig_paddle as api
-
+import collections
 import topology
+import minibatch
 from data_feeder import DataFeeder
-import itertools
-import numpy

 __all__ = ['infer']

@@ -21,9 +21,39 @@ class Inference(object):
        self.__gradient_machine__ = gm
        self.__data_types__ = topo.data_type()

-    def iter_infer(self, reader, reader_dict=None):
+    def iter_infer(self,
+                   input=None,
+                   batch_size=None,
+                   reader=None,
+                   reader_dict=None):
        if reader_dict is None:
            reader_dict = self.default_reader_dict()
+
+        if reader is None:
+            assert input is not None and isinstance(input, collections.Iterable)
+            if not isinstance(input, collections.Iterable):
+                raise TypeError("When reader is None, input should be whole "
+                                "inference data and should be iterable")
+
+            if batch_size is None:
+                if not hasattr(input, '__len__'):
+                    raise ValueError("Should set batch size when input data "
+                                     "don't contain length.")
+                batch_size = len(input)
+
+            def __reader_impl__():
+                for each_sample in input:
+                    if len(reader_dict) == 1:
+                        yield [each_sample]
+                    else:
+                        yield each_sample
+
+            reader = minibatch.batch(__reader_impl__, batch_size=batch_size)
+        else:
+            if input is not None:
+                raise ValueError("User should set either input or reader, "
+                                 "should not set them both.")
+
        feeder = DataFeeder(self.__data_types__, reader_dict)
        self.__gradient_machine__.start()
        for data_batch in reader():
@@ -54,6 +84,52 @@ class Inference(object):
        return reader_dict


-def infer(output, parameters, reader, reader_dict=None, field='value'):
+def infer(output,
+          parameters,
+          input=None,
+          batch_size=None,
+          reader=None,
+          reader_dict=None,
+          field='value'):
+    """
+    Infer a neural network by given neural network output and parameters.  The
+    user should pass either a batch of input data or reader method.
+
+    Example usages:
+
+    ..  code-block:: python
+
+        result = paddle.infer(prediction, parameters, input=SomeData,
+                              batch_size=32)
+        print result
+
+    :param output: output of the neural network that would be inferred
+    :type output: paddle.v2.config_base.Layer
+    :param parameters: parameters of the neural network.
+    :type parameters: paddle.v2.parameters.Parameters
+    :param input: input data batch. Should be a python iterable object, and each
+                  element is the data batch.
+    :type input: collections.Iterable
+    :param batch_size: the batch size when perform inference. Default is the
+                       length of input.
+    :type batch_size: int
+    :param reader: input data reader creator in batch. If this field is set, the
+                   `input` and `batch_size` will be ignored.
+    :type reader: callable
+    :param reader_dict: Reader dictionary. Default could generate from input
+                        value.
+    :param field: The prediction field. It should in [`value`, `ids`]. `value`
+                  means return the prediction probabilities, `ids` means return
+                  the prediction labels. Default is `value`
+    :type field: str
+    :return: a numpy array
+    :rtype: numpy.ndarray
+    """
+
    inferer = Inference(output=output, parameters=parameters)
-    return inferer.infer(field=field, reader=reader, reader_dict=reader_dict)
+    return inferer.infer(
+        field=field,
+        input=input,
+        batch_size=batch_size,
+        reader=reader,
+        reader_dict=reader_dict)