diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py index a59b30ccdb2eddea6680d6ad5c790c857b9c5141..06beb7024d1fd07dc327cb4c09d74e1b89a7b8ff 100644 --- a/demo/mnist/api_train_v2.py +++ b/demo/mnist/api_train_v2.py @@ -44,6 +44,19 @@ def main(): batch_size=32), event_handler=event_handler) + # output is a softmax layer. It returns probabilities. + # Shape should be (100, 10) + probs = paddle.infer( + output=inference, + parameters=parameters, + reader=paddle.reader.batched( + paddle.reader.firstn( + paddle.reader.map_readers(lambda item: (item[0], ), + paddle.dataset.mnist.test()), + n=100), + batch_size=32)) + print probs.shape + if __name__ == '__main__': main() diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index f055c845c7c42b3831973ac89d3e90b6969add07..d548d1adaafacdb097dbe476fdc76651c9f46b6b 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -24,13 +24,14 @@ from . import dataset from . import reader import attr import pooling +import inferencer import networks import py_paddle.swig_paddle as api __all__ = [ 'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', 'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'dataset', 'reader', - 'topology', 'networks' + 'topology', 'networks', 'inferencer', 'infer' ] @@ -40,3 +41,6 @@ def init(**kwargs): args.append('--%s=%s' % (key, str(kwargs[key]))) api.initPaddle(*args) + + +infer = inferencer.infer diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py index 1512a3c3189de4e54f8502cfadf450b0710a246e..ebcdff78b317ceb4811048ac78982e072962fa9c 100644 --- a/python/paddle/v2/dataset/mnist.py +++ b/python/paddle/v2/dataset/mnist.py @@ -35,24 +35,25 @@ def reader_creator(image_filename, label_filename, buffer_size): l = subprocess.Popen([zcat_cmd, label_filename], stdout=subprocess.PIPE) l.stdout.read(8) # skip some magic bytes - while True: - labels = numpy.fromfile( - l.stdout, 'ubyte', count=buffer_size).astype("int") + try: # reader could be break. + while True: + labels = numpy.fromfile( + l.stdout, 'ubyte', count=buffer_size).astype("int") - if labels.size != buffer_size: - break # numpy.fromfile returns empty slice after EOF. + if labels.size != buffer_size: + break # numpy.fromfile returns empty slice after EOF. - images = numpy.fromfile( - m.stdout, 'ubyte', count=buffer_size * 28 * 28).reshape( - (buffer_size, 28 * 28)).astype('float32') + images = numpy.fromfile( + m.stdout, 'ubyte', count=buffer_size * 28 * 28).reshape( + (buffer_size, 28 * 28)).astype('float32') - images = images / 255.0 * 2.0 - 1.0 + images = images / 255.0 * 2.0 - 1.0 - for i in xrange(buffer_size): - yield images[i, :], int(labels[i]) - - m.terminate() - l.terminate() + for i in xrange(buffer_size): + yield images[i, :], int(labels[i]) + finally: + m.terminate() + l.terminate() return reader diff --git a/python/paddle/v2/inferencer.py b/python/paddle/v2/inferencer.py new file mode 100644 index 0000000000000000000000000000000000000000..ac03b016c9b8bfbc586072855402ed3a373e9b54 --- /dev/null +++ b/python/paddle/v2/inferencer.py @@ -0,0 +1,59 @@ +import py_paddle.swig_paddle as api + +import topology +from data_feeder import DataFeeder +import itertools +import numpy + +__all__ = ['Inference', 'infer'] + + +class Inference(object): + def __init__(self, output, parameters): + topo = topology.Topology(output) + gm = api.GradientMachine.createFromConfigProto( + topo.proto(), api.CREATE_MODE_TESTING, [api.PARAMETER_VALUE]) + for param in gm.getParameters(): + val = param.getBuf(api.PARAMETER_VALUE) + name = param.getName() + assert isinstance(val, api.Vector) + val.copyFromNumpyArray(parameters.get(name).flatten()) + self.__gradient_machine__ = gm + self.__data_types__ = topo.data_type() + + def iter_infer(self, reader, reader_dict=None): + if reader_dict is None: + reader_dict = self.default_reader_dict() + feeder = DataFeeder(self.__data_types__, reader_dict) + self.__gradient_machine__.start() + for data_batch in reader(): + yield self.__gradient_machine__.forwardTest(feeder(data_batch)) + self.__gradient_machine__.finish() + + def iter_infer_field(self, field, **kwargs): + for result in self.iter_infer(**kwargs): + yield [each_result[field] for each_result in result] + + def infer(self, field='value', **kwargs): + retv = None + for result in self.iter_infer_field(field=field, **kwargs): + if retv is None: + retv = [[]] * len(result) + for i, item in enumerate(result): + retv[i].append(item) + retv = [numpy.concatenate(out) for out in retv] + if len(retv) == 1: + return retv[0] + else: + return retv + + def default_reader_dict(self): + reader_dict = dict() + for i, tp in enumerate(self.__data_types__): + reader_dict[tp[0]] = i + return reader_dict + + +def infer(output, parameters, reader, reader_dict=None, field='value'): + inferer = Inference(output=output, parameters=parameters) + return inferer.infer(field=field, reader=reader, reader_dict=reader_dict) diff --git a/python/paddle/v2/reader/decorator.py b/python/paddle/v2/reader/decorator.py index 5687f118ce277928d16f4ea48907cee120c73b47..b7657e27764f099334ba3030c493a7607f323abe 100644 --- a/python/paddle/v2/reader/decorator.py +++ b/python/paddle/v2/reader/decorator.py @@ -14,13 +14,13 @@ __all__ = [ 'map_readers', 'buffered', 'compose', 'chain', 'shuffle', - 'ComposeNotAligned', 'batched' + 'ComposeNotAligned', 'batched', 'firstn' ] -from Queue import Queue -from threading import Thread import itertools import random +from Queue import Queue +from threading import Thread def map_readers(func, *readers): @@ -213,3 +213,20 @@ def batched(reader, batch_size): yield batch return batched_reader + + +def firstn(reader, n): + """ + Limit the max number of samples that reader could return. + """ + + # TODO(yuyang18): Check if just drop the reader, could clean the opened + # resource or not? + + def firstn_reader(): + for i, item in enumerate(reader()): + if i == n: + break + yield item + + return firstn_reader