提交 e7ca8b27 编写于 作者: W wangkuiyi 提交者: GitHub

Merge pull request #1499 from reyoung/feature/inferencer

Complete inferencer
......@@ -44,6 +44,19 @@ def main():
batch_size=32),
event_handler=event_handler)
# output is a softmax layer. It returns probabilities.
# Shape should be (100, 10)
probs = paddle.infer(
output=inference,
parameters=parameters,
reader=paddle.reader.batched(
paddle.reader.firstn(
paddle.reader.map_readers(lambda item: (item[0], ),
paddle.dataset.mnist.test()),
n=100),
batch_size=32))
print probs.shape
if __name__ == '__main__':
main()
......@@ -24,13 +24,14 @@ from . import dataset
from . import reader
import attr
import pooling
import inferencer
import networks
import py_paddle.swig_paddle as api
__all__ = [
'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer',
'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'dataset', 'reader',
'topology', 'networks'
'topology', 'networks', 'inferencer', 'infer'
]
......@@ -40,3 +41,6 @@ def init(**kwargs):
args.append('--%s=%s' % (key, str(kwargs[key])))
api.initPaddle(*args)
infer = inferencer.infer
......@@ -35,24 +35,25 @@ def reader_creator(image_filename, label_filename, buffer_size):
l = subprocess.Popen([zcat_cmd, label_filename], stdout=subprocess.PIPE)
l.stdout.read(8) # skip some magic bytes
while True:
labels = numpy.fromfile(
l.stdout, 'ubyte', count=buffer_size).astype("int")
try: # reader could be break.
while True:
labels = numpy.fromfile(
l.stdout, 'ubyte', count=buffer_size).astype("int")
if labels.size != buffer_size:
break # numpy.fromfile returns empty slice after EOF.
if labels.size != buffer_size:
break # numpy.fromfile returns empty slice after EOF.
images = numpy.fromfile(
m.stdout, 'ubyte', count=buffer_size * 28 * 28).reshape(
(buffer_size, 28 * 28)).astype('float32')
images = numpy.fromfile(
m.stdout, 'ubyte', count=buffer_size * 28 * 28).reshape(
(buffer_size, 28 * 28)).astype('float32')
images = images / 255.0 * 2.0 - 1.0
images = images / 255.0 * 2.0 - 1.0
for i in xrange(buffer_size):
yield images[i, :], int(labels[i])
m.terminate()
l.terminate()
for i in xrange(buffer_size):
yield images[i, :], int(labels[i])
finally:
m.terminate()
l.terminate()
return reader
......
import py_paddle.swig_paddle as api
import topology
from data_feeder import DataFeeder
import itertools
import numpy
__all__ = ['Inference', 'infer']
class Inference(object):
def __init__(self, output, parameters):
topo = topology.Topology(output)
gm = api.GradientMachine.createFromConfigProto(
topo.proto(), api.CREATE_MODE_TESTING, [api.PARAMETER_VALUE])
for param in gm.getParameters():
val = param.getBuf(api.PARAMETER_VALUE)
name = param.getName()
assert isinstance(val, api.Vector)
val.copyFromNumpyArray(parameters.get(name).flatten())
self.__gradient_machine__ = gm
self.__data_types__ = topo.data_type()
def iter_infer(self, reader, reader_dict=None):
if reader_dict is None:
reader_dict = self.default_reader_dict()
feeder = DataFeeder(self.__data_types__, reader_dict)
self.__gradient_machine__.start()
for data_batch in reader():
yield self.__gradient_machine__.forwardTest(feeder(data_batch))
self.__gradient_machine__.finish()
def iter_infer_field(self, field, **kwargs):
for result in self.iter_infer(**kwargs):
yield [each_result[field] for each_result in result]
def infer(self, field='value', **kwargs):
retv = None
for result in self.iter_infer_field(field=field, **kwargs):
if retv is None:
retv = [[]] * len(result)
for i, item in enumerate(result):
retv[i].append(item)
retv = [numpy.concatenate(out) for out in retv]
if len(retv) == 1:
return retv[0]
else:
return retv
def default_reader_dict(self):
reader_dict = dict()
for i, tp in enumerate(self.__data_types__):
reader_dict[tp[0]] = i
return reader_dict
def infer(output, parameters, reader, reader_dict=None, field='value'):
inferer = Inference(output=output, parameters=parameters)
return inferer.infer(field=field, reader=reader, reader_dict=reader_dict)
......@@ -14,13 +14,13 @@
__all__ = [
'map_readers', 'buffered', 'compose', 'chain', 'shuffle',
'ComposeNotAligned', 'batched'
'ComposeNotAligned', 'batched', 'firstn'
]
from Queue import Queue
from threading import Thread
import itertools
import random
from Queue import Queue
from threading import Thread
def map_readers(func, *readers):
......@@ -213,3 +213,20 @@ def batched(reader, batch_size):
yield batch
return batched_reader
def firstn(reader, n):
"""
Limit the max number of samples that reader could return.
"""
# TODO(yuyang18): Check if just drop the reader, could clean the opened
# resource or not?
def firstn_reader():
for i, item in enumerate(reader()):
if i == n:
break
yield item
return firstn_reader
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册