提交 ff4e0463 编写于 作者: W wangyang59

improve demo/mnist dataProvider speed

上级 5ac16e5e
from paddle.trainer.PyDataProvider2 import * from paddle.trainer.PyDataProvider2 import *
import numpy
# Define a py data provider # Define a py data provider
@provider( @provider(
input_types={'pixel': dense_vector(28 * 28), input_types={'pixel': dense_vector(28 * 28),
'label': integer_value(10)}) 'label': integer_value(10)},
cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, filename): # settings is not used currently. def process(settings, filename): # settings is not used currently.
imgf = filename + "-images-idx3-ubyte" imgf = filename + "-images-idx3-ubyte"
labelf = filename + "-labels-idx1-ubyte" labelf = filename + "-labels-idx1-ubyte"
...@@ -20,12 +21,12 @@ def process(settings, filename): # settings is not used currently. ...@@ -20,12 +21,12 @@ def process(settings, filename): # settings is not used currently.
else: else:
n = 10000 n = 10000
for i in range(n): images = numpy.fromfile(f, 'ubyte', count=n*28*28).reshape((n, 28*28)).astype('float32')
label = ord(l.read(1)) images = images / 255.0 * 2.0 - 1.0
pixels = [] labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")
for j in range(28 * 28):
pixels.append(float(ord(f.read(1))) / 255.0) for i in xrange(n):
yield {"pixel": pixels, 'label': label} yield {"pixel": images[i, :], 'label': labels[i]}
f.close() f.close()
l.close() l.close()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
新手
引导
客服 返回
顶部