未验证 提交 d55cfc60 编写于 作者: T Tao Luo 提交者: GitHub

Merge pull request #11640 from wojtuss/wojtuss/cycle-cifar-flowers

added cycling the cifar and flowers datasets
......@@ -43,7 +43,7 @@ CIFAR100_URL = URL_PREFIX + 'cifar-100-python.tar.gz'
CIFAR100_MD5 = 'eb9058c3a382ffc7106e4002c42a8d85'
def reader_creator(filename, sub_name):
def reader_creator(filename, sub_name, cycle=False):
def read_batch(batch):
data = batch['data']
labels = batch.get('labels', batch.get('fine_labels', None))
......@@ -56,10 +56,13 @@ def reader_creator(filename, sub_name):
names = (each_item.name for each_item in f
if sub_name in each_item.name)
while True:
for name in names:
batch = cPickle.load(f.extractfile(name))
for item in read_batch(batch):
yield item
if not cycle:
break
return reader
......@@ -94,34 +97,40 @@ def test100():
'test')
def train10():
def train10(cycle=False):
"""
CIFAR-10 training set creator.
It returns a reader creator, each sample in the reader is image pixels in
[0, 1] and label in [0, 9].
:param cycle: whether to cycle through the dataset
:type cycle: bool
:return: Training reader creator
:rtype: callable
"""
return reader_creator(
paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
'data_batch')
'data_batch',
cycle=cycle)
def test10():
def test10(cycle=False):
"""
CIFAR-10 test set creator.
It returns a reader creator, each sample in the reader is image pixels in
[0, 1] and label in [0, 9].
:param cycle: whether to cycle through the dataset
:type cycle: bool
:return: Test reader creator.
:rtype: callable
"""
return reader_creator(
paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
'test_batch')
'test_batch',
cycle=cycle)
def fetch():
......
......@@ -76,7 +76,8 @@ def reader_creator(data_file,
dataset_name,
mapper,
buffered_size=1024,
use_xmap=True):
use_xmap=True,
cycle=False):
'''
1. read images from tar file and
merge images into batch files in 102flowers.tgz_batch/
......@@ -96,6 +97,8 @@ def reader_creator(data_file,
:type mapper: callable
:param buffered_size: the size of buffer used to process images
:type buffered_size: int
:param cycle: whether to cycle through the dataset
:type cycle: bool
:return: data reader
:rtype: callable
'''
......@@ -108,6 +111,7 @@ def reader_creator(data_file,
file_list = batch_images_from_tar(data_file, dataset_name, img2label)
def reader():
while True:
for file in open(file_list):
file = file.strip()
batch = None
......@@ -117,6 +121,8 @@ def reader_creator(data_file,
labels = batch['label']
for sample, label in itertools.izip(data, batch['label']):
yield sample, int(label) - 1
if not cycle:
break
if use_xmap:
cpu_num = int(os.environ.get('CPU_NUM', cpu_count()))
......@@ -125,7 +131,7 @@ def reader_creator(data_file,
return map_readers(mapper, reader)
def train(mapper=train_mapper, buffered_size=1024, use_xmap=True):
def train(mapper=train_mapper, buffered_size=1024, use_xmap=True, cycle=False):
'''
Create flowers training set reader.
It returns a reader, each sample in the reader is
......@@ -138,17 +144,23 @@ def train(mapper=train_mapper, buffered_size=1024, use_xmap=True):
:type mapper: callable
:param buffered_size: the size of buffer used to process images
:type buffered_size: int
:param cycle: whether to cycle through the dataset
:type cycle: bool
:return: train data reader
:rtype: callable
'''
return reader_creator(
download(DATA_URL, 'flowers', DATA_MD5),
download(LABEL_URL, 'flowers', LABEL_MD5),
download(SETID_URL, 'flowers', SETID_MD5), TRAIN_FLAG, mapper,
buffered_size, use_xmap)
download(SETID_URL, 'flowers', SETID_MD5),
TRAIN_FLAG,
mapper,
buffered_size,
use_xmap,
cycle=cycle)
def test(mapper=test_mapper, buffered_size=1024, use_xmap=True):
def test(mapper=test_mapper, buffered_size=1024, use_xmap=True, cycle=False):
'''
Create flowers test set reader.
It returns a reader, each sample in the reader is
......@@ -161,14 +173,20 @@ def test(mapper=test_mapper, buffered_size=1024, use_xmap=True):
:type mapper: callable
:param buffered_size: the size of buffer used to process images
:type buffered_size: int
:param cycle: whether to cycle through the dataset
:type cycle: bool
:return: test data reader
:rtype: callable
'''
return reader_creator(
download(DATA_URL, 'flowers', DATA_MD5),
download(LABEL_URL, 'flowers', LABEL_MD5),
download(SETID_URL, 'flowers', SETID_MD5), TEST_FLAG, mapper,
buffered_size, use_xmap)
download(SETID_URL, 'flowers', SETID_MD5),
TEST_FLAG,
mapper,
buffered_size,
use_xmap,
cycle=cycle)
def valid(mapper=test_mapper, buffered_size=1024, use_xmap=True):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册