提交 05204af1 编写于 作者: Q qingqing01 提交者: Yu Yang

Fix memory leak in image classification demo, which is caused by dataprovider (#323)

* the memory leak is inside one pass.
上级 bd50f93e
......@@ -5,3 +5,5 @@ plot.png
train.log
image_provider_copy_1.py
*pyc
train.list
test.list
文件模式从 100644 更改为 100755
......@@ -58,14 +58,19 @@ def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg,
settings.logger.info('DataProvider Initialization finished')
@provider(init_hook=hook)
def processData(settings, file_name):
@provider(init_hook=hook, min_pool_size=0)
def processData(settings, file_list):
"""
The main function for loading data.
Load the batch, iterate all the images and labels in this batch.
file_name: the batch file name.
file_list: the batch file list.
"""
data = cPickle.load(io.open(file_name, 'rb'))
with open(file_list, 'r') as fdata:
lines = [line.strip() for line in fdata]
random.shuffle(lines)
for file_name in lines:
with io.open(file_name.strip(), 'rb') as file:
data = cPickle.load(file)
indexes = list(range(len(data['images'])))
if settings.is_train:
random.shuffle(indexes)
......@@ -78,4 +83,4 @@ def processData(settings, file_name):
settings.img_size, settings.is_train,
settings.color)
label = data['labels'][i]
yield img_feat.tolist(), int(label)
yield img_feat.astype('float32'), int(label)
......@@ -35,6 +35,8 @@ if __name__ == '__main__':
data_creator = ImageClassificationDatasetCreater(data_dir,
processed_image_size,
color)
data_creator.train_list_name = "train.txt"
data_creator.test_list_name = "test.txt"
data_creator.num_per_batch = 1000
data_creator.overwrite = True
data_creator.create_batches()
......@@ -17,3 +17,6 @@ set -e
data_dir=./data/cifar-out
python preprocess.py -i $data_dir -s 32 -c 1
echo "data/cifar-out/batches/train.txt" > train.list
echo "data/cifar-out/batches/test.txt" > test.list
......@@ -25,8 +25,8 @@ if not is_predict:
'img_size': 32,'num_classes': 10,
'use_jpeg': 1,'color': "color"}
define_py_data_sources2(train_list=data_dir+"train.list",
test_list=data_dir+'test.list',
define_py_data_sources2(train_list="train.list",
test_list="train.list",
module='image_provider',
obj='processData',
args=args)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册