reader.py 1.7 KB
Newer Older
1 2
import random
from paddle.v2.image import load_and_transform
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
import paddle.v2 as paddle
from multiprocessing import cpu_count


def train_mapper(sample):
    '''
    map image path to type needed by model input layer for the training set
    '''
    img, label = sample
    img = paddle.image.load_image(img)
    img = paddle.image.simple_transform(img, 256, 224, True)
    return img.flatten().astype('float32'), label


def test_mapper(sample):
    '''
    map image path to type needed by model input layer for the test set
    '''
    img, label = sample
    img = paddle.image.load_image(img)
    img = paddle.image.simple_transform(img, 256, 224, True)
    return img.flatten().astype('float32'), label
25 26


27
def train_reader(train_list, buffered_size=1024):
28 29 30 31 32
    def reader():
        with open(train_list, 'r') as f:
            lines = [line.strip() for line in f]
            for line in lines:
                img_path, lab = line.strip().split('\t')
33
                yield img_path, int(lab)
34

35 36
    return paddle.reader.xmap_readers(train_mapper, reader,
                                      cpu_count(), buffered_size)
37 38


39
def test_reader(test_list, buffered_size=1024):
40 41 42 43 44
    def reader():
        with open(test_list, 'r') as f:
            lines = [line.strip() for line in f]
            for line in lines:
                img_path, lab = line.strip().split('\t')
45
                yield img_path, int(lab)
46

47 48
    return paddle.reader.xmap_readers(test_mapper, reader,
                                      cpu_count(), buffered_size)
49 50 51 52 53 54 55


if __name__ == '__main__':
    for im in train_reader('train.list'):
        print len(im[0])
    for im in train_reader('test.list'):
        print len(im[0])