在训练时使用reader读取数据很慢
Created by: yeyupiaoling
环境
- PaddlePaddle 1.2.0
- Python 3.5
- Ubuntu 16.04
- 模型:MobileNet V1
问题
我使用以下的Python代码读取图像数据,但是每一次价值数据都很慢,比训练时间还长,这是什么原因呢?是否能解决?
# 训练图片的预处理
def train_mapper(sample):
img, label = sample
path = img
img = Image.open(img)
# 统一图片大小
try:
img = img.resize((cfg.READER.RESIZE_SIZE, cfg.READER.RESIZE_SIZE), Image.ANTIALIAS)
except:
print(path)
# 随机水平翻转
r1 = random.random()
if r1 > 0.5:
img = img.transpose(Image.FLIP_LEFT_RIGHT)
# 随机垂直翻转
r2 = random.random()
if r2 > 0.5:
img = img.transpose(Image.FLIP_TOP_BOTTOM)
# 随机角度翻转
r3 = random.randint(-3, 3)
img = img.rotate(r3, expand=False)
# 随机裁剪
r4 = random.randint(0, int(cfg.READER.RESIZE_SIZE - cfg.READER.CROP_SIZE))
r5 = random.randint(0, int(cfg.READER.RESIZE_SIZE - cfg.READER.CROP_SIZE))
box = (r4, r5, r4 + cfg.READER.CROP_SIZE, r5 + cfg.READER.CROP_SIZE)
img = img.crop(box)
# 把图片转换成numpy值
img = np.array(img).astype(np.float32)
# 转换成CHW
img = img.transpose((2, 0, 1))
# 转换成BGR
img = img[(2, 1, 0), :, :] / 255.0
return img, int(label)
# 获取训练的reader
def train_reader(train_list_path):
father_path = os.path.dirname(train_list_path)
def reader():
with open(train_list_path, 'r') as f:
lines = f.readlines()
for line in lines:
img, label = line.split('\t')
img = os.path.join(father_path, img)
yield img, label
return paddle.reader.xmap_readers(train_mapper, reader, cpu_count(), 1024)