diff --git a/fluid/image_classification/reader.py b/fluid/image_classification/reader.py index 6f17cd8c8b7ac497939cc98da7d24c457f23af66..ce992c7e99cbd7522d525837cda311e69b28b6cd 100644 --- a/fluid/image_classification/reader.py +++ b/fluid/image_classification/reader.py @@ -1,4 +1,5 @@ import os +import math import random import functools import numpy as np @@ -7,10 +8,6 @@ from PIL import Image, ImageEnhance random.seed(0) -_R_MEAN = 123.0 -_G_MEAN = 117.0 -_B_MEAN = 104.0 - DATA_DIM = 224 THREAD = 8 @@ -20,7 +17,8 @@ DATA_DIR = 'ILSVRC2012' TRAIN_LIST = 'ILSVRC2012/train_list.txt' TEST_LIST = 'ILSVRC2012/test_list.txt' -img_mean = np.array([_R_MEAN, _G_MEAN, _B_MEAN]).reshape((3, 1, 1)) +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) def resize_short(img, target_size): @@ -46,6 +44,30 @@ def crop_image(img, target_size, center): return img +def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]): + aspect_ratio = math.sqrt(random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + bound = min((float(img.size[0]) / img.size[1]) / (w**2), + (float(img.size[1]) / img.size[0]) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img.size[0] * img.size[1] * random.uniform(scale_min, + scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = random.randint(0, img.size[0] - w) + j = random.randint(0, img.size[1] - h) + + img = img.crop((i, j, i + w, j + h)) + img = img.resize((size, size), Image.LANCZOS) + return img + + def rotate_image(img): angle = random.randint(-10, 10) img = img.rotate(angle) @@ -75,26 +97,28 @@ def distort_color(img): return img -def process_image(sample, mode): +def process_image(sample, mode, color_jitter, rotate): img_path = sample[0] img = Image.open(img_path) if mode == 'train': - img = resize_short(img, DATA_DIM + 32) - img = rotate_image(img) + if rotate: img = rotate_image(img) + img = random_crop(img, DATA_DIM) else: img = resize_short(img, DATA_DIM) - img = crop_image(img, target_size=DATA_DIM, center=(mode != 'train')) + img = crop_image(img, target_size=DATA_DIM, center=True) if mode == 'train': - img = distort_color(img) + if color_jitter: + img = distort_color(img) if random.randint(0, 1) == 1: img = img.transpose(Image.FLIP_LEFT_RIGHT) if img.mode != 'RGB': img = img.convert('RGB') - img = np.array(img).astype('float32').transpose((2, 0, 1)) + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 img -= img_mean + img /= img_std if mode == 'train' or mode == 'test': return img, sample[1] @@ -102,7 +126,11 @@ def process_image(sample, mode): return img -def _reader_creator(file_list, mode, shuffle=False): +def _reader_creator(file_list, + mode, + shuffle=False, + color_jitter=False, + rotate=False): def reader(): with open(file_list) as flist: lines = [line.strip() for line in flist] @@ -117,13 +145,15 @@ def _reader_creator(file_list, mode, shuffle=False): img_path = os.path.join(DATA_DIR, line) yield [img_path] - mapper = functools.partial(process_image, mode=mode) + mapper = functools.partial( + process_image, mode=mode, color_jitter=color_jitter, rotate=rotate) return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE) def train(): - return _reader_creator(TRAIN_LIST, 'train', shuffle=True) + return _reader_creator( + TRAIN_LIST, 'train', shuffle=True, color_jitter=True, rotate=True) def test(): diff --git a/fluid/image_classification/se_resnext.py b/fluid/image_classification/se_resnext.py index 46b938f1f696f6801faa116e62663d3fdc0b5afc..4dc1263f58ae17d30d01cd8c02fbebe816678371 100644 --- a/fluid/image_classification/se_resnext.py +++ b/fluid/image_classification/se_resnext.py @@ -35,7 +35,11 @@ def squeeze_excitation(input, num_channels, reduction_ratio): def shortcut(input, ch_out, stride): ch_in = input.shape[1] if ch_in != ch_out: - return conv_bn_layer(input, ch_out, 3, stride) + if stride == 1: + filter_size = 1 + else: + filter_size = 3 + return conv_bn_layer(input, ch_out, filter_size, stride) else: return input @@ -109,9 +113,9 @@ def train(learning_rate, batch_size, num_passes, model_save_dir='model'): avg_cost = fluid.layers.mean(x=cost) optimizer = fluid.optimizer.Momentum( - learning_rate=learning_rate / batch_size, + learning_rate=learning_rate, momentum=0.9, - regularization=fluid.regularizer.L2Decay(1e-4 * batch_size)) + regularization=fluid.regularizer.L2Decay(1e-4)) opts = optimizer.minimize(avg_cost) accuracy = fluid.evaluator.Accuracy(input=out, label=label) @@ -125,8 +129,8 @@ def train(learning_rate, batch_size, num_passes, model_save_dir='model'): exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - train_reader = paddle.batch(datareader.train(), batch_size=batch_size) - test_reader = paddle.batch(datareader.test(), batch_size=batch_size) + train_reader = paddle.batch(reader.train(), batch_size=batch_size) + test_reader = paddle.batch(reader.test(), batch_size=batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) for pass_id in range(num_passes): @@ -153,4 +157,4 @@ def train(learning_rate, batch_size, num_passes, model_save_dir='model'): if __name__ == '__main__': - train(learning_rate=0.1, batch_size=7, num_passes=100) + train(learning_rate=0.1, batch_size=8, num_passes=100)