Merge pull request #585 from will-am/add_rotate_augmentation

Add rotate augmentation in SE-ResNeXt

Merge pull request #585 from will-am/add_rotate_augmentation
Add rotate augmentation in SE-ResNeXt
f2f6daa2 · Wang Meng · GitHub · 47f591da · da613567 · f2f6daa2
隐藏空白更改
内联并排

Showing with 58 addition and 17 deletion

fluid/image_classification/reader.py fluid/image_classification/reader.py +50 -13

fluid/image_classification/se_resnext.py fluid/image_classification/se_resnext.py +8 -4

未找到文件。
--- a/fluid/image_classification/reader.py
+++ b/fluid/image_classification/reader.py
 import os
+import math
 import random
 import functools
 import numpy as np
@@ -7,10 +8,6 @@ from PIL import Image, ImageEnhance

 random.seed(0)

-_R_MEAN = 123.0
-_G_MEAN = 117.0
-_B_MEAN = 104.0
-
 DATA_DIM = 224

 THREAD = 8
@@ -20,7 +17,8 @@ DATA_DIR = 'ILSVRC2012'
 TRAIN_LIST = 'ILSVRC2012/train_list.txt'
 TEST_LIST = 'ILSVRC2012/test_list.txt'

-img_mean = np.array([_R_MEAN, _G_MEAN, _B_MEAN]).reshape((3, 1, 1))
+img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
+img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))


 def resize_short(img, target_size):
@@ -46,6 +44,36 @@ def crop_image(img, target_size, center):
    return img


+def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]):
+    aspect_ratio = math.sqrt(random.uniform(*ratio))
+    w = 1. * aspect_ratio
+    h = 1. / aspect_ratio
+
+    bound = min((float(img.size[0]) / img.size[1]) / (w**2),
+                (float(img.size[1]) / img.size[0]) / (h**2))
+    scale_max = min(scale[1], bound)
+    scale_min = min(scale[0], bound)
+
+    target_area = img.size[0] * img.size[1] * random.uniform(scale_min,
+                                                             scale_max)
+    target_size = math.sqrt(target_area)
+    w = int(target_size * w)
+    h = int(target_size * h)
+
+    i = random.randint(0, img.size[0] - w)
+    j = random.randint(0, img.size[1] - h)
+
+    img = img.crop((i, j, i + w, j + h))
+    img = img.resize((size, size), Image.LANCZOS)
+    return img
+
+
+def rotate_image(img):
+    angle = random.randint(-10, 10)
+    img = img.rotate(angle)
+    return img
+
+
 def distort_color(img):
    def random_brightness(img, lower=0.5, upper=1.5):
        e = random.uniform(lower, upper)
@@ -69,25 +97,28 @@ def distort_color(img):
    return img


-def process_image(sample, mode):
+def process_image(sample, mode, color_jitter, rotate):
    img_path = sample[0]

    img = Image.open(img_path)
    if mode == 'train':
-        img = resize_short(img, DATA_DIM + 32)
+        if rotate: img = rotate_image(img)
+        img = random_crop(img, DATA_DIM)
    else:
        img = resize_short(img, DATA_DIM)
-    img = crop_image(img, target_size=DATA_DIM, center=(mode != 'train'))
+        img = crop_image(img, target_size=DATA_DIM, center=True)
    if mode == 'train':
-        img = distort_color(img)
+        if color_jitter:
+            img = distort_color(img)
        if random.randint(0, 1) == 1:
            img = img.transpose(Image.FLIP_LEFT_RIGHT)

    if img.mode != 'RGB':
        img = img.convert('RGB')

-    img = np.array(img).astype('float32').transpose((2, 0, 1))
+    img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255
    img -= img_mean
+    img /= img_std

    if mode == 'train' or mode == 'test':
        return img, sample[1]
@@ -95,7 +126,11 @@ def process_image(sample, mode):
        return img


-def _reader_creator(file_list, mode, shuffle=False):
+def _reader_creator(file_list,
+                    mode,
+                    shuffle=False,
+                    color_jitter=False,
+                    rotate=False):
    def reader():
        with open(file_list) as flist:
            lines = [line.strip() for line in flist]
@@ -110,13 +145,15 @@ def _reader_creator(file_list, mode, shuffle=False):
                    img_path = os.path.join(DATA_DIR, line)
                    yield [img_path]

-    mapper = functools.partial(process_image, mode=mode)
+    mapper = functools.partial(
+        process_image, mode=mode, color_jitter=color_jitter, rotate=rotate)

    return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE)


 def train():
-    return _reader_creator(TRAIN_LIST, 'train', shuffle=True)
+    return _reader_creator(
+        TRAIN_LIST, 'train', shuffle=True, color_jitter=True, rotate=True)


 def test():

--- a/fluid/image_classification/se_resnext.py
+++ b/fluid/image_classification/se_resnext.py
@@ -35,7 +35,11 @@ def squeeze_excitation(input, num_channels, reduction_ratio):
 def shortcut(input, ch_out, stride):
    ch_in = input.shape[1]
    if ch_in != ch_out:
-        return conv_bn_layer(input, ch_out, 3, stride)
+        if stride == 1:
+            filter_size = 1
+        else:
+            filter_size = 3
+        return conv_bn_layer(input, ch_out, filter_size, stride)
    else:
        return input

@@ -109,9 +113,9 @@ def train(learning_rate, batch_size, num_passes, model_save_dir='model'):
    avg_cost = fluid.layers.mean(x=cost)

    optimizer = fluid.optimizer.Momentum(
-        learning_rate=learning_rate / batch_size,
+        learning_rate=learning_rate,
        momentum=0.9,
-        regularization=fluid.regularizer.L2Decay(1e-4 * batch_size))
+        regularization=fluid.regularizer.L2Decay(1e-4))
    opts = optimizer.minimize(avg_cost)
    accuracy = fluid.evaluator.Accuracy(input=out, label=label)

@@ -153,4 +157,4 @@ def train(learning_rate, batch_size, num_passes, model_save_dir='model'):


 if __name__ == '__main__':
-    train(learning_rate=0.1, batch_size=7, num_passes=100)
+    train(learning_rate=0.1, batch_size=8, num_passes=100)