imagenet_dataset.py

import os
import cv2
import math
import random
import numpy as np
from paddle.fluid.io import Dataset

from datasets.folder import DatasetFolder


def center_crop_resize(img):
    h, w = img.shape[:2]
    c = int(224 / 256 * min((h, w)))
    i = (h + 1 - c) // 2
    j = (w + 1 - c) // 2
    img = img[i:i + c, j:j + c, :]
    return cv2.resize(img, (224, 224), 0, 0, cv2.INTER_LINEAR)


def random_crop_resize(img):
    height, width = img.shape[:2]
    area = height * width

    for attempt in range(10):
        target_area = random.uniform(0.08, 1.) * area
        log_ratio = (math.log(3 / 4), math.log(4 / 3))
        aspect_ratio = math.exp(random.uniform(*log_ratio))

        w = int(round(math.sqrt(target_area * aspect_ratio)))
        h = int(round(math.sqrt(target_area / aspect_ratio)))

        if w <= width and h <= height:
            i = random.randint(0, height - h)
            j = random.randint(0, width - w)
            img = img[i:i + h, j:j + w, :]
            return cv2.resize(img, (224, 224), 0, 0, cv2.INTER_LINEAR)

    return center_crop_resize(img)


def random_flip(img):
    if np.random.randint(0, 2) == 1:
        img = img[:, ::-1, :]
    return img


def normalize_permute(img):
    # transpose and convert to RGB from BGR
    img = img.astype(np.float32).transpose((2, 0, 1))[::-1, ...]
    mean = np.array([123.675, 116.28, 103.53], dtype=np.float32)
    std = np.array([58.395, 57.120, 57.375], dtype=np.float32)
    invstd = 1. / std
    for v, m, s in zip(img, mean, invstd):
        v.__isub__(m).__imul__(s)
    return img


def compose(functions):
    def process(sample):
        img, label = sample
        for fn in functions:
            img = fn(img)
        return img, label

    return process


def image_folder(path):
    valid_ext = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.webp')
    classes = [
        d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))
    ]
    classes.sort()
    class_map = {cls: idx for idx, cls in enumerate(classes)}
    samples = []
    for dir in sorted(class_map.keys()):
        d = os.path.join(path, dir)
        for root, _, fnames in sorted(os.walk(d)):
            for fname in sorted(fnames):
                p = os.path.join(root, fname)
                if os.path.splitext(p)[1].lower() in valid_ext:
                    samples.append((p, [class_map[dir]]))
    return samples


class ImageNetDataset(DatasetFolder):
    def __init__(self, path, mode='train'):
        super(ImageNetDataset, self).__init__(path)
        self.mode = mode
        if self.mode == 'train':
            self.transform = compose([
                cv2.imread, random_crop_resize, random_flip, normalize_permute
            ])
        else:
            self.transform = compose(
                [cv2.imread, center_crop_resize, normalize_permute])

    def __getitem__(self, idx):

        return self.transform(self.samples[idx])

    def __len__(self):
        return len(self.samples)