diff --git a/image_classification/imagenet_dataset.py b/image_classification/imagenet_dataset.py index 948ac5b8bb4c360bc2ea52d819c2958da52ef68f..158093b3aa9380547490ac5da2386695dd71dd33 100644 --- a/image_classification/imagenet_dataset.py +++ b/image_classification/imagenet_dataset.py @@ -19,80 +19,33 @@ import random import numpy as np from datasets.folder import DatasetFolder - - -def center_crop_resize(img): - h, w = img.shape[:2] - c = int(224 / 256 * min((h, w))) - i = (h + 1 - c) // 2 - j = (w + 1 - c) // 2 - img = img[i:i + c, j:j + c, :] - return cv2.resize(img, (224, 224), 0, 0, cv2.INTER_LINEAR) - - -def random_crop_resize(img): - height, width = img.shape[:2] - area = height * width - - for attempt in range(10): - target_area = random.uniform(0.08, 1.) * area - log_ratio = (math.log(3 / 4), math.log(4 / 3)) - aspect_ratio = math.exp(random.uniform(*log_ratio)) - - w = int(round(math.sqrt(target_area * aspect_ratio))) - h = int(round(math.sqrt(target_area / aspect_ratio))) - - if w <= width and h <= height: - i = random.randint(0, height - h) - j = random.randint(0, width - w) - img = img[i:i + h, j:j + w, :] - return cv2.resize(img, (224, 224), 0, 0, cv2.INTER_LINEAR) - - return center_crop_resize(img) - - -def random_flip(img): - if np.random.randint(0, 2) == 1: - img = img[:, ::-1, :] - return img - - -def normalize_permute(img): - # transpose and convert to RGB from BGR - img = img.astype(np.float32).transpose((2, 0, 1))[::-1, ...] - mean = np.array([123.675, 116.28, 103.53], dtype=np.float32) - std = np.array([58.395, 57.120, 57.375], dtype=np.float32) - invstd = 1. / std - for v, m, s in zip(img, mean, invstd): - v.__isub__(m).__imul__(s) - return img - - -def compose(functions): - def process(sample): - img, label = sample - for fn in functions: - img = fn(img) - return img, label - - return process +from transform import transforms +from paddle import fluid class ImageNetDataset(DatasetFolder): def __init__(self, path, mode='train'): super(ImageNetDataset, self).__init__(path) self.mode = mode + + normalize = transforms.Normalize( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.120, 57.375]) if self.mode == 'train': - self.transform = compose([ - cv2.imread, random_crop_resize, random_flip, normalize_permute + self.transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.Permute(mode='CHW'), normalize ]) else: - self.transform = compose( - [cv2.imread, center_crop_resize, normalize_permute]) + self.transform = transforms.Compose([ + transforms.Resize(256), transforms.CenterCrop(224), + transforms.Permute(mode='CHW'), normalize + ]) def __getitem__(self, idx): - img, label = self.samples[idx] - return self.transform((img, [label])) + img_path, label = self.samples[idx] + img = cv2.imread(img_path).astype(np.float32) + return self.transform(img), [label] def __len__(self): return len(self.samples) diff --git a/transform/__init__.py b/transform/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7986cdd642998fb0638a81c9ea22615faf8bad0b --- /dev/null +++ b/transform/__init__.py @@ -0,0 +1 @@ +from .transforms import * diff --git a/transform/functional.py b/transform/functional.py new file mode 100644 index 0000000000000000000000000000000000000000..b5f6392a2e4ffd06e333ab805ed7e1e2aa5d25c2 --- /dev/null +++ b/transform/functional.py @@ -0,0 +1,56 @@ +import sys +import collections +import random + +import cv2 +import numpy as np + +if sys.version_info < (3, 3): + Sequence = collections.Sequence + Iterable = collections.Iterable +else: + Sequence = collections.abc.Sequence + Iterable = collections.abc.Iterable + + +def flip(image, code): + """ + Accordding to the code (the type of flip), flip the input image + + Args: + image: Input image, with (H, W, C) shape + code: code that indicates the type of flip. + -1 : Flip horizontally and vertically + 0 : Flip vertically + 1 : Flip horizontally + """ + return cv2.flip(image, flipCode=code) + + +def resize(img, size, interpolation=cv2.INTER_LINEAR): + """ + resize the input data to given size + + Args: + input: Input data, could be image or masks, with (H, W, C) shape + size: Target size of input data, with (height, width) shape. + interpolation: Interpolation method. + """ + + if isinstance(interpolation, Sequence): + interpolation = random.choice(interpolation) + + if isinstance(size, int): + h, w = img.shape[:2] + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return cv2.resize(img, (ow, oh), interpolation=interpolation) + else: + oh = size + ow = int(size * w / h) + return cv2.resize(img, (ow, oh), interpolation=interpolation) + else: + return cv2.resize(img, size[::-1], interpolation=interpolation) diff --git a/transform/transforms.py b/transform/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..70ae28352628bc74c1184eafcd7975f45bd24eea --- /dev/null +++ b/transform/transforms.py @@ -0,0 +1,437 @@ +from __future__ import division + +import math +import sys +import random +import cv2 + +import numpy as np +import numbers +import types +import collections +import warnings + +from . import functional as F + +if sys.version_info < (3, 3): + Sequence = collections.Sequence + Iterable = collections.Iterable +else: + Sequence = collections.abc.Sequence + Iterable = collections.abc.Iterable + + +class Compose(object): + """Composes several transforms together. + + Args: + transforms (list of ``Transform`` objects): list of transforms to compose. + + """ + + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, img): + for t in self.transforms: + img = t(img) + return img + + def __repr__(self): + format_string = self.__class__.__name__ + '(' + for t in self.transforms: + format_string += '\n' + format_string += ' {0}'.format(t) + format_string += '\n)' + return format_string + + +class Resize(object): + """Resize the input PIL Image to the given size. + + Args: + size (int|list|tuple): Desired output size. If size is a sequence like + (h, w), output size will be matched to this. If size is an int, + smaller edge of the image will be matched to this number. + i.e, if height > width, then image will be rescaled to + (size * height / width, size) + interpolation (int): interpolation mode of resize. Default: cv2.INTER_LINEAR. + """ + + def __init__(self, size, interpolation=cv2.INTER_LINEAR): + assert isinstance(size, int) or (isinstance(size, Iterable) and + len(size) == 2) + self.size = size + self.interpolation = interpolation + + def __call__(self, img): + """ + Args: + img (PIL Image): Image to be scaled. + + Returns: + PIL Image: Rescaled image. + """ + return F.resize(img, self.size, self.interpolation) + + +class RandomResizedCrop(object): + """Crop the input data to random size and aspect ratio. + A crop of random size (default: of 0.08 to 1.0) of the original size and a random + aspect ratio (default: of 3/4 to 1.33) of the original aspect ratio is made. + After applying crop transfrom, the input data will be resized to given size. + + Args: + output_size (int|list|tuple): Target size of output image, with (height, width) shape. + scale (list|tuple): Range of size of the origin size cropped. Default: (0.08, 1.0) + ratio (list|tuple): Range of aspect ratio of the origin aspect ratio cropped. Default: (0.75, 1.33) + """ + + def __init__(self, + output_size, + scale=(0.08, 1.0), + ratio=(3. / 4, 4. / 3), + interpolation=cv2.INTER_LINEAR): + if isinstance(output_size, int): + self.output_size = (output_size, output_size) + else: + self.output_size = output_size + assert (scale[0] <= scale[1]), "scale should be of kind (min, max)" + assert (ratio[0] <= ratio[1]), "ratio should be of kind (min, max)" + self.scale = scale + self.ratio = ratio + self.interpolation = interpolation + + def _get_params(self, image, attempts=10): + height, width, _ = image.shape + area = height * width + + for _ in range(attempts): + target_area = np.random.uniform(*self.scale) * area + log_ratio = tuple(math.log(x) for x in self.ratio) + aspect_ratio = math.exp(np.random.uniform(*log_ratio)) + + w = int(round(math.sqrt(target_area * aspect_ratio))) + h = int(round(math.sqrt(target_area / aspect_ratio))) + + if 0 < w <= width and 0 < h <= height: + x = np.random.randint(0, width - w + 1) + y = np.random.randint(0, height - h + 1) + return x, y, w, h + + # Fallback to central crop + in_ratio = float(width) / float(height) + if in_ratio < min(self.ratio): + w = width + h = int(round(w / min(self.ratio))) + elif in_ratio > max(self.ratio): + h = height + w = int(round(h * max(self.ratio))) + else: # whole image + w = width + h = height + x = (width - w) // 2 + y = (height - h) // 2 + return x, y, w, h + + def __call__(self, img): + x, y, w, h = self._get_params(img) + cropped_img = img[y:y + h, x:x + w] + return F.resize(cropped_img, self.output_size, self.interpolation) + + +class CenterCropResize(object): + """Crops to center of image with padding then scales size. + + Args: + size (int|list|tuple): Target size of output image, with (height, width) shape. + crop_padding (int): center crop with the padding. Default: 32. + interpolation (int): interpolation mode of resize. Default: cv2.INTER_LINEAR. + """ + + def __init__(self, size, crop_padding=32, interpolation=cv2.INTER_LINEAR): + if isinstance(size, int): + self.size = (size, size) + else: + self.size = size + self.crop_padding = crop_padding + self.interpolation = interpolation + + def _get_params(self, img): + h, w = img.shape[:2] + size = min(self.size) + c = int(size / (size + self.crop_padding) * min((h, w))) + x = (h + 1 - c) // 2 + y = (w + 1 - c) // 2 + return c, x, y + + def __call__(self, img): + c, x, y = self._get_params(img) + cropped_img = img[x:x + c, y:y + c, :] + return F.resize(cropped_img, self.size, self.interpolation) + + +class CenterCrop(object): + """Crops the given the input data at the center. + + Args: + output_size: Target size of output image, with (height, width) shape. + """ + + def __init__(self, output_size): + if isinstance(output_size, int): + self.output_size = (output_size, output_size) + else: + self.output_size = output_size + + def _get_params(self, img): + th, tw = self.output_size + h, w, _ = img.shape + assert th <= h and tw <= w, "output size is bigger than image size" + x = int(round((w - tw) / 2.0)) + y = int(round((h - th) / 2.0)) + return x, y + + def __call__(self, img): + x, y = self._get_params(img) + th, tw = self.output_size + return img[y:y + th, x:x + tw] + + +class RandomHorizontalFlip(object): + """Horizontally flip the input data randomly with a given probability. + + Args: + prob (float): probability of the input data being flipped. Default: 0.5 + """ + + def __init__(self, prob=0.5): + self.prob = prob + + def __call__(self, img): + if np.random.random() < self.prob: + return F.flip(img, code=1) + return img + + +class RandomVerticalFlip(object): + """Vertically flip the input data randomly with a given probability. + + Args: + prob (float): probability of the input data being flipped. Default: 0.5 + """ + + def __init__(self, prob=0.5): + self.prob = prob + + def __call__(self, img): + if np.random.random() < self.prob: + return F.flip(img, code=0) + return img + + +class Normalize(object): + """Normalize the input data with mean and standard deviation. + Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, + this transform will normalize each channel of the input data. + ``output[channel] = (input[channel] - mean[channel]) / std[channel]`` + + Args: + mean (int|float|list): Sequence of means for each channel. + std (int|float|list): Sequence of standard deviations for each channel. + + """ + + def __init__(self, mean=0.0, std=1.0): + if isinstance(mean, numbers.Number): + mean = [mean, mean, mean] + + if isinstance(std, numbers.Number): + mean = [std, std, std] + + self.mean = np.array(mean, dtype=np.float32).reshape(len(mean), 1, 1) + self.std = np.array(std, dtype=np.float32).reshape(len(std), 1, 1) + + def __call__(self, img): + return (img - self.mean) / self.std + + +class Permute(object): + """Change input data to a target mode. + For example, most transforms use HWC mode image, + while the Neural Network might use CHW mode input tensor + + Args: + mode: Output mode of input. Use "CHW" mode by default. + """ + + def __init__(self, mode="CHW"): + assert mode in ["CHW"], "unsupported mode: {}".format(mode) + self.mode = mode + + def __call__(self, img): + if self.mode == "CHW": + return img.transpose((2, 0, 1))[::-1, ...] + return img + + +class GaussianNoise(object): + """Add random gaussian noise to the input data. + Gaussian noise is generated with given mean and std. + + Args: + mean: Gaussian mean used to generate noise. + std: Gaussian standard deviation used to generate noise. + """ + + def __init__(self, mean=0.0, std=1.0): + self.mean = np.array(mean, dtype=np.float32) + self.std = np.array(std, dtype=np.float32) + + def __call__(self, img): + dtype = img.dtype + noise = np.random.normal(self.mean, self.std, img.shape) * 255 + img = img + noise.astype(np.float32) + return np.clip(img, 0, 255).astype(dtype) + + +class BrightnessTransform(object): + """Adjust brightness of the image. + + Args: + value: How much to adjust the brightness. Can be any + non negative number. 0 gives the original image + """ + + def __init__(self, value): + if value < 0: + raise ValueError("brightness value should be non-negative") + self.value = value + + def __call__(self, img): + if self.value == 0: + return img + + dtype = img.dtype + img = img.astype(np.float32) + alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value) + img = img * alpha + return img.clip(0, 255).astype(dtype) + + +class ContrastTransform(object): + """Adjust contrast of the image. + + Args: + value: How much to adjust the contrast. Can be any + non negative number. 0 gives the original image + """ + + def __init__(self, value): + if value < 0: + raise ValueError("contrast value should be non-negative") + self.value = value + + def __call__(self, img): + if self.value == 0: + return img + + dtype = img.dtype + img = img.astype(np.float32) + alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value) + img = img * alpha + cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).mean() * ( + 1 - alpha) + return img.clip(0, 255).astype(dtype) + + +class SaturationTransform(object): + """Adjust saturation of the image. + + Args: + value: How much to adjust the saturation. Can be any + non negative number. 0 gives the original image + """ + + def __init__(self, value): + if value < 0: + raise ValueError("saturation value should be non-negative") + self.value = value + + def __call__(self, img): + if self.value == 0: + return img + + dtype = img.dtype + img = img.astype(np.float32) + alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value) + gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + gray_img = gray_img[..., np.newaxis] + img = img * alpha + gray_img * (1 - alpha) + return img.clip(0, 255).astype(dtype) + + +class HueTransform(object): + """Adjust hue of the image. + + Args: + value: How much to adjust the hue. Can be any number + between 0 and 0.5, 0 gives the original image + """ + + def __init__(self, value): + if value < 0 or value > 0.5: + raise ValueError("hue value should be in [0.0, 0.5]") + self.value = value + + def __call__(self, img): + if self.value == 0: + return img + + dtype = img.dtype + img = img.astype(np.uint8) + hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV_FULL) + h, s, v = cv2.split(hsv_img) + + alpha = np.random.uniform(-self.value, self.value) + h = h.astype(np.uint8) + # uint8 addition take cares of rotation across boundaries + with np.errstate(over="ignore"): + h += np.uint8(alpha * 255) + hsv_img = cv2.merge([h, s, v]) + return cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR_FULL).astype(dtype) + + +class ColorJitter(object): + """Randomly change the brightness, contrast, saturation and hue of an image. + + Args: + brightness: How much to jitter brightness. + Chosen uniformly from [max(0, 1 - brightness), 1 + brightness] + or the given [min, max]. Should be non negative numbers. + contrast: How much to jitter contrast. + Chosen uniformly from [max(0, 1 - contrast), 1 + contrast] + or the given [min, max]. Should be non negative numbers. + saturation: How much to jitter saturation. + Chosen uniformly from [max(0, 1 - saturation), 1 + saturation] + or the given [min, max]. Should be non negative numbers. + hue: How much to jitter hue. + Chosen uniformly from [-hue, hue] or the given [min, max]. + Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5. + """ + + def __init__(self, brightness=0, contrast=0, saturation=0, hue=0): + transforms = [] + if brightness != 0: + transforms.append(BrightnessTransform(brightness)) + if contrast != 0: + transforms.append(ContrastTransform(contrast)) + if saturation != 0: + transforms.append(SaturationTransform(saturation)) + if hue != 0: + transforms.append(HueTransform(hue)) + + random.shuffle(transforms) + self.transforms = Compose(transforms) + + def __call__(self, img): + return self.transforms(img)