# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import division from __future__ import print_function import cv2 import traceback import numpy as np import logging logger = logging.getLogger(__name__) __all__ = ['ColorDistort', 'RandomExpand', 'RandomCrop', 'RandomFlip', 'NormalizeBox', 'PadBox', 'RandomShape', 'NormalizeImage', 'BboxXYXY2XYWH', 'ResizeImage', 'Compose', 'BatchCompose'] class Compose(object): def __init__(self, transforms=[]): self.transforms = transforms def __call__(self, *data): for f in self.transforms: try: data = f(*data) except Exception as e: stack_info = traceback.format_exc() logger.info("fail to perform transform [{}] with error: " "{} and stack:\n{}".format(f, e, str(stack_info))) raise e return data class BatchCompose(object): def __init__(self, transforms=[]): self.transforms = transforms def __call__(self, data): for f in self.transforms: try: data = f(data) except Exception as e: stack_info = traceback.format_exc() logger.info("fail to perform batch transform [{}] with error: " "{} and stack:\n{}".format(f, e, str(stack_info))) raise e # sample list to batch data batch = list(zip(*data)) return batch class ColorDistort(object): """Random color distortion. Args: hue (list): hue settings. in [lower, upper, probability] format. saturation (list): saturation settings. in [lower, upper, probability] format. contrast (list): contrast settings. in [lower, upper, probability] format. brightness (list): brightness settings. in [lower, upper, probability] format. random_apply (bool): whether to apply in random (yolo) or fixed (SSD) order. """ def __init__(self, hue=[-18, 18, 0.5], saturation=[0.5, 1.5, 0.5], contrast=[0.5, 1.5, 0.5], brightness=[0.5, 1.5, 0.5], random_apply=True): self.hue = hue self.saturation = saturation self.contrast = contrast self.brightness = brightness self.random_apply = random_apply def apply_hue(self, img): low, high, prob = self.hue if np.random.uniform(0., 1.) < prob: return img img = img.astype(np.float32) # XXX works, but result differ from HSV version delta = np.random.uniform(low, high) u = np.cos(delta * np.pi) w = np.sin(delta * np.pi) bt = np.array([[1.0, 0.0, 0.0], [0.0, u, -w], [0.0, w, u]]) tyiq = np.array([[0.299, 0.587, 0.114], [0.596, -0.274, -0.321], [0.211, -0.523, 0.311]]) ityiq = np.array([[1.0, 0.956, 0.621], [1.0, -0.272, -0.647], [1.0, -1.107, 1.705]]) t = np.dot(np.dot(ityiq, bt), tyiq).T img = np.dot(img, t) return img def apply_saturation(self, img): low, high, prob = self.saturation if np.random.uniform(0., 1.) < prob: return img delta = np.random.uniform(low, high) img = img.astype(np.float32) gray = img * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32) gray = gray.sum(axis=2, keepdims=True) gray *= (1.0 - delta) img *= delta img += gray return img def apply_contrast(self, img): low, high, prob = self.contrast if np.random.uniform(0., 1.) < prob: return img delta = np.random.uniform(low, high) img = img.astype(np.float32) img *= delta return img def apply_brightness(self, img): low, high, prob = self.brightness if np.random.uniform(0., 1.) < prob: return img delta = np.random.uniform(low, high) img = img.astype(np.float32) img += delta return img def __call__(self, im_info, im, gt_bbox, gt_class, gt_score): if self.random_apply: distortions = np.random.permutation([ self.apply_brightness, self.apply_contrast, self.apply_saturation, self.apply_hue ]) for func in distortions: im = func(im) return [im_info, im, gt_bbox, gt_class, gt_score] im = self.apply_brightness(im) if np.random.randint(0, 2): im = self.apply_contrast(im) im = self.apply_saturation(im) im = self.apply_hue(im) else: im = self.apply_saturation(im) im = self.apply_hue(im) im = self.apply_contrast(im) return [im_info, im, gt_bbox, gt_class, gt_score] class RandomExpand(object): """Random expand the canvas. Args: ratio (float): maximum expansion ratio. prob (float): probability to expand. fill_value (list): color value used to fill the canvas. in RGB order. """ def __init__(self, ratio=4., prob=0.5, fill_value=[123.675, 116.28, 103.53]): assert ratio > 1.01, "expand ratio must be larger than 1.01" self.ratio = ratio self.prob = prob self.fill_value = fill_value def __call__(self, im_info, im, gt_bbox, gt_class, gt_score): if np.random.uniform(0., 1.) < self.prob: return [im_info, im, gt_bbox, gt_class, gt_score] height, width, _ = im.shape expand_ratio = np.random.uniform(1., self.ratio) h = int(height * expand_ratio) w = int(width * expand_ratio) if not h > height or not w > width: return [im_info, im, gt_bbox, gt_class, gt_score] y = np.random.randint(0, h - height) x = np.random.randint(0, w - width) canvas = np.ones((h, w, 3), dtype=np.uint8) canvas *= np.array(self.fill_value, dtype=np.uint8) canvas[y:y + height, x:x + width, :] = im.astype(np.uint8) gt_bbox += np.array([x, y, x, y], dtype=np.float32) return [im_info, canvas, gt_bbox, gt_class, gt_score] class RandomCrop(): """Random crop image and bboxes. Args: aspect_ratio (list): aspect ratio of cropped region. in [min, max] format. thresholds (list): iou thresholds for decide a valid bbox crop. scaling (list): ratio between a cropped region and the original image. in [min, max] format. num_attempts (int): number of tries before giving up. allow_no_crop (bool): allow return without actually cropping them. cover_all_box (bool): ensure all bboxes are covered in the final crop. """ def __init__(self, aspect_ratio=[.5, 2.], thresholds=[.0, .1, .3, .5, .7, .9], scaling=[.3, 1.], num_attempts=50, allow_no_crop=True, cover_all_box=False): self.aspect_ratio = aspect_ratio self.thresholds = thresholds self.scaling = scaling self.num_attempts = num_attempts self.allow_no_crop = allow_no_crop self.cover_all_box = cover_all_box def __call__(self, im_info, im, gt_bbox, gt_class, gt_score): if len(gt_bbox) == 0: return [im_info, im, gt_bbox, gt_class, gt_score] # NOTE Original method attempts to generate one candidate for each # threshold then randomly sample one from the resulting list. # Here a short circuit approach is taken, i.e., randomly choose a # threshold and attempt to find a valid crop, and simply return the # first one found. # The probability is not exactly the same, kinda resembling the # "Monty Hall" problem. Actually carrying out the attempts will affect # observability (just like opening doors in the "Monty Hall" game). thresholds = list(self.thresholds) if self.allow_no_crop: thresholds.append('no_crop') np.random.shuffle(thresholds) for thresh in thresholds: if thresh == 'no_crop': return [im_info, im, gt_bbox, gt_class, gt_score] h, w, _ = im.shape found = False for i in range(self.num_attempts): scale = np.random.uniform(*self.scaling) min_ar, max_ar = self.aspect_ratio aspect_ratio = np.random.uniform( max(min_ar, scale**2), min(max_ar, scale**-2)) crop_h = int(h * scale / np.sqrt(aspect_ratio)) crop_w = int(w * scale * np.sqrt(aspect_ratio)) crop_y = np.random.randint(0, h - crop_h) crop_x = np.random.randint(0, w - crop_w) crop_box = [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h] iou = self._iou_matrix( gt_bbox, np.array( [crop_box], dtype=np.float32)) if iou.max() < thresh: continue if self.cover_all_box and iou.min() < thresh: continue cropped_box, valid_ids = self._crop_box_with_center_constraint( gt_bbox, np.array( crop_box, dtype=np.float32)) if valid_ids.size > 0: found = True break if found: im = self._crop_image(im, crop_box) gt_bbox = np.take(cropped_box, valid_ids, axis=0) gt_class = np.take(gt_class, valid_ids, axis=0) gt_score = np.take(gt_score, valid_ids, axis=0) return [im_info, im, gt_bbox, gt_class, gt_score] return [im_info, im, gt_bbox, gt_class, gt_score] def _iou_matrix(self, a, b): tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2]) br_i = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) area_i = np.prod(br_i - tl_i, axis=2) * (tl_i < br_i).all(axis=2) area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) area_b = np.prod(b[:, 2:] - b[:, :2], axis=1) area_o = (area_a[:, np.newaxis] + area_b - area_i) return area_i / (area_o + 1e-10) def _crop_box_with_center_constraint(self, box, crop): cropped_box = box.copy() cropped_box[:, :2] = np.maximum(box[:, :2], crop[:2]) cropped_box[:, 2:] = np.minimum(box[:, 2:], crop[2:]) cropped_box[:, :2] -= crop[:2] cropped_box[:, 2:] -= crop[:2] centers = (box[:, :2] + box[:, 2:]) / 2 valid = np.logical_and(crop[:2] <= centers, centers < crop[2:]).all(axis=1) valid = np.logical_and( valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1)) return cropped_box, np.where(valid)[0] def _crop_image(self, img, crop): x1, y1, x2, y2 = crop return img[y1:y2, x1:x2, :] class RandomFlip(): def __init__(self, prob=0.5, is_normalized=False): """ Args: prob (float): the probability of flipping image is_normalized (bool): whether the bbox scale to [0,1] """ self.prob = prob self.is_normalized = is_normalized if not (isinstance(self.prob, float) and isinstance(self.is_normalized, bool)): raise TypeError("{}: input type is invalid.".format(self)) def __call__(self, im_info, im, gt_bbox, gt_class, gt_score): """Filp the image and bounding box. Operators: 1. Flip the image numpy. 2. Transform the bboxes' x coordinates. (Must judge whether the coordinates are normalized!) """ if not isinstance(im, np.ndarray): raise TypeError("{}: image is not a numpy array.".format(self)) if len(im.shape) != 3: raise ImageError("{}: image is not 3-dimensional.".format(self)) height, width, _ = im.shape if np.random.uniform(0, 1) < self.prob: im = im[:, ::-1, :] if gt_bbox.shape[0] > 0: oldx1 = gt_bbox[:, 0].copy() oldx2 = gt_bbox[:, 2].copy() if self.is_normalized: gt_bbox[:, 0] = 1 - oldx2 gt_bbox[:, 2] = 1 - oldx1 else: gt_bbox[:, 0] = width - oldx2 - 1 gt_bbox[:, 2] = width - oldx1 - 1 if gt_bbox.shape[0] != 0 and ( gt_bbox[:, 2] < gt_bbox[:, 0]).all(): m = "{}: invalid box, x2 should be greater than x1".format( self) raise ValueError(m) return [im_info, im, gt_bbox, gt_class, gt_score] class NormalizeBox(object): """Transform the bounding box's coornidates to [0,1].""" def __call__(self, im_info, im, gt_bbox, gt_class, gt_score): height, width, _ = im.shape for i in range(gt_bbox.shape[0]): gt_bbox[i][0] = gt_bbox[i][0] / width gt_bbox[i][1] = gt_bbox[i][1] / height gt_bbox[i][2] = gt_bbox[i][2] / width gt_bbox[i][3] = gt_bbox[i][3] / height return [im_info, im, gt_bbox, gt_class, gt_score] class PadBox(object): def __init__(self, num_max_boxes=50): """ Pad zeros to bboxes if number of bboxes is less than num_max_boxes. Args: num_max_boxes (int): the max number of bboxes """ self.num_max_boxes = num_max_boxes def __call__(self, im_info, im, gt_bbox, gt_class, gt_score): gt_num = min(self.num_max_boxes, len(gt_bbox)) num_max = self.num_max_boxes pad_bbox = np.zeros((num_max, 4), dtype=np.float32) if gt_num > 0: pad_bbox[:gt_num, :] = gt_bbox[:gt_num, :] gt_bbox = pad_bbox pad_class = np.zeros((num_max), dtype=np.int32) if gt_num > 0: pad_class[:gt_num] = gt_class[:gt_num, 0] gt_class = pad_class pad_score = np.zeros((num_max), dtype=np.float32) if gt_num > 0: pad_score[:gt_num] = gt_score[:gt_num, 0] gt_score = pad_score return [im_info, im, gt_bbox, gt_class, gt_score] class BboxXYXY2XYWH(object): """ Convert bbox XYXY format to XYWH format. """ def __call__(self, im_info, im, gt_bbox, gt_class, gt_score): gt_bbox[:, 2:4] = gt_bbox[:, 2:4] - gt_bbox[:, :2] gt_bbox[:, :2] = gt_bbox[:, :2] + gt_bbox[:, 2:4] / 2. return [im_info, im, gt_bbox, gt_class, gt_score] class RandomShape(object): """ Randomly reshape a batch. If random_inter is True, also randomly select one an interpolation algorithm [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4]. If random_inter is False, use cv2.INTER_NEAREST. Args: sizes (list): list of int, random choose a size from these random_inter (bool): whether to randomly interpolation, defalut true. """ def __init__(self, sizes=[320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_inter=True): self.sizes = sizes self.random_inter = random_inter self.interps = [ cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4, ] if random_inter else [] def __call__(self, samples): shape = np.random.choice(self.sizes) method = np.random.choice(self.interps) if self.random_inter \ else cv2.INTER_NEAREST for i in range(len(samples)): im = samples[i][1] h, w = im.shape[:2] scale_x = float(shape) / w scale_y = float(shape) / h im = cv2.resize( im, None, None, fx=scale_x, fy=scale_y, interpolation=method) samples[i][1] = im return samples class NormalizeImage(object): def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], scale=True, channel_first=True): """ Args: mean (list): the pixel mean std (list): the pixel variance scale (bool): whether scale image to [0, 1] channel_first (bool): whehter change [h, w, c] to [c, h, w] """ self.mean = mean self.std = std self.scale = scale self.channel_first = channel_first if not (isinstance(self.mean, list) and isinstance(self.std, list) and isinstance(self.scale, bool)): raise TypeError("{}: input type is invalid.".format(self)) from functools import reduce if reduce(lambda x, y: x * y, self.std) == 0: raise ValueError('{}: std is invalid!'.format(self)) def __call__(self, samples): """Normalize the image. Operators: 1. (optional) Scale the image to [0,1] 2. Each pixel minus mean and is divided by std 3. (optional) permute channel """ for i in range(len(samples)): im = samples[i][1] im = im.astype(np.float32, copy=False) mean = np.array(self.mean)[np.newaxis, np.newaxis, :] std = np.array(self.std)[np.newaxis, np.newaxis, :] if self.scale: im = im / 255.0 im -= mean im /= std if self.channel_first: im = im.transpose((2, 0, 1)) samples[i][1] = im return samples def _iou_matrix(a, b): tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2]) br_i = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) area_i = np.prod(br_i - tl_i, axis=2) * (tl_i < br_i).all(axis=2) area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) area_b = np.prod(b[:, 2:] - b[:, :2], axis=1) area_o = (area_a[:, np.newaxis] + area_b - area_i) return area_i / (area_o + 1e-10) def _crop_box_with_center_constraint(box, crop): cropped_box = box.copy() cropped_box[:, :2] = np.maximum(box[:, :2], crop[:2]) cropped_box[:, 2:] = np.minimum(box[:, 2:], crop[2:]) cropped_box[:, :2] -= crop[:2] cropped_box[:, 2:] -= crop[:2] centers = (box[:, :2] + box[:, 2:]) / 2 valid = np.logical_and( crop[:2] <= centers, centers < crop[2:]).all(axis=1) valid = np.logical_and( valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1)) return cropped_box, np.where(valid)[0] def random_crop(inputs): aspect_ratios = [.5, 2.] thresholds = [.0, .1, .3, .5, .7, .9] scaling = [.3, 1.] img, img_ids, gt_box, gt_label = inputs h, w = img.shape[:2] if len(gt_box) == 0: return inputs np.random.shuffle(thresholds) for thresh in thresholds: found = False for i in range(50): scale = np.random.uniform(*scaling) min_ar, max_ar = aspect_ratios ar = np.random.uniform(max(min_ar, scale**2), min(max_ar, scale**-2)) crop_h = int(h * scale / np.sqrt(ar)) crop_w = int(w * scale * np.sqrt(ar)) crop_y = np.random.randint(0, h - crop_h) crop_x = np.random.randint(0, w - crop_w) crop_box = [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h] iou = _iou_matrix(gt_box, np.array([crop_box], dtype=np.float32)) if iou.max() < thresh: continue cropped_box, valid_ids = _crop_box_with_center_constraint( gt_box, np.array(crop_box, dtype=np.float32)) if valid_ids.size > 0: found = True break if found: x1, y1, x2, y2 = crop_box img = img[y1:y2, x1:x2, :] gt_box = np.take(cropped_box, valid_ids, axis=0) gt_label = np.take(gt_label, valid_ids, axis=0) return img, img_ids, gt_box, gt_label return inputs class ResizeImage(object): def __init__(self, target_size=0, interp=cv2.INTER_CUBIC): """ Rescale image to the specified target size. If target_size is list, selected a scale randomly as the specified target size. Args: target_size (int|list): the target size of image's short side, multi-scale training is adopted when type is list. interp (int): the interpolation method """ self.interp = int(interp) if not (isinstance(target_size, int) or isinstance(target_size, list)): raise TypeError( "Type of target_size is invalid. Must be Integer or List, now is {}". format(type(target_size))) self.target_size = target_size def __call__(self, im_info, im, gt_bbox, gt_class, gt_score): """ Resize the image numpy. """ if not isinstance(im, np.ndarray): raise TypeError("{}: image type is not numpy.".format(self)) if len(im.shape) != 3: raise ImageError('{}: image is not 3-dimensional.'.format(self)) im_shape = im.shape im_scale_x = float(self.target_size) / float(im_shape[1]) im_scale_y = float(self.target_size) / float(im_shape[0]) resize_w = self.target_size resize_h = self.target_size im = cv2.resize( im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=self.interp) return [im_info, im, gt_bbox, gt_class, gt_score]