diff --git a/ppdet/data/transform/operators.py b/ppdet/data/transform/operators.py index ae2d799701cdc448202ce1ab2bef97358970666e..a5ea6fb8f6e824d70658dcd3d7943e13ee6f3ce1 100644 --- a/ppdet/data/transform/operators.py +++ b/ppdet/data/transform/operators.py @@ -253,8 +253,8 @@ class SniperDecodeCrop(BaseOperator): chip = sample['chip'] x1, y1, x2, y2 = [int(xi) for xi in chip] - im = im[max(y1, 0):min(y2, im.shape[0]), - max(x1, 0):min(x2, im.shape[1]), :] + im = im[max(y1, 0):min(y2, im.shape[0]), max(x1, 0):min(x2, im.shape[ + 1]), :] sample['image'] = im h = im.shape[0] @@ -2471,184 +2471,6 @@ class RandomResizeCrop(BaseOperator): return sample -class RandomPerspective(BaseOperator): - """ - Rotate, tranlate, scale, shear and perspect image and bboxes randomly, - refer to https://github.com/ultralytics/yolov5/blob/develop/utils/datasets.py - - Args: - degree (int): rotation degree, uniformly sampled in [-degree, degree] - translate (float): translate fraction, translate_x and translate_y are uniformly sampled - in [0.5 - translate, 0.5 + translate] - scale (float): scale factor, uniformly sampled in [1 - scale, 1 + scale] - shear (int): shear degree, shear_x and shear_y are uniformly sampled in [-shear, shear] - perspective (float): perspective_x and perspective_y are uniformly sampled in [-perspective, perspective] - area_thr (float): the area threshold of bbox to be kept after transformation, default 0.25 - fill_value (tuple): value used in case of a constant border, default (114, 114, 114) - """ - - def __init__(self, - degree=10, - translate=0.1, - scale=0.1, - shear=10, - perspective=0.0, - border=[0, 0], - area_thr=0.25, - fill_value=(114, 114, 114)): - super(RandomPerspective, self).__init__() - self.degree = degree - self.translate = translate - self.scale = scale - self.shear = shear - self.perspective = perspective - self.border = border - self.area_thr = area_thr - self.fill_value = fill_value - - def apply(self, sample, context=None): - im = sample['image'] - height = im.shape[0] + self.border[0] * 2 - width = im.shape[1] + self.border[1] * 2 - - # center - C = np.eye(3) - C[0, 2] = -im.shape[1] / 2 - C[1, 2] = -im.shape[0] / 2 - - # perspective - P = np.eye(3) - P[2, 0] = random.uniform(-self.perspective, self.perspective) - P[2, 1] = random.uniform(-self.perspective, self.perspective) - - # Rotation and scale - R = np.eye(3) - a = random.uniform(-self.degree, self.degree) - s = random.uniform(1 - self.scale, 1 + self.scale) - R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) - - # Shear - S = np.eye(3) - # shear x (deg) - S[0, 1] = math.tan( - random.uniform(-self.shear, self.shear) * math.pi / 180) - # shear y (deg) - S[1, 0] = math.tan( - random.uniform(-self.shear, self.shear) * math.pi / 180) - - # Translation - T = np.eye(3) - T[0, 2] = random.uniform(0.5 - self.translate, - 0.5 + self.translate) * width - T[1, 2] = random.uniform(0.5 - self.translate, - 0.5 + self.translate) * height - - # matmul - # M = T @ S @ R @ P @ C - M = np.eye(3) - for cM in [T, S, R, P, C]: - M = np.matmul(M, cM) - - if (self.border[0] != 0) or (self.border[1] != 0) or ( - M != np.eye(3)).any(): - if self.perspective: - im = cv2.warpPerspective( - im, M, dsize=(width, height), borderValue=self.fill_value) - else: - im = cv2.warpAffine( - im, - M[:2], - dsize=(width, height), - borderValue=self.fill_value) - - sample['image'] = im - if sample['gt_bbox'].shape[0] > 0: - sample = transform_bbox( - sample, - M, - width, - height, - area_thr=self.area_thr, - perspective=self.perspective) - - return sample - - -@register_op -class Mosaic(BaseOperator): - """ - Mosaic Data Augmentation, refer to https://github.com/ultralytics/yolov5/blob/develop/utils/datasets.py - - """ - - def __init__(self, - target_size, - mosaic_border=None, - fill_value=(114, 114, 114)): - super(Mosaic, self).__init__() - self.target_size = target_size - if mosaic_border is None: - mosaic_border = (-target_size // 2, -target_size // 2) - self.mosaic_border = mosaic_border - self.fill_value = fill_value - - def __call__(self, sample, context=None): - if not isinstance(sample, Sequence): - return sample - - s = self.target_size - yc, xc = [ - int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border - ] - boxes = [x['gt_bbox'] for x in sample] - labels = [x['gt_class'] for x in sample] - for i in range(len(sample)): - im = sample[i]['image'] - h, w, c = im.shape - - if i == 0: # top left - image = np.ones( - (s * 2, s * 2, c), dtype=np.uint8) * self.fill_value - # xmin, ymin, xmax, ymax (dst image) - x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc - # xmin, ymin, xmax, ymax (src image) - x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h - elif i == 1: # top right - x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc - x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h - elif i == 2: # bottom left - x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) - x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min( - y2a - y1a, h) - elif i == 3: # bottom right - x1a, y1a, x2a, y2a = xc, yc, min(xc + w, - s * 2), min(s * 2, yc + h) - x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) - - image[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b] - padw = x1a - x1b - padh = y1a - y1b - boxes[i] = boxes[i] + (padw, padh, padw, padh) - - boxes = np.concatenate(boxes, axis=0) - boxes = np.clip(boxes, 0, s * 2) - labels = np.concatenate(labels, axis=0) - if 'is_crowd' in sample[0]: - is_crowd = np.concatenate([x['is_crowd'] for x in sample], axis=0) - if 'difficult' in sample[0]: - difficult = np.concatenate([x['difficult'] for x in sample], axis=0) - sample = sample[0] - sample['image'] = image.astype(np.uint8) - sample['gt_bbox'] = boxes - sample['gt_class'] = labels - if 'is_crowd' in sample: - sample['is_crowd'] = is_crowd - if 'difficult' in sample: - sample['difficult'] = difficult - - return sample - - @register_op class RandomSelect(BaseOperator): """