From 1cae81443cddbfabc2c4b68761aed9a7398d9ef3 Mon Sep 17 00:00:00 2001
From: wangxinxin08 <69842442+wangxinxin08@users.noreply.github.com>
Date: Tue, 29 Jun 2021 20:47:27 +0800
Subject: [PATCH] add mosaic data augmentation (#3185)

---
 ppdet/data/transform/op_helper.py |  59 ++++++++++
 ppdet/data/transform/operators.py | 184 +++++++++++++++++++++++++++++-
 2 files changed, 240 insertions(+), 3 deletions(-)

diff --git a/ppdet/data/transform/op_helper.py b/ppdet/data/transform/op_helper.py
index 02d219546..048bb29da 100644
--- a/ppdet/data/transform/op_helper.py
+++ b/ppdet/data/transform/op_helper.py
@@ -462,3 +462,62 @@ def gaussian2D(shape, sigma_x=1, sigma_y=1):
                                                             sigma_y)))
     h[h < np.finfo(h.dtype).eps * h.max()] = 0
     return h
+
+
+def transform_bbox(sample,
+                   M,
+                   w,
+                   h,
+                   area_thr=0.25,
+                   wh_thr=2,
+                   ar_thr=20,
+                   perspective=False):
+    """
+    transfrom bbox according to tranformation matrix M,
+    refer to https://github.com/ultralytics/yolov5/blob/develop/utils/datasets.py
+    """
+    bbox = sample['gt_bbox']
+    label = sample['gt_class']
+    # rotate bbox
+    n = len(bbox)
+    xy = np.ones((n * 4, 3), dtype=np.float32)
+    xy[:, :2] = bbox[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)
+    # xy = xy @ M.T
+    xy = np.matmul(xy, M.T)
+    if perspective:
+        xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8)
+    else:
+        xy = xy[:, :2].reshape(n, 8)
+    # get new bboxes
+    x = xy[:, [0, 2, 4, 6]]
+    y = xy[:, [1, 3, 5, 7]]
+    bbox = np.concatenate(
+        (x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+    # clip boxes
+    mask = filter_bbox(bbox, w, h, area_thr)
+    sample['gt_bbox'] = bbox[mask]
+    sample['gt_class'] = sample['gt_class'][mask]
+    if 'is_crowd' in sample:
+        sample['is_crowd'] = sample['is_crowd'][mask]
+    if 'difficult' in sample:
+        sample['difficult'] = sample['difficult'][mask]
+    return sample
+
+
+def filter_bbox(bbox, w, h, area_thr=0.25, wh_thr=2, ar_thr=20):
+    """
+    filter bbox, refer to https://github.com/ultralytics/yolov5/blob/develop/utils/datasets.py
+    """
+    # clip boxes
+    area1 = (bbox[:, 2:4] - bbox[:, 0:2]).prod(1)
+    bbox[:, [0, 2]] = bbox[:, [0, 2]].clip(0, w)
+    bbox[:, [1, 3]] = bbox[:, [1, 3]].clip(0, h)
+    # compute
+    area2 = (bbox[:, 2:4] - bbox[:, 0:2]).prod(1)
+    area_ratio = area2 / (area1 + 1e-16)
+    wh = bbox[:, 2:4] - bbox[:, 0:2]
+    ar_ratio = np.maximum(wh[:, 1] / (wh[:, 0] + 1e-16),
+                          wh[:, 0] / (wh[:, 1] + 1e-16))
+    mask = (area_ratio > area_thr) & (
+        (wh > wh_thr).all(1)) & (ar_ratio < ar_thr)
+    return mask
diff --git a/ppdet/data/transform/operators.py b/ppdet/data/transform/operators.py
index 3ea7da8aa..3d95dd1b8 100644
--- a/ppdet/data/transform/operators.py
+++ b/ppdet/data/transform/operators.py
@@ -45,7 +45,7 @@ from .op_helper import (satisfy_sample_constraint, filter_and_process,
                         generate_sample_bbox, clip_bbox, data_anchor_sampling,
                         satisfy_sample_constraint_coverage, crop_image_sampling,
                         generate_sample_bbox_square, bbox_area_sampling,
-                        is_poly, gaussian_radius, draw_gaussian)
+                        is_poly, gaussian_radius, draw_gaussian, transform_bbox)
 
 from ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
@@ -1767,8 +1767,8 @@ class DebugVisibleImage(BaseOperator):
             raise TypeError("{}: input type is invalid.".format(self))
 
     def apply(self, sample, context=None):
-        image = Image.open(sample['im_file']).convert('RGB')
-        out_file_name = sample['im_file'].split('/')[-1]
+        image = Image.fromarray(sample['image'].astype(np.uint8))
+        out_file_name = '{:012d}.jpg'.format(sample['im_id'][0])
         width = sample['w']
         height = sample['h']
         gt_bbox = sample['gt_bbox']
@@ -2348,5 +2348,183 @@ class RandomResizeCrop(BaseOperator):
                 for gt_segm in sample['gt_segm']
             ]
             sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
+        
+        return sample
+
+
+class RandomPerspective(BaseOperator):
+    """
+    Rotate, tranlate, scale, shear and perspect image and bboxes randomly,
+    refer to https://github.com/ultralytics/yolov5/blob/develop/utils/datasets.py
+
+    Args:
+        degree (int): rotation degree, uniformly sampled in [-degree, degree]
+        translate (float): translate fraction, translate_x and translate_y are uniformly sampled
+            in [0.5 - translate, 0.5 + translate]
+        scale (float): scale factor, uniformly sampled in [1 - scale, 1 + scale]
+        shear (int): shear degree, shear_x and shear_y are uniformly sampled in [-shear, shear]
+        perspective (float): perspective_x and perspective_y are uniformly sampled in [-perspective, perspective]
+        area_thr (float): the area threshold of bbox to be kept after transformation, default 0.25
+        fill_value (tuple): value used in case of a constant border, default (114, 114, 114)
+    """
+
+    def __init__(self,
+                 degree=10,
+                 translate=0.1,
+                 scale=0.1,
+                 shear=10,
+                 perspective=0.0,
+                 border=[0, 0],
+                 area_thr=0.25,
+                 fill_value=(114, 114, 114)):
+        super(RandomPerspective, self).__init__()
+        self.degree = degree
+        self.translate = translate
+        self.scale = scale
+        self.shear = shear
+        self.perspective = perspective
+        self.border = border
+        self.area_thr = area_thr
+        self.fill_value = fill_value
+
+    def apply(self, sample, context=None):
+        im = sample['image']
+        height = im.shape[0] + self.border[0] * 2
+        width = im.shape[1] + self.border[1] * 2
+
+        # center
+        C = np.eye(3)
+        C[0, 2] = -im.shape[1] / 2
+        C[1, 2] = -im.shape[0] / 2
+
+        # perspective
+        P = np.eye(3)
+        P[2, 0] = random.uniform(-self.perspective, self.perspective)
+        P[2, 1] = random.uniform(-self.perspective, self.perspective)
+
+        # Rotation and scale
+        R = np.eye(3)
+        a = random.uniform(-self.degree, self.degree)
+        s = random.uniform(1 - self.scale, 1 + self.scale)
+        R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
+
+        # Shear
+        S = np.eye(3)
+        # shear x (deg)
+        S[0, 1] = math.tan(
+            random.uniform(-self.shear, self.shear) * math.pi / 180)
+        # shear y (deg)
+        S[1, 0] = math.tan(
+            random.uniform(-self.shear, self.shear) * math.pi / 180)
+
+        # Translation
+        T = np.eye(3)
+        T[0, 2] = random.uniform(0.5 - self.translate,
+                                 0.5 + self.translate) * width
+        T[1, 2] = random.uniform(0.5 - self.translate,
+                                 0.5 + self.translate) * height
+
+        # matmul
+        # M = T @ S @ R @ P @ C
+        M = np.eye(3)
+        for cM in [T, S, R, P, C]:
+            M = np.matmul(M, cM)
+
+        if (self.border[0] != 0) or (self.border[1] != 0) or (
+                M != np.eye(3)).any():
+            if self.perspective:
+                im = cv2.warpPerspective(
+                    im, M, dsize=(width, height), borderValue=self.fill_value)
+            else:
+                im = cv2.warpAffine(
+                    im,
+                    M[:2],
+                    dsize=(width, height),
+                    borderValue=self.fill_value)
+
+        sample['image'] = im
+        if sample['gt_bbox'].shape[0] > 0:
+            sample = transform_bbox(
+                sample,
+                M,
+                width,
+                height,
+                area_thr=self.area_thr,
+                perspective=self.perspective)
+
+        return sample
+
+
+@register_op
+class Mosaic(BaseOperator):
+    """
+    Mosaic Data Augmentation, refer to https://github.com/ultralytics/yolov5/blob/develop/utils/datasets.py
+
+    """
+
+    def __init__(self,
+                 target_size,
+                 mosaic_border=None,
+                 fill_value=(114, 114, 114)):
+        super(Mosaic, self).__init__()
+        self.target_size = target_size
+        if mosaic_border is None:
+            mosaic_border = (-target_size // 2, -target_size // 2)
+        self.mosaic_border = mosaic_border
+        self.fill_value = fill_value
+
+    def __call__(self, sample, context=None):
+        if not isinstance(sample, Sequence):
+            return sample
+
+        s = self.target_size
+        yc, xc = [
+            int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border
+        ]
+        boxes = [x['gt_bbox'] for x in sample]
+        labels = [x['gt_class'] for x in sample]
+        for i in range(len(sample)):
+            im = sample[i]['image']
+            h, w, c = im.shape
+
+            if i == 0:  # top left
+                image = np.ones(
+                    (s * 2, s * 2, c), dtype=np.uint8) * self.fill_value
+                # xmin, ymin, xmax, ymax (dst image)
+                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc
+                # xmin, ymin, xmax, ymax (src image)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h
+            elif i == 1:  # top right
+                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
+                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+            elif i == 2:  # bottom left
+                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(
+                    y2a - y1a, h)
+            elif i == 3:  # bottom right
+                x1a, y1a, x2a, y2a = xc, yc, min(xc + w,
+                                                 s * 2), min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+
+            image[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b]
+            padw = x1a - x1b
+            padh = y1a - y1b
+            boxes[i] = boxes[i] + (padw, padh, padw, padh)
+
+        boxes = np.concatenate(boxes, axis=0)
+        boxes = np.clip(boxes, 0, s * 2)
+        labels = np.concatenate(labels, axis=0)
+        if 'is_crowd' in sample[0]:
+            is_crowd = np.concatenate([x['is_crowd'] for x in sample], axis=0)
+        if 'difficult' in sample[0]:
+            difficult = np.concatenate([x['difficult'] for x in sample], axis=0)
+        sample = sample[0]
+        sample['image'] = image.astype(np.uint8)
+        sample['gt_bbox'] = boxes
+        sample['gt_class'] = labels
+        if 'is_crowd' in sample:
+            sample['is_crowd'] = is_crowd
+        if 'difficult' in sample:
+            sample['difficult'] = difficult
 
         return sample
-- 
GitLab