finish data preprocess ops

8ce95746 · wangxinxin08 · 2ae4ac30 · 8ce95746 · 8ce95746 · 8ce95746
4 changed file
--- a/configs/yolov5/yolov5_reader.yml
+++ b/configs/yolov5/yolov5_reader.yml
@@ -12,22 +12,27 @@ TrainReader:
  sample_transforms:
    - !DecodeImage
      to_rgb: True
-      # with_mosaic: True
-    # - !MosaicImage
-    #   offset: 0.3
-    #   mosaic_scale: [0.8, 1.0]
-    #   sample_scale: [0.8, 1.0]
-    #   sample_flip: 0.5
-    #   use_cv2: true
-    #   interp: 2
-    - !NormalizeBox {}
+      with_mosaic: True
+    - !Mosaic
+      target_size: 640
+    - !RandomPerspective
+      degree: 0
+      translate: 0.1
+      scale: 0.5
+      shear: 0.0
+      perspective: 0.0
+      border: [-320, -320]
+    - !RandomFlipImage
+      prob: 0.5
+      is_normalized: false
+    - !RandomHSV
+      hgain: 0.015
+      sgain: 0.7
+      vgain: 0.4
    - !PadBox
      num_max_boxes: 50
    - !BboxXYXY2XYWH {}
  batch_transforms:
-    - !RandomShape
-      sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640]
-      random_inter: True
    - !NormalizeImage
      mean: [0.0, 0.0, 0.0]
      std: [1.0, 1.0, 1.0]
@@ -37,10 +42,6 @@ TrainReader:
      to_bgr: false
      channel_first: True
      # focus: false
-    - !Gt2YoloTarget
-      anchor_masks: [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
-      anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
-            [59, 119], [116, 90], [156, 198], [373, 326]]
      downsample_ratios: [8, 16, 32]
  batch_size: 2
  mosaic_prob: 0.3
@@ -49,6 +50,9 @@ TrainReader:
  drop_last: true
  worker_num: 8
  bufsize: 16
+  target_size: 640
+  rect: false
+  pad: 0.5
  use_process: true

 EvalReader:

--- a/ppdet/data/reader.py
+++ b/ppdet/data/reader.py
@@ -21,6 +21,7 @@ import copy
 import functools
 import collections
 import traceback
+import random
 import numpy as np
 import logging

@@ -209,7 +210,8 @@ class Reader(object):
                 memsize='3G',
                 inputs_def=None,
                 devices_num=1,
-                 num_trainers=1):
+                 num_trainers=1,
+                 mosaic=False):
        self._dataset = dataset
        self._roidbs = self._dataset.get_roidb()
        if rect:
@@ -234,7 +236,8 @@ class Reader(object):
                elif mini > 1:
                    shapes[i] = [1, 1 / mini]

-            batch_shapes = np.ceil(np.array(shapes) * target_size / stride + pad) * stride
+            batch_shapes = np.ceil(
+                np.array(shapes) * target_size / stride + pad) * stride
            new_roidbs = [self._roidbs[j] for j in irect]
            self._roidbs = new_roidbs
            for i, j in enumerate(bi):
@@ -243,6 +246,8 @@ class Reader(object):
        self._fields = copy.deepcopy(inputs_def[
            'fields']) if inputs_def else None

+        self.mosaic = mosaic
+
        # transform
        self._sample_transforms = Compose(sample_transforms,
                                          {'fields': self._fields})
@@ -387,6 +392,17 @@ class Reader(object):
            if self._load_img:
                sample['image'] = self._load_image(sample['im_file'])

+            if self.mosaic:
+                sample['mosaic'] = []
+                for idx in [
+                        random.randint(0, len(self.indexes) - 1)
+                        for _ in range(3)
+                ]:
+                    rec = copy.deepcopy(self._roidbs[idx])
+                    if self._load_img:
+                        rec['image'] = self._load_image(rec['im_file'])
+                    sample['mosaic'].append(rec)
+
            if self._epoch < self._mixup_epoch:
                num = len(self.indexes)
                mix_idx = np.random.randint(1, num)

--- a/ppdet/data/transform/op_helper.py
+++ b/ppdet/data/transform/op_helper.py
@@ -462,3 +462,56 @@ def gaussian2D(shape, sigma_x=1, sigma_y=1):
                                                            sigma_y)))
    h[h < np.finfo(h.dtype).eps * h.max()] = 0
    return h
+
+
+def transform_bbox(bbox,
+                   label,
+                   M,
+                   w,
+                   h,
+                   area_thr=0.25,
+                   wh_thr=2,
+                   ar_thr=20,
+                   perspective=False):
+    """
+    Transfrom bbox according to tranformation matrix M
+    """
+    # rotate bbox
+    n = len(bbox)
+    xy = np.ones((n * 4, 3), dtype=np.float32)
+    xy[:, :2] = bbox[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)
+    # xy = xy @ M.T
+    xy = np.matmul(xy, M.T)
+    if perspective:
+        xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8)
+    else:
+        xy = xy[:, :2].reshape(n, 8)
+    # get new bboxes
+    x = xy[:, [0, 2, 4, 6]]
+    y = xy[:, [1, 3, 5, 7]]
+    new_bbox = np.concatenate(
+        (x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+    # clip boxes
+    new_bbox, mask = clip_bbox(new_bbox, w, h, area_thr)
+    new_label = label[mask]
+    return new_bbox, new_label
+
+
+def clip_bbox(bbox, w, h, area_thr=0.25, wh_thr=2, ar_thr=20):
+    """
+    clip bbox according to w and h
+    """
+    # clip boxes
+    area1 = (bbox[:, 2:4] - bbox[:, 0:2]).prod(1)
+    bbox[:, [0, 2]] = bbox[:, [0, 2]].clip(0, w)
+    bbox[:, [1, 3]] = bbox[:, [1, 3]].clip(0, h)
+    # compute
+    area2 = (bbox[:, 2:4] - bbox[:, 0:2]).prod(1)
+    area_ratio = area2 / (area1 + 1e-16)
+    wh = bbox[:, 2:4] - bbox[:, 0:2]
+    ar_ratio = np.maximum(wh[:, 1] / (wh[:, 0] + 1e-16),
+                          wh[:, 0] / (wh[:, 1] + 1e-16))
+    mask = (area_ratio > area_thr) & (
+        (wh > wh_thr).all(1)) & (ar_ratio < ar_thr)
+    bbox = bbox[mask]
+    return bbox, mask
\ No newline at end of file
--- a/ppdet/data/transform/operators.py
+++ b/ppdet/data/transform/operators.py