add augment

825cea1d · dengkaipeng · aa9ff438 · 825cea1d · 825cea1d · 825cea1d
6 changed file
--- a/configs/yolov4/yolov4_cspdarknet_coco.yml
+++ b/configs/yolov4/yolov4_cspdarknet_coco.yml
@@ -90,6 +90,7 @@ TrainReader:
    - !DecodeImage
      to_rgb: True
      with_mosaic: True
+      with_mixup: True
    - !MosaicImage
      offset: 0.3
      mosaic_scale: [0.8, 1.0]
@@ -97,6 +98,15 @@ TrainReader:
      sample_flip: 0.5
      use_cv2: true
      interp: 2
+    - !MixupImage
+      alpha: 1.5
+      beta: 1.5
+    - !ColorDistort {}
+    - !RandomExpand
+      fill_value: [123.675, 116.28, 103.53]
+    - !RandomCrop {}
+    - !RandomFlipImage
+      is_normalized: false
    - !NormalizeBox {}
    - !PadBox
      num_max_boxes: 90

--- a/configs/yolov4/yolov4_cspdarknet_voc.yml
+++ b/configs/yolov4/yolov4_cspdarknet_voc.yml
@@ -89,6 +89,7 @@ TrainReader:
    - !DecodeImage
      to_rgb: True
      with_mosaic: True
+      with_mixup: True
    - !MosaicImage
      offset: 0.3
      mosaic_scale: [0.8, 1.0]
@@ -96,6 +97,15 @@ TrainReader:
      sample_flip: 0.5
      use_cv2: true
      interp: 2
+    - !MixupImage
+      alpha: 1.5
+      beta: 1.5
+    - !ColorDistort {}
+    - !RandomExpand
+      fill_value: [123.675, 116.28, 103.53]
+    - !RandomCrop {}
+    - !RandomFlipImage
+      is_normalized: false
    - !NormalizeBox {}
    - !PadBox
      num_max_boxes: 90
@@ -124,6 +134,7 @@ TrainReader:
      num_classes: 20
      iou_thresh: 0.213
  batch_size: 8
+  mixup_epoch: 250
  mosaic_prob: 0.3
  mosaic_epoch: 300
  shuffle: true

--- a/ppdet/data/transform/operators.py
+++ b/ppdet/data/transform/operators.py
@@ -87,6 +87,11 @@ class BaseOperator(object):
        return str(self._id)
+def is_mosaiced(context):
+    return isinstance(context, dict) and \
+            'mosaic' in context and context['mosaic']
 @register_op
 class DecodeImage(BaseOperator):
    def __init__(self, to_rgb=True, with_mosaic=False, with_mixup=False):
@@ -670,6 +675,7 @@ class RandomDistort(BaseOperator):
    def __call__(self, sample, context):
        """random distort the image"""
        ops = [
            self.random_brightness, self.random_contrast,
            self.random_saturation, self.random_hue
@@ -795,6 +801,7 @@ class CropImage(BaseOperator):
        Returns:
            sample: the image, bounding box are replaced.
        """
        assert 'image' in sample, "image data not found"
        im = sample['image']
        gt_bbox = sample['gt_bbox']
@@ -1279,9 +1286,10 @@ class MosaicImage(BaseOperator):
    def __call__(self, sample, context=None):
        if 'mosaic0' not in sample:
-            sample = self.crop(sample, 0, 0)
+            # sample = self.crop(sample, 0, 0)
-            if self.sample_flip:
+            # if self.sample_flip:
-                sample = self.sample_flip_fun(sample, self.sample_flip)
+            #     sample = self.sample_flip_fun(sample, self.sample_flip)
+            context['mosaic'] = False
            return sample
        h = sample['h']
        w = sample['w']
@@ -1346,6 +1354,7 @@ class MosaicImage(BaseOperator):
        sample.pop('mosaic1')
        sample.pop('mosaic2')
+        context['mosaic'] = True
        return sample
@@ -1533,6 +1542,9 @@ class MixupImage(BaseOperator):
        return img.astype('uint8')
    def __call__(self, sample, context=None):
+        if is_mosaiced(context):
+            return sample
        if 'mixup' not in sample:
            return sample
        factor = np.random.beta(self.alpha, self.beta)
@@ -2044,6 +2056,9 @@ class RandomCrop(BaseOperator):
        return crop_segms
    def __call__(self, sample, context=None):
+        if is_mosaiced(context):
+            return sample
        if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0:
            return sample

--- a/ppdet/modeling/backbones/cspdarknet.py
+++ b/ppdet/modeling/backbones/cspdarknet.py
@@ -55,7 +55,8 @@ class CSPDarkNet(object):
        return fluid.layers.log(1 + expf)
    def _mish(self, input):
-        return input * fluid.layers.tanh(self._softplus(input))
+        return fluid.layers.mish(input)
+        # return input * fluid.layers.tanh(self._softplus(input))
    def _conv_norm(self,
                   input,

--- a/ppdet/modeling/losses/iou_loss.py
+++ b/ppdet/modeling/losses/iou_loss.py
@@ -64,7 +64,8 @@ class IouLoss(object):
                 downsample_ratio,
                 batch_size,
                 ioup=None,
-                 eps=1.e-10):
+                 eps=1.e-10,
+                 scale_x_y=1.0):
        '''
        Args:
            x  | y | w | h  ([Variables]): the output of yolov3 for encoded x|y|w|h
@@ -75,9 +76,9 @@ class IouLoss(object):
            eps (float): the decimal to prevent the denominator eqaul zero
        '''
        pred = self._bbox_transform(x, y, w, h, anchors, downsample_ratio,
-                                    batch_size, False)
+                                    batch_size, False, scale_x_y)
        gt = self._bbox_transform(tx, ty, tw, th, anchors, downsample_ratio,
-                                  batch_size, True)
+                                  batch_size, True, 1.0)
        iouk = self._iou(pred, gt, ioup, eps)
        if self.loss_square:
            loss_iou = 1. - iouk * iouk
@@ -145,7 +146,7 @@ class IouLoss(object):
        return diou_term + ciou_term
    def _bbox_transform(self, dcx, dcy, dw, dh, anchors, downsample_ratio,
-                        batch_size, is_gt):
+                        batch_size, is_gt, scale_x_y):
        grid_x = int(self._MAX_WI / downsample_ratio)
        grid_y = int(self._MAX_HI / downsample_ratio)
        an_num = len(anchors) // 2
@@ -179,8 +180,11 @@ class IouLoss(object):
            cy.gradient = True
        else:
            dcx_sig = fluid.layers.sigmoid(dcx)
-            cx = fluid.layers.elementwise_add(dcx_sig, gi) / grid_x_act
            dcy_sig = fluid.layers.sigmoid(dcy)
+            if abs(scale_x_y - 1.0) > 1e-6:
+                dcx_sig = scale_x_y * dcx_sig - 0.5 * (scale_x_y - 1.)
+                dcy_sig = scale_x_y * dcy_sig - 0.5 * (scale_x_y - 1.)
+            cx = fluid.layers.elementwise_add(dcx_sig, gi) / grid_x_act
            cy = fluid.layers.elementwise_add(dcy_sig, gj) / grid_y_act
        anchor_w_ = [anchors[i] for i in range(0, len(anchors)) if i % 2 == 0]

--- a/ppdet/modeling/losses/yolo_loss.py
+++ b/ppdet/modeling/losses/yolo_loss.py
@@ -147,9 +147,13 @@ class YOLOv3Loss(object):
            loss_w = fluid.layers.reduce_sum(loss_w, dim=[1, 2, 3])
            loss_h = fluid.layers.abs(h - th) * tscale_tobj
            loss_h = fluid.layers.reduce_sum(loss_h, dim=[1, 2, 3])
+            scale_x_y = self.scale_x_y if not isinstance(
+                self.scale_x_y, Sequence) else self.scale_x_y[i]
            if self._iou_loss is not None:
                loss_iou = self._iou_loss(x, y, w, h, tx, ty, tw, th, anchors,
-                                          downsample, self._batch_size)
+                                          downsample, self._batch_size,
+                                          scale_x_y)
                loss_iou = loss_iou * tscale_tobj
                loss_iou = fluid.layers.reduce_sum(loss_iou, dim=[1, 2, 3])
                loss_ious.append(fluid.layers.reduce_mean(loss_iou))