From 825cea1d5f8acaabbc5a6c67bf66064b163ffcab Mon Sep 17 00:00:00 2001
From: dengkaipeng <dengkaipeng@baidu.com>
Date: Tue, 9 Jun 2020 12:19:28 +0000
Subject: [PATCH] add augment

---
 configs/yolov4/yolov4_cspdarknet_coco.yml | 10 ++++++++++
 configs/yolov4/yolov4_cspdarknet_voc.yml  | 11 +++++++++++
 ppdet/data/transform/operators.py         | 21 ++++++++++++++++++---
 ppdet/modeling/backbones/cspdarknet.py    |  3 ++-
 ppdet/modeling/losses/iou_loss.py         | 14 +++++++++-----
 ppdet/modeling/losses/yolo_loss.py        |  6 +++++-
 6 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/configs/yolov4/yolov4_cspdarknet_coco.yml b/configs/yolov4/yolov4_cspdarknet_coco.yml
index efbc33c52..b4f7908fa 100644
--- a/configs/yolov4/yolov4_cspdarknet_coco.yml
+++ b/configs/yolov4/yolov4_cspdarknet_coco.yml
@@ -90,6 +90,7 @@ TrainReader:
     - !DecodeImage
       to_rgb: True
       with_mosaic: True
+      with_mixup: True
     - !MosaicImage
       offset: 0.3
       mosaic_scale: [0.8, 1.0]
@@ -97,6 +98,15 @@ TrainReader:
       sample_flip: 0.5
       use_cv2: true
       interp: 2
+    - !MixupImage
+      alpha: 1.5
+      beta: 1.5
+    - !ColorDistort {}
+    - !RandomExpand
+      fill_value: [123.675, 116.28, 103.53]
+    - !RandomCrop {}
+    - !RandomFlipImage
+      is_normalized: false
     - !NormalizeBox {}
     - !PadBox
       num_max_boxes: 90
diff --git a/configs/yolov4/yolov4_cspdarknet_voc.yml b/configs/yolov4/yolov4_cspdarknet_voc.yml
index af5cd6d39..cc9629a16 100644
--- a/configs/yolov4/yolov4_cspdarknet_voc.yml
+++ b/configs/yolov4/yolov4_cspdarknet_voc.yml
@@ -89,6 +89,7 @@ TrainReader:
     - !DecodeImage
       to_rgb: True
       with_mosaic: True
+      with_mixup: True
     - !MosaicImage
       offset: 0.3
       mosaic_scale: [0.8, 1.0]
@@ -96,6 +97,15 @@ TrainReader:
       sample_flip: 0.5
       use_cv2: true
       interp: 2
+    - !MixupImage
+      alpha: 1.5
+      beta: 1.5
+    - !ColorDistort {}
+    - !RandomExpand
+      fill_value: [123.675, 116.28, 103.53]
+    - !RandomCrop {}
+    - !RandomFlipImage
+      is_normalized: false
     - !NormalizeBox {}
     - !PadBox
       num_max_boxes: 90
@@ -124,6 +134,7 @@ TrainReader:
       num_classes: 20
       iou_thresh: 0.213
   batch_size: 8
+  mixup_epoch: 250
   mosaic_prob: 0.3
   mosaic_epoch: 300
   shuffle: true
diff --git a/ppdet/data/transform/operators.py b/ppdet/data/transform/operators.py
index 1fa25e244..6fbf757be 100644
--- a/ppdet/data/transform/operators.py
+++ b/ppdet/data/transform/operators.py
@@ -87,6 +87,11 @@ class BaseOperator(object):
         return str(self._id)
 
 
+def is_mosaiced(context):
+    return isinstance(context, dict) and \
+            'mosaic' in context and context['mosaic']
+
+
 @register_op
 class DecodeImage(BaseOperator):
     def __init__(self, to_rgb=True, with_mosaic=False, with_mixup=False):
@@ -670,6 +675,7 @@ class RandomDistort(BaseOperator):
 
     def __call__(self, sample, context):
         """random distort the image"""
+
         ops = [
             self.random_brightness, self.random_contrast,
             self.random_saturation, self.random_hue
@@ -795,6 +801,7 @@ class CropImage(BaseOperator):
         Returns:
             sample: the image, bounding box are replaced.
         """
+
         assert 'image' in sample, "image data not found"
         im = sample['image']
         gt_bbox = sample['gt_bbox']
@@ -1279,9 +1286,10 @@ class MosaicImage(BaseOperator):
 
     def __call__(self, sample, context=None):
         if 'mosaic0' not in sample:
-            sample = self.crop(sample, 0, 0)
-            if self.sample_flip:
-                sample = self.sample_flip_fun(sample, self.sample_flip)
+            # sample = self.crop(sample, 0, 0)
+            # if self.sample_flip:
+            #     sample = self.sample_flip_fun(sample, self.sample_flip)
+            context['mosaic'] = False
             return sample
         h = sample['h']
         w = sample['w']
@@ -1346,6 +1354,7 @@ class MosaicImage(BaseOperator):
         sample.pop('mosaic1')
         sample.pop('mosaic2')
 
+        context['mosaic'] = True
         return sample
 
 
@@ -1533,6 +1542,9 @@ class MixupImage(BaseOperator):
         return img.astype('uint8')
 
     def __call__(self, sample, context=None):
+        if is_mosaiced(context):
+            return sample
+
         if 'mixup' not in sample:
             return sample
         factor = np.random.beta(self.alpha, self.beta)
@@ -2044,6 +2056,9 @@ class RandomCrop(BaseOperator):
         return crop_segms
 
     def __call__(self, sample, context=None):
+        if is_mosaiced(context):
+            return sample
+
         if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0:
             return sample
 
diff --git a/ppdet/modeling/backbones/cspdarknet.py b/ppdet/modeling/backbones/cspdarknet.py
index c789e2229..1b758bca8 100644
--- a/ppdet/modeling/backbones/cspdarknet.py
+++ b/ppdet/modeling/backbones/cspdarknet.py
@@ -55,7 +55,8 @@ class CSPDarkNet(object):
         return fluid.layers.log(1 + expf)
 
     def _mish(self, input):
-        return input * fluid.layers.tanh(self._softplus(input))
+        return fluid.layers.mish(input)
+        # return input * fluid.layers.tanh(self._softplus(input))
 
     def _conv_norm(self,
                    input,
diff --git a/ppdet/modeling/losses/iou_loss.py b/ppdet/modeling/losses/iou_loss.py
index 61ee495e0..dd0153b27 100644
--- a/ppdet/modeling/losses/iou_loss.py
+++ b/ppdet/modeling/losses/iou_loss.py
@@ -64,7 +64,8 @@ class IouLoss(object):
                  downsample_ratio,
                  batch_size,
                  ioup=None,
-                 eps=1.e-10):
+                 eps=1.e-10,
+                 scale_x_y=1.0):
         '''
         Args:
             x  | y | w | h  ([Variables]): the output of yolov3 for encoded x|y|w|h
@@ -75,9 +76,9 @@ class IouLoss(object):
             eps (float): the decimal to prevent the denominator eqaul zero
         '''
         pred = self._bbox_transform(x, y, w, h, anchors, downsample_ratio,
-                                    batch_size, False)
+                                    batch_size, False, scale_x_y)
         gt = self._bbox_transform(tx, ty, tw, th, anchors, downsample_ratio,
-                                  batch_size, True)
+                                  batch_size, True, 1.0)
         iouk = self._iou(pred, gt, ioup, eps)
         if self.loss_square:
             loss_iou = 1. - iouk * iouk
@@ -145,7 +146,7 @@ class IouLoss(object):
         return diou_term + ciou_term
 
     def _bbox_transform(self, dcx, dcy, dw, dh, anchors, downsample_ratio,
-                        batch_size, is_gt):
+                        batch_size, is_gt, scale_x_y):
         grid_x = int(self._MAX_WI / downsample_ratio)
         grid_y = int(self._MAX_HI / downsample_ratio)
         an_num = len(anchors) // 2
@@ -179,8 +180,11 @@ class IouLoss(object):
             cy.gradient = True
         else:
             dcx_sig = fluid.layers.sigmoid(dcx)
-            cx = fluid.layers.elementwise_add(dcx_sig, gi) / grid_x_act
             dcy_sig = fluid.layers.sigmoid(dcy)
+            if abs(scale_x_y - 1.0) > 1e-6:
+                dcx_sig = scale_x_y * dcx_sig - 0.5 * (scale_x_y - 1.)
+                dcy_sig = scale_x_y * dcy_sig - 0.5 * (scale_x_y - 1.)
+            cx = fluid.layers.elementwise_add(dcx_sig, gi) / grid_x_act
             cy = fluid.layers.elementwise_add(dcy_sig, gj) / grid_y_act
 
         anchor_w_ = [anchors[i] for i in range(0, len(anchors)) if i % 2 == 0]
diff --git a/ppdet/modeling/losses/yolo_loss.py b/ppdet/modeling/losses/yolo_loss.py
index 66967f30b..545ca84a1 100644
--- a/ppdet/modeling/losses/yolo_loss.py
+++ b/ppdet/modeling/losses/yolo_loss.py
@@ -147,9 +147,13 @@ class YOLOv3Loss(object):
             loss_w = fluid.layers.reduce_sum(loss_w, dim=[1, 2, 3])
             loss_h = fluid.layers.abs(h - th) * tscale_tobj
             loss_h = fluid.layers.reduce_sum(loss_h, dim=[1, 2, 3])
+
+            scale_x_y = self.scale_x_y if not isinstance(
+                self.scale_x_y, Sequence) else self.scale_x_y[i]
             if self._iou_loss is not None:
                 loss_iou = self._iou_loss(x, y, w, h, tx, ty, tw, th, anchors,
-                                          downsample, self._batch_size)
+                                          downsample, self._batch_size,
+                                          scale_x_y)
                 loss_iou = loss_iou * tscale_tobj
                 loss_iou = fluid.layers.reduce_sum(loss_iou, dim=[1, 2, 3])
                 loss_ious.append(fluid.layers.reduce_mean(loss_iou))
-- 
GitLab