diff --git a/configs/yolov4/yolov4_cspdarknet_coco.yml b/configs/yolov4/yolov4_cspdarknet_coco.yml index efbc33c52407616cb06107436714f43dac1a2105..b4f7908fa0969c936cc7944a41400c931a2eaf29 100644 --- a/configs/yolov4/yolov4_cspdarknet_coco.yml +++ b/configs/yolov4/yolov4_cspdarknet_coco.yml @@ -90,6 +90,7 @@ TrainReader: - !DecodeImage to_rgb: True with_mosaic: True + with_mixup: True - !MosaicImage offset: 0.3 mosaic_scale: [0.8, 1.0] @@ -97,6 +98,15 @@ TrainReader: sample_flip: 0.5 use_cv2: true interp: 2 + - !MixupImage + alpha: 1.5 + beta: 1.5 + - !ColorDistort {} + - !RandomExpand + fill_value: [123.675, 116.28, 103.53] + - !RandomCrop {} + - !RandomFlipImage + is_normalized: false - !NormalizeBox {} - !PadBox num_max_boxes: 90 diff --git a/configs/yolov4/yolov4_cspdarknet_voc.yml b/configs/yolov4/yolov4_cspdarknet_voc.yml index af5cd6d39b01e357f2e6be958e51c21546fcec50..cc9629a169ef29545d6e8b5f2836c71349e8d6e7 100644 --- a/configs/yolov4/yolov4_cspdarknet_voc.yml +++ b/configs/yolov4/yolov4_cspdarknet_voc.yml @@ -89,6 +89,7 @@ TrainReader: - !DecodeImage to_rgb: True with_mosaic: True + with_mixup: True - !MosaicImage offset: 0.3 mosaic_scale: [0.8, 1.0] @@ -96,6 +97,15 @@ TrainReader: sample_flip: 0.5 use_cv2: true interp: 2 + - !MixupImage + alpha: 1.5 + beta: 1.5 + - !ColorDistort {} + - !RandomExpand + fill_value: [123.675, 116.28, 103.53] + - !RandomCrop {} + - !RandomFlipImage + is_normalized: false - !NormalizeBox {} - !PadBox num_max_boxes: 90 @@ -124,6 +134,7 @@ TrainReader: num_classes: 20 iou_thresh: 0.213 batch_size: 8 + mixup_epoch: 250 mosaic_prob: 0.3 mosaic_epoch: 300 shuffle: true diff --git a/ppdet/data/transform/operators.py b/ppdet/data/transform/operators.py index 1fa25e244bb7454103a0893eaeb89ca1c69f04ae..6fbf757be260b560f774a54f2c6a96d4b3a73acb 100644 --- a/ppdet/data/transform/operators.py +++ b/ppdet/data/transform/operators.py @@ -87,6 +87,11 @@ class BaseOperator(object): return str(self._id) +def is_mosaiced(context): + return isinstance(context, dict) and \ + 'mosaic' in context and context['mosaic'] + + @register_op class DecodeImage(BaseOperator): def __init__(self, to_rgb=True, with_mosaic=False, with_mixup=False): @@ -670,6 +675,7 @@ class RandomDistort(BaseOperator): def __call__(self, sample, context): """random distort the image""" + ops = [ self.random_brightness, self.random_contrast, self.random_saturation, self.random_hue @@ -795,6 +801,7 @@ class CropImage(BaseOperator): Returns: sample: the image, bounding box are replaced. """ + assert 'image' in sample, "image data not found" im = sample['image'] gt_bbox = sample['gt_bbox'] @@ -1279,9 +1286,10 @@ class MosaicImage(BaseOperator): def __call__(self, sample, context=None): if 'mosaic0' not in sample: - sample = self.crop(sample, 0, 0) - if self.sample_flip: - sample = self.sample_flip_fun(sample, self.sample_flip) + # sample = self.crop(sample, 0, 0) + # if self.sample_flip: + # sample = self.sample_flip_fun(sample, self.sample_flip) + context['mosaic'] = False return sample h = sample['h'] w = sample['w'] @@ -1346,6 +1354,7 @@ class MosaicImage(BaseOperator): sample.pop('mosaic1') sample.pop('mosaic2') + context['mosaic'] = True return sample @@ -1533,6 +1542,9 @@ class MixupImage(BaseOperator): return img.astype('uint8') def __call__(self, sample, context=None): + if is_mosaiced(context): + return sample + if 'mixup' not in sample: return sample factor = np.random.beta(self.alpha, self.beta) @@ -2044,6 +2056,9 @@ class RandomCrop(BaseOperator): return crop_segms def __call__(self, sample, context=None): + if is_mosaiced(context): + return sample + if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0: return sample diff --git a/ppdet/modeling/backbones/cspdarknet.py b/ppdet/modeling/backbones/cspdarknet.py index c789e22299ad06a73c78fcef6d89002afb845780..1b758bca81c79b0b44e3a9161170bfcba2153990 100644 --- a/ppdet/modeling/backbones/cspdarknet.py +++ b/ppdet/modeling/backbones/cspdarknet.py @@ -55,7 +55,8 @@ class CSPDarkNet(object): return fluid.layers.log(1 + expf) def _mish(self, input): - return input * fluid.layers.tanh(self._softplus(input)) + return fluid.layers.mish(input) + # return input * fluid.layers.tanh(self._softplus(input)) def _conv_norm(self, input, diff --git a/ppdet/modeling/losses/iou_loss.py b/ppdet/modeling/losses/iou_loss.py index 61ee495e0261ce71e9e4c6d581f2353ae09d5c43..dd0153b271aa957c8d7e8d53d036847378448779 100644 --- a/ppdet/modeling/losses/iou_loss.py +++ b/ppdet/modeling/losses/iou_loss.py @@ -64,7 +64,8 @@ class IouLoss(object): downsample_ratio, batch_size, ioup=None, - eps=1.e-10): + eps=1.e-10, + scale_x_y=1.0): ''' Args: x | y | w | h ([Variables]): the output of yolov3 for encoded x|y|w|h @@ -75,9 +76,9 @@ class IouLoss(object): eps (float): the decimal to prevent the denominator eqaul zero ''' pred = self._bbox_transform(x, y, w, h, anchors, downsample_ratio, - batch_size, False) + batch_size, False, scale_x_y) gt = self._bbox_transform(tx, ty, tw, th, anchors, downsample_ratio, - batch_size, True) + batch_size, True, 1.0) iouk = self._iou(pred, gt, ioup, eps) if self.loss_square: loss_iou = 1. - iouk * iouk @@ -145,7 +146,7 @@ class IouLoss(object): return diou_term + ciou_term def _bbox_transform(self, dcx, dcy, dw, dh, anchors, downsample_ratio, - batch_size, is_gt): + batch_size, is_gt, scale_x_y): grid_x = int(self._MAX_WI / downsample_ratio) grid_y = int(self._MAX_HI / downsample_ratio) an_num = len(anchors) // 2 @@ -179,8 +180,11 @@ class IouLoss(object): cy.gradient = True else: dcx_sig = fluid.layers.sigmoid(dcx) - cx = fluid.layers.elementwise_add(dcx_sig, gi) / grid_x_act dcy_sig = fluid.layers.sigmoid(dcy) + if abs(scale_x_y - 1.0) > 1e-6: + dcx_sig = scale_x_y * dcx_sig - 0.5 * (scale_x_y - 1.) + dcy_sig = scale_x_y * dcy_sig - 0.5 * (scale_x_y - 1.) + cx = fluid.layers.elementwise_add(dcx_sig, gi) / grid_x_act cy = fluid.layers.elementwise_add(dcy_sig, gj) / grid_y_act anchor_w_ = [anchors[i] for i in range(0, len(anchors)) if i % 2 == 0] diff --git a/ppdet/modeling/losses/yolo_loss.py b/ppdet/modeling/losses/yolo_loss.py index 66967f30bfbb8bde8b25b5cea0fbaaebb063fef2..545ca84a130132fffe566ab351b316bc8ac888e4 100644 --- a/ppdet/modeling/losses/yolo_loss.py +++ b/ppdet/modeling/losses/yolo_loss.py @@ -147,9 +147,13 @@ class YOLOv3Loss(object): loss_w = fluid.layers.reduce_sum(loss_w, dim=[1, 2, 3]) loss_h = fluid.layers.abs(h - th) * tscale_tobj loss_h = fluid.layers.reduce_sum(loss_h, dim=[1, 2, 3]) + + scale_x_y = self.scale_x_y if not isinstance( + self.scale_x_y, Sequence) else self.scale_x_y[i] if self._iou_loss is not None: loss_iou = self._iou_loss(x, y, w, h, tx, ty, tw, th, anchors, - downsample, self._batch_size) + downsample, self._batch_size, + scale_x_y) loss_iou = loss_iou * tscale_tobj loss_iou = fluid.layers.reduce_sum(loss_iou, dim=[1, 2, 3]) loss_ious.append(fluid.layers.reduce_mean(loss_iou))