Activate scale x y (#734)

* activate scale_x_y * update cspdarknet pretrained model

Activate scale x y (#734)
* activate scale_x_y * update cspdarknet pretrained model
633d8c56 · wangguanzhong · GitHub · f4e1436f · 633d8c56 · 633d8c56
5 changed file
--- a/configs/yolov4/yolov4_cspdarknet.yml
+++ b/configs/yolov4/yolov4_cspdarknet.yml
@@ -32,6 +32,7 @@ YOLOv4Head:
    normalized: true
    score_threshold: 0.001
  downsample: [8,16,32]
+  scale_x_y: [1.2, 1.1, 1.05]

 YOLOv3Loss:
  # batch_size here is only used for fine grained loss, not used
@@ -42,7 +43,7 @@ YOLOv3Loss:
  ignore_thresh: 0.7
  label_smooth: true
  downsample: [8,16,32]
-  #scale_x_y: [1.2, 1.1, 1.05]
+  scale_x_y: [1.2, 1.1, 1.05]
  iou_loss: IouLoss
  match_score: true

@@ -83,7 +84,7 @@ EvalReader:
    !COCODataSet
      image_dir: test2017
      anno_path: annotations/image_info_test-dev2017.json
-      dataset_dir: data/coco
+      dataset_dir: dataset/coco
      with_background: false
  sample_transforms:
    - !DecodeImage

--- a/configs/yolov4/yolov4_cspdarknet_coco.yml
+++ b/configs/yolov4/yolov4_cspdarknet_coco.yml
+architecture: YOLOv4
+use_gpu: true
+max_iters: 500200
+log_smooth_window: 20
+save_dir: output
+snapshot_iter: 10000
+metric: COCO
+pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/CSPDarkNet53_pretrained.pdparams
+weights: output/yolov4_cspdarknet_coco/model_final
+num_classes: 80
+use_fine_grained_loss: true
+
+YOLOv4:
+  backbone: CSPDarkNet
+  yolo_head: YOLOv4Head
+
+CSPDarkNet:
+  norm_type: sync_bn
+  norm_decay: 0.
+  depth: 53
+
+YOLOv4Head:
+  anchors: [[12, 16], [19, 36], [40, 28], [36, 75], [76, 55],
+            [72, 146], [142, 110], [192, 243], [459, 401]]
+  anchor_masks: [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
+  nms:
+    background_label: -1
+    keep_top_k: -1
+    nms_threshold: 0.45
+    nms_top_k: -1
+    normalized: true
+    score_threshold: 0.001
+  downsample: [8,16,32]
+  scale_x_y: [1.2, 1.1, 1.05]
+
+YOLOv3Loss:
+  # batch_size here is only used for fine grained loss, not used
+  # for training batch_size setting, training batch_size setting
+  # is in configs/yolov3_reader.yml TrainReader.batch_size, batch
+  # size here should be set as same value as TrainReader.batch_size
+  batch_size: 8
+  ignore_thresh: 0.7
+  label_smooth: true
+  downsample: [8,16,32]
+  scale_x_y: [1.2, 1.1, 1.05]
+  iou_loss: IouLoss
+  match_score: true
+
+IouLoss:
+  loss_weight: 0.07
+  max_height: 608
+  max_width: 608
+  ciou_term: true
+  loss_square: true
+
+LearningRate:
+  base_lr: 0.0001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones:
+    - 400000
+    - 450000
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 4000
+
+OptimizerBuilder:
+  clip_grad_by_norm: 10.
+  optimizer:
+    momentum: 0.949
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
+
+_READER_: '../yolov3_reader.yml'
+TrainReader:
+  inputs_def:
+    fields: ['image', 'gt_bbox', 'gt_class', 'gt_score', 'im_id']
+    num_max_boxes: 50
+  dataset:
+    !COCODataSet
+      image_dir: train2017
+      anno_path: annotations/instances_train2017.json
+      dataset_dir: dataset/coco
+      with_background: false
+  sample_transforms:
+    - !DecodeImage
+      to_rgb: True
+    - !ColorDistort {}
+    - !RandomExpand
+      fill_value: [123.675, 116.28, 103.53]
+    - !RandomCrop {}
+    - !RandomFlipImage
+      is_normalized: false
+    - !NormalizeBox {}
+    - !PadBox
+      num_max_boxes: 50
+    - !BboxXYXY2XYWH {}
+  batch_transforms:
+  - !RandomShape
+    sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
+    random_inter: True
+  - !NormalizeImage
+    mean: [0.,0.,0.]
+    std: [1.,1.,1.]
+    is_scale: True
+    is_channel_first: false
+  - !Permute
+    to_bgr: false
+    channel_first: True
+  # Gt2YoloTarget is only used when use_fine_grained_loss set as true,
+  # this operator will be deleted automatically if use_fine_grained_loss
+  # is set as false
+  - !Gt2YoloTarget
+    anchor_masks: [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
+    anchors: [[12, 16], [19, 36], [40, 28],
+              [36, 75], [76, 55], [72, 146],
+              [142, 110], [192, 243], [459, 401]]
+    downsample_ratios: [8, 16, 32]
+  batch_size: 8
+  shuffle: true
+  drop_last: true
+  worker_num: 8
+  bufsize: 16
+  use_process: true
+  drop_empty: false
+
+EvalReader:
+  inputs_def:
+    fields: ['image', 'im_size', 'im_id']
+    num_max_boxes: 90
+  dataset:
+    !COCODataSet
+      image_dir: val2017
+      anno_path: annotations/instances_val2017.json
+      dataset_dir: dataset/coco
+      with_background: false
+  sample_transforms:
+    - !DecodeImage
+      to_rgb: True
+    - !ResizeImage
+      target_size: 608
+      interp: 1
+    - !NormalizeImage
+      mean: [0., 0., 0.]
+      std: [1., 1., 1.]
+      is_scale: True
+      is_channel_first: false
+    - !PadBox
+      num_max_boxes: 90
+    - !Permute
+      to_bgr: false
+      channel_first: True
+  batch_size: 4
+  drop_empty: false
+  worker_num: 8
+  bufsize: 16
+
+TestReader:
+  dataset:
+    !ImageFolder
+    use_default_label: true
+    with_background: false
+  sample_transforms:
+    - !DecodeImage
+      to_rgb: True
+    - !ResizeImage
+      target_size: 608
+      interp: 1
+    - !NormalizeImage
+      mean: [0., 0., 0.]
+      std: [1., 1., 1.]
+      is_scale: True
+      is_channel_first: false
+    - !Permute
+      to_bgr: false
+      channel_first: True
--- a/configs/yolov4/yolov4_cspdarknet_voc.yml
+++ b/configs/yolov4/yolov4_cspdarknet_voc.yml
@@ -31,6 +31,7 @@ YOLOv4Head:
    normalized: true
    score_threshold: 0.001
  downsample: [8,16,32]
+  scale_x_y: [1.2, 1.1, 1.05]

 YOLOv3Loss:
  # batch_size here is only used for fine grained loss, not used
@@ -41,7 +42,7 @@ YOLOv3Loss:
  ignore_thresh: 0.7
  label_smooth: true
  downsample: [8,16,32]
-  #scale_x_y: [1.2, 1.1, 1.05]
+  scale_x_y: [1.2, 1.1, 1.05]
  iou_loss: IouLoss
  match_score: true


--- a/ppdet/modeling/anchor_heads/yolo_head.py
+++ b/ppdet/modeling/anchor_heads/yolo_head.py
@@ -85,8 +85,7 @@ class YOLOv3Head(object):
        if isinstance(nms, dict):
            self.nms = MultiClassNMS(**nms)
        self.downsample = downsample
-        # TODO(guanzhong) activate scale_x_y in Paddle 2.0
-        #self.scale_x_y = scale_x_y
+        self.scale_x_y = scale_x_y
        self.clip_bbox = clip_bbox

    def _conv_bn(self,
@@ -317,8 +316,8 @@ class YOLOv3Head(object):
                                             len(self.anchor_masks[i]),
                                             self.num_classes,
                                             self.iou_aware_factor)
-            #scale_x_y = self.scale_x_y if not isinstance(
-            #    self.scale_x_y, Sequence) else self.scale_x_y[i]
+            scale_x_y = self.scale_x_y if not isinstance(
+                self.scale_x_y, Sequence) else self.scale_x_y[i]
            box, score = fluid.layers.yolo_box(
                x=output,
                img_size=im_size,
@@ -327,7 +326,8 @@ class YOLOv3Head(object):
                conf_thresh=self.nms.score_threshold,
                downsample_ratio=self.downsample[i],
                name=self.prefix_name + "yolo_box" + str(i),
-                clip_bbox=self.clip_bbox)
+                clip_bbox=self.clip_bbox,
+                scale_x_y=scale_x_y)
            boxes.append(box)
            scores.append(fluid.layers.transpose(score, perm=[0, 2, 1]))

@@ -349,7 +349,7 @@ class YOLOv4Head(YOLOv3Head):
        spp_stage (int): apply spp on which stage.
        num_classes (int): number of output classes
        downsample (list): downsample ratio for each yolo_head
-        scale_x_y (list): scale the left top point of bbox at each stage
+        scale_x_y (list): scale the center point of bbox at each stage
    """
    __inject__ = ['nms', 'yolo_loss']
    __shared__ = ['num_classes', 'weight_prefix_name']
@@ -368,7 +368,7 @@ class YOLOv4Head(YOLOv3Head):
                 num_classes=80,
                 weight_prefix_name='',
                 downsample=[8, 16, 32],
-                 scale_x_y=[1.2, 1.1, 1.05],
+                 scale_x_y=1.0,
                 yolo_loss="YOLOv3Loss",
                 iou_aware=False,
                 iou_aware_factor=0.4,

--- a/ppdet/modeling/losses/yolo_loss.py
+++ b/ppdet/modeling/losses/yolo_loss.py
@@ -58,8 +58,7 @@ class YOLOv3Loss(object):
        self._iou_loss = iou_loss
        self._iou_aware_loss = iou_aware_loss
        self.downsample = downsample
-        # TODO(guanzhong) activate scale_x_y in Paddle 2.0
-        #self.scale_x_y = scale_x_y
+        self.scale_x_y = scale_x_y
        self.match_score = match_score

    def __call__(self, outputs, gt_box, gt_label, gt_score, targets, anchors,
@@ -71,8 +70,8 @@ class YOLOv3Loss(object):
        else:
            losses = []
            for i, output in enumerate(outputs):
-                #scale_x_y = self.scale_x_y if not isinstance(
-                #    self.scale_x_y, Sequence) else self.scale_x_y[i]
+                scale_x_y = self.scale_x_y if not isinstance(
+                    self.scale_x_y, Sequence) else self.scale_x_y[i]
                anchor_mask = anchor_masks[i]
                loss = fluid.layers.yolov3_loss(
                    x=output,
@@ -85,6 +84,7 @@ class YOLOv3Loss(object):
                    ignore_thresh=self._ignore_thresh,
                    downsample_ratio=self.downsample[i],
                    use_label_smooth=self._label_smooth,
+                    scale_x_y=scale_x_y,
                    name=prefix_name + "yolo_loss" + str(i))
                losses.append(fluid.layers.reduce_mean(loss))

@@ -162,11 +162,11 @@ class YOLOv3Loss(object):
                    loss_iou_aware, dim=[1, 2, 3])
                loss_iou_awares.append(fluid.layers.reduce_mean(loss_iou_aware))

-            #scale_x_y = self.scale_x_y if not isinstance(
-            #    self.scale_x_y, Sequence) else self.scale_x_y[i]
+            scale_x_y = self.scale_x_y if not isinstance(
+                self.scale_x_y, Sequence) else self.scale_x_y[i]
            loss_obj_pos, loss_obj_neg = self._calc_obj_loss(
                output, obj, tobj, gt_box, self._batch_size, anchors,
-                num_classes, downsample, self._ignore_thresh)
+                num_classes, downsample, self._ignore_thresh, scale_x_y)

            loss_cls = fluid.layers.sigmoid_cross_entropy_with_logits(cls, tcls)
            loss_cls = fluid.layers.elementwise_mul(loss_cls, tobj, axis=0)
@@ -276,7 +276,7 @@ class YOLOv3Loss(object):
        return (tx, ty, tw, th, tscale, tobj, tcls)

    def _calc_obj_loss(self, output, obj, tobj, gt_box, batch_size, anchors,
-                       num_classes, downsample, ignore_thresh):
+                       num_classes, downsample, ignore_thresh, scale_x_y):
        # A prediction bbox overlap any gt_bbox over ignore_thresh, 
        # objectness loss will be ignored, process as follows:

@@ -290,7 +290,8 @@ class YOLOv3Loss(object):
            class_num=num_classes,
            conf_thresh=0.,
            downsample_ratio=downsample,
-            clip_bbox=False)
+            clip_bbox=False,
+            scale_x_y=scale_x_y)

        # 2. split pred bbox and gt bbox by sample, calculate IoU between pred bbox
        #    and gt bbox in each sample