ppyolo support h != w.

f7e67d95 · dengkaipeng · 0e5f87c3 · f7e67d95 · f7e67d95 · f7e67d95
4 changed file
--- a/configs/ppyolo/README.md
+++ b/configs/ppyolo/README.md
@@ -82,6 +82,8 @@ Training PP-YOLO on 8 GPUs with following command(all commands should be run und
 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python tools/train.py -c configs/ppyolo/ppyolo.yml --eval
 ```

+**Notes:** If you want to preserve the aspect ratio of images during training/evaluation/inference, please see `configs/ppyolo/ppyolo_reader_keep_ratio.yml`
+
 ### 2. Evaluation

 Evaluating PP-YOLO on COCO val2017 dataset in single GPU with following commands:

--- a/configs/ppyolo/README_cn.md
+++ b/configs/ppyolo/README_cn.md
@@ -83,6 +83,8 @@ PP-YOLO从如下方面优化和提升YOLOv3模型的精度和速度：
 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python tools/train.py -c configs/ppyolo/ppyolo.yml --eval
 ```

+**注意：** 如果想在训练、评估、预测过程中不改变图像的长宽比，可以参考`configs/ppyolo/ppyolo_reader_keep_ratio.yml`
+
 ### 2. 评估

 使用单GPU通过如下命令一键式评估模型在COCO val2017数据集效果

--- a/configs/ppyolo/ppyolo_reader_keep_ratio.yml
+++ b/configs/ppyolo/ppyolo_reader_keep_ratio.yml
+TrainReader:
+  inputs_def:
+    fields: ['image', 'gt_bbox', 'gt_class', 'gt_score']
+    num_max_boxes: 50
+  dataset:
+    !COCODataSet
+      image_dir: train2017
+      anno_path: annotations/instances_train2017.json
+      dataset_dir: dataset/coco
+      with_background: false
+  sample_transforms:
+    - !DecodeImage
+      to_rgb: True
+      with_mixup: True
+    - !MixupImage
+      alpha: 1.5
+      beta: 1.5
+    - !ColorDistort {}
+    - !RandomExpand
+      fill_value: [123.675, 116.28, 103.53]
+    - !RandomCrop {}
+    - !RandomFlipImage
+      is_normalized: false
+    - !NormalizeBox {}
+    - !PadBox
+      num_max_boxes: 50
+    - !BboxXYXY2XYWH {}
+  batch_transforms:
+  - !RandomShape
+    ratios: [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
+    random_inter: True
+  - !NormalizeImage
+    mean: [0.485, 0.456, 0.406]
+    std: [0.229, 0.224, 0.225]
+    is_scale: True
+    is_channel_first: false
+  - !Permute
+    to_bgr: false
+    channel_first: True
+  - !PadBatch
+    pad_to_stride: 32
+  # Gt2YoloTarget is only used when use_fine_grained_loss set as true,
+  # this operator will be deleted automatically if use_fine_grained_loss
+  # is set as false
+  - !Gt2YoloTarget
+    anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+    anchors: [[10, 13], [16, 30], [33, 23],
+              [30, 61], [62, 45], [59, 119],
+              [116, 90], [156, 198], [373, 326]]
+    downsample_ratios: [32, 16, 8]
+  batch_size: 24
+  shuffle: true
+  mixup_epoch: 25000
+  drop_last: true
+  worker_num: 8
+  bufsize: 4
+  use_process: true
+
+EvalReader:
+  inputs_def:
+    fields: ['image', 'im_size', 'im_id']
+    num_max_boxes: 50
+  dataset:
+    !COCODataSet
+      image_dir: val2017
+      anno_path: annotations/instances_val2017.json
+      dataset_dir: dataset/coco
+      with_background: false
+  sample_transforms:
+    - !DecodeImage
+      to_rgb: True
+    - !NormalizeImage
+      mean: [0.485, 0.456, 0.406]
+      std: [0.229, 0.224, 0.225]
+      is_scale: True
+      is_channel_first: false
+    - !PadBox
+      num_max_boxes: 50
+    - !Permute
+      to_bgr: false
+      channel_first: True
+  batch_transforms:
+    - !PadBatch
+      pad_to_stride: 32
+  batch_size: 1
+  drop_empty: false
+  worker_num: 8
+  bufsize: 4
+
+TestReader:
+  inputs_def:
+    fields: ['image', 'im_size', 'im_id']
+  dataset:
+    !ImageFolder
+      anno_path: annotations/instances_val2017.json
+      with_background: false
+  sample_transforms:
+    - !DecodeImage
+      to_rgb: True
+    - !NormalizeImage
+      mean: [0.485, 0.456, 0.406]
+      std: [0.229, 0.224, 0.225]
+      is_scale: True
+      is_channel_first: false
+    - !Permute
+      to_bgr: false
+      channel_first: True
+  batch_transforms:
+    - !PadBatch
+      pad_to_stride: 32
+  batch_size: 1
--- a/ppdet/data/transform/batch_operators.py
+++ b/ppdet/data/transform/batch_operators.py
@@ -80,6 +80,7 @@ class PadBatch(BaseOperator):
                (im_c, max_shape[1], max_shape[2]), dtype=np.float32)
            padding_im[:, :im_h, :im_w] = im
            data['image'] = padding_im
+            data['h'], data['w'] = data['image'].shape[1:3]
            if self.use_padded_im_info:
                data['im_info'][:2] = max_shape[1:3]
            if 'semantic' in data.keys() and data['semantic'] is not None:
@@ -101,12 +102,20 @@ class RandomShape(BaseOperator):
    False, use cv2.INTER_NEAREST.
    Args:
        sizes (list): list of int, random choose a size from these
+        ratios (list): list of float, random choose a ratio to resize image.
        random_inter (bool): whether to randomly interpolation, defalut true.
    """

-    def __init__(self, sizes=[], random_inter=False, resize_box=False):
+    def __init__(self,
+                 sizes=[],
+                 ratios=[],
+                 random_inter=False,
+                 resize_box=False):
        super(RandomShape, self).__init__()
+        assert len(sizes) == 0 or len(ratios) == 0, \
+                "'sizes' and 'ratios' only one can be set"
        self.sizes = sizes
+        self.ratios = ratios
        self.random_inter = random_inter
        self.interps = [
            cv2.INTER_NEAREST,
@@ -118,14 +127,23 @@ class RandomShape(BaseOperator):
        self.resize_box = resize_box

    def __call__(self, samples, context=None):
-        shape = np.random.choice(self.sizes)
        method = np.random.choice(self.interps) if self.random_inter \
            else cv2.INTER_NEAREST
+        if len(self.sizes) > 0:
+            shape = np.random.choice(self.sizes)
+        elif len(self.ratios) > 0:
+            ratio = np.random.choice(self.ratios)
        for i in range(len(samples)):
            im = samples[i]['image']
            h, w = im.shape[:2]
+
+            if len(self.sizes) > 0:
                scale_x = float(shape) / w
                scale_y = float(shape) / h
+            elif len(self.ratios) > 0:
+                scale_x = ratio
+                scale_y = ratio
+
            im = cv2.resize(
                im, None, None, fx=scale_x, fy=scale_y, interpolation=method)
            samples[i]['image'] = im