From f7e67d95927b9c7a760c3794c344c0f08ffdddd4 Mon Sep 17 00:00:00 2001 From: dengkaipeng Date: Tue, 15 Sep 2020 13:20:54 +0000 Subject: [PATCH] ppyolo support h != w. --- configs/ppyolo/README.md | 2 + configs/ppyolo/README_cn.md | 2 + configs/ppyolo/ppyolo_reader_keep_ratio.yml | 111 ++++++++++++++++++++ ppdet/data/transform/batch_operators.py | 26 ++++- 4 files changed, 137 insertions(+), 4 deletions(-) create mode 100644 configs/ppyolo/ppyolo_reader_keep_ratio.yml diff --git a/configs/ppyolo/README.md b/configs/ppyolo/README.md index 11837a1b6..c4601d7c8 100644 --- a/configs/ppyolo/README.md +++ b/configs/ppyolo/README.md @@ -82,6 +82,8 @@ Training PP-YOLO on 8 GPUs with following command(all commands should be run und CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python tools/train.py -c configs/ppyolo/ppyolo.yml --eval ``` +**Notes:** If you want to preserve the aspect ratio of images during training/evaluation/inference, please see `configs/ppyolo/ppyolo_reader_keep_ratio.yml` + ### 2. Evaluation Evaluating PP-YOLO on COCO val2017 dataset in single GPU with following commands: diff --git a/configs/ppyolo/README_cn.md b/configs/ppyolo/README_cn.md index 2c81cd9d6..0fbb9f152 100644 --- a/configs/ppyolo/README_cn.md +++ b/configs/ppyolo/README_cn.md @@ -83,6 +83,8 @@ PP-YOLO从如下方面优化和提升YOLOv3模型的精度和速度: CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python tools/train.py -c configs/ppyolo/ppyolo.yml --eval ``` +**注意:** 如果想在训练、评估、预测过程中不改变图像的长宽比,可以参考`configs/ppyolo/ppyolo_reader_keep_ratio.yml` + ### 2. 评估 使用单GPU通过如下命令一键式评估模型在COCO val2017数据集效果 diff --git a/configs/ppyolo/ppyolo_reader_keep_ratio.yml b/configs/ppyolo/ppyolo_reader_keep_ratio.yml new file mode 100644 index 000000000..6b4d35c6e --- /dev/null +++ b/configs/ppyolo/ppyolo_reader_keep_ratio.yml @@ -0,0 +1,111 @@ +TrainReader: + inputs_def: + fields: ['image', 'gt_bbox', 'gt_class', 'gt_score'] + num_max_boxes: 50 + dataset: + !COCODataSet + image_dir: train2017 + anno_path: annotations/instances_train2017.json + dataset_dir: dataset/coco + with_background: false + sample_transforms: + - !DecodeImage + to_rgb: True + with_mixup: True + - !MixupImage + alpha: 1.5 + beta: 1.5 + - !ColorDistort {} + - !RandomExpand + fill_value: [123.675, 116.28, 103.53] + - !RandomCrop {} + - !RandomFlipImage + is_normalized: false + - !NormalizeBox {} + - !PadBox + num_max_boxes: 50 + - !BboxXYXY2XYWH {} + batch_transforms: + - !RandomShape + ratios: [0.5, 0.6, 0.7, 0.8, 0.9, 1.0] + random_inter: True + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: True + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + - !PadBatch + pad_to_stride: 32 + # Gt2YoloTarget is only used when use_fine_grained_loss set as true, + # this operator will be deleted automatically if use_fine_grained_loss + # is set as false + - !Gt2YoloTarget + anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + anchors: [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + downsample_ratios: [32, 16, 8] + batch_size: 24 + shuffle: true + mixup_epoch: 25000 + drop_last: true + worker_num: 8 + bufsize: 4 + use_process: true + +EvalReader: + inputs_def: + fields: ['image', 'im_size', 'im_id'] + num_max_boxes: 50 + dataset: + !COCODataSet + image_dir: val2017 + anno_path: annotations/instances_val2017.json + dataset_dir: dataset/coco + with_background: false + sample_transforms: + - !DecodeImage + to_rgb: True + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: True + is_channel_first: false + - !PadBox + num_max_boxes: 50 + - !Permute + to_bgr: false + channel_first: True + batch_transforms: + - !PadBatch + pad_to_stride: 32 + batch_size: 1 + drop_empty: false + worker_num: 8 + bufsize: 4 + +TestReader: + inputs_def: + fields: ['image', 'im_size', 'im_id'] + dataset: + !ImageFolder + anno_path: annotations/instances_val2017.json + with_background: false + sample_transforms: + - !DecodeImage + to_rgb: True + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: True + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + batch_transforms: + - !PadBatch + pad_to_stride: 32 + batch_size: 1 diff --git a/ppdet/data/transform/batch_operators.py b/ppdet/data/transform/batch_operators.py index 331752d77..c69cc909a 100644 --- a/ppdet/data/transform/batch_operators.py +++ b/ppdet/data/transform/batch_operators.py @@ -80,6 +80,7 @@ class PadBatch(BaseOperator): (im_c, max_shape[1], max_shape[2]), dtype=np.float32) padding_im[:, :im_h, :im_w] = im data['image'] = padding_im + data['h'], data['w'] = data['image'].shape[1:3] if self.use_padded_im_info: data['im_info'][:2] = max_shape[1:3] if 'semantic' in data.keys() and data['semantic'] is not None: @@ -101,12 +102,20 @@ class RandomShape(BaseOperator): False, use cv2.INTER_NEAREST. Args: sizes (list): list of int, random choose a size from these + ratios (list): list of float, random choose a ratio to resize image. random_inter (bool): whether to randomly interpolation, defalut true. """ - def __init__(self, sizes=[], random_inter=False, resize_box=False): + def __init__(self, + sizes=[], + ratios=[], + random_inter=False, + resize_box=False): super(RandomShape, self).__init__() + assert len(sizes) == 0 or len(ratios) == 0, \ + "'sizes' and 'ratios' only one can be set" self.sizes = sizes + self.ratios = ratios self.random_inter = random_inter self.interps = [ cv2.INTER_NEAREST, @@ -118,14 +127,23 @@ class RandomShape(BaseOperator): self.resize_box = resize_box def __call__(self, samples, context=None): - shape = np.random.choice(self.sizes) method = np.random.choice(self.interps) if self.random_inter \ else cv2.INTER_NEAREST + if len(self.sizes) > 0: + shape = np.random.choice(self.sizes) + elif len(self.ratios) > 0: + ratio = np.random.choice(self.ratios) for i in range(len(samples)): im = samples[i]['image'] h, w = im.shape[:2] - scale_x = float(shape) / w - scale_y = float(shape) / h + + if len(self.sizes) > 0: + scale_x = float(shape) / w + scale_y = float(shape) / h + elif len(self.ratios) > 0: + scale_x = ratio + scale_y = ratio + im = cv2.resize( im, None, None, fx=scale_x, fy=scale_y, interpolation=method) samples[i]['image'] = im -- GitLab