diff --git a/README.md b/README.md index 0f0375e7e595485026a3f12f329a68f2a6a4de6d..bab001a2ed01734555271d3159ba68fb08c615b6 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,7 @@ PaddleDetection的目的是为工业界和学术界提供丰富、易用的目 - [行人检测和车辆检测预训练模型](contrib/README_cn.md) 针对不同场景的检测模型 - [YOLOv3增强模型](docs/YOLOv3_ENHANCEMENT.md) 改进原始YOLOv3,精度达到41.4%,原论文精度为33.0%,同时预测速度也得到提升 - [Objects365 2019 Challenge夺冠模型](docs/CACascadeRCNN.md) Objects365 Full Track任务中最好的单模型之一,精度达到31.7% +- [Open Images V5和Objects365数据集模型](docs/OIDV5_BASELINE_MODEL.md) ## 模型压缩 @@ -90,8 +91,13 @@ PaddleDetection的目的是为工业界和学术界提供丰富、易用的目 ## 版本更新 -### 10/2019 +### 21/11/2019 +- 增加CascadeClsAware RCNN模型。 +- 增加CBNet,ResNet200和Non-local模型。 +- 增加SoftNMS。 +- 增加Open Image V5数据集和Objects365数据集模型。 +### 10/2019 - 增加增强版YOLOv3模型,精度高达41.4%。 - 增加人脸检测模型BlazeFace、Faceboxes。 - 丰富基于COCO的模型,精度高达51.9%。 diff --git a/README_en.md b/README_en.md index e055beadcb1ac5425d3afe32c952a5df6fdf779a..e083193dd21536a315843220a854bf09baac64a3 100644 --- a/README_en.md +++ b/README_en.md @@ -80,6 +80,7 @@ Advanced Features: - [Pretrained models for pedestrian and vehicle detection](contrib/README.md) Models for object detection in specific scenarios. - [YOLOv3 enhanced model](docs/YOLOv3_ENHANCEMENT.md) Compared to MAP of 33.0% in paper, enhanced YOLOv3 reaches the MAP of 41.4% and inference speed is improved as well - [Objects365 2019 Challenge champion model](docs/CACascadeRCNN.md) One of the best single models in Objects365 Full Track of which MAP reaches 31.7%. +- [Open Images Dataset V5 and Objects365 Dataset models](docs/OIDV5_BASELINE_MODEL.md) ## Model compression @@ -98,6 +99,12 @@ Advanced Features: ## Updates +#### 21/11/2019 +- Add CascadeClsAware RCNN model. +- Add CBNet, ResNet200 and Non-local model. +- Add SoftNMS. +- Add models of Open Images Dataset V5 and Objects365 Dataset. + #### 10/2019 - Add enhanced YOLOv3 models, box mAP up to 41.4%. diff --git a/configs/obj365/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.yml b/configs/obj365/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.yml new file mode 100644 index 0000000000000000000000000000000000000000..0de0e9f521d3d44459cb67e45cbe07bd82855431 --- /dev/null +++ b/configs/obj365/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.yml @@ -0,0 +1,190 @@ +architecture: CascadeRCNNClsAware +train_feed: FasterRCNNTrainFeed +eval_feed: FasterRCNNEvalFeed +test_feed: FasterRCNNTestFeed +max_iters: 800000 +snapshot_iter: 10000 +use_gpu: true +log_smooth_window: 20 +save_dir: output +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet200_vd_pretrained.tar +weights: output/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms/model_final +# obj365 dataset format and its eval method are same as those for coco +metric: COCO +num_classes: 366 + +CascadeRCNNClsAware: + backbone: ResNet + fpn: FPN + rpn_head: FPNRPNHead + roi_extractor: FPNRoIAlign + bbox_head: CascadeBBoxHead + bbox_assigner: CascadeBBoxAssigner + +ResNet: + norm_type: bn + depth: 200 + feature_maps: [2, 3, 4, 5] + freeze_at: 2 + variant: d + dcn_v2_stages: [3, 4, 5] + nonlocal_stages: [4] + +FPN: + min_level: 2 + max_level: 6 + num_chan: 256 + spatial_scale: [0.03125, 0.0625, 0.125, 0.25] + +FPNRPNHead: + anchor_generator: + anchor_sizes: [32, 64, 128, 256, 512] + aspect_ratios: [0.5, 1.0, 2.0] + stride: [16.0, 16.0] + variance: [1.0, 1.0, 1.0, 1.0] + anchor_start_size: 32 + min_level: 2 + max_level: 6 + num_chan: 256 + rpn_target_assign: + rpn_batch_size_per_im: 256 + rpn_fg_fraction: 0.5 + rpn_positive_overlap: 0.7 + rpn_negative_overlap: 0.3 + rpn_straddle_thresh: 0.0 + train_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 2000 + post_nms_top_n: 2000 + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 1000 + post_nms_top_n: 1000 + +FPNRoIAlign: + canconical_level: 4 + canonical_size: 224 + min_level: 2 + max_level: 5 + box_resolution: 14 + sampling_ratio: 2 + +CascadeBBoxAssigner: + batch_size_per_im: 512 + bbox_reg_weights: [10, 20, 30] + bg_thresh_lo: [0.0, 0.0, 0.0] + bg_thresh_hi: [0.5, 0.6, 0.7] + fg_thresh: [0.5, 0.6, 0.7] + fg_fraction: 0.25 + class_aware: True + +CascadeBBoxHead: + head: CascadeTwoFCHead + nms: MultiClassSoftNMS + +CascadeTwoFCHead: + mlp_dim: 1024 + +MultiClassSoftNMS: + score_threshold: 0.001 + keep_top_k: 300 + softnms_sigma: 0.15 + +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [520000, 740000] + - !LinearWarmup + start_factor: 0.1 + steps: 1000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +FasterRCNNTrainFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/obj365 + annotation: train.json + image_dir: train + sample_transforms: + - !DecodeImage + to_rgb: True + with_mixup: False + - !RandomFlipImage + prob: 0.5 + - !NormalizeImage + is_channel_first: false + is_scale: True + mean: + - 0.485 + - 0.456 + - 0.406 + std: + - 0.229 + - 0.224 + - 0.225 + - !ResizeImage + interp: 1 + target_size: [416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152, 1184, 1216, 1248, 1280, 1312, 1344, 1376, 1408] + max_size: 1800 + use_cv2: true + - !Permute + to_bgr: false + batch_transforms: + - !PadBatch + pad_to_stride: 32 + drop_last: false + num_workers: 2 + +FasterRCNNEvalFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/obj365 + annotation: val.json + image_dir: val + sample_transforms: + - !DecodeImage + to_rgb: True + with_mixup: False + - !NormalizeImage + is_channel_first: false + is_scale: True + mean: + - 0.485 + - 0.456 + - 0.406 + std: + - 0.229 + - 0.224 + - 0.225 + - !ResizeImage + interp: 1 + target_size: + - 1200 + max_size: 2000 + use_cv2: true + - !Permute + to_bgr: false + batch_transforms: + - !PadBatch + pad_to_stride: 32 + +FasterRCNNTestFeed: + batch_size: 1 + dataset: + annotation: dataset/obj365/val.json + batch_transforms: + - !PadBatch + pad_to_stride: 32 + drop_last: false + num_workers: 2 diff --git a/configs/oidv5/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.yml b/configs/oidv5/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.yml new file mode 100644 index 0000000000000000000000000000000000000000..be07dd55a7f3db931379b87f02550197b6188913 --- /dev/null +++ b/configs/oidv5/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.yml @@ -0,0 +1,189 @@ +architecture: CascadeRCNNClsAware +train_feed: FasterRCNNTrainFeed +eval_feed: FasterRCNNEvalFeed +test_feed: FasterRCNNTestFeed +max_iters: 1500000 +snapshot_iter: 10000 +use_gpu: true +log_smooth_window: 20 +save_dir: output +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet200_vd_pretrained.tar +weights: output/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms/model_final +metric: OID +num_classes: 501 + +CascadeRCNNClsAware: + backbone: ResNet + fpn: FPN + rpn_head: FPNRPNHead + roi_extractor: FPNRoIAlign + bbox_head: CascadeBBoxHead + bbox_assigner: CascadeBBoxAssigner + +ResNet: + norm_type: bn + depth: 200 + feature_maps: [2, 3, 4, 5] + freeze_at: 2 + variant: d + dcn_v2_stages: [3, 4, 5] + nonlocal_stages: [4] + +FPN: + min_level: 2 + max_level: 6 + num_chan: 256 + spatial_scale: [0.03125, 0.0625, 0.125, 0.25] + +FPNRPNHead: + anchor_generator: + anchor_sizes: [32, 64, 128, 256, 512] + aspect_ratios: [0.5, 1.0, 2.0] + stride: [16.0, 16.0] + variance: [1.0, 1.0, 1.0, 1.0] + anchor_start_size: 32 + min_level: 2 + max_level: 6 + num_chan: 256 + rpn_target_assign: + rpn_batch_size_per_im: 256 + rpn_fg_fraction: 0.5 + rpn_positive_overlap: 0.7 + rpn_negative_overlap: 0.3 + rpn_straddle_thresh: 0.0 + train_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 2000 + post_nms_top_n: 2000 + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 1000 + post_nms_top_n: 1000 + +FPNRoIAlign: + canconical_level: 4 + canonical_size: 224 + min_level: 2 + max_level: 5 + box_resolution: 14 + sampling_ratio: 2 + +CascadeBBoxAssigner: + batch_size_per_im: 512 + bbox_reg_weights: [10, 20, 30] + bg_thresh_lo: [0.0, 0.0, 0.0] + bg_thresh_hi: [0.5, 0.6, 0.7] + fg_thresh: [0.5, 0.6, 0.7] + fg_fraction: 0.25 + class_aware: True + +CascadeBBoxHead: + head: CascadeTwoFCHead + nms: MultiClassSoftNMS + +CascadeTwoFCHead: + mlp_dim: 1024 + +MultiClassSoftNMS: + score_threshold: 0.001 + keep_top_k: 300 + softnms_sigma: 0.15 + +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [1000000, 1400000] + - !LinearWarmup + start_factor: 0.1 + steps: 1000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +FasterRCNNTrainFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/oid + annotation: train.json + image_dir: train + sample_transforms: + - !DecodeImage + to_rgb: True + with_mixup: False + - !RandomFlipImage + prob: 0.5 + - !NormalizeImage + is_channel_first: false + is_scale: True + mean: + - 0.485 + - 0.456 + - 0.406 + std: + - 0.229 + - 0.224 + - 0.225 + - !ResizeImage + interp: 1 + target_size: [416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152, 1184, 1216, 1248, 1280, 1312, 1344, 1376, 1408] + max_size: 1800 + use_cv2: true + - !Permute + to_bgr: false + batch_transforms: + - !PadBatch + pad_to_stride: 32 + drop_last: false + num_workers: 2 + +FasterRCNNEvalFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/oidv5 + annotation: val.json + image_dir: val + sample_transforms: + - !DecodeImage + to_rgb: True + with_mixup: False + - !NormalizeImage + is_channel_first: false + is_scale: True + mean: + - 0.485 + - 0.456 + - 0.406 + std: + - 0.229 + - 0.224 + - 0.225 + - !ResizeImage + interp: 1 + target_size: + - 1200 + max_size: 2000 + use_cv2: true + - !Permute + to_bgr: false + batch_transforms: + - !PadBatch + pad_to_stride: 32 + +FasterRCNNTestFeed: + batch_size: 1 + dataset: + annotation: dataset/oidv5/val.json + batch_transforms: + - !PadBatch + pad_to_stride: 32 + drop_last: false + num_workers: 2 diff --git a/demo/oidv5_gt.png b/demo/oidv5_gt.png new file mode 100644 index 0000000000000000000000000000000000000000..e28fadae95a4eb113ae11d0018609896c2101c20 Binary files /dev/null and b/demo/oidv5_gt.png differ diff --git a/demo/oidv5_model_framework.png b/demo/oidv5_model_framework.png new file mode 100644 index 0000000000000000000000000000000000000000..b319958384e81cdd5405a41c9d468519638ed43d Binary files /dev/null and b/demo/oidv5_model_framework.png differ diff --git a/demo/oidv5_pred.jpg b/demo/oidv5_pred.jpg new file mode 100644 index 0000000000000000000000000000000000000000..eb39ef730daaf12fdd47dce22fa32ec9066c29d9 Binary files /dev/null and b/demo/oidv5_pred.jpg differ diff --git a/docs/MODEL_ZOO.md b/docs/MODEL_ZOO.md index 02e8b7e79d8ddb051f500eee30afe5c975316bd9..842a25a24d7cfdc17f4f3ba874b96cf337a44d6c 100644 --- a/docs/MODEL_ZOO.md +++ b/docs/MODEL_ZOO.md @@ -177,3 +177,8 @@ randomly cropping, randomly expansion, randomly flipping. ## Face Detection Please refer [face detection models](../configs/face_detection) for details. + + +## Object Detection in Open Images Dataset V5 + +Please refer [Open Images Dataset V5 Baseline model](OIDV5_BASELINE_MODEL.md) for details. diff --git a/docs/MODEL_ZOO_cn.md b/docs/MODEL_ZOO_cn.md index b889cd9a6b0ab5e24d6aa0c07098af914edd453f..6a7ec48260738533296a63a0f11065c77950ba2f 100644 --- a/docs/MODEL_ZOO_cn.md +++ b/docs/MODEL_ZOO_cn.md @@ -83,6 +83,7 @@ Paddle提供基于ImageNet的骨架网络预训练模型。所有预训练模型 | CBResNet200-vd-FPN-Nonlocal | Cascade Faster | c3-c5 | 1 | 2.5x | - | 53.3%(softnms) | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_cbr200_vd_fpn_dcnv2_nonlocal_softnms.tar) | + #### 注意事项: - Deformable卷积网络v2(dcn_v2)参考自论文[Deformable ConvNets v2](https://arxiv.org/abs/1811.11168). - `c3-c5`意思是在resnet模块的3到5阶段增加`dcn`. @@ -166,4 +167,9 @@ Paddle提供基于ImageNet的骨架网络预训练模型。所有预训练模型 ## 人脸检测 -详细请参考[人脸检测模型](../configs/face_detection). +详细请参考[人脸检测模型](../configs/face_detection)。 + + +## 基于Open Images V5数据集的物体检测 + +详细请参考[Open Images V5数据集基线模型](OIDV5_BASELINE_MODEL.md)。 diff --git a/docs/OIDV5_BASELINE_MODEL.md b/docs/OIDV5_BASELINE_MODEL.md new file mode 100644 index 0000000000000000000000000000000000000000..73558a65762dc4bed28016a5e5e4c97560577cb8 --- /dev/null +++ b/docs/OIDV5_BASELINE_MODEL.md @@ -0,0 +1,58 @@ +# CascadeCA RCNN +## 简介 +CascadeCA RCNN是百度视觉技术部在Google AI Open Images 2019-Object Detction比赛中的最佳单模型,该单模型助力团队在500多参数队伍中取得第二名。Open Images Dataset V5(OIDV5)包含500个类别、173W训练图像和超过1400W个标注边框,是目前已知规模最大的目标检测公开数据集,数据集地址:[https://storage.googleapis.com/openimages/web/index.html](https://storage.googleapis.com/openimages/web/index.html)。团队在比赛中的技术方案报告地址:[https://arxiv.org/pdf/1911.07171.pdf](https://arxiv.org/pdf/1911.07171.pdf) + +
+ +
+ +## 方法描述 +该模型结合了当前较优的检测方法。具体地,它将ResNet200-vd作为检测模型的骨干网络,其imagenet分类预训练模型可以在[这里](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/image_classification/README_en.md)下载;结合了CascadeCA RCNN、Feature Pyramid Networks、Non-local、Deformable V2等方法。在这里需要注意的是,标准的CascadeRCNN是只预测2个框(前景和背景,使用得分信息去判断最终前景所属的类别),而该模型对每个类别都单独预测了一个框(Cascade Class Aware)。最终模型框图如下图所示。 + +
+ +
+ + +由于OIDV5的类别不均衡现象比较严重,在训练时采用了动态采样的策略去选择样本并进行训练;多尺度训练被用于解决边框面积范围太大的情况;此外,团队使用Libra loss替代Smooth L1 loss,来计算预测框的loss;在预测时,使用SoftNMS方法进行后处理,保证更多的框可以被召回。 + +Objects365 Dataset和OIDV5有大约189个类别是重复的,因此将两个数据集合并进行训练,用于扩充OIDV5的训练数据,最终该模型与其性能指标如下表所示。更具体的模型训练和融合策略可以见:[OIDV5技术报告](https://arxiv.org/pdf/1911.07171.pdf)。 + +OIDV5模型训练结果如下。 + + +| 模型结构 | Public/Private Score | 下载链接 | +| :-----------------: | :--------: | :----------------------------------------------------------: | +| CascadeCARCNN-FPN-Dcnv2-Nonlocal ResNet200-vd | 0.62690/0.59459 | [模型](https://paddlemodels.bj.bcebos.com/object_detection/oidv5_cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.tar) | + + +此外,为验证模型的性能,团队基于该模型结构,也训练了针对COCO2017和Objects365 Dataset的模型,模型和验证集指标如下表。 + +| 模型结构 | 数据集 | 验证集mAP | 下载链接 | +| :-----------------: | :--------: | :--------: | :----------------------------------------------------------: | +| CascadeCARCNN-FPN-Dcnv2-Nonlocal ResNet200-vd | COCO2017 | 51.7% | [模型](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.tar) | +| CascadeCARCNN-FPN-Dcnv2-Nonlocal ResNet200-vd | Objects365 | 34.5% | [模型](https://paddlemodels.bj.bcebos.com/object_detection/obj365_cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.tar) | + +COCO和Objects365 Dataset数据格式相同,目前只支持预测和评估。 + +## 使用方法 + +OIDV5数据集格式与COCO不同,目前仅支持单张图片的预测。OIDV5的模型评估方法可以参考[这里](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/challenge_evaluation.md) + +1. 下载模型并解压。 + +2. 运行预测程序。 + +``` +python -u tools/infer.py -c configs/oidv5/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.yml -o weights=./oidv5_cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms/ --infer_img=demo/000000570688.jpg +``` + +其中模型所在文件夹需要根据自己放置的位置进行修改。 + +检测结果图像可以在`output`文件夹中查看。 + +## 模型检测效果 + +
+ +
diff --git a/ppdet/modeling/target_assigners.py b/ppdet/modeling/target_assigners.py index 69df2962b1ed230e59abbd4e9becf2343700060a..72297aff990e116f9b51afbde03c721a643c1e0d 100644 --- a/ppdet/modeling/target_assigners.py +++ b/ppdet/modeling/target_assigners.py @@ -70,6 +70,6 @@ class CascadeBBoxAssigner(object): bbox_reg_weights=curr_bbox_reg_w, use_random=self.use_random, class_nums=self.class_nums if self.class_aware else 2, - is_cls_agnostic=True, + is_cls_agnostic=not self.class_aware, is_cascade_rcnn=True if curr_stage > 0 and not self.class_aware else False) return outs diff --git a/ppdet/utils/oid_eval.py b/ppdet/utils/oid_eval.py new file mode 100644 index 0000000000000000000000000000000000000000..21d4813f51f512c77aa9dfb988ab271f5ea5d315 --- /dev/null +++ b/ppdet/utils/oid_eval.py @@ -0,0 +1,544 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import os +import sys +import numpy as np + +from .coco_eval import bbox2out + +import logging +logger = logging.getLogger(__name__) + + +__all__ = ['bbox2out', 'get_category_info'] + +def get_category_info(anno_file=None, + with_background=True, + use_default_label=False): + clsid2catid = { k:k for k in range(1, 501) } + + catid2name = { + 0:"background", + 1:"Infant bed", + 2:"Rose", + 3:"Flag", + 4:"Flashlight", + 5:"Sea turtle", + 6:"Camera", + 7:"Animal", + 8:"Glove", + 9:"Crocodile", + 10:"Cattle", + 11:"House", + 12:"Guacamole", + 13:"Penguin", + 14:"Vehicle registration plate", + 15:"Bench", + 16:"Ladybug", + 17:"Human nose", + 18:"Watermelon", + 19:"Flute", + 20:"Butterfly", + 21:"Washing machine", + 22:"Raccoon", + 23:"Segway", + 24:"Taco", + 25:"Jellyfish", + 26:"Cake", + 27:"Pen", + 28:"Cannon", + 29:"Bread", + 30:"Tree", + 31:"Shellfish", + 32:"Bed", + 33:"Hamster", + 34:"Hat", + 35:"Toaster", + 36:"Sombrero", + 37:"Tiara", + 38:"Bowl", + 39:"Dragonfly", + 40:"Moths and butterflies", + 41:"Antelope", + 42:"Vegetable", + 43:"Torch", + 44:"Building", + 45:"Power plugs and sockets", + 46:"Blender", + 47:"Billiard table", + 48:"Cutting board", + 49:"Bronze sculpture", + 50:"Turtle", + 51:"Broccoli", + 52:"Tiger", + 53:"Mirror", + 54:"Bear", + 55:"Zucchini", + 56:"Dress", + 57:"Volleyball", + 58:"Guitar", + 59:"Reptile", + 60:"Golf cart", + 61:"Tart", + 62:"Fedora", + 63:"Carnivore", + 64:"Car", + 65:"Lighthouse", + 66:"Coffeemaker", + 67:"Food processor", + 68:"Truck", + 69:"Bookcase", + 70:"Surfboard", + 71:"Footwear", + 72:"Bench", + 73:"Necklace", + 74:"Flower", + 75:"Radish", + 76:"Marine mammal", + 77:"Frying pan", + 78:"Tap", + 79:"Peach", + 80:"Knife", + 81:"Handbag", + 82:"Laptop", + 83:"Tent", + 84:"Ambulance", + 85:"Christmas tree", + 86:"Eagle", + 87:"Limousine", + 88:"Kitchen & dining room table", + 89:"Polar bear", + 90:"Tower", + 91:"Football", + 92:"Willow", + 93:"Human head", + 94:"Stop sign", + 95:"Banana", + 96:"Mixer", + 97:"Binoculars", + 98:"Dessert", + 99:"Bee", + 100:"Chair", + 101:"Wood-burning stove", + 102:"Flowerpot", + 103:"Beaker", + 104:"Oyster", + 105:"Woodpecker", + 106:"Harp", + 107:"Bathtub", + 108:"Wall clock", + 109:"Sports uniform", + 110:"Rhinoceros", + 111:"Beehive", + 112:"Cupboard", + 113:"Chicken", + 114:"Man", + 115:"Blue jay", + 116:"Cucumber", + 117:"Balloon", + 118:"Kite", + 119:"Fireplace", + 120:"Lantern", + 121:"Missile", + 122:"Book", + 123:"Spoon", + 124:"Grapefruit", + 125:"Squirrel", + 126:"Orange", + 127:"Coat", + 128:"Punching bag", + 129:"Zebra", + 130:"Billboard", + 131:"Bicycle", + 132:"Door handle", + 133:"Mechanical fan", + 134:"Ring binder", + 135:"Table", + 136:"Parrot", + 137:"Sock", + 138:"Vase", + 139:"Weapon", + 140:"Shotgun", + 141:"Glasses", + 142:"Seahorse", + 143:"Belt", + 144:"Watercraft", + 145:"Window", + 146:"Giraffe", + 147:"Lion", + 148:"Tire", + 149:"Vehicle", + 150:"Canoe", + 151:"Tie", + 152:"Shelf", + 153:"Picture frame", + 154:"Printer", + 155:"Human leg", + 156:"Boat", + 157:"Slow cooker", + 158:"Croissant", + 159:"Candle", + 160:"Pancake", + 161:"Pillow", + 162:"Coin", + 163:"Stretcher", + 164:"Sandal", + 165:"Woman", + 166:"Stairs", + 167:"Harpsichord", + 168:"Stool", + 169:"Bus", + 170:"Suitcase", + 171:"Human mouth", + 172:"Juice", + 173:"Skull", + 174:"Door", + 175:"Violin", + 176:"Chopsticks", + 177:"Digital clock", + 178:"Sunflower", + 179:"Leopard", + 180:"Bell pepper", + 181:"Harbor seal", + 182:"Snake", + 183:"Sewing machine", + 184:"Goose", + 185:"Helicopter", + 186:"Seat belt", + 187:"Coffee cup", + 188:"Microwave oven", + 189:"Hot dog", + 190:"Countertop", + 191:"Serving tray", + 192:"Dog bed", + 193:"Beer", + 194:"Sunglasses", + 195:"Golf ball", + 196:"Waffle", + 197:"Palm tree", + 198:"Trumpet", + 199:"Ruler", + 200:"Helmet", + 201:"Ladder", + 202:"Office building", + 203:"Tablet computer", + 204:"Toilet paper", + 205:"Pomegranate", + 206:"Skirt", + 207:"Gas stove", + 208:"Cookie", + 209:"Cart", + 210:"Raven", + 211:"Egg", + 212:"Burrito", + 213:"Goat", + 214:"Kitchen knife", + 215:"Skateboard", + 216:"Salt and pepper shakers", + 217:"Lynx", + 218:"Boot", + 219:"Platter", + 220:"Ski", + 221:"Swimwear", + 222:"Swimming pool", + 223:"Drinking straw", + 224:"Wrench", + 225:"Drum", + 226:"Ant", + 227:"Human ear", + 228:"Headphones", + 229:"Fountain", + 230:"Bird", + 231:"Jeans", + 232:"Television", + 233:"Crab", + 234:"Microphone", + 235:"Home appliance", + 236:"Snowplow", + 237:"Beetle", + 238:"Artichoke", + 239:"Jet ski", + 240:"Stationary bicycle", + 241:"Human hair", + 242:"Brown bear", + 243:"Starfish", + 244:"Fork", + 245:"Lobster", + 246:"Corded phone", + 247:"Drink", + 248:"Saucer", + 249:"Carrot", + 250:"Insect", + 251:"Clock", + 252:"Castle", + 253:"Tennis racket", + 254:"Ceiling fan", + 255:"Asparagus", + 256:"Jaguar", + 257:"Musical instrument", + 258:"Train", + 259:"Cat", + 260:"Rifle", + 261:"Dumbbell", + 262:"Mobile phone", + 263:"Taxi", + 264:"Shower", + 265:"Pitcher", + 266:"Lemon", + 267:"Invertebrate", + 268:"Turkey", + 269:"High heels", + 270:"Bust", + 271:"Elephant", + 272:"Scarf", + 273:"Barrel", + 274:"Trombone", + 275:"Pumpkin", + 276:"Box", + 277:"Tomato", + 278:"Frog", + 279:"Bidet", + 280:"Human face", + 281:"Houseplant", + 282:"Van", + 283:"Shark", + 284:"Ice cream", + 285:"Swim cap", + 286:"Falcon", + 287:"Ostrich", + 288:"Handgun", + 289:"Whiteboard", + 290:"Lizard", + 291:"Pasta", + 292:"Snowmobile", + 293:"Light bulb", + 294:"Window blind", + 295:"Muffin", + 296:"Pretzel", + 297:"Computer monitor", + 298:"Horn", + 299:"Furniture", + 300:"Sandwich", + 301:"Fox", + 302:"Convenience store", + 303:"Fish", + 304:"Fruit", + 305:"Earrings", + 306:"Curtain", + 307:"Grape", + 308:"Sofa bed", + 309:"Horse", + 310:"Luggage and bags", + 311:"Desk", + 312:"Crutch", + 313:"Bicycle helmet", + 314:"Tick", + 315:"Airplane", + 316:"Canary", + 317:"Spatula", + 318:"Watch", + 319:"Lily", + 320:"Kitchen appliance", + 321:"Filing cabinet", + 322:"Aircraft", + 323:"Cake stand", + 324:"Candy", + 325:"Sink", + 326:"Mouse", + 327:"Wine", + 328:"Wheelchair", + 329:"Goldfish", + 330:"Refrigerator", + 331:"French fries", + 332:"Drawer", + 333:"Treadmill", + 334:"Picnic basket", + 335:"Dice", + 336:"Cabbage", + 337:"Football helmet", + 338:"Pig", + 339:"Person", + 340:"Shorts", + 341:"Gondola", + 342:"Honeycomb", + 343:"Doughnut", + 344:"Chest of drawers", + 345:"Land vehicle", + 346:"Bat", + 347:"Monkey", + 348:"Dagger", + 349:"Tableware", + 350:"Human foot", + 351:"Mug", + 352:"Alarm clock", + 353:"Pressure cooker", + 354:"Human hand", + 355:"Tortoise", + 356:"Baseball glove", + 357:"Sword", + 358:"Pear", + 359:"Miniskirt", + 360:"Traffic sign", + 361:"Girl", + 362:"Roller skates", + 363:"Dinosaur", + 364:"Porch", + 365:"Human beard", + 366:"Submarine sandwich", + 367:"Screwdriver", + 368:"Strawberry", + 369:"Wine glass", + 370:"Seafood", + 371:"Racket", + 372:"Wheel", + 373:"Sea lion", + 374:"Toy", + 375:"Tea", + 376:"Tennis ball", + 377:"Waste container", + 378:"Mule", + 379:"Cricket ball", + 380:"Pineapple", + 381:"Coconut", + 382:"Doll", + 383:"Coffee table", + 384:"Snowman", + 385:"Lavender", + 386:"Shrimp", + 387:"Maple", + 388:"Cowboy hat", + 389:"Goggles", + 390:"Rugby ball", + 391:"Caterpillar", + 392:"Poster", + 393:"Rocket", + 394:"Organ", + 395:"Saxophone", + 396:"Traffic light", + 397:"Cocktail", + 398:"Plastic bag", + 399:"Squash", + 400:"Mushroom", + 401:"Hamburger", + 402:"Light switch", + 403:"Parachute", + 404:"Teddy bear", + 405:"Winter melon", + 406:"Deer", + 407:"Musical keyboard", + 408:"Plumbing fixture", + 409:"Scoreboard", + 410:"Baseball bat", + 411:"Envelope", + 412:"Adhesive tape", + 413:"Briefcase", + 414:"Paddle", + 415:"Bow and arrow", + 416:"Telephone", + 417:"Sheep", + 418:"Jacket", + 419:"Boy", + 420:"Pizza", + 421:"Otter", + 422:"Office supplies", + 423:"Couch", + 424:"Cello", + 425:"Bull", + 426:"Camel", + 427:"Ball", + 428:"Duck", + 429:"Whale", + 430:"Shirt", + 431:"Tank", + 432:"Motorcycle", + 433:"Accordion", + 434:"Owl", + 435:"Porcupine", + 436:"Sun hat", + 437:"Nail", + 438:"Scissors", + 439:"Swan", + 440:"Lamp", + 441:"Crown", + 442:"Piano", + 443:"Sculpture", + 444:"Cheetah", + 445:"Oboe", + 446:"Tin can", + 447:"Mango", + 448:"Tripod", + 449:"Oven", + 450:"Mouse", + 451:"Barge", + 452:"Coffee", + 453:"Snowboard", + 454:"Common fig", + 455:"Salad", + 456:"Marine invertebrates", + 457:"Umbrella", + 458:"Kangaroo", + 459:"Human arm", + 460:"Measuring cup", + 461:"Snail", + 462:"Loveseat", + 463:"Suit", + 464:"Teapot", + 465:"Bottle", + 466:"Alpaca", + 467:"Kettle", + 468:"Trousers", + 469:"Popcorn", + 470:"Centipede", + 471:"Spider", + 472:"Sparrow", + 473:"Plate", + 474:"Bagel", + 475:"Personal care", + 476:"Apple", + 477:"Brassiere", + 478:"Bathroom cabinet", + 479:"studio couch", + 480:"Computer keyboard", + 481:"Table tennis racket", + 482:"Sushi", + 483:"Cabinetry", + 484:"Street light", + 485:"Towel", + 486:"Nightstand", + 487:"Rabbit", + 488:"Dolphin", + 489:"Dog", + 490:"Jug", + 491:"Wok", + 492:"Fire hydrant", + 493:"Human eye", + 494:"Skyscraper", + 495:"Backpack", + 496:"Potato", + 497:"Paper towel", + 498:"Lifejacket", + 499:"Bicycle wheel", + 500:"Toilet", + } + + if not with_background: + clsid2catid = {k - 1: v for k, v in clsid2catid.items()} + return clsid2catid, catid2name + \ No newline at end of file diff --git a/tools/eval.py b/tools/eval.py index a9d7ed719f946697657f27601480086dd6a334d8..779d30b1ba12fe7e48dc409a5d412a98dc0a64dd 100644 --- a/tools/eval.py +++ b/tools/eval.py @@ -106,13 +106,16 @@ def main(): exe.run(startup_prog) if 'weights' in cfg: checkpoint.load_params(exe, eval_prog, cfg.weights) - + + assert cfg.metric != 'OID', "eval process of OID dataset \ + is not supported." if cfg.metric == "WIDERFACE": raise ValueError("metric type {} does not support in tools/eval.py, " "please use tools/face_eval.py".format(cfg.metric)) assert cfg.metric in ['COCO', 'VOC'], \ "unknown metric type {}".format(cfg.metric) extra_keys = [] + if cfg.metric == 'COCO': extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg.metric == 'VOC': diff --git a/tools/infer.py b/tools/infer.py index 5eaa4f7fe07b871f2598c706fdda5a2f15e9998b..5e303da4ca057f3d34de70c3d69cd073ad5cfe12 100644 --- a/tools/infer.py +++ b/tools/infer.py @@ -139,10 +139,10 @@ def main(): checkpoint.load_params(exe, infer_prog, cfg.weights) # parse infer fetches - assert cfg.metric in ['COCO', 'VOC', 'WIDERFACE'], \ + assert cfg.metric in ['COCO', 'VOC', 'OID', 'WIDERFACE'], \ "unknown metric type {}".format(cfg.metric) extra_keys = [] - if cfg['metric'] == 'COCO': + if cfg['metric'] in ['COCO', 'OID']: extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg['metric'] == 'VOC' or cfg['metric'] == 'WIDERFACE': extra_keys = ['im_id', 'im_shape'] @@ -151,6 +151,8 @@ def main(): # parse dataset category if cfg.metric == 'COCO': from ppdet.utils.coco_eval import bbox2out, mask2out, get_category_info + if cfg.metric == 'OID': + from ppdet.utils.oid_eval import bbox2out, get_category_info if cfg.metric == "VOC": from ppdet.utils.voc_eval import bbox2out, get_category_info if cfg.metric == "WIDERFACE":