diff --git a/README.md b/README.md
index 0f0375e7e595485026a3f12f329a68f2a6a4de6d..bab001a2ed01734555271d3159ba68fb08c615b6 100644
--- a/README.md
+++ b/README.md
@@ -71,6 +71,7 @@ PaddleDetection的目的是为工业界和学术界提供丰富、易用的目
- [行人检测和车辆检测预训练模型](contrib/README_cn.md) 针对不同场景的检测模型
- [YOLOv3增强模型](docs/YOLOv3_ENHANCEMENT.md) 改进原始YOLOv3,精度达到41.4%,原论文精度为33.0%,同时预测速度也得到提升
- [Objects365 2019 Challenge夺冠模型](docs/CACascadeRCNN.md) Objects365 Full Track任务中最好的单模型之一,精度达到31.7%
+- [Open Images V5和Objects365数据集模型](docs/OIDV5_BASELINE_MODEL.md)
## 模型压缩
@@ -90,8 +91,13 @@ PaddleDetection的目的是为工业界和学术界提供丰富、易用的目
## 版本更新
-### 10/2019
+### 21/11/2019
+- 增加CascadeClsAware RCNN模型。
+- 增加CBNet,ResNet200和Non-local模型。
+- 增加SoftNMS。
+- 增加Open Image V5数据集和Objects365数据集模型。
+### 10/2019
- 增加增强版YOLOv3模型,精度高达41.4%。
- 增加人脸检测模型BlazeFace、Faceboxes。
- 丰富基于COCO的模型,精度高达51.9%。
diff --git a/README_en.md b/README_en.md
index e055beadcb1ac5425d3afe32c952a5df6fdf779a..e083193dd21536a315843220a854bf09baac64a3 100644
--- a/README_en.md
+++ b/README_en.md
@@ -80,6 +80,7 @@ Advanced Features:
- [Pretrained models for pedestrian and vehicle detection](contrib/README.md) Models for object detection in specific scenarios.
- [YOLOv3 enhanced model](docs/YOLOv3_ENHANCEMENT.md) Compared to MAP of 33.0% in paper, enhanced YOLOv3 reaches the MAP of 41.4% and inference speed is improved as well
- [Objects365 2019 Challenge champion model](docs/CACascadeRCNN.md) One of the best single models in Objects365 Full Track of which MAP reaches 31.7%.
+- [Open Images Dataset V5 and Objects365 Dataset models](docs/OIDV5_BASELINE_MODEL.md)
## Model compression
@@ -98,6 +99,12 @@ Advanced Features:
## Updates
+#### 21/11/2019
+- Add CascadeClsAware RCNN model.
+- Add CBNet, ResNet200 and Non-local model.
+- Add SoftNMS.
+- Add models of Open Images Dataset V5 and Objects365 Dataset.
+
#### 10/2019
- Add enhanced YOLOv3 models, box mAP up to 41.4%.
diff --git a/configs/obj365/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.yml b/configs/obj365/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0de0e9f521d3d44459cb67e45cbe07bd82855431
--- /dev/null
+++ b/configs/obj365/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.yml
@@ -0,0 +1,190 @@
+architecture: CascadeRCNNClsAware
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+max_iters: 800000
+snapshot_iter: 10000
+use_gpu: true
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet200_vd_pretrained.tar
+weights: output/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms/model_final
+# obj365 dataset format and its eval method are same as those for coco
+metric: COCO
+num_classes: 366
+
+CascadeRCNNClsAware:
+ backbone: ResNet
+ fpn: FPN
+ rpn_head: FPNRPNHead
+ roi_extractor: FPNRoIAlign
+ bbox_head: CascadeBBoxHead
+ bbox_assigner: CascadeBBoxAssigner
+
+ResNet:
+ norm_type: bn
+ depth: 200
+ feature_maps: [2, 3, 4, 5]
+ freeze_at: 2
+ variant: d
+ dcn_v2_stages: [3, 4, 5]
+ nonlocal_stages: [4]
+
+FPN:
+ min_level: 2
+ max_level: 6
+ num_chan: 256
+ spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+
+FPNRPNHead:
+ anchor_generator:
+ anchor_sizes: [32, 64, 128, 256, 512]
+ aspect_ratios: [0.5, 1.0, 2.0]
+ stride: [16.0, 16.0]
+ variance: [1.0, 1.0, 1.0, 1.0]
+ anchor_start_size: 32
+ min_level: 2
+ max_level: 6
+ num_chan: 256
+ rpn_target_assign:
+ rpn_batch_size_per_im: 256
+ rpn_fg_fraction: 0.5
+ rpn_positive_overlap: 0.7
+ rpn_negative_overlap: 0.3
+ rpn_straddle_thresh: 0.0
+ train_proposal:
+ min_size: 0.0
+ nms_thresh: 0.7
+ pre_nms_top_n: 2000
+ post_nms_top_n: 2000
+ test_proposal:
+ min_size: 0.0
+ nms_thresh: 0.7
+ pre_nms_top_n: 1000
+ post_nms_top_n: 1000
+
+FPNRoIAlign:
+ canconical_level: 4
+ canonical_size: 224
+ min_level: 2
+ max_level: 5
+ box_resolution: 14
+ sampling_ratio: 2
+
+CascadeBBoxAssigner:
+ batch_size_per_im: 512
+ bbox_reg_weights: [10, 20, 30]
+ bg_thresh_lo: [0.0, 0.0, 0.0]
+ bg_thresh_hi: [0.5, 0.6, 0.7]
+ fg_thresh: [0.5, 0.6, 0.7]
+ fg_fraction: 0.25
+ class_aware: True
+
+CascadeBBoxHead:
+ head: CascadeTwoFCHead
+ nms: MultiClassSoftNMS
+
+CascadeTwoFCHead:
+ mlp_dim: 1024
+
+MultiClassSoftNMS:
+ score_threshold: 0.001
+ keep_top_k: 300
+ softnms_sigma: 0.15
+
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [520000, 740000]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0001
+ type: L2
+
+FasterRCNNTrainFeed:
+ batch_size: 1
+ dataset:
+ dataset_dir: dataset/obj365
+ annotation: train.json
+ image_dir: train
+ sample_transforms:
+ - !DecodeImage
+ to_rgb: True
+ with_mixup: False
+ - !RandomFlipImage
+ prob: 0.5
+ - !NormalizeImage
+ is_channel_first: false
+ is_scale: True
+ mean:
+ - 0.485
+ - 0.456
+ - 0.406
+ std:
+ - 0.229
+ - 0.224
+ - 0.225
+ - !ResizeImage
+ interp: 1
+ target_size: [416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152, 1184, 1216, 1248, 1280, 1312, 1344, 1376, 1408]
+ max_size: 1800
+ use_cv2: true
+ - !Permute
+ to_bgr: false
+ batch_transforms:
+ - !PadBatch
+ pad_to_stride: 32
+ drop_last: false
+ num_workers: 2
+
+FasterRCNNEvalFeed:
+ batch_size: 1
+ dataset:
+ dataset_dir: dataset/obj365
+ annotation: val.json
+ image_dir: val
+ sample_transforms:
+ - !DecodeImage
+ to_rgb: True
+ with_mixup: False
+ - !NormalizeImage
+ is_channel_first: false
+ is_scale: True
+ mean:
+ - 0.485
+ - 0.456
+ - 0.406
+ std:
+ - 0.229
+ - 0.224
+ - 0.225
+ - !ResizeImage
+ interp: 1
+ target_size:
+ - 1200
+ max_size: 2000
+ use_cv2: true
+ - !Permute
+ to_bgr: false
+ batch_transforms:
+ - !PadBatch
+ pad_to_stride: 32
+
+FasterRCNNTestFeed:
+ batch_size: 1
+ dataset:
+ annotation: dataset/obj365/val.json
+ batch_transforms:
+ - !PadBatch
+ pad_to_stride: 32
+ drop_last: false
+ num_workers: 2
diff --git a/configs/oidv5/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.yml b/configs/oidv5/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.yml
new file mode 100644
index 0000000000000000000000000000000000000000..be07dd55a7f3db931379b87f02550197b6188913
--- /dev/null
+++ b/configs/oidv5/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.yml
@@ -0,0 +1,189 @@
+architecture: CascadeRCNNClsAware
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+max_iters: 1500000
+snapshot_iter: 10000
+use_gpu: true
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet200_vd_pretrained.tar
+weights: output/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms/model_final
+metric: OID
+num_classes: 501
+
+CascadeRCNNClsAware:
+ backbone: ResNet
+ fpn: FPN
+ rpn_head: FPNRPNHead
+ roi_extractor: FPNRoIAlign
+ bbox_head: CascadeBBoxHead
+ bbox_assigner: CascadeBBoxAssigner
+
+ResNet:
+ norm_type: bn
+ depth: 200
+ feature_maps: [2, 3, 4, 5]
+ freeze_at: 2
+ variant: d
+ dcn_v2_stages: [3, 4, 5]
+ nonlocal_stages: [4]
+
+FPN:
+ min_level: 2
+ max_level: 6
+ num_chan: 256
+ spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+
+FPNRPNHead:
+ anchor_generator:
+ anchor_sizes: [32, 64, 128, 256, 512]
+ aspect_ratios: [0.5, 1.0, 2.0]
+ stride: [16.0, 16.0]
+ variance: [1.0, 1.0, 1.0, 1.0]
+ anchor_start_size: 32
+ min_level: 2
+ max_level: 6
+ num_chan: 256
+ rpn_target_assign:
+ rpn_batch_size_per_im: 256
+ rpn_fg_fraction: 0.5
+ rpn_positive_overlap: 0.7
+ rpn_negative_overlap: 0.3
+ rpn_straddle_thresh: 0.0
+ train_proposal:
+ min_size: 0.0
+ nms_thresh: 0.7
+ pre_nms_top_n: 2000
+ post_nms_top_n: 2000
+ test_proposal:
+ min_size: 0.0
+ nms_thresh: 0.7
+ pre_nms_top_n: 1000
+ post_nms_top_n: 1000
+
+FPNRoIAlign:
+ canconical_level: 4
+ canonical_size: 224
+ min_level: 2
+ max_level: 5
+ box_resolution: 14
+ sampling_ratio: 2
+
+CascadeBBoxAssigner:
+ batch_size_per_im: 512
+ bbox_reg_weights: [10, 20, 30]
+ bg_thresh_lo: [0.0, 0.0, 0.0]
+ bg_thresh_hi: [0.5, 0.6, 0.7]
+ fg_thresh: [0.5, 0.6, 0.7]
+ fg_fraction: 0.25
+ class_aware: True
+
+CascadeBBoxHead:
+ head: CascadeTwoFCHead
+ nms: MultiClassSoftNMS
+
+CascadeTwoFCHead:
+ mlp_dim: 1024
+
+MultiClassSoftNMS:
+ score_threshold: 0.001
+ keep_top_k: 300
+ softnms_sigma: 0.15
+
+LearningRate:
+ base_lr: 0.01
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [1000000, 1400000]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0001
+ type: L2
+
+FasterRCNNTrainFeed:
+ batch_size: 1
+ dataset:
+ dataset_dir: dataset/oid
+ annotation: train.json
+ image_dir: train
+ sample_transforms:
+ - !DecodeImage
+ to_rgb: True
+ with_mixup: False
+ - !RandomFlipImage
+ prob: 0.5
+ - !NormalizeImage
+ is_channel_first: false
+ is_scale: True
+ mean:
+ - 0.485
+ - 0.456
+ - 0.406
+ std:
+ - 0.229
+ - 0.224
+ - 0.225
+ - !ResizeImage
+ interp: 1
+ target_size: [416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152, 1184, 1216, 1248, 1280, 1312, 1344, 1376, 1408]
+ max_size: 1800
+ use_cv2: true
+ - !Permute
+ to_bgr: false
+ batch_transforms:
+ - !PadBatch
+ pad_to_stride: 32
+ drop_last: false
+ num_workers: 2
+
+FasterRCNNEvalFeed:
+ batch_size: 1
+ dataset:
+ dataset_dir: dataset/oidv5
+ annotation: val.json
+ image_dir: val
+ sample_transforms:
+ - !DecodeImage
+ to_rgb: True
+ with_mixup: False
+ - !NormalizeImage
+ is_channel_first: false
+ is_scale: True
+ mean:
+ - 0.485
+ - 0.456
+ - 0.406
+ std:
+ - 0.229
+ - 0.224
+ - 0.225
+ - !ResizeImage
+ interp: 1
+ target_size:
+ - 1200
+ max_size: 2000
+ use_cv2: true
+ - !Permute
+ to_bgr: false
+ batch_transforms:
+ - !PadBatch
+ pad_to_stride: 32
+
+FasterRCNNTestFeed:
+ batch_size: 1
+ dataset:
+ annotation: dataset/oidv5/val.json
+ batch_transforms:
+ - !PadBatch
+ pad_to_stride: 32
+ drop_last: false
+ num_workers: 2
diff --git a/demo/oidv5_gt.png b/demo/oidv5_gt.png
new file mode 100644
index 0000000000000000000000000000000000000000..e28fadae95a4eb113ae11d0018609896c2101c20
Binary files /dev/null and b/demo/oidv5_gt.png differ
diff --git a/demo/oidv5_model_framework.png b/demo/oidv5_model_framework.png
new file mode 100644
index 0000000000000000000000000000000000000000..b319958384e81cdd5405a41c9d468519638ed43d
Binary files /dev/null and b/demo/oidv5_model_framework.png differ
diff --git a/demo/oidv5_pred.jpg b/demo/oidv5_pred.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..eb39ef730daaf12fdd47dce22fa32ec9066c29d9
Binary files /dev/null and b/demo/oidv5_pred.jpg differ
diff --git a/docs/MODEL_ZOO.md b/docs/MODEL_ZOO.md
index 02e8b7e79d8ddb051f500eee30afe5c975316bd9..842a25a24d7cfdc17f4f3ba874b96cf337a44d6c 100644
--- a/docs/MODEL_ZOO.md
+++ b/docs/MODEL_ZOO.md
@@ -177,3 +177,8 @@ randomly cropping, randomly expansion, randomly flipping.
## Face Detection
Please refer [face detection models](../configs/face_detection) for details.
+
+
+## Object Detection in Open Images Dataset V5
+
+Please refer [Open Images Dataset V5 Baseline model](OIDV5_BASELINE_MODEL.md) for details.
diff --git a/docs/MODEL_ZOO_cn.md b/docs/MODEL_ZOO_cn.md
index b889cd9a6b0ab5e24d6aa0c07098af914edd453f..6a7ec48260738533296a63a0f11065c77950ba2f 100644
--- a/docs/MODEL_ZOO_cn.md
+++ b/docs/MODEL_ZOO_cn.md
@@ -83,6 +83,7 @@ Paddle提供基于ImageNet的骨架网络预训练模型。所有预训练模型
| CBResNet200-vd-FPN-Nonlocal | Cascade Faster | c3-c5 | 1 | 2.5x | - | 53.3%(softnms) | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_cbr200_vd_fpn_dcnv2_nonlocal_softnms.tar) |
+
#### 注意事项:
- Deformable卷积网络v2(dcn_v2)参考自论文[Deformable ConvNets v2](https://arxiv.org/abs/1811.11168).
- `c3-c5`意思是在resnet模块的3到5阶段增加`dcn`.
@@ -166,4 +167,9 @@ Paddle提供基于ImageNet的骨架网络预训练模型。所有预训练模型
## 人脸检测
-详细请参考[人脸检测模型](../configs/face_detection).
+详细请参考[人脸检测模型](../configs/face_detection)。
+
+
+## 基于Open Images V5数据集的物体检测
+
+详细请参考[Open Images V5数据集基线模型](OIDV5_BASELINE_MODEL.md)。
diff --git a/docs/OIDV5_BASELINE_MODEL.md b/docs/OIDV5_BASELINE_MODEL.md
new file mode 100644
index 0000000000000000000000000000000000000000..73558a65762dc4bed28016a5e5e4c97560577cb8
--- /dev/null
+++ b/docs/OIDV5_BASELINE_MODEL.md
@@ -0,0 +1,58 @@
+# CascadeCA RCNN
+## 简介
+CascadeCA RCNN是百度视觉技术部在Google AI Open Images 2019-Object Detction比赛中的最佳单模型,该单模型助力团队在500多参数队伍中取得第二名。Open Images Dataset V5(OIDV5)包含500个类别、173W训练图像和超过1400W个标注边框,是目前已知规模最大的目标检测公开数据集,数据集地址:[https://storage.googleapis.com/openimages/web/index.html](https://storage.googleapis.com/openimages/web/index.html)。团队在比赛中的技术方案报告地址:[https://arxiv.org/pdf/1911.07171.pdf](https://arxiv.org/pdf/1911.07171.pdf)
+
+
+
+
+
+## 方法描述
+该模型结合了当前较优的检测方法。具体地,它将ResNet200-vd作为检测模型的骨干网络,其imagenet分类预训练模型可以在[这里](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/image_classification/README_en.md)下载;结合了CascadeCA RCNN、Feature Pyramid Networks、Non-local、Deformable V2等方法。在这里需要注意的是,标准的CascadeRCNN是只预测2个框(前景和背景,使用得分信息去判断最终前景所属的类别),而该模型对每个类别都单独预测了一个框(Cascade Class Aware)。最终模型框图如下图所示。
+
+
+
+
+
+
+由于OIDV5的类别不均衡现象比较严重,在训练时采用了动态采样的策略去选择样本并进行训练;多尺度训练被用于解决边框面积范围太大的情况;此外,团队使用Libra loss替代Smooth L1 loss,来计算预测框的loss;在预测时,使用SoftNMS方法进行后处理,保证更多的框可以被召回。
+
+Objects365 Dataset和OIDV5有大约189个类别是重复的,因此将两个数据集合并进行训练,用于扩充OIDV5的训练数据,最终该模型与其性能指标如下表所示。更具体的模型训练和融合策略可以见:[OIDV5技术报告](https://arxiv.org/pdf/1911.07171.pdf)。
+
+OIDV5模型训练结果如下。
+
+
+| 模型结构 | Public/Private Score | 下载链接 |
+| :-----------------: | :--------: | :----------------------------------------------------------: |
+| CascadeCARCNN-FPN-Dcnv2-Nonlocal ResNet200-vd | 0.62690/0.59459 | [模型](https://paddlemodels.bj.bcebos.com/object_detection/oidv5_cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.tar) |
+
+
+此外,为验证模型的性能,团队基于该模型结构,也训练了针对COCO2017和Objects365 Dataset的模型,模型和验证集指标如下表。
+
+| 模型结构 | 数据集 | 验证集mAP | 下载链接 |
+| :-----------------: | :--------: | :--------: | :----------------------------------------------------------: |
+| CascadeCARCNN-FPN-Dcnv2-Nonlocal ResNet200-vd | COCO2017 | 51.7% | [模型](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.tar) |
+| CascadeCARCNN-FPN-Dcnv2-Nonlocal ResNet200-vd | Objects365 | 34.5% | [模型](https://paddlemodels.bj.bcebos.com/object_detection/obj365_cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.tar) |
+
+COCO和Objects365 Dataset数据格式相同,目前只支持预测和评估。
+
+## 使用方法
+
+OIDV5数据集格式与COCO不同,目前仅支持单张图片的预测。OIDV5的模型评估方法可以参考[这里](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/challenge_evaluation.md)
+
+1. 下载模型并解压。
+
+2. 运行预测程序。
+
+```
+python -u tools/infer.py -c configs/oidv5/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.yml -o weights=./oidv5_cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms/ --infer_img=demo/000000570688.jpg
+```
+
+其中模型所在文件夹需要根据自己放置的位置进行修改。
+
+检测结果图像可以在`output`文件夹中查看。
+
+## 模型检测效果
+
+
+
+
diff --git a/ppdet/modeling/target_assigners.py b/ppdet/modeling/target_assigners.py
index 69df2962b1ed230e59abbd4e9becf2343700060a..72297aff990e116f9b51afbde03c721a643c1e0d 100644
--- a/ppdet/modeling/target_assigners.py
+++ b/ppdet/modeling/target_assigners.py
@@ -70,6 +70,6 @@ class CascadeBBoxAssigner(object):
bbox_reg_weights=curr_bbox_reg_w,
use_random=self.use_random,
class_nums=self.class_nums if self.class_aware else 2,
- is_cls_agnostic=True,
+ is_cls_agnostic=not self.class_aware,
is_cascade_rcnn=True if curr_stage > 0 and not self.class_aware else False)
return outs
diff --git a/ppdet/utils/oid_eval.py b/ppdet/utils/oid_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..21d4813f51f512c77aa9dfb988ab271f5ea5d315
--- /dev/null
+++ b/ppdet/utils/oid_eval.py
@@ -0,0 +1,544 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import os
+import sys
+import numpy as np
+
+from .coco_eval import bbox2out
+
+import logging
+logger = logging.getLogger(__name__)
+
+
+__all__ = ['bbox2out', 'get_category_info']
+
+def get_category_info(anno_file=None,
+ with_background=True,
+ use_default_label=False):
+ clsid2catid = { k:k for k in range(1, 501) }
+
+ catid2name = {
+ 0:"background",
+ 1:"Infant bed",
+ 2:"Rose",
+ 3:"Flag",
+ 4:"Flashlight",
+ 5:"Sea turtle",
+ 6:"Camera",
+ 7:"Animal",
+ 8:"Glove",
+ 9:"Crocodile",
+ 10:"Cattle",
+ 11:"House",
+ 12:"Guacamole",
+ 13:"Penguin",
+ 14:"Vehicle registration plate",
+ 15:"Bench",
+ 16:"Ladybug",
+ 17:"Human nose",
+ 18:"Watermelon",
+ 19:"Flute",
+ 20:"Butterfly",
+ 21:"Washing machine",
+ 22:"Raccoon",
+ 23:"Segway",
+ 24:"Taco",
+ 25:"Jellyfish",
+ 26:"Cake",
+ 27:"Pen",
+ 28:"Cannon",
+ 29:"Bread",
+ 30:"Tree",
+ 31:"Shellfish",
+ 32:"Bed",
+ 33:"Hamster",
+ 34:"Hat",
+ 35:"Toaster",
+ 36:"Sombrero",
+ 37:"Tiara",
+ 38:"Bowl",
+ 39:"Dragonfly",
+ 40:"Moths and butterflies",
+ 41:"Antelope",
+ 42:"Vegetable",
+ 43:"Torch",
+ 44:"Building",
+ 45:"Power plugs and sockets",
+ 46:"Blender",
+ 47:"Billiard table",
+ 48:"Cutting board",
+ 49:"Bronze sculpture",
+ 50:"Turtle",
+ 51:"Broccoli",
+ 52:"Tiger",
+ 53:"Mirror",
+ 54:"Bear",
+ 55:"Zucchini",
+ 56:"Dress",
+ 57:"Volleyball",
+ 58:"Guitar",
+ 59:"Reptile",
+ 60:"Golf cart",
+ 61:"Tart",
+ 62:"Fedora",
+ 63:"Carnivore",
+ 64:"Car",
+ 65:"Lighthouse",
+ 66:"Coffeemaker",
+ 67:"Food processor",
+ 68:"Truck",
+ 69:"Bookcase",
+ 70:"Surfboard",
+ 71:"Footwear",
+ 72:"Bench",
+ 73:"Necklace",
+ 74:"Flower",
+ 75:"Radish",
+ 76:"Marine mammal",
+ 77:"Frying pan",
+ 78:"Tap",
+ 79:"Peach",
+ 80:"Knife",
+ 81:"Handbag",
+ 82:"Laptop",
+ 83:"Tent",
+ 84:"Ambulance",
+ 85:"Christmas tree",
+ 86:"Eagle",
+ 87:"Limousine",
+ 88:"Kitchen & dining room table",
+ 89:"Polar bear",
+ 90:"Tower",
+ 91:"Football",
+ 92:"Willow",
+ 93:"Human head",
+ 94:"Stop sign",
+ 95:"Banana",
+ 96:"Mixer",
+ 97:"Binoculars",
+ 98:"Dessert",
+ 99:"Bee",
+ 100:"Chair",
+ 101:"Wood-burning stove",
+ 102:"Flowerpot",
+ 103:"Beaker",
+ 104:"Oyster",
+ 105:"Woodpecker",
+ 106:"Harp",
+ 107:"Bathtub",
+ 108:"Wall clock",
+ 109:"Sports uniform",
+ 110:"Rhinoceros",
+ 111:"Beehive",
+ 112:"Cupboard",
+ 113:"Chicken",
+ 114:"Man",
+ 115:"Blue jay",
+ 116:"Cucumber",
+ 117:"Balloon",
+ 118:"Kite",
+ 119:"Fireplace",
+ 120:"Lantern",
+ 121:"Missile",
+ 122:"Book",
+ 123:"Spoon",
+ 124:"Grapefruit",
+ 125:"Squirrel",
+ 126:"Orange",
+ 127:"Coat",
+ 128:"Punching bag",
+ 129:"Zebra",
+ 130:"Billboard",
+ 131:"Bicycle",
+ 132:"Door handle",
+ 133:"Mechanical fan",
+ 134:"Ring binder",
+ 135:"Table",
+ 136:"Parrot",
+ 137:"Sock",
+ 138:"Vase",
+ 139:"Weapon",
+ 140:"Shotgun",
+ 141:"Glasses",
+ 142:"Seahorse",
+ 143:"Belt",
+ 144:"Watercraft",
+ 145:"Window",
+ 146:"Giraffe",
+ 147:"Lion",
+ 148:"Tire",
+ 149:"Vehicle",
+ 150:"Canoe",
+ 151:"Tie",
+ 152:"Shelf",
+ 153:"Picture frame",
+ 154:"Printer",
+ 155:"Human leg",
+ 156:"Boat",
+ 157:"Slow cooker",
+ 158:"Croissant",
+ 159:"Candle",
+ 160:"Pancake",
+ 161:"Pillow",
+ 162:"Coin",
+ 163:"Stretcher",
+ 164:"Sandal",
+ 165:"Woman",
+ 166:"Stairs",
+ 167:"Harpsichord",
+ 168:"Stool",
+ 169:"Bus",
+ 170:"Suitcase",
+ 171:"Human mouth",
+ 172:"Juice",
+ 173:"Skull",
+ 174:"Door",
+ 175:"Violin",
+ 176:"Chopsticks",
+ 177:"Digital clock",
+ 178:"Sunflower",
+ 179:"Leopard",
+ 180:"Bell pepper",
+ 181:"Harbor seal",
+ 182:"Snake",
+ 183:"Sewing machine",
+ 184:"Goose",
+ 185:"Helicopter",
+ 186:"Seat belt",
+ 187:"Coffee cup",
+ 188:"Microwave oven",
+ 189:"Hot dog",
+ 190:"Countertop",
+ 191:"Serving tray",
+ 192:"Dog bed",
+ 193:"Beer",
+ 194:"Sunglasses",
+ 195:"Golf ball",
+ 196:"Waffle",
+ 197:"Palm tree",
+ 198:"Trumpet",
+ 199:"Ruler",
+ 200:"Helmet",
+ 201:"Ladder",
+ 202:"Office building",
+ 203:"Tablet computer",
+ 204:"Toilet paper",
+ 205:"Pomegranate",
+ 206:"Skirt",
+ 207:"Gas stove",
+ 208:"Cookie",
+ 209:"Cart",
+ 210:"Raven",
+ 211:"Egg",
+ 212:"Burrito",
+ 213:"Goat",
+ 214:"Kitchen knife",
+ 215:"Skateboard",
+ 216:"Salt and pepper shakers",
+ 217:"Lynx",
+ 218:"Boot",
+ 219:"Platter",
+ 220:"Ski",
+ 221:"Swimwear",
+ 222:"Swimming pool",
+ 223:"Drinking straw",
+ 224:"Wrench",
+ 225:"Drum",
+ 226:"Ant",
+ 227:"Human ear",
+ 228:"Headphones",
+ 229:"Fountain",
+ 230:"Bird",
+ 231:"Jeans",
+ 232:"Television",
+ 233:"Crab",
+ 234:"Microphone",
+ 235:"Home appliance",
+ 236:"Snowplow",
+ 237:"Beetle",
+ 238:"Artichoke",
+ 239:"Jet ski",
+ 240:"Stationary bicycle",
+ 241:"Human hair",
+ 242:"Brown bear",
+ 243:"Starfish",
+ 244:"Fork",
+ 245:"Lobster",
+ 246:"Corded phone",
+ 247:"Drink",
+ 248:"Saucer",
+ 249:"Carrot",
+ 250:"Insect",
+ 251:"Clock",
+ 252:"Castle",
+ 253:"Tennis racket",
+ 254:"Ceiling fan",
+ 255:"Asparagus",
+ 256:"Jaguar",
+ 257:"Musical instrument",
+ 258:"Train",
+ 259:"Cat",
+ 260:"Rifle",
+ 261:"Dumbbell",
+ 262:"Mobile phone",
+ 263:"Taxi",
+ 264:"Shower",
+ 265:"Pitcher",
+ 266:"Lemon",
+ 267:"Invertebrate",
+ 268:"Turkey",
+ 269:"High heels",
+ 270:"Bust",
+ 271:"Elephant",
+ 272:"Scarf",
+ 273:"Barrel",
+ 274:"Trombone",
+ 275:"Pumpkin",
+ 276:"Box",
+ 277:"Tomato",
+ 278:"Frog",
+ 279:"Bidet",
+ 280:"Human face",
+ 281:"Houseplant",
+ 282:"Van",
+ 283:"Shark",
+ 284:"Ice cream",
+ 285:"Swim cap",
+ 286:"Falcon",
+ 287:"Ostrich",
+ 288:"Handgun",
+ 289:"Whiteboard",
+ 290:"Lizard",
+ 291:"Pasta",
+ 292:"Snowmobile",
+ 293:"Light bulb",
+ 294:"Window blind",
+ 295:"Muffin",
+ 296:"Pretzel",
+ 297:"Computer monitor",
+ 298:"Horn",
+ 299:"Furniture",
+ 300:"Sandwich",
+ 301:"Fox",
+ 302:"Convenience store",
+ 303:"Fish",
+ 304:"Fruit",
+ 305:"Earrings",
+ 306:"Curtain",
+ 307:"Grape",
+ 308:"Sofa bed",
+ 309:"Horse",
+ 310:"Luggage and bags",
+ 311:"Desk",
+ 312:"Crutch",
+ 313:"Bicycle helmet",
+ 314:"Tick",
+ 315:"Airplane",
+ 316:"Canary",
+ 317:"Spatula",
+ 318:"Watch",
+ 319:"Lily",
+ 320:"Kitchen appliance",
+ 321:"Filing cabinet",
+ 322:"Aircraft",
+ 323:"Cake stand",
+ 324:"Candy",
+ 325:"Sink",
+ 326:"Mouse",
+ 327:"Wine",
+ 328:"Wheelchair",
+ 329:"Goldfish",
+ 330:"Refrigerator",
+ 331:"French fries",
+ 332:"Drawer",
+ 333:"Treadmill",
+ 334:"Picnic basket",
+ 335:"Dice",
+ 336:"Cabbage",
+ 337:"Football helmet",
+ 338:"Pig",
+ 339:"Person",
+ 340:"Shorts",
+ 341:"Gondola",
+ 342:"Honeycomb",
+ 343:"Doughnut",
+ 344:"Chest of drawers",
+ 345:"Land vehicle",
+ 346:"Bat",
+ 347:"Monkey",
+ 348:"Dagger",
+ 349:"Tableware",
+ 350:"Human foot",
+ 351:"Mug",
+ 352:"Alarm clock",
+ 353:"Pressure cooker",
+ 354:"Human hand",
+ 355:"Tortoise",
+ 356:"Baseball glove",
+ 357:"Sword",
+ 358:"Pear",
+ 359:"Miniskirt",
+ 360:"Traffic sign",
+ 361:"Girl",
+ 362:"Roller skates",
+ 363:"Dinosaur",
+ 364:"Porch",
+ 365:"Human beard",
+ 366:"Submarine sandwich",
+ 367:"Screwdriver",
+ 368:"Strawberry",
+ 369:"Wine glass",
+ 370:"Seafood",
+ 371:"Racket",
+ 372:"Wheel",
+ 373:"Sea lion",
+ 374:"Toy",
+ 375:"Tea",
+ 376:"Tennis ball",
+ 377:"Waste container",
+ 378:"Mule",
+ 379:"Cricket ball",
+ 380:"Pineapple",
+ 381:"Coconut",
+ 382:"Doll",
+ 383:"Coffee table",
+ 384:"Snowman",
+ 385:"Lavender",
+ 386:"Shrimp",
+ 387:"Maple",
+ 388:"Cowboy hat",
+ 389:"Goggles",
+ 390:"Rugby ball",
+ 391:"Caterpillar",
+ 392:"Poster",
+ 393:"Rocket",
+ 394:"Organ",
+ 395:"Saxophone",
+ 396:"Traffic light",
+ 397:"Cocktail",
+ 398:"Plastic bag",
+ 399:"Squash",
+ 400:"Mushroom",
+ 401:"Hamburger",
+ 402:"Light switch",
+ 403:"Parachute",
+ 404:"Teddy bear",
+ 405:"Winter melon",
+ 406:"Deer",
+ 407:"Musical keyboard",
+ 408:"Plumbing fixture",
+ 409:"Scoreboard",
+ 410:"Baseball bat",
+ 411:"Envelope",
+ 412:"Adhesive tape",
+ 413:"Briefcase",
+ 414:"Paddle",
+ 415:"Bow and arrow",
+ 416:"Telephone",
+ 417:"Sheep",
+ 418:"Jacket",
+ 419:"Boy",
+ 420:"Pizza",
+ 421:"Otter",
+ 422:"Office supplies",
+ 423:"Couch",
+ 424:"Cello",
+ 425:"Bull",
+ 426:"Camel",
+ 427:"Ball",
+ 428:"Duck",
+ 429:"Whale",
+ 430:"Shirt",
+ 431:"Tank",
+ 432:"Motorcycle",
+ 433:"Accordion",
+ 434:"Owl",
+ 435:"Porcupine",
+ 436:"Sun hat",
+ 437:"Nail",
+ 438:"Scissors",
+ 439:"Swan",
+ 440:"Lamp",
+ 441:"Crown",
+ 442:"Piano",
+ 443:"Sculpture",
+ 444:"Cheetah",
+ 445:"Oboe",
+ 446:"Tin can",
+ 447:"Mango",
+ 448:"Tripod",
+ 449:"Oven",
+ 450:"Mouse",
+ 451:"Barge",
+ 452:"Coffee",
+ 453:"Snowboard",
+ 454:"Common fig",
+ 455:"Salad",
+ 456:"Marine invertebrates",
+ 457:"Umbrella",
+ 458:"Kangaroo",
+ 459:"Human arm",
+ 460:"Measuring cup",
+ 461:"Snail",
+ 462:"Loveseat",
+ 463:"Suit",
+ 464:"Teapot",
+ 465:"Bottle",
+ 466:"Alpaca",
+ 467:"Kettle",
+ 468:"Trousers",
+ 469:"Popcorn",
+ 470:"Centipede",
+ 471:"Spider",
+ 472:"Sparrow",
+ 473:"Plate",
+ 474:"Bagel",
+ 475:"Personal care",
+ 476:"Apple",
+ 477:"Brassiere",
+ 478:"Bathroom cabinet",
+ 479:"studio couch",
+ 480:"Computer keyboard",
+ 481:"Table tennis racket",
+ 482:"Sushi",
+ 483:"Cabinetry",
+ 484:"Street light",
+ 485:"Towel",
+ 486:"Nightstand",
+ 487:"Rabbit",
+ 488:"Dolphin",
+ 489:"Dog",
+ 490:"Jug",
+ 491:"Wok",
+ 492:"Fire hydrant",
+ 493:"Human eye",
+ 494:"Skyscraper",
+ 495:"Backpack",
+ 496:"Potato",
+ 497:"Paper towel",
+ 498:"Lifejacket",
+ 499:"Bicycle wheel",
+ 500:"Toilet",
+ }
+
+ if not with_background:
+ clsid2catid = {k - 1: v for k, v in clsid2catid.items()}
+ return clsid2catid, catid2name
+
\ No newline at end of file
diff --git a/tools/eval.py b/tools/eval.py
index a9d7ed719f946697657f27601480086dd6a334d8..779d30b1ba12fe7e48dc409a5d412a98dc0a64dd 100644
--- a/tools/eval.py
+++ b/tools/eval.py
@@ -106,13 +106,16 @@ def main():
exe.run(startup_prog)
if 'weights' in cfg:
checkpoint.load_params(exe, eval_prog, cfg.weights)
-
+
+ assert cfg.metric != 'OID', "eval process of OID dataset \
+ is not supported."
if cfg.metric == "WIDERFACE":
raise ValueError("metric type {} does not support in tools/eval.py, "
"please use tools/face_eval.py".format(cfg.metric))
assert cfg.metric in ['COCO', 'VOC'], \
"unknown metric type {}".format(cfg.metric)
extra_keys = []
+
if cfg.metric == 'COCO':
extra_keys = ['im_info', 'im_id', 'im_shape']
if cfg.metric == 'VOC':
diff --git a/tools/infer.py b/tools/infer.py
index 5eaa4f7fe07b871f2598c706fdda5a2f15e9998b..5e303da4ca057f3d34de70c3d69cd073ad5cfe12 100644
--- a/tools/infer.py
+++ b/tools/infer.py
@@ -139,10 +139,10 @@ def main():
checkpoint.load_params(exe, infer_prog, cfg.weights)
# parse infer fetches
- assert cfg.metric in ['COCO', 'VOC', 'WIDERFACE'], \
+ assert cfg.metric in ['COCO', 'VOC', 'OID', 'WIDERFACE'], \
"unknown metric type {}".format(cfg.metric)
extra_keys = []
- if cfg['metric'] == 'COCO':
+ if cfg['metric'] in ['COCO', 'OID']:
extra_keys = ['im_info', 'im_id', 'im_shape']
if cfg['metric'] == 'VOC' or cfg['metric'] == 'WIDERFACE':
extra_keys = ['im_id', 'im_shape']
@@ -151,6 +151,8 @@ def main():
# parse dataset category
if cfg.metric == 'COCO':
from ppdet.utils.coco_eval import bbox2out, mask2out, get_category_info
+ if cfg.metric == 'OID':
+ from ppdet.utils.oid_eval import bbox2out, get_category_info
if cfg.metric == "VOC":
from ppdet.utils.voc_eval import bbox2out, get_category_info
if cfg.metric == "WIDERFACE":