From 356bb7e286b85190560c492fbd4d669f4420df75 Mon Sep 17 00:00:00 2001
From: Kaipeng Deng <dengkaipeng@baidu.com>
Date: Thu, 27 Jun 2019 14:57:28 +0800
Subject: [PATCH] add VOC visualize (#2547)

* add VOC visualize

* fixn ssd_mobilenet_v1_voc.yml

* use default label

* clean TestFeed dataset config

* fix voc default label

* fix format

* fix as review

* revert voc default

* use defult label for all

* enable batch size != 1
---
 .../configs/cascade_rcnn_r50_fpn_1x.yml       |  1 -
 .../configs/faster_rcnn_r101_1x.yml           |  2 -
 .../configs/faster_rcnn_r101_fpn_1x.yml       |  2 -
 .../configs/faster_rcnn_r101_fpn_2x.yml       |  2 -
 .../configs/faster_rcnn_r101_vd_fpn_1x.yml    |  2 -
 .../configs/faster_rcnn_r101_vd_fpn_2x.yml    |  2 -
 .../configs/faster_rcnn_r50_1x.yml            |  2 -
 .../configs/faster_rcnn_r50_2x.yml            |  2 -
 .../configs/faster_rcnn_r50_fpn_1x.yml        |  1 -
 .../configs/faster_rcnn_r50_fpn_2x.yml        |  1 -
 .../configs/faster_rcnn_r50_vd_1x.yml         |  2 -
 .../configs/faster_rcnn_r50_vd_fpn_2x.yml     |  2 -
 .../configs/faster_rcnn_se154_vd_1x.yml       |  2 -
 .../configs/faster_rcnn_se154_vd_fpn_1x.yml   |  2 -
 .../configs/faster_rcnn_se154_vd_fpn_s1x.yml  |  2 -
 .../configs/faster_rcnn_x101_64x4d_fpn_1x.yml |  2 -
 .../configs/faster_rcnn_x101_64x4d_fpn_2x.yml |  2 -
 .../configs/mask_rcnn_r101_fpn_1x.yml         |  2 -
 .../configs/mask_rcnn_r101_fpn_2x.yml         |  2 -
 .../configs/mask_rcnn_r50_1x.yml              |  2 -
 .../configs/mask_rcnn_r50_2x.yml              |  2 -
 .../configs/mask_rcnn_r50_fpn_1x.yml          |  2 -
 .../configs/mask_rcnn_r50_fpn_2x.yml          |  2 -
 .../configs/mask_rcnn_r50_vd_fpn_2x.yml       |  2 -
 .../configs/mask_rcnn_se154_vd_fpn_s1x.yml    |  2 -
 .../configs/retinanet_r50_fpn_1x.yml          |  2 -
 .../configs/ssd_mobilenet_v1_voc.yml          |  8 +-
 .../configs/yolov3_darknet.yml                |  2 -
 .../configs/yolov3_mobilenet_v1.yml           |  2 -
 .../object_detection/configs/yolov3_r34.yml   |  2 -
 .../object_detection/ppdet/data/data_feed.py  |  2 +-
 .../ppdet/data/source/voc_loader.py           | 40 ++++----
 .../data/tools/generate_data_for_training.py  |  2 +-
 .../ppdet/data/transform/arrange_sample.py    |  3 +-
 .../ppdet/modeling/model_input.py             |  2 +-
 .../object_detection/ppdet/utils/coco_eval.py | 29 +++++-
 .../ppdet/utils/visualizer.py                 | 32 +++++--
 .../object_detection/ppdet/utils/voc_eval.py  | 93 +++++++++++++++++++
 PaddleCV/object_detection/tools/infer.py      | 43 +++++----
 39 files changed, 194 insertions(+), 115 deletions(-)
 create mode 100644 PaddleCV/object_detection/ppdet/utils/voc_eval.py

diff --git a/PaddleCV/object_detection/configs/cascade_rcnn_r50_fpn_1x.yml b/PaddleCV/object_detection/configs/cascade_rcnn_r50_fpn_1x.yml
index a950e612..ade4ecc6 100644
--- a/PaddleCV/object_detection/configs/cascade_rcnn_r50_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/cascade_rcnn_r50_fpn_1x.yml
@@ -135,7 +135,6 @@ FasterRCNNTestFeed:
     pad_to_stride: 32
   dataset:
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   drop_last: false
   num_workers: 2
   shuffle: false
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r101_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r101_1x.yml
index b779dc02..1ba2f2f1 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r101_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r101_1x.yml
@@ -112,6 +112,4 @@ FasterRCNNEvalFeed:
 FasterRCNNTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_1x.yml
index 81c496d8..8ff2eadb 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_1x.yml
@@ -135,8 +135,6 @@ FasterRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   shuffle: False
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_2x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_2x.yml
index 4352ce05..ff0d8ca3 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_2x.yml
@@ -135,8 +135,6 @@ FasterRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   shuffle: False
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_1x.yml
index 3e709f50..9d207897 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_1x.yml
@@ -136,8 +136,6 @@ FasterRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   shuffle: False
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_2x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_2x.yml
index 7321afd9..9bcc54c9 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_2x.yml
@@ -136,8 +136,6 @@ FasterRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   shuffle: False
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r50_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r50_1x.yml
index c18191ef..b01443a4 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r50_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_1x.yml
@@ -112,6 +112,4 @@ FasterRCNNEvalFeed:
 FasterRCNNTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r50_2x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r50_2x.yml
index 03b4336a..e5953616 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r50_2x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_2x.yml
@@ -112,6 +112,4 @@ FasterRCNNEvalFeed:
 FasterRCNNTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_1x.yml
index 594bd0d6..7aed12f8 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_1x.yml
@@ -135,7 +135,6 @@ FasterRCNNTestFeed:
     pad_to_stride: 32
   dataset:
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   drop_last: false
   num_workers: 2
   shuffle: false
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_2x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_2x.yml
index 895442e0..bb9afac7 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_2x.yml
@@ -135,7 +135,6 @@ FasterRCNNTestFeed:
     pad_to_stride: 32
   dataset:
     annotation: coco/annotations/instances_val2017.json
-    image_dir: coco/val2017
   drop_last: false
   num_workers: 2
   shuffle: false
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_1x.yml
index bad32239..03d075d9 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_1x.yml
@@ -114,6 +114,4 @@ FasterRCNNEvalFeed:
 FasterRCNNTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_fpn_2x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_fpn_2x.yml
index 2a942f9b..312352c8 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_fpn_2x.yml
@@ -136,8 +136,6 @@ FasterRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   shuffle: False
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_1x.yml
index 564ac8c7..183b4bb7 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_1x.yml
@@ -121,8 +121,6 @@ FasterRCNNEvalFeed:
 FasterRCNNTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   shuffle: False
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_1x.yml
index bfd00e43..065c0ea7 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_1x.yml
@@ -138,8 +138,6 @@ FasterRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   shuffle: False
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_s1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_s1x.yml
index 1864ad7a..36e849c9 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_s1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_s1x.yml
@@ -138,8 +138,6 @@ FasterRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   shuffle: False
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_1x.yml
index 00ce3387..bdbe03c4 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_1x.yml
@@ -137,8 +137,6 @@ FasterRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   shuffle: False
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_2x.yml b/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_2x.yml
index 6940c38c..d9012e0e 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_2x.yml
@@ -137,8 +137,6 @@ FasterRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   shuffle: False
diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_1x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_1x.yml
index 4da7f6c1..fd1e4bc6 100644
--- a/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_1x.yml
@@ -144,8 +144,6 @@ MaskRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   use_padded_im_info: True
diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_2x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_2x.yml
index 78876a51..6d9bcbba 100644
--- a/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_2x.yml
@@ -144,8 +144,6 @@ MaskRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   use_padded_im_info: True
diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r50_1x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r50_1x.yml
index 0358a3bf..581e5424 100644
--- a/PaddleCV/object_detection/configs/mask_rcnn_r50_1x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r50_1x.yml
@@ -125,6 +125,4 @@ MaskRCNNEvalFeed:
 MaskRCNNTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r50_2x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r50_2x.yml
index fb08e174..d0c8ced1 100644
--- a/PaddleCV/object_detection/configs/mask_rcnn_r50_2x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r50_2x.yml
@@ -126,6 +126,4 @@ MaskRCNNEvalFeed:
 MaskRCNNTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_1x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_1x.yml
index 753fcd62..e9204d0a 100644
--- a/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_1x.yml
@@ -144,8 +144,6 @@ MaskRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   use_padded_im_info: True
diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_2x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_2x.yml
index 623bdf11..24984891 100644
--- a/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_2x.yml
@@ -144,8 +144,6 @@ MaskRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   use_padded_im_info: True
diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r50_vd_fpn_2x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r50_vd_fpn_2x.yml
index e49c42c6..90b817a0 100644
--- a/PaddleCV/object_detection/configs/mask_rcnn_r50_vd_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r50_vd_fpn_2x.yml
@@ -147,8 +147,6 @@ MaskRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   use_padded_im_info: True
diff --git a/PaddleCV/object_detection/configs/mask_rcnn_se154_vd_fpn_s1x.yml b/PaddleCV/object_detection/configs/mask_rcnn_se154_vd_fpn_s1x.yml
index 297b272e..0f73de16 100644
--- a/PaddleCV/object_detection/configs/mask_rcnn_se154_vd_fpn_s1x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_se154_vd_fpn_s1x.yml
@@ -149,8 +149,6 @@ MaskRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   use_padded_im_info: True
diff --git a/PaddleCV/object_detection/configs/retinanet_r50_fpn_1x.yml b/PaddleCV/object_detection/configs/retinanet_r50_fpn_1x.yml
index 188c06df..aafb4491 100644
--- a/PaddleCV/object_detection/configs/retinanet_r50_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/retinanet_r50_fpn_1x.yml
@@ -148,9 +148,7 @@ FasterRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 128
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   drop_last: false
   image_shape: [3, 1333, 800]
   num_workers: 2
diff --git a/PaddleCV/object_detection/configs/ssd_mobilenet_v1_voc.yml b/PaddleCV/object_detection/configs/ssd_mobilenet_v1_voc.yml
index eb033192..4053a990 100644
--- a/PaddleCV/object_detection/configs/ssd_mobilenet_v1_voc.yml
+++ b/PaddleCV/object_detection/configs/ssd_mobilenet_v1_voc.yml
@@ -65,6 +65,7 @@ SSDTrainFeed:
     dataset_dir: data/voc
     annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt
     image_dir: VOCdevkit/VOC_all/JPEGImages
+    use_default_label: true
 
 SSDEvalFeed:
   batch_size: 64
@@ -73,14 +74,11 @@ SSDEvalFeed:
     dataset_dir: data/voc
     annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt
     image_dir: VOCdevkit/VOC_all/JPEGImages
-    use_default_label: false
+    use_default_label: true
   drop_last: false
 
 SSDTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/test.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
-    use_default_label: false
+    use_default_label: true
   drop_last: false
diff --git a/PaddleCV/object_detection/configs/yolov3_darknet.yml b/PaddleCV/object_detection/configs/yolov3_darknet.yml
index f9f9bfa5..886d4e23 100644
--- a/PaddleCV/object_detection/configs/yolov3_darknet.yml
+++ b/PaddleCV/object_detection/configs/yolov3_darknet.yml
@@ -77,6 +77,4 @@ YoloEvalFeed:
 YoloTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
diff --git a/PaddleCV/object_detection/configs/yolov3_mobilenet_v1.yml b/PaddleCV/object_detection/configs/yolov3_mobilenet_v1.yml
index f6658d36..d27449be 100644
--- a/PaddleCV/object_detection/configs/yolov3_mobilenet_v1.yml
+++ b/PaddleCV/object_detection/configs/yolov3_mobilenet_v1.yml
@@ -78,6 +78,4 @@ YoloEvalFeed:
 YoloTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
diff --git a/PaddleCV/object_detection/configs/yolov3_r34.yml b/PaddleCV/object_detection/configs/yolov3_r34.yml
index 2c53d141..e782992a 100644
--- a/PaddleCV/object_detection/configs/yolov3_r34.yml
+++ b/PaddleCV/object_detection/configs/yolov3_r34.yml
@@ -80,6 +80,4 @@ YoloEvalFeed:
 YoloTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
diff --git a/PaddleCV/object_detection/ppdet/data/data_feed.py b/PaddleCV/object_detection/ppdet/data/data_feed.py
index f57f56f5..a6dc371c 100644
--- a/PaddleCV/object_detection/ppdet/data/data_feed.py
+++ b/PaddleCV/object_detection/ppdet/data/data_feed.py
@@ -781,7 +781,7 @@ class SSDTestFeed(DataFeed):
 
     def __init__(self,
                  dataset=SimpleDataSet(VOC_TEST_ANNOTATION).__dict__,
-                 fields=['image'],
+                 fields=['image', 'im_id'],
                  image_shape=[3, 300, 300],
                  sample_transforms=[
                      DecodeImage(to_rgb=True),
diff --git a/PaddleCV/object_detection/ppdet/data/source/voc_loader.py b/PaddleCV/object_detection/ppdet/data/source/voc_loader.py
index 387d5645..f40884c5 100644
--- a/PaddleCV/object_detection/ppdet/data/source/voc_loader.py
+++ b/PaddleCV/object_detection/ppdet/data/source/voc_loader.py
@@ -243,25 +243,25 @@ def load(anno_path, sample_num=-1, use_default_label=True):
 
 def pascalvoc_label():
     labels_map = {
-        'aeroplane': 1,
-        'bicycle': 2,
-        'bird': 3,
-        'boat': 4,
-        'bottle': 5,
-        'bus': 6,
-        'car': 7,
-        'cat': 8,
-        'chair': 9,
-        'cow': 10,
-        'diningtable': 11,
-        'dog': 12,
-        'horse': 13,
-        'motorbike': 14,
-        'person': 15,
-        'pottedplant': 16,
-        'sheep': 17,
-        'sofa': 18,
-        'train': 19,
-        'tvmonitor': 20
+	'aeroplane': 1,
+	'bicycle': 2,
+	'bird': 3,
+	'boat': 4,
+	'bottle': 5,
+	'bus': 6,
+	'car': 7,
+	'cat': 8,
+	'chair': 9,
+	'cow': 10,
+	'diningtable': 11,
+	'dog': 12,
+	'horse': 13,
+	'motorbike': 14,
+	'person': 15,
+	'pottedplant': 16,
+	'sheep': 17,
+	'sofa': 18,
+	'train': 19,
+	'tvmonitor': 20
     }
     return labels_map
diff --git a/PaddleCV/object_detection/ppdet/data/tools/generate_data_for_training.py b/PaddleCV/object_detection/ppdet/data/tools/generate_data_for_training.py
index 75d9b95c..30b196f6 100644
--- a/PaddleCV/object_detection/ppdet/data/tools/generate_data_for_training.py
+++ b/PaddleCV/object_detection/ppdet/data/tools/generate_data_for_training.py
@@ -109,7 +109,7 @@ def dump_voc_as_pickle(args):
     if not os.path.exists(save_dir):
         os.makedirs(save_dir)
     save_dir = args.save_dir
-    anno_path = args.annotation
+    anno_path = os.path.expanduser(args.annotation)
     roidb, cat2id = loader.load(
         anno_path, samples, with_cat2id=True, use_default_label=None)
     samples = len(roidb)
diff --git a/PaddleCV/object_detection/ppdet/data/transform/arrange_sample.py b/PaddleCV/object_detection/ppdet/data/transform/arrange_sample.py
index da92c93f..556e2c2c 100644
--- a/PaddleCV/object_detection/ppdet/data/transform/arrange_sample.py
+++ b/PaddleCV/object_detection/ppdet/data/transform/arrange_sample.py
@@ -183,7 +183,8 @@ class ArrangeTestSSD(BaseOperator):
             sample: a tuple containing the following items: (image)
         """
         im = sample['image']
-        outs = (im)
+        im_id = sample['im_id']
+        outs = (im, im_id)
         return outs
 
 
diff --git a/PaddleCV/object_detection/ppdet/modeling/model_input.py b/PaddleCV/object_detection/ppdet/modeling/model_input.py
index 73750e8f..de1aef6e 100644
--- a/PaddleCV/object_detection/ppdet/modeling/model_input.py
+++ b/PaddleCV/object_detection/ppdet/modeling/model_input.py
@@ -31,7 +31,7 @@ feed_var_def = [
     {'name': 'is_crowd',      'shape': [1],  'dtype': 'int32',   'lod_level': 1},
     {'name': 'gt_mask',       'shape': [2],  'dtype': 'float32', 'lod_level': 3},
     {'name': 'is_difficult',  'shape': [1],  'dtype': 'int32',   'lod_level': 1},
-    {'name': 'gt_score',      'shape': None, 'dtype': 'float32', 'lod_level': 0},
+    {'name': 'gt_score',      'shape': [1],  'dtype': 'float32', 'lod_level': 0},
     {'name': 'im_shape',      'shape': [3],  'dtype': 'float32',   'lod_level': 0},
 ]
 # yapf: enable
diff --git a/PaddleCV/object_detection/ppdet/utils/coco_eval.py b/PaddleCV/object_detection/ppdet/utils/coco_eval.py
index 941f86ee..3a48d2f7 100644
--- a/PaddleCV/object_detection/ppdet/utils/coco_eval.py
+++ b/PaddleCV/object_detection/ppdet/utils/coco_eval.py
@@ -34,6 +34,14 @@ __all__ = [
 ]
 
 
+def clip_bbox(bbox):
+    xmin = max(min(bbox[0], 1.), 0.)
+    ymin = max(min(bbox[1], 1.), 0.)
+    xmax = max(min(bbox[2], 1.), 0.)
+    ymax = max(min(bbox[3], 1.), 0.)
+    return xmin, ymin, xmax, ymax
+
+
 def bbox_eval(results, anno_file, outfile, with_background=True):
     assert 'bbox' in results[0]
     assert outfile.endswith('.json')
@@ -80,7 +88,7 @@ def mask_eval(results, anno_file, outfile, resolution, thresh_binarize=0.5):
     coco_ev.summarize()
 
 
-def bbox2out(results, clsid2catid):
+def bbox2out(results, clsid2catid, is_bbox_normalized=False):
     xywh_res = []
     for t in results:
         bboxes = t['bbox'][0]
@@ -97,8 +105,16 @@ def bbox2out(results, clsid2catid):
                 dt = bboxes[k]
                 clsid, score, xmin, ymin, xmax, ymax = dt.tolist()
                 catid = clsid2catid[clsid]
-                w = xmax - xmin + 1
-                h = ymax - ymin + 1
+
+                if is_bbox_normalized:
+                    xmin, ymin, xmax, ymax = \
+                            clip_bbox([xmin, ymin, xmax, ymax])
+                    w = xmax - xmin
+                    h = ymax - ymin
+                else:
+                    w = xmax - xmin + 1
+                    h = ymax - ymin + 1
+
                 bbox = [xmin, ymin, w, h]
                 coco_res = {
                     'image_id': im_id,
@@ -211,8 +227,11 @@ def expand_boxes(boxes, scale):
     return boxes_exp
 
 
-def get_category_info(anno_file=None, with_background=True):
-    if anno_file is None or not os.path.exists(anno_file):
+def get_category_info(anno_file=None,
+                      with_background=True,
+                      use_default_label=False):
+    if use_default_label or anno_file is None \
+            or not os.path.exists(anno_file):
         logger.info("Not found annotation file {}, load "
                     "coco17 categories.".format(anno_file))
         return coco17_category_info(with_background)
diff --git a/PaddleCV/object_detection/ppdet/utils/visualizer.py b/PaddleCV/object_detection/ppdet/utils/visualizer.py
index 1cbce02a..fa795cdd 100644
--- a/PaddleCV/object_detection/ppdet/utils/visualizer.py
+++ b/PaddleCV/object_detection/ppdet/utils/visualizer.py
@@ -17,6 +17,7 @@ from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals
 
+import logging
 import numpy as np
 import pycocotools.mask as mask_util
 from PIL import Image, ImageDraw
@@ -25,23 +26,28 @@ from .colormap import colormap
 
 __all__ = ['visualize_results']
 
+logger = logging.getLogger(__name__)
+
 
 def visualize_results(image,
+                      im_id,
                       catid2name,
                       threshold=0.5,
                       bbox_results=None,
-                      mask_results=None):
+                      mask_results=None,
+                      is_bbox_normalized=False):
     """
     Visualize bbox and mask results
     """
     if mask_results:
-        image = draw_mask(image, mask_results, threshold)
+        image = draw_mask(image, im_id, mask_results, threshold)
     if bbox_results:
-        image = draw_bbox(image, catid2name, bbox_results, threshold)
+        image = draw_bbox(image, im_id, catid2name, bbox_results,
+                          threshold, is_bbox_normalized)
     return image
 
 
-def draw_mask(image, segms, threshold, alpha=0.7):
+def draw_mask(image, im_id, segms, threshold, alpha=0.7):
     """
     Draw mask on image
     """
@@ -50,6 +56,8 @@ def draw_mask(image, segms, threshold, alpha=0.7):
     w_ratio = .4
     img_array = np.array(image).astype('float32')
     for dt in np.array(segms):
+        if im_id != dt['image_id']:
+            continue
         segm, score = dt['segmentation'], dt['score']
         if score < threshold:
             continue
@@ -65,18 +73,28 @@ def draw_mask(image, segms, threshold, alpha=0.7):
     return Image.fromarray(img_array.astype('uint8'))
 
 
-def draw_bbox(image, catid2name, bboxes, threshold):
+def draw_bbox(image, im_id, catid2name, bboxes, threshold, 
+              is_bbox_normalized=False):
     """
     Draw bbox on image
     """
     draw = ImageDraw.Draw(image)
-    im_width, im_height = image.size
 
     for dt in np.array(bboxes):
+        if im_id != dt['image_id']:
+            continue
         catid, bbox, score = dt['category_id'], dt['bbox'], dt['score']
         if score < threshold:
             continue
         xmin, ymin, w, h = bbox
+
+        if is_bbox_normalized:
+            im_width, im_height = image.size
+            xmin *= im_width
+            ymin *= im_height
+            w *= im_width
+            h *= im_height
+
         xmax = xmin + w
         ymax = ymin + h
         draw.line(
@@ -86,5 +104,7 @@ def draw_bbox(image, catid2name, bboxes, threshold):
             fill='red')
         if image.mode == 'RGB':
             draw.text((xmin, ymin), catid2name[catid], (255, 255, 0))
+        logger.debug("\t {:15s} at {:25} score: {:.5f}".format(catid2name[catid],
+                    str(list(map(int, list([xmin, ymin, xmax, ymax])))), score))
 
     return image
diff --git a/PaddleCV/object_detection/ppdet/utils/voc_eval.py b/PaddleCV/object_detection/ppdet/utils/voc_eval.py
new file mode 100644
index 00000000..f637c0b7
--- /dev/null
+++ b/PaddleCV/object_detection/ppdet/utils/voc_eval.py
@@ -0,0 +1,93 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import os
+import sys
+import numpy as np
+
+from ..data.source.voc_loader import pascalvoc_label
+from .coco_eval import bbox2out
+
+import logging
+logger = logging.getLogger(__name__)
+
+__all__ = [
+    'bbox2out', 'get_category_info'
+]
+
+
+def get_category_info(anno_file=None,
+                      with_background=True,
+                      use_default_label=False):
+    if use_default_label or anno_file is None \
+            or not os.path.exists(anno_file):
+        logger.info("Not found annotation file {}, load "
+                    "voc2012 categories.".format(anno_file))
+        return vocall_category_info(with_background)
+    else:
+        logger.info("Load categories from {}".format(anno_file))
+        return get_category_info_from_anno(anno_file, with_background)
+
+
+def get_category_info_from_anno(anno_file, with_background=True):
+    """
+    Get class id to category id map and category id
+    to category name map from annotation file.
+
+    Args:
+        anno_file (str): annotation file path
+        with_background (bool, default True):
+            whether load background as class 0.
+    """
+    cats = []
+    with open(anno_file) as f:
+        for line in f.readlines():
+            cats.append(line.strip())
+
+    if cats[0] != 'background' and with_background:
+        cats.insert(0, 'background')
+    if cats[0] == 'background' and not with_background:
+        cats = cats[1:]
+
+    clsid2catid = {i: i for i in range(len(cats))}
+    catid2name = {i: name for i, name in enumerate(cats)}
+
+    return clsid2catid, catid2name
+
+
+def vocall_category_info(with_background=True):
+    """
+    Get class id to category id map and category id
+    to category name map of mixup voc dataset
+
+    Args:
+        with_background (bool, default True):
+            whether load background as class 0.
+    """
+    label_map = pascalvoc_label()
+    label_map = sorted(label_map.items(), key=lambda x: x[1])
+    cats = [l[0] for l in label_map]
+
+    if with_background:
+        cats.insert(0, 'background')
+
+    clsid2catid = {i: i for i in range(len(cats))}
+    catid2name = {i: name for i, name in enumerate(cats)}
+
+    return clsid2catid, catid2name
diff --git a/PaddleCV/object_detection/tools/infer.py b/PaddleCV/object_detection/tools/infer.py
index 63d69be9..615f9f26 100644
--- a/PaddleCV/object_detection/tools/infer.py
+++ b/PaddleCV/object_detection/tools/infer.py
@@ -119,18 +119,21 @@ def main():
     extra_keys = []
     if cfg['metric'] == 'COCO':
         extra_keys = ['im_info', 'im_id', 'im_shape']
+    if cfg['metric'] == 'VOC':
+        extra_keys = ['im_id']
     keys, values, _ = parse_fetches(test_fetches, infer_prog, extra_keys)
 
     # 6. Parse dataset category
     if cfg.metric == 'COCO':
         from ppdet.utils.coco_eval import bbox2out, mask2out, get_category_info
     if cfg.metric == "VOC":
-        # TODO(dengkaipeng): add VOC metric process
-        pass
+        from ppdet.utils.voc_eval import bbox2out, get_category_info
 
     anno_file = getattr(test_feed.dataset, 'annotation', None)
     with_background = getattr(test_feed, 'with_background', True)
-    clsid2catid, catid2name = get_category_info(anno_file, with_background)
+    use_default_label = getattr(test_feed, 'use_default_label', False)
+    clsid2catid, catid2name = get_category_info(anno_file, with_background,
+                                                use_default_label)
 
     imid2path = reader.imid2path
     for iter_id, data in enumerate(reader()):
@@ -144,27 +147,27 @@ def main():
         }
         logger.info('Infer iter {}'.format(iter_id))
 
-        im_id = int(res['im_id'][0])
-        image_path = imid2path[im_id]
-        if cfg.metric == 'COCO':
-            bbox_results = None
-            mask_results = None
-            if 'bbox' in res:
-                bbox_results = bbox2out([res], clsid2catid)
-            if 'mask' in res:
-                mask_results = mask2out([res], clsid2catid,
-                                        cfg.MaskHead.resolution)
-            image = Image.open(image_path)
-            image = visualize_results(image, catid2name, 0.5,
-                                      bbox_results, mask_results)
+        bbox_results = None
+        mask_results = None
+        is_bbox_normalized = True if cfg.metric == 'VOC' else False
+        if 'bbox' in res:
+            bbox_results = bbox2out([res], clsid2catid, 
+                                    is_bbox_normalized)
+        if 'mask' in res:
+            mask_results = mask2out([res], clsid2catid,
+                                    cfg.MaskHead['resolution'])
+
+        # visualize result
+        im_ids = res['im_id'][0]
+        for im_id in im_ids:
+            image_path = imid2path[int(im_id)]
+            image = Image.open(image_path).convert('RGB')
+            visualize_results(image, int(im_id), catid2name, 0.5, bbox_results,
+                              mask_results, is_bbox_normalized)
             save_name = get_save_image_name(FLAGS.output_dir, image_path)
             logger.info("Detection bbox results save in {}".format(save_name))
             image.save(save_name)
 
-        if cfg.metric == "VOC":
-            # TODO(dengkaipeng): add VOC metric process
-            pass
-
 
 if __name__ == '__main__':
     parser = ArgsParser()
-- 
GitLab