diff --git a/PaddleCV/object_detection/configs/cascade_rcnn_r50_fpn_1x.yml b/PaddleCV/object_detection/configs/cascade_rcnn_r50_fpn_1x.yml
index a950e612fc10e9691130c9fda669f2db0b6e2258..ade4ecc6ff266ef7f9b725b519a6a1445e79862c 100644
--- a/PaddleCV/object_detection/configs/cascade_rcnn_r50_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/cascade_rcnn_r50_fpn_1x.yml
@@ -135,7 +135,6 @@ FasterRCNNTestFeed:
     pad_to_stride: 32
   dataset:
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   drop_last: false
   num_workers: 2
   shuffle: false
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r101_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r101_1x.yml
index b779dc021592398520ed30cd6b48e914503182e8..1ba2f2f12d52dee2b281f82eb492b8e82026fd80 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r101_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r101_1x.yml
@@ -112,6 +112,4 @@ FasterRCNNEvalFeed:
 FasterRCNNTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_1x.yml
index 81c496d89f44a13ef5a44e864299eb49c1293105..8ff2eadb7a178e9323d311108b13a6b66c9a3a86 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_1x.yml
@@ -135,8 +135,6 @@ FasterRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   shuffle: False
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_2x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_2x.yml
index 4352ce058f65883ac80124ec89b4094fd95611ca..ff0d8ca3f66640225665c78a2b673ca0f9f29baa 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_2x.yml
@@ -135,8 +135,6 @@ FasterRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   shuffle: False
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_1x.yml
index 3e709f50a102913323c163d4ba3c976ffad505d1..9d207897a3d0526b9e15513d9617cc7d964c2bfa 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_1x.yml
@@ -136,8 +136,6 @@ FasterRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   shuffle: False
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_2x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_2x.yml
index 7321afd9223e10296713c549a8212d99a79b2a2a..9bcc54c90eabe48d333cf31a7ade0601a1dec285 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_2x.yml
@@ -136,8 +136,6 @@ FasterRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   shuffle: False
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r50_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r50_1x.yml
index c18191ef19a47b1ce591cbd966358df3279ef3f7..b01443a4e05e876eda5fe15cbb4b1030b0701edd 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r50_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_1x.yml
@@ -112,6 +112,4 @@ FasterRCNNEvalFeed:
 FasterRCNNTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r50_2x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r50_2x.yml
index 03b4336af42b99fb68ad000add92e552559558b5..e5953616b5547a5a7580570d2cac47e64b7120a3 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r50_2x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_2x.yml
@@ -112,6 +112,4 @@ FasterRCNNEvalFeed:
 FasterRCNNTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_1x.yml
index 594bd0d63a4628d00bfa0ab811e0b2ab809f32ae..7aed12f8896074ad44ef238fcb4e786ddb9b64c5 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_1x.yml
@@ -135,7 +135,6 @@ FasterRCNNTestFeed:
     pad_to_stride: 32
   dataset:
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   drop_last: false
   num_workers: 2
   shuffle: false
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_2x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_2x.yml
index 895442e0501a0ef421b87ff70b9c46dd179e82bb..bb9afac76d1d7045c9411a582e4f5b8cc1ce4af8 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_2x.yml
@@ -135,7 +135,6 @@ FasterRCNNTestFeed:
     pad_to_stride: 32
   dataset:
     annotation: coco/annotations/instances_val2017.json
-    image_dir: coco/val2017
   drop_last: false
   num_workers: 2
   shuffle: false
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_1x.yml
index bad322394a2b0aed0d4d09992b9519469d7b3c46..03d075d90f93d56025b519302894d0fc27eb500a 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_1x.yml
@@ -114,6 +114,4 @@ FasterRCNNEvalFeed:
 FasterRCNNTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_fpn_2x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_fpn_2x.yml
index 2a942f9b6185856ed646e3b285fa4e3e004eacf5..312352c8d8a17059336a3f80dfa0bb57547736ac 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_fpn_2x.yml
@@ -136,8 +136,6 @@ FasterRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   shuffle: False
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_1x.yml
index 564ac8c7111e19edce1d5f33dc48ccca17b4aeae..183b4bb707b4e268b9b09ad1c7d148a3a0830d98 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_1x.yml
@@ -121,8 +121,6 @@ FasterRCNNEvalFeed:
 FasterRCNNTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   shuffle: False
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_1x.yml
index bfd00e434a8e88c656d6198ad326e48d785aacbe..065c0ea741237c59a53f3dcf107ae15a8fb0a06c 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_1x.yml
@@ -138,8 +138,6 @@ FasterRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   shuffle: False
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_s1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_s1x.yml
index 1864ad7a7656c5d57b3c3752e46c7390862e669f..36e849c9a5f075d13d7ac4076725ae0581a7ec20 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_s1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_s1x.yml
@@ -138,8 +138,6 @@ FasterRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   shuffle: False
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_1x.yml
index 00ce33876b328a9061eebe882ba4ee0fb6f7f2bc..bdbe03c426e971fb6980addc2428b9835945742a 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_1x.yml
@@ -137,8 +137,6 @@ FasterRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   shuffle: False
diff --git a/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_2x.yml b/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_2x.yml
index 6940c38cb0be88f0fb75002b7a0185a26b1078ed..d9012e0e627a653ce04c728d7b6b5975eb31a684 100644
--- a/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_2x.yml
@@ -137,8 +137,6 @@ FasterRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   shuffle: False
diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_1x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_1x.yml
index 4da7f6c1b076ad37bd2e12b718abce3d5657f047..fd1e4bc6c8a16b7c78cf7cd9d838e3f88c01e6d3 100644
--- a/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_1x.yml
@@ -144,8 +144,6 @@ MaskRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   use_padded_im_info: True
diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_2x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_2x.yml
index 78876a51d5699e7db6f29143a7be2f9c6488adc6..6d9bcbbaef682b8d553a1482c73aa3e77c4f01cb 100644
--- a/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_2x.yml
@@ -144,8 +144,6 @@ MaskRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   use_padded_im_info: True
diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r50_1x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r50_1x.yml
index 0358a3bfa72dee553efc9ea0840164a5aa561186..581e54247476ebd43a46af05d1c5c9b293ab94d1 100644
--- a/PaddleCV/object_detection/configs/mask_rcnn_r50_1x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r50_1x.yml
@@ -125,6 +125,4 @@ MaskRCNNEvalFeed:
 MaskRCNNTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r50_2x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r50_2x.yml
index fb08e1740e6c0661203dede973e33a9dbb4e6efa..d0c8ced12f70bc998d350dddeffee7436c1289b7 100644
--- a/PaddleCV/object_detection/configs/mask_rcnn_r50_2x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r50_2x.yml
@@ -126,6 +126,4 @@ MaskRCNNEvalFeed:
 MaskRCNNTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_1x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_1x.yml
index 753fcd62339a9f2a6e5d39d4904a467e8878d4ac..e9204d0a5b8261fca3b54214222d07609b1d7327 100644
--- a/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_1x.yml
@@ -144,8 +144,6 @@ MaskRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   use_padded_im_info: True
diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_2x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_2x.yml
index 623bdf11812aa31972708ae1e466024558372349..249848912a55bf48b033e96bcf3ee8544a4a00f8 100644
--- a/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_2x.yml
@@ -144,8 +144,6 @@ MaskRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   use_padded_im_info: True
diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r50_vd_fpn_2x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r50_vd_fpn_2x.yml
index e49c42c641faa475c4e4ebe1f7168d50572037c8..90b817a0c3a8d556cfc48b0d78907d0b63777aee 100644
--- a/PaddleCV/object_detection/configs/mask_rcnn_r50_vd_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r50_vd_fpn_2x.yml
@@ -147,8 +147,6 @@ MaskRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   use_padded_im_info: True
diff --git a/PaddleCV/object_detection/configs/mask_rcnn_se154_vd_fpn_s1x.yml b/PaddleCV/object_detection/configs/mask_rcnn_se154_vd_fpn_s1x.yml
index 297b272e4f2b67e56aec46be808b6356aaa3a8c4..0f73de1699962bae54b0d29318b9148158db7770 100644
--- a/PaddleCV/object_detection/configs/mask_rcnn_se154_vd_fpn_s1x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_se154_vd_fpn_s1x.yml
@@ -149,8 +149,6 @@ MaskRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 32
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   num_workers: 2
   use_padded_im_info: True
diff --git a/PaddleCV/object_detection/configs/retinanet_r50_fpn_1x.yml b/PaddleCV/object_detection/configs/retinanet_r50_fpn_1x.yml
index 188c06dfcc2401670167d5d826542cb86eb35b44..aafb4491ca40c64d7cba931b984344a05a6f27c7 100644
--- a/PaddleCV/object_detection/configs/retinanet_r50_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/retinanet_r50_fpn_1x.yml
@@ -148,9 +148,7 @@ FasterRCNNTestFeed:
   - !PadBatch
     pad_to_stride: 128
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
   drop_last: false
   image_shape: [3, 1333, 800]
   num_workers: 2
diff --git a/PaddleCV/object_detection/configs/ssd_mobilenet_v1_voc.yml b/PaddleCV/object_detection/configs/ssd_mobilenet_v1_voc.yml
index eb033192a0790f2d0f73986b31e017df151c675d..4053a9908b2aa1506436bdfb25d91e51d2d701e6 100644
--- a/PaddleCV/object_detection/configs/ssd_mobilenet_v1_voc.yml
+++ b/PaddleCV/object_detection/configs/ssd_mobilenet_v1_voc.yml
@@ -65,6 +65,7 @@ SSDTrainFeed:
     dataset_dir: data/voc
     annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt
     image_dir: VOCdevkit/VOC_all/JPEGImages
+    use_default_label: true
 
 SSDEvalFeed:
   batch_size: 64
@@ -73,14 +74,11 @@ SSDEvalFeed:
     dataset_dir: data/voc
     annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt
     image_dir: VOCdevkit/VOC_all/JPEGImages
-    use_default_label: false
+    use_default_label: true
   drop_last: false
 
 SSDTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/test.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
-    use_default_label: false
+    use_default_label: true
   drop_last: false
diff --git a/PaddleCV/object_detection/configs/yolov3_darknet.yml b/PaddleCV/object_detection/configs/yolov3_darknet.yml
index f9f9bfa5f63d679ff444f57117f6397a9afeb298..886d4e23b4dbd4cfaaebc9cccee8bdd1dfd2ff3d 100644
--- a/PaddleCV/object_detection/configs/yolov3_darknet.yml
+++ b/PaddleCV/object_detection/configs/yolov3_darknet.yml
@@ -77,6 +77,4 @@ YoloEvalFeed:
 YoloTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
diff --git a/PaddleCV/object_detection/configs/yolov3_mobilenet_v1.yml b/PaddleCV/object_detection/configs/yolov3_mobilenet_v1.yml
index f6658d36d4d7131a537048b5cc1d55d63aa55d5b..d27449beedb7c8d7665112e0ea4aec533192b608 100644
--- a/PaddleCV/object_detection/configs/yolov3_mobilenet_v1.yml
+++ b/PaddleCV/object_detection/configs/yolov3_mobilenet_v1.yml
@@ -78,6 +78,4 @@ YoloEvalFeed:
 YoloTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
diff --git a/PaddleCV/object_detection/configs/yolov3_r34.yml b/PaddleCV/object_detection/configs/yolov3_r34.yml
index 2c53d14105e5ea73b4aa8e707e4da172024c56fa..e782992ad9a252e12f265601e095d11febc63021 100644
--- a/PaddleCV/object_detection/configs/yolov3_r34.yml
+++ b/PaddleCV/object_detection/configs/yolov3_r34.yml
@@ -80,6 +80,4 @@ YoloEvalFeed:
 YoloTestFeed:
   batch_size: 1
   dataset:
-    dataset_dir: data/coco
     annotation: annotations/instances_val2017.json
-    image_dir: val2017
diff --git a/PaddleCV/object_detection/ppdet/data/data_feed.py b/PaddleCV/object_detection/ppdet/data/data_feed.py
index f57f56f5e0ec5bd9b1be9b5c4fc0c7af10615bda..a6dc371c254a2e05a9a295034179e42766d346bb 100644
--- a/PaddleCV/object_detection/ppdet/data/data_feed.py
+++ b/PaddleCV/object_detection/ppdet/data/data_feed.py
@@ -781,7 +781,7 @@ class SSDTestFeed(DataFeed):
 
     def __init__(self,
                  dataset=SimpleDataSet(VOC_TEST_ANNOTATION).__dict__,
-                 fields=['image'],
+                 fields=['image', 'im_id'],
                  image_shape=[3, 300, 300],
                  sample_transforms=[
                      DecodeImage(to_rgb=True),
diff --git a/PaddleCV/object_detection/ppdet/data/source/voc_loader.py b/PaddleCV/object_detection/ppdet/data/source/voc_loader.py
index 387d56453f3787f46275a5a3ac8668eb8f9c8c7c..f40884c5836b3485c41a9bb91f70ceea727044f6 100644
--- a/PaddleCV/object_detection/ppdet/data/source/voc_loader.py
+++ b/PaddleCV/object_detection/ppdet/data/source/voc_loader.py
@@ -243,25 +243,25 @@ def load(anno_path, sample_num=-1, use_default_label=True):
 
 def pascalvoc_label():
     labels_map = {
-        'aeroplane': 1,
-        'bicycle': 2,
-        'bird': 3,
-        'boat': 4,
-        'bottle': 5,
-        'bus': 6,
-        'car': 7,
-        'cat': 8,
-        'chair': 9,
-        'cow': 10,
-        'diningtable': 11,
-        'dog': 12,
-        'horse': 13,
-        'motorbike': 14,
-        'person': 15,
-        'pottedplant': 16,
-        'sheep': 17,
-        'sofa': 18,
-        'train': 19,
-        'tvmonitor': 20
+	'aeroplane': 1,
+	'bicycle': 2,
+	'bird': 3,
+	'boat': 4,
+	'bottle': 5,
+	'bus': 6,
+	'car': 7,
+	'cat': 8,
+	'chair': 9,
+	'cow': 10,
+	'diningtable': 11,
+	'dog': 12,
+	'horse': 13,
+	'motorbike': 14,
+	'person': 15,
+	'pottedplant': 16,
+	'sheep': 17,
+	'sofa': 18,
+	'train': 19,
+	'tvmonitor': 20
     }
     return labels_map
diff --git a/PaddleCV/object_detection/ppdet/data/tools/generate_data_for_training.py b/PaddleCV/object_detection/ppdet/data/tools/generate_data_for_training.py
index 75d9b95c637a67e743a2bb415c65b0c8e320e4a2..30b196f61418f28edbdb80c95a1d9adbdf00c11b 100644
--- a/PaddleCV/object_detection/ppdet/data/tools/generate_data_for_training.py
+++ b/PaddleCV/object_detection/ppdet/data/tools/generate_data_for_training.py
@@ -109,7 +109,7 @@ def dump_voc_as_pickle(args):
     if not os.path.exists(save_dir):
         os.makedirs(save_dir)
     save_dir = args.save_dir
-    anno_path = args.annotation
+    anno_path = os.path.expanduser(args.annotation)
     roidb, cat2id = loader.load(
         anno_path, samples, with_cat2id=True, use_default_label=None)
     samples = len(roidb)
diff --git a/PaddleCV/object_detection/ppdet/data/transform/arrange_sample.py b/PaddleCV/object_detection/ppdet/data/transform/arrange_sample.py
index da92c93f8c990e8b6951c83341407b04b6cd2972..556e2c2c568316d584ef58e8bd42e79448b6f637 100644
--- a/PaddleCV/object_detection/ppdet/data/transform/arrange_sample.py
+++ b/PaddleCV/object_detection/ppdet/data/transform/arrange_sample.py
@@ -183,7 +183,8 @@ class ArrangeTestSSD(BaseOperator):
             sample: a tuple containing the following items: (image)
         """
         im = sample['image']
-        outs = (im)
+        im_id = sample['im_id']
+        outs = (im, im_id)
         return outs
 
 
diff --git a/PaddleCV/object_detection/ppdet/modeling/model_input.py b/PaddleCV/object_detection/ppdet/modeling/model_input.py
index 73750e8f6172c56194bdf60cdff9cd6252b75c43..de1aef6e5ed77898ec585aad5ee69756620554da 100644
--- a/PaddleCV/object_detection/ppdet/modeling/model_input.py
+++ b/PaddleCV/object_detection/ppdet/modeling/model_input.py
@@ -31,7 +31,7 @@ feed_var_def = [
     {'name': 'is_crowd',      'shape': [1],  'dtype': 'int32',   'lod_level': 1},
     {'name': 'gt_mask',       'shape': [2],  'dtype': 'float32', 'lod_level': 3},
     {'name': 'is_difficult',  'shape': [1],  'dtype': 'int32',   'lod_level': 1},
-    {'name': 'gt_score',      'shape': None, 'dtype': 'float32', 'lod_level': 0},
+    {'name': 'gt_score',      'shape': [1],  'dtype': 'float32', 'lod_level': 0},
     {'name': 'im_shape',      'shape': [3],  'dtype': 'float32',   'lod_level': 0},
 ]
 # yapf: enable
diff --git a/PaddleCV/object_detection/ppdet/utils/coco_eval.py b/PaddleCV/object_detection/ppdet/utils/coco_eval.py
index 941f86eeac97bf50d652a260e73137e8db486920..3a48d2f727d8b7c5efc0d0593b7accf316fc9b5d 100644
--- a/PaddleCV/object_detection/ppdet/utils/coco_eval.py
+++ b/PaddleCV/object_detection/ppdet/utils/coco_eval.py
@@ -34,6 +34,14 @@ __all__ = [
 ]
 
 
+def clip_bbox(bbox):
+    xmin = max(min(bbox[0], 1.), 0.)
+    ymin = max(min(bbox[1], 1.), 0.)
+    xmax = max(min(bbox[2], 1.), 0.)
+    ymax = max(min(bbox[3], 1.), 0.)
+    return xmin, ymin, xmax, ymax
+
+
 def bbox_eval(results, anno_file, outfile, with_background=True):
     assert 'bbox' in results[0]
     assert outfile.endswith('.json')
@@ -80,7 +88,7 @@ def mask_eval(results, anno_file, outfile, resolution, thresh_binarize=0.5):
     coco_ev.summarize()
 
 
-def bbox2out(results, clsid2catid):
+def bbox2out(results, clsid2catid, is_bbox_normalized=False):
     xywh_res = []
     for t in results:
         bboxes = t['bbox'][0]
@@ -97,8 +105,16 @@ def bbox2out(results, clsid2catid):
                 dt = bboxes[k]
                 clsid, score, xmin, ymin, xmax, ymax = dt.tolist()
                 catid = clsid2catid[clsid]
-                w = xmax - xmin + 1
-                h = ymax - ymin + 1
+
+                if is_bbox_normalized:
+                    xmin, ymin, xmax, ymax = \
+                            clip_bbox([xmin, ymin, xmax, ymax])
+                    w = xmax - xmin
+                    h = ymax - ymin
+                else:
+                    w = xmax - xmin + 1
+                    h = ymax - ymin + 1
+
                 bbox = [xmin, ymin, w, h]
                 coco_res = {
                     'image_id': im_id,
@@ -211,8 +227,11 @@ def expand_boxes(boxes, scale):
     return boxes_exp
 
 
-def get_category_info(anno_file=None, with_background=True):
-    if anno_file is None or not os.path.exists(anno_file):
+def get_category_info(anno_file=None,
+                      with_background=True,
+                      use_default_label=False):
+    if use_default_label or anno_file is None \
+            or not os.path.exists(anno_file):
         logger.info("Not found annotation file {}, load "
                     "coco17 categories.".format(anno_file))
         return coco17_category_info(with_background)
diff --git a/PaddleCV/object_detection/ppdet/utils/visualizer.py b/PaddleCV/object_detection/ppdet/utils/visualizer.py
index 1cbce02a160266f17b2324ec84ff37fed8834ba6..fa795cdd12515a55a35b3a03b66b12e4553cb70e 100644
--- a/PaddleCV/object_detection/ppdet/utils/visualizer.py
+++ b/PaddleCV/object_detection/ppdet/utils/visualizer.py
@@ -17,6 +17,7 @@ from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals
 
+import logging
 import numpy as np
 import pycocotools.mask as mask_util
 from PIL import Image, ImageDraw
@@ -25,23 +26,28 @@ from .colormap import colormap
 
 __all__ = ['visualize_results']
 
+logger = logging.getLogger(__name__)
+
 
 def visualize_results(image,
+                      im_id,
                       catid2name,
                       threshold=0.5,
                       bbox_results=None,
-                      mask_results=None):
+                      mask_results=None,
+                      is_bbox_normalized=False):
     """
     Visualize bbox and mask results
     """
     if mask_results:
-        image = draw_mask(image, mask_results, threshold)
+        image = draw_mask(image, im_id, mask_results, threshold)
     if bbox_results:
-        image = draw_bbox(image, catid2name, bbox_results, threshold)
+        image = draw_bbox(image, im_id, catid2name, bbox_results,
+                          threshold, is_bbox_normalized)
     return image
 
 
-def draw_mask(image, segms, threshold, alpha=0.7):
+def draw_mask(image, im_id, segms, threshold, alpha=0.7):
     """
     Draw mask on image
     """
@@ -50,6 +56,8 @@ def draw_mask(image, segms, threshold, alpha=0.7):
     w_ratio = .4
     img_array = np.array(image).astype('float32')
     for dt in np.array(segms):
+        if im_id != dt['image_id']:
+            continue
         segm, score = dt['segmentation'], dt['score']
         if score < threshold:
             continue
@@ -65,18 +73,28 @@ def draw_mask(image, segms, threshold, alpha=0.7):
     return Image.fromarray(img_array.astype('uint8'))
 
 
-def draw_bbox(image, catid2name, bboxes, threshold):
+def draw_bbox(image, im_id, catid2name, bboxes, threshold, 
+              is_bbox_normalized=False):
     """
     Draw bbox on image
     """
     draw = ImageDraw.Draw(image)
-    im_width, im_height = image.size
 
     for dt in np.array(bboxes):
+        if im_id != dt['image_id']:
+            continue
         catid, bbox, score = dt['category_id'], dt['bbox'], dt['score']
         if score < threshold:
             continue
         xmin, ymin, w, h = bbox
+
+        if is_bbox_normalized:
+            im_width, im_height = image.size
+            xmin *= im_width
+            ymin *= im_height
+            w *= im_width
+            h *= im_height
+
         xmax = xmin + w
         ymax = ymin + h
         draw.line(
@@ -86,5 +104,7 @@ def draw_bbox(image, catid2name, bboxes, threshold):
             fill='red')
         if image.mode == 'RGB':
             draw.text((xmin, ymin), catid2name[catid], (255, 255, 0))
+        logger.debug("\t {:15s} at {:25} score: {:.5f}".format(catid2name[catid],
+                    str(list(map(int, list([xmin, ymin, xmax, ymax])))), score))
 
     return image
diff --git a/PaddleCV/object_detection/ppdet/utils/voc_eval.py b/PaddleCV/object_detection/ppdet/utils/voc_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..f637c0b749eaf88ae96af6b0b37c4cb376888687
--- /dev/null
+++ b/PaddleCV/object_detection/ppdet/utils/voc_eval.py
@@ -0,0 +1,93 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import os
+import sys
+import numpy as np
+
+from ..data.source.voc_loader import pascalvoc_label
+from .coco_eval import bbox2out
+
+import logging
+logger = logging.getLogger(__name__)
+
+__all__ = [
+    'bbox2out', 'get_category_info'
+]
+
+
+def get_category_info(anno_file=None,
+                      with_background=True,
+                      use_default_label=False):
+    if use_default_label or anno_file is None \
+            or not os.path.exists(anno_file):
+        logger.info("Not found annotation file {}, load "
+                    "voc2012 categories.".format(anno_file))
+        return vocall_category_info(with_background)
+    else:
+        logger.info("Load categories from {}".format(anno_file))
+        return get_category_info_from_anno(anno_file, with_background)
+
+
+def get_category_info_from_anno(anno_file, with_background=True):
+    """
+    Get class id to category id map and category id
+    to category name map from annotation file.
+
+    Args:
+        anno_file (str): annotation file path
+        with_background (bool, default True):
+            whether load background as class 0.
+    """
+    cats = []
+    with open(anno_file) as f:
+        for line in f.readlines():
+            cats.append(line.strip())
+
+    if cats[0] != 'background' and with_background:
+        cats.insert(0, 'background')
+    if cats[0] == 'background' and not with_background:
+        cats = cats[1:]
+
+    clsid2catid = {i: i for i in range(len(cats))}
+    catid2name = {i: name for i, name in enumerate(cats)}
+
+    return clsid2catid, catid2name
+
+
+def vocall_category_info(with_background=True):
+    """
+    Get class id to category id map and category id
+    to category name map of mixup voc dataset
+
+    Args:
+        with_background (bool, default True):
+            whether load background as class 0.
+    """
+    label_map = pascalvoc_label()
+    label_map = sorted(label_map.items(), key=lambda x: x[1])
+    cats = [l[0] for l in label_map]
+
+    if with_background:
+        cats.insert(0, 'background')
+
+    clsid2catid = {i: i for i in range(len(cats))}
+    catid2name = {i: name for i, name in enumerate(cats)}
+
+    return clsid2catid, catid2name
diff --git a/PaddleCV/object_detection/tools/infer.py b/PaddleCV/object_detection/tools/infer.py
index 63d69be9dd1d748a4c38067290672cb0d5d4a51e..615f9f2685bea47e49b4c829526902c3de81e858 100644
--- a/PaddleCV/object_detection/tools/infer.py
+++ b/PaddleCV/object_detection/tools/infer.py
@@ -119,18 +119,21 @@ def main():
     extra_keys = []
     if cfg['metric'] == 'COCO':
         extra_keys = ['im_info', 'im_id', 'im_shape']
+    if cfg['metric'] == 'VOC':
+        extra_keys = ['im_id']
     keys, values, _ = parse_fetches(test_fetches, infer_prog, extra_keys)
 
     # 6. Parse dataset category
     if cfg.metric == 'COCO':
         from ppdet.utils.coco_eval import bbox2out, mask2out, get_category_info
     if cfg.metric == "VOC":
-        # TODO(dengkaipeng): add VOC metric process
-        pass
+        from ppdet.utils.voc_eval import bbox2out, get_category_info
 
     anno_file = getattr(test_feed.dataset, 'annotation', None)
     with_background = getattr(test_feed, 'with_background', True)
-    clsid2catid, catid2name = get_category_info(anno_file, with_background)
+    use_default_label = getattr(test_feed, 'use_default_label', False)
+    clsid2catid, catid2name = get_category_info(anno_file, with_background,
+                                                use_default_label)
 
     imid2path = reader.imid2path
     for iter_id, data in enumerate(reader()):
@@ -144,27 +147,27 @@ def main():
         }
         logger.info('Infer iter {}'.format(iter_id))
 
-        im_id = int(res['im_id'][0])
-        image_path = imid2path[im_id]
-        if cfg.metric == 'COCO':
-            bbox_results = None
-            mask_results = None
-            if 'bbox' in res:
-                bbox_results = bbox2out([res], clsid2catid)
-            if 'mask' in res:
-                mask_results = mask2out([res], clsid2catid,
-                                        cfg.MaskHead.resolution)
-            image = Image.open(image_path)
-            image = visualize_results(image, catid2name, 0.5,
-                                      bbox_results, mask_results)
+        bbox_results = None
+        mask_results = None
+        is_bbox_normalized = True if cfg.metric == 'VOC' else False
+        if 'bbox' in res:
+            bbox_results = bbox2out([res], clsid2catid, 
+                                    is_bbox_normalized)
+        if 'mask' in res:
+            mask_results = mask2out([res], clsid2catid,
+                                    cfg.MaskHead['resolution'])
+
+        # visualize result
+        im_ids = res['im_id'][0]
+        for im_id in im_ids:
+            image_path = imid2path[int(im_id)]
+            image = Image.open(image_path).convert('RGB')
+            visualize_results(image, int(im_id), catid2name, 0.5, bbox_results,
+                              mask_results, is_bbox_normalized)
             save_name = get_save_image_name(FLAGS.output_dir, image_path)
             logger.info("Detection bbox results save in {}".format(save_name))
             image.save(save_name)
 
-        if cfg.metric == "VOC":
-            # TODO(dengkaipeng): add VOC metric process
-            pass
-
 
 if __name__ == '__main__':
     parser = ArgsParser()