diff --git a/ppdet/data/data_feed.py b/ppdet/data/data_feed.py
index 97f8e1fd98f46bd3baf900c9560fbdf3b1482b8c..44256722b6fa6fc82dbb2f3bb08f159110beb374 100644
--- a/ppdet/data/data_feed.py
+++ b/ppdet/data/data_feed.py
@@ -936,6 +936,7 @@ class YoloEvalFeed(DataFeed):
             with_background=with_background,
             num_workers=num_workers,
             use_process=use_process)
+        self.num_max_boxes = num_max_boxes
         self.mode = 'VAL'
         self.bufsize = 128
 
diff --git a/ppdet/data/transform/arrange_sample.py b/ppdet/data/transform/arrange_sample.py
index 708bf3fe1ba5fcbdfbd590b2199d6142e5c3457e..991eed51580abbb929247d4cca0482621f5b4d81 100644
--- a/ppdet/data/transform/arrange_sample.py
+++ b/ppdet/data/transform/arrange_sample.py
@@ -245,8 +245,8 @@ class ArrangeEvalYOLO(BaseOperator):
             context: a dict which contains additional info.
         Returns:
             sample: a tuple containing the following items:
-                (image, gt_bbox, gt_class, gt_score,
-                 is_crowd, im_info, gt_masks)
+                (image, im_shape, im_id, gt_bbox, gt_class,
+                 difficult)
         """
         im = sample['image']
         if len(sample['gt_bbox']) != len(sample['gt_class']):
@@ -255,9 +255,14 @@ class ArrangeEvalYOLO(BaseOperator):
         h = sample['h']
         w = sample['w']
         im_shape = np.array((h, w))
-        gt_bbox = sample['gt_bbox']
-        gt_class = sample['gt_class']
-        difficult = sample['difficult']
+        gt_bbox = np.zeros((50, 4), dtype=im.dtype)
+        gt_class = np.zeros((50, ), dtype=np.int32)
+        difficult = np.zeros((50, ), dtype=np.int32)
+        gt_num = min(50, len(sample['gt_bbox']))
+        if gt_num > 0:
+            gt_bbox[:gt_num, :] = sample['gt_bbox'][:gt_num, :]
+            gt_class[:gt_num] = sample['gt_class'][:gt_num, 0]
+            difficult[:gt_num] = sample['difficult'][:gt_num, 0]
         outs = (im, im_shape, im_id, gt_bbox, gt_class, difficult)
         return outs
 
diff --git a/ppdet/modeling/model_input.py b/ppdet/modeling/model_input.py
index 896da81beea31ecad00e9c9a409921556dc452e1..1f16efe3f887eaf98e2318f5bc18a111fa5edefc 100644
--- a/ppdet/modeling/model_input.py
+++ b/ppdet/modeling/model_input.py
@@ -54,9 +54,11 @@ def create_feed(feed, use_pyreader=True):
         feed_var_map['gt_label']['shape'] = [feed.num_max_boxes]
         feed_var_map['gt_score']['shape'] = [feed.num_max_boxes]
         feed_var_map['gt_box']['shape'] = [feed.num_max_boxes, 4]
+        feed_var_map['is_difficult']['shape'] = [feed.num_max_boxes]
         feed_var_map['gt_label']['lod_level'] = 0
         feed_var_map['gt_score']['lod_level'] = 0
         feed_var_map['gt_box']['lod_level'] = 0
+        feed_var_map['is_difficult']['lod_level'] = 0
 
     feed_vars = OrderedDict([(key, fluid.layers.data(
         name=feed_var_map[key]['name'],
diff --git a/ppdet/utils/map_utils.py b/ppdet/utils/map_utils.py
index 9ba856df3df5774dffd1495aea092e2c7ae33d6f..e6e18a25c890cf6c0eeffed3c5dd24549b931238 100644
--- a/ppdet/utils/map_utils.py
+++ b/ppdet/utils/map_utils.py
@@ -105,7 +105,7 @@ class DetectionMAP(object):
         # record class gt count
         for gtl, diff in zip(gt_label, difficult):
             if self.evaluate_difficult or int(diff) == 0:
-                self.class_gt_counts[int(gtl[0])] += 1
+                self.class_gt_counts[int(np.array(gtl))] += 1
 
         # record class score positive
         visited = [False] * len(gt_label)
@@ -124,7 +124,7 @@ class DetectionMAP(object):
 
             if max_overlap > self.overlap_thresh:
                 if self.evaluate_difficult or \
-                        int(difficult[max_idx]) == 0:
+                        int(np.array(difficult[max_idx])) == 0:
                     if not visited[max_idx]:
                         self.class_score_poss[
                                 int(label)].append([score, 1.0])
diff --git a/ppdet/utils/voc_eval.py b/ppdet/utils/voc_eval.py
index f67fc24d4812c106b5bc30655d0f37582d4e2cfe..b5c7a87aad80a51f2c7860d5fe3df7b9d3d41a3e 100644
--- a/ppdet/utils/voc_eval.py
+++ b/ppdet/utils/voc_eval.py
@@ -71,27 +71,39 @@ def bbox_eval(results,
             continue
 
         gt_boxes = t['gt_box'][0]
-        gt_box_lengths = t['gt_box'][1][0]
         gt_labels = t['gt_label'][0]
-        assert len(gt_boxes) == len(gt_labels)
         difficults = t['is_difficult'][0] if not evaluate_difficult \
                             else None
-        if not evaluate_difficult:
-            assert len(gt_labels) == len(difficults)
-
-        bbox_idx = 0
-        gt_box_idx = 0
-        for i in range(len(bbox_lengths)):
-            bbox_num = bbox_lengths[i]
-            gt_box_num = gt_box_lengths[i]
-            bbox = bboxes[bbox_idx: bbox_idx + bbox_num]
-            gt_box = gt_boxes[gt_box_idx: gt_box_idx + gt_box_num]
-            gt_label = gt_labels[gt_box_idx: gt_box_idx + gt_box_num]
-            difficult = None if difficults is None else \
-                        difficults[gt_box_idx: gt_box_idx + gt_box_num]
-            detection_map.update(bbox, gt_box, gt_label, difficult)
-            bbox_idx += bbox_num
-            gt_box_idx += gt_box_num
+
+        if len(t['gt_box'][1]) == 0:
+            # gt_box, gt_label, difficult read as zero padded Tensor
+            bbox_idx = 0
+            for i in range(len(gt_boxes)):
+                gt_box = gt_boxes[i]
+                gt_label = gt_labels[i]
+                difficult = difficults[i]
+                bbox_num = bbox_lengths[i]
+                bbox = bboxes[bbox_idx: bbox_idx + bbox_num]
+                gt_box, gt_label, difficult = prune_zero_padding(
+                                        gt_box, gt_label, difficult)
+                detection_map.update(bbox, gt_box, gt_label, difficult)
+                bbox_idx += bbox_num
+        else:
+            # gt_box, gt_label, difficult read as LoDTensor
+            gt_box_lengths = t['gt_box'][1][0]
+            bbox_idx = 0
+            gt_box_idx = 0
+            for i in range(len(bbox_lengths)):
+                bbox_num = bbox_lengths[i]
+                gt_box_num = gt_box_lengths[i]
+                bbox = bboxes[bbox_idx: bbox_idx + bbox_num]
+                gt_box = gt_boxes[gt_box_idx: gt_box_idx + gt_box_num]
+                gt_label = gt_labels[gt_box_idx: gt_box_idx + gt_box_num]
+                difficult = None if difficults is None else \
+                            difficults[gt_box_idx: gt_box_idx + gt_box_num]
+                detection_map.update(bbox, gt_box, gt_label, difficult)
+                bbox_idx += bbox_num
+                gt_box_idx += gt_box_num
 
     logger.info("Accumulating evaluatation results...")
     detection_map.accumulate()
@@ -99,6 +111,17 @@ def bbox_eval(results,
                             map_type, 100. * detection_map.get_map()))
 
 
+def prune_zero_padding(gt_box, gt_label, difficult=None):
+    valid_cnt = 0
+    for i in range(len(gt_box)):
+        if gt_box[i, 0] == 0 and gt_box[i, 1] == 0 and \
+                gt_box[i, 2] == 0 and gt_box[i, 3] == 0:
+            break
+        valid_cnt += 1
+    return (gt_box[:valid_cnt], gt_label[:valid_cnt],
+            difficult[:valid_cnt] if difficult is not None else None)
+
+
 def get_category_info(anno_file=None,
                       with_background=True,
                       use_default_label=False):
diff --git a/tools/train.py b/tools/train.py
index 9ec2f8045d625211da2b4702ffd244455a480913..4944cdd6b34da8b716cd3c093344324b3799cd12 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -123,8 +123,11 @@ def main():
         eval_pyreader.decorate_sample_list_generator(eval_reader, place)
 
         # parse eval fetches
-        extra_keys = ['im_info', 'im_id',
-                      'im_shape'] if cfg.metric == 'COCO' else []
+        extra_keys = []
+        if cfg.metric == 'COCO':
+            extra_keys = ['im_info', 'im_id', 'im_shape']
+        if cfg.metric == 'VOC':
+            extra_keys = ['gt_box', 'gt_label', 'is_difficult']
         eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog,
                                                          extra_keys)