Polish reader to simplify preprocessing logic. (#112)

* Polish Reader to simplify preprocessing logic. # sample_transforms-> make batch -> batch_transforms in Reader. * Clean some code * Imporve yolov3_r50vd_dcn_obj365_pretrained_coco 41.4 to 41.8. * Update all configs.

Polish reader to simplify preprocessing logic. (#112)
* Polish Reader to simplify preprocessing logic. # sample_transforms-> make batch -> batch_transforms in Reader. * Clean some code * Imporve yolov3_r50vd_dcn_obj365_pretrained_coco 41.4 to 41.8. * Update all configs.
8192c758 · qingqing01 · GitHub · 9cd0d1ee · 8192c758 · 8192c758
151 changed file
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -11,17 +11,17 @@
    -   id: detect-private-key
        files: (?!.*paddle)^.*$
    -   id: end-of-file-fixer
-        files: \.md$
+        files: \.(md|yml)$
    -   id: trailing-whitespace
-        files: \.md$
+        files: \.(md|yml)$
 -   repo: https://github.com/Lucas-C/pre-commit-hooks
    sha: v1.0.1
    hooks:
    -   id: forbid-crlf
-        files: \.md$
+        files: \.(md|yml)$
    -   id: remove-crlf
-        files: \.md$
+        files: \.(md|yml)$
    -   id: forbid-tabs
-        files: \.md$
+        files: \.(md|yml)$
    -   id: remove-tabs
-        files: \.md$
+        files: \.(md|yml)$
--- a/configs/cascade_mask_rcnn_r50_fpn_1x.yml
+++ b/configs/cascade_mask_rcnn_r50_fpn_1x.yml
 architecture: CascadeMaskRCNN
-train_feed: MaskRCNNTrainFeed
-eval_feed: MaskRCNNEvalFeed
-test_feed: MaskRCNNTestFeed
 use_gpu: true
 max_iters: 180000
 snapshot_iter: 10000
@@ -86,7 +83,7 @@ MaskAssigner:
  resolution: 28
 CascadeBBoxHead:
-  head: CascadeTwoFCHead 
+  head: CascadeTwoFCHead
  nms:
    keep_top_k: 100
    nms_threshold: 0.5
@@ -113,33 +110,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-MaskRCNNTrainFeed:
+_READER_: 'mask_fpn_reader.yml'
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/cascade_rcnn_cls_aware_r101_vd_fpn_1x_softnms.yml
+++ b/configs/cascade_rcnn_cls_aware_r101_vd_fpn_1x_softnms.yml
 architecture: CascadeRCNNClsAware
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 90000
 snapshot_iter: 10000
 use_gpu: true
@@ -107,80 +104,6 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_fpn_reader.yml'
+TrainReader:
  batch_size: 2
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  drop_last: false
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  sample_transforms: 
-  - !DecodeImage
-    to_rgb: True
-    with_mixup: False
-  - !NormalizeImage
-    is_channel_first: false
-    is_scale: True
-    mean:
-    - 0.485
-    - 0.456
-    - 0.406
-    std:
-    - 0.229
-    - 0.224
-    - 0.225 
-  - !ResizeImage
-    interp: 1
-    target_size:
-    - 800
-    max_size: 1333
-    use_cv2: true
-  - !Permute
-    to_bgr: false
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  sample_transforms: 
-  - !DecodeImage
-    to_rgb: True
-    with_mixup: False
-  - !NormalizeImage
-    is_channel_first: false
-    is_scale: True
-    mean:
-    - 0.485
-    - 0.456
-    - 0.406
-    std:
-    - 0.229
-    - 0.224
-    - 0.225 
-  - !ResizeImage
-    interp: 1
-    target_size:
-    - 800
-    max_size: 1333
-    use_cv2: true
-  - !Permute
-    to_bgr: false
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  drop_last: false
-  num_workers: 2
\ No newline at end of file
--- a/configs/cascade_rcnn_r50_fpn_1x.yml
+++ b/configs/cascade_rcnn_r50_fpn_1x.yml
 architecture: CascadeRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 90000
 snapshot_iter: 10000
 use_gpu: true
@@ -104,34 +101,6 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_fpn_reader.yml'
+TrainReader:
  batch_size: 2
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  drop_last: false
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  drop_last: false
-  num_workers: 2
--- a/configs/cascade_rcnn_r50_fpn_1x_ms_test.yml
+++ b/configs/cascade_rcnn_r50_fpn_1x_ms_test.yml
 architecture: CascadeRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 90000
 snapshot_iter: 10000
 use_gpu: true
@@ -111,23 +108,22 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_fpn_reader.yml'
+TrainReader:
  batch_size: 2
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  drop_last: false
-  num_workers: 2
-FasterRCNNEvalFeed:
+EvalReader:
  batch_size: 1
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
+    multi_scale: true
+    num_scales: 18
+    use_flip: true
  dataset:
+    !COCODataSet
    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
+    anno_path: annotations/instances_val2017.json
    image_dir: val2017
  sample_transforms:
  - !DecodeImage
@@ -160,18 +156,6 @@ FasterRCNNEvalFeed:
  - !Permute
    channel_first: true
    to_bgr: false
-  batch_transforms:
+  - !PadMultiScaleTest
-  - !PadMSTest
-    pad_to_stride: 32
-  num_scale: 18
-  num_workers: 2
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
    pad_to_stride: 32
-  drop_last: false
+  worker_num: 2
-  num_workers: 2
--- a/configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x.yml
+++ b/configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x.yml
 architecture: CascadeMaskRCNN
-train_feed: MaskRCNNTrainFeed
-eval_feed: MaskRCNNEvalFeed
-test_feed: MaskRCNNTestFeed
 max_iters: 300000
 snapshot_iter: 10
 use_gpu: true
 log_iter: 20
 log_smooth_window: 20
 save_dir: output
-pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/SENet154_vd_caffe_pretrained.tar 
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/SENet154_vd_caffe_pretrained.tar
 weights: output/cascade_mask_rcnn_dcn_se154_vd_fpn_gn_s1x/model_final/
 metric: COCO
 num_classes: 81
@@ -96,7 +93,7 @@ MaskAssigner:
  resolution: 28
 CascadeBBoxHead:
-  head: CascadeXConvNormHead 
+  head: CascadeXConvNormHead
  nms:
    keep_top_k: 100
    nms_threshold: 0.5
@@ -123,17 +120,19 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-MaskRCNNTrainFeed:
+TrainReader:
  # batch size per device
  batch_size: 1
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_mask']
  dataset:
+    !COCODataSet
    dataset_dir: dataset/coco
    image_dir: train2017
-    annotation: annotations/instances_train2017.json
+    anno_path: annotations/instances_train2017.json
-  sample_transforms: 
+  sample_transforms:
  - !DecodeImage
-    to_rgb: False
+    to_rgb: false
-    with_mixup: False
  - !RandomFlipImage
    is_mask_flip: true
    is_normalized: false
@@ -142,13 +141,13 @@ MaskRCNNTrainFeed:
    is_channel_first: false
    is_scale: False
    mean:
-    - 102.9801 
+    - 102.9801
    - 115.9465
    - 122.7717
    std:
-    - 1.0 
+    - 1.0
-    - 1.0 
+    - 1.0
-    - 1.0 
+    - 1.0
  - !ResizeImage
    interp: 1
    target_size:
@@ -161,7 +160,7 @@ MaskRCNNTrainFeed:
    - 608
    - 640
    - 672
-    - 704 
+    - 704
    - 736
    - 768
    - 800
@@ -192,29 +191,32 @@ MaskRCNNTrainFeed:
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
-  num_workers: 8
+  worker_num: 8
+  shuffle: true
-MaskRCNNEvalFeed:
+EvalReader:
  batch_size: 1
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
  dataset:
+    !COCODataSet
    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
+    anno_path: annotations/instances_val2017.json
    image_dir: val2017
-  sample_transforms: 
+  sample_transforms:
  - !DecodeImage
    to_rgb: False
-    with_mixup: False
  - !NormalizeImage
    is_channel_first: false
    is_scale: False
    mean:
-    - 102.9801 
+    - 102.9801
    - 115.9465
    - 122.7717
    std:
-    - 1.0 
+    - 1.0
-    - 1.0 
+    - 1.0
-    - 1.0 
+    - 1.0
  - !ResizeImage
    interp: 1
    target_size:
@@ -227,16 +229,20 @@ MaskRCNNEvalFeed:
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
-  num_workers: 2
+  worker_num: 2
+  drop_empty: false
-MaskRCNNTestFeed:
+TestReader:
  batch_size: 1
+  inputs_def:
+    image_shape: [3,800,1333]
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
+    !ImageFolder
+    anno_path: annotations/instances_val2017.json
  sample_transforms:
  - !DecodeImage
    to_rgb: False
-    with_mixup: False
  - !NormalizeImage
    is_channel_first: false
    is_scale: False
@@ -254,4 +260,4 @@ MaskRCNNTestFeed:
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
-  num_workers: 2
+  worker_num: 2
--- a/configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml
+++ b/configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml
 architecture: CascadeMaskRCNN
-train_feed: MaskRCNNTrainFeed
-eval_feed: MaskRCNNEvalFeed
-test_feed: MaskRCNNTestFeed
 max_iters: 300000
 snapshot_iter: 10000
 use_gpu: true
 log_iter: 20
 log_smooth_window: 20
 save_dir: output
-pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/SENet154_vd_caffe_pretrained.tar 
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/SENet154_vd_caffe_pretrained.tar
 weights: output/cascade_mask_rcnn_dcn_se154_vd_fpn_gn_s1x/model_final/
 metric: COCO
 num_classes: 81
@@ -96,7 +93,7 @@ MaskAssigner:
  resolution: 28
 CascadeBBoxHead:
-  head: CascadeXConvNormHead 
+  head: CascadeXConvNormHead
  nms:
    keep_top_k: 100
    nms_threshold: 0.5
@@ -130,14 +127,17 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-MaskRCNNTrainFeed:
+TrainReader:
  # batch size per device
  batch_size: 1
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_mask']
  dataset:
+    !COCODataSet
    dataset_dir: dataset/coco
    image_dir: train2017
-    annotation: annotations/instances_train2017.json
+    anno_path: annotations/instances_train2017.json
-  sample_transforms: 
+  sample_transforms:
  - !DecodeImage
    to_rgb: False
    with_mixup: False
@@ -149,13 +149,13 @@ MaskRCNNTrainFeed:
    is_channel_first: false
    is_scale: False
    mean:
-    - 102.9801 
+    - 102.9801
    - 115.9465
    - 122.7717
    std:
-    - 1.0 
+    - 1.0
-    - 1.0 
+    - 1.0
-    - 1.0 
+    - 1.0
  - !ResizeImage
    interp: 1
    target_size:
@@ -168,7 +168,7 @@ MaskRCNNTrainFeed:
    - 608
    - 640
    - 672
-    - 704 
+    - 704
    - 736
    - 768
    - 800
@@ -199,28 +199,36 @@ MaskRCNNTrainFeed:
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
-  num_workers: 8
+  worker_num: 8
+  shuffle: true
-MaskRCNNEvalFeed:
+EvalReader:
  batch_size: 1
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
+    multi_scale: true
+    # num_scale = (len(target_size) + 1) * (1 + use_flip)
+    num_scales: 18
+    use_flip: true
  dataset:
-    dataset_dir: dataset/coco
+    !COCODataSet
-    annotation: annotations/instances_val2017.json
    image_dir: val2017
-  sample_transforms: 
+    anno_path: annotations/instances_val2017.json
+    dataset_dir: dataset/coco
+  sample_transforms:
  - !DecodeImage
    to_rgb: False
  - !NormalizeImage
    is_channel_first: false
    is_scale: False
    mean:
-    - 102.9801 
+    - 102.9801
    - 115.9465
    - 122.7717
    std:
-    - 1.0 
+    - 1.0
-    - 1.0 
+    - 1.0
-    - 1.0 
+    - 1.0
  - !MultiscaleTestResize
    origin_target_size: 800
    origin_max_size: 1333
@@ -239,16 +247,17 @@ MaskRCNNEvalFeed:
    channel_first: true
    to_bgr: false
  batch_transforms:
-  - !PadMSTest
+  - !PadMultiScaleTest
    pad_to_stride: 32
-  # num_scale = (len(target_size) + 1) * (1 + use_flip)
+  worker_num: 2
-  num_scale: 18 
-  num_workers: 2
-MaskRCNNTestFeed:
+TestReader:
  batch_size: 1
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
+    !ImageFolder
+    anno_path: annotations/instances_val2017.json
  sample_transforms:
  - !DecodeImage
    to_rgb: False
@@ -269,4 +278,3 @@ MaskRCNNTestFeed:
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
-  num_workers: 2
--- a/configs/dcn/cascade_rcnn_cbr200_vd_fpn_dcnv2_nonlocal_softnms.yml
+++ b/configs/dcn/cascade_rcnn_cbr200_vd_fpn_dcnv2_nonlocal_softnms.yml
 architecture: CascadeRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 460000
 snapshot_iter: 10000
 use_gpu: true
@@ -109,16 +106,18 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+TrainReader:
  batch_size: 1
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd']
  dataset:
+    !COCODataSet
+    image_dir: val2017
+    anno_path: annotations/instances_val2017.json
    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
+  sample_transforms:
-    image_dir: train2017
-  sample_transforms: 
  - !DecodeImage
-    to_rgb: True
+    to_rgb: true
-    with_mixup: False
  - !RandomFlipImage
    prob: 0.5
  - !NormalizeImage
@@ -131,7 +130,7 @@ FasterRCNNTrainFeed:
    std:
    - 0.229
    - 0.224
-    - 0.225 
+    - 0.225
  - !ResizeImage
    interp: 1
    target_size: [416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152, 1184, 1216, 1248, 1280, 1312, 1344, 1376, 1408]
@@ -142,16 +141,19 @@ FasterRCNNTrainFeed:
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
-  drop_last: false
+  worker_num: 2
-  num_workers: 2
+  shuffle: true
-FasterRCNNEvalFeed:
+EvalReader:
  batch_size: 1
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
  dataset:
-    dataset_dir: dataset/coco
+    !COCODataSet
-    annotation: annotations/instances_val2017.json
    image_dir: val2017
-  sample_transforms: 
+    anno_path: annotations/instances_val2017.json
+    dataset_dir: dataset/coco
+  sample_transforms:
  - !DecodeImage
    to_rgb: True
    with_mixup: False
@@ -165,7 +167,7 @@ FasterRCNNEvalFeed:
    std:
    - 0.229
    - 0.224
-    - 0.225 
+    - 0.225
  - !ResizeImage
    interp: 1
    target_size:
@@ -177,13 +179,34 @@ FasterRCNNEvalFeed:
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
+  worker_num: 2
-FasterRCNNTestFeed:
+TestReader:
-  batch_size: 1
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
+    !ImageFolder
+    anno_path: annotations/instances_val2017.json
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
-  drop_last: false
+    use_padded_im_info: true
-  num_workers: 2
+  batch_size: 1
+  worker_num: 2
--- a/configs/dcn/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.yml
+++ b/configs/dcn/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.yml
 architecture: CascadeRCNNClsAware
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 460000
 snapshot_iter: 10000
 use_gpu: true
@@ -109,16 +106,17 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+TrainReader:
-  batch_size: 1
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd']
  dataset:
-    dataset_dir: dataset/coco
+    !COCODataSet
-    annotation: annotations/instances_train2017.json
    image_dir: train2017
-  sample_transforms: 
+    anno_path: annotations/instances_train2017.json
+    dataset_dir: dataset/coco
+  sample_transforms:
  - !DecodeImage
-    to_rgb: True
+    to_rgb: true
-    with_mixup: False
  - !RandomFlipImage
    prob: 0.5
  - !NormalizeImage
@@ -131,7 +129,7 @@ FasterRCNNTrainFeed:
    std:
    - 0.229
    - 0.224
-    - 0.225 
+    - 0.225
  - !ResizeImage
    interp: 1
    target_size: [416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152, 1184, 1216, 1248, 1280, 1312, 1344, 1376, 1408]
@@ -142,16 +140,20 @@ FasterRCNNTrainFeed:
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
+  batch_size: 1
+  shuffle: true
  drop_last: false
-  num_workers: 2
+  worker_num: 2
-FasterRCNNEvalFeed:
+EvalReader:
-  batch_size: 1
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
  dataset:
-    dataset_dir: dataset/coco
+    !COCODataSet
-    annotation: annotations/instances_val2017.json
    image_dir: val2017
-  sample_transforms: 
+    anno_path: annotations/instances_val2017.json
+    dataset_dir: dataset/coco
+  sample_transforms:
  - !DecodeImage
    to_rgb: True
    with_mixup: False
@@ -165,7 +167,7 @@ FasterRCNNEvalFeed:
    std:
    - 0.229
    - 0.224
-    - 0.225 
+    - 0.225
  - !ResizeImage
    interp: 1
    target_size:
@@ -177,13 +179,37 @@ FasterRCNNEvalFeed:
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
-FasterRCNNTestFeed:
  batch_size: 1
+  worker_num: 2
+  drop_empty: false
+TestReader:
+  inputs_def:
+    image_shape: [3,800,1333]
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
+    !ImageFolder
+    anno_path: annotations/instances_val2017.json
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
-  drop_last: false
+    use_padded_im_info: true
-  num_workers: 2
+  batch_size: 1
+  worker_num: 2
--- a/configs/dcn/cascade_rcnn_dcn_r101_vd_fpn_1x.yml
+++ b/configs/dcn/cascade_rcnn_dcn_r101_vd_fpn_1x.yml
 architecture: CascadeRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 90000
 snapshot_iter: 10000
 use_gpu: true
@@ -106,34 +103,6 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: '../faster_fpn_reader.yml'
+TrainReader:
  batch_size: 2
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  drop_last: false
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  drop_last: false
-  num_workers: 2
--- a/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x.yml
+++ b/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x.yml
 architecture: CascadeRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 90000
 snapshot_iter: 10000
 use_gpu: true
@@ -106,34 +103,6 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: '../faster_fpn_reader.yml'
+TrainReader:
  batch_size: 2
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  drop_last: false
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  drop_last: false
-  num_workers: 2
--- a/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml
+++ b/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml
 architecture: CascadeRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 90000
 snapshot_iter: 10000
 use_gpu: true
@@ -26,7 +23,7 @@ ResNeXt:
  depth: 101
  feature_maps: [2, 3, 4, 5]
  freeze_at: 2
-  group_width: 4                                                                                                                                          
+  group_width: 4
  groups: 64
  variant: d
  dcn_v2_stages: [3, 4, 5]
@@ -108,34 +105,6 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: '../faster_fpn_reader.yml'
+TrainReader:
  batch_size: 2
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  drop_last: false
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  drop_last: false
-  num_workers: 2
--- a/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x.yml
+++ b/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 90000
 snapshot_iter: 10000
 use_gpu: true
@@ -106,34 +103,7 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: '../faster_fpn_reader.yml'
+TrainReader:
  # batch size per device
  batch_size: 2
-  dataset:
-    dataset_dir: dataset/coco
-    image_dir: train2017
-    annotation: annotations/instances_train2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/dcn/faster_rcnn_dcn_r50_fpn_1x.yml
+++ b/configs/dcn/faster_rcnn_dcn_r50_fpn_1x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 90000
 use_gpu: true
 snapshot_iter: 10000
@@ -105,34 +102,8 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
-  batch_size: 2
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  drop_last: false
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-FasterRCNNTestFeed:
+_READER_: '../faster_fpn_reader.yml'
-  batch_size: 1
+TrainReader:
-  dataset:
+  # batch size per device
-    annotation: dataset/coco/annotations/instances_val2017.json
+  batch_size: 2
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  drop_last: false
-  num_workers: 2
--- a/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x.yml
+++ b/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 180000
 snapshot_iter: 10000
 use_gpu: true
@@ -106,34 +103,7 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: '../faster_fpn_reader.yml'
+TrainReader:
  # batch size per device
  batch_size: 2
-  dataset:
-    dataset_dir: dataset/coco
-    image_dir: train2017
-    annotation: annotations/instances_train2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml
+++ b/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 180000
 snapshot_iter: 10000
 use_gpu: true
@@ -108,36 +105,5 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
-  # batch size per device
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    image_dir: train2017
-    annotation: annotations/instances_train2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-  shuffle: true
-FasterRCNNEvalFeed:
+_READER_: '../faster_fpn_reader.yml'
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-  shuffle: false
--- a/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x.yml
+++ b/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x.yml
 architecture: MaskRCNN
-train_feed: MaskRCNNTrainFeed
-eval_feed: MaskRCNNEvalFeed
-test_feed: MaskRCNNTestFeed
 max_iters: 180000
 snapshot_iter: 10000
 use_gpu: true
 log_smooth_window: 20
 log_iter: 20
 save_dir: output
-pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar 
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar
 weights: output/mask_rcnn_dcn_r101_vd_fpn_1x/model_final
 metric: COCO
 num_classes: 81
@@ -114,33 +111,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-MaskRCNNTrainFeed:
+_READER_: '../mask_fpn_reader.yml'
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/dcn/mask_rcnn_dcn_r50_fpn_1x.yml
+++ b/configs/dcn/mask_rcnn_dcn_r50_fpn_1x.yml
 architecture: MaskRCNN
-train_feed: MaskRCNNTrainFeed
-eval_feed: MaskRCNNEvalFeed
-test_feed: MaskRCNNTestFeed
 use_gpu: true
 max_iters: 180000
 snapshot_iter: 10000
@@ -113,33 +110,5 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-MaskRCNNTrainFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNEvalFeed:
+_READER_: '../mask_fpn_reader.yml'
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x.yml
+++ b/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x.yml
 architecture: MaskRCNN
-train_feed: MaskRCNNTrainFeed
-eval_feed: MaskRCNNEvalFeed
-test_feed: MaskRCNNTestFeed
 use_gpu: true
 max_iters: 360000
 snapshot_iter: 10000
@@ -114,34 +111,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-MaskRCNNTrainFeed:
+_READER_: '../mask_fpn_reader.yml'
-  # batch size per device
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    image_dir: train2017
-    annotation: annotations/instances_train2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml
+++ b/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml
 architecture: MaskRCNN
-train_feed: MaskRCNNTrainFeed
-eval_feed: MaskRCNNEvalFeed
-test_feed: MaskRCNNTestFeed
 max_iters: 180000
 snapshot_iter: 10000
 use_gpu: true
 log_smooth_window: 20
 log_iter: 20
 save_dir: output
-pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar 
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar
 weights: output/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x/model_final
 metric: COCO
 num_classes: 81
@@ -116,33 +113,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-MaskRCNNTrainFeed:
+_READER_: '../mask_fpn_reader.yml'
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/dcn/yolov3_r50vd_dcn.yml
+++ b/configs/dcn/yolov3_r50vd_dcn.yml
 architecture: YOLOv3
-train_feed: YoloTrainFeed
-eval_feed: YoloEvalFeed
-test_feed: YoloTestFeed
 use_gpu: true
 max_iters: 500000
 log_smooth_window: 20
 save_dir: output
 snapshot_iter: 20000
 metric: COCO
-pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar 
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar
 weights: output/yolov3_r50vd_dcn/model_final
 num_classes: 80
@@ -62,67 +59,4 @@ OptimizerBuilder:
    factor: 0.0005
    type: L2
-YoloTrainFeed:
+_READER_: '../yolov3_reader.yml'
-  batch_size: 8
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  sample_transforms:
-    - !DecodeImage
-      to_rgb: True
-      with_mixup: True
-    - !MixupImage
-      alpha: 1.5
-      beta: 1.5
-    - !NormalizeBox {}
-    - !RandomDistort {}
-    - !ExpandImage
-      max_ratio: 4
-      prob: 0.5
-      mean:
-      - 123.675
-      - 116.28
-      - 103.53
-    - !CropImage
-      batch_sampler: [[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0],
-        [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
-        [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
-        [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],
-        [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],
-        [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],
-        [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
-    - !RandomInterpImage
-      target_size: 608
-    - !RandomFlipImage
-      is_normalized: True
-    - !NormalizeImage
-      mean:
-      - 0.485
-      - 0.456
-      - 0.406
-      std:
-      - 0.229
-      - 0.224
-      - 0.225
-      is_scale: True
-      is_channel_first: False      
-    - !Permute
-      to_bgr: False
-  num_workers: 8
-  bufsize: 128
-  use_process: true
-YoloEvalFeed:
-  batch_size: 8
-  image_shape: [3, 608, 608]
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-YoloTestFeed:
-  batch_size: 1
-  image_shape: [3, 608, 608]
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
--- a/configs/dcn/yolov3_r50vd_dcn_obj365_pretrained_coco.yml
+++ b/configs/dcn/yolov3_r50vd_dcn_obj365_pretrained_coco.yml
 architecture: YOLOv3
-train_feed: YoloTrainFeed
-eval_feed: YoloEvalFeed
-test_feed: YoloTestFeed
 use_gpu: true
 max_iters: 55000
 log_smooth_window: 20
 save_dir: output
 snapshot_iter: 10000
 metric: COCO
-pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_obj365_pretrained.tar 
+pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_obj365_pretrained.tar
 weights: output/yolov3_r50vd_dcn_obj365_pretrained_coco/model_final
 num_classes: 80
@@ -62,98 +59,97 @@ OptimizerBuilder:
    factor: 0.0005
    type: L2
-YoloTrainFeed:
+TrainReader:
-  batch_size: 8
+  inputs_def:
+    fields: ['image', 'gt_bbox', 'gt_class', 'gt_score']
+    num_max_boxes: 50
  dataset:
-    dataset_dir: dataset/coco
+    !COCODataSet
-    annotation: annotations/instances_train2017.json
    image_dir: train2017
+    anno_path: annotations/instances_train2017.json
+    dataset_dir: dataset/coco
+    with_background: false
  sample_transforms:
    - !DecodeImage
      to_rgb: True
-      with_mixup: False
+    - !RandomCrop {}
-    - !NormalizeBox {}
-    - !CropImage
-      batch_sampler: [[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0],
-        [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
-        [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
-        [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],
-        [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],
-        [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],
-        [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
-    - !RandomInterpImage
-      target_size: 608
    - !RandomFlipImage
-      is_normalized: True
+      is_normalized: false
+    - !NormalizeBox {}
+    - !PadBox
+      num_max_boxes: 50
+    - !BboxXYXY2XYWH {}
+  batch_transforms:
+    - !RandomShape
+      sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
+      random_inter: True
    - !NormalizeImage
-      mean:
+      mean: [0.485, 0.456, 0.406]
-      - 0.485
+      std: [0.229, 0.224, 0.225]
-      - 0.456
+      is_scale: True
-      - 0.406
+      is_channel_first: false
-      std:
-      - 0.229
-      - 0.224
-      - 0.225
-      is_scale: False
-      is_channel_first: False      
    - !Permute
-      to_bgr: False
+      to_bgr: false
-  num_workers: 8
+      channel_first: True
-  bufsize: 128
+  batch_size: 8
+  shuffle: true
+  drop_last: true
+  worker_num: 8
+  bufsize: 32
  use_process: true
-YoloEvalFeed:
+EvalReader:
-  batch_size: 8
+  inputs_def:
-  image_shape: [3, 608, 608]
+    image_shape: [3, 608, 608]
+    fields: ['image', 'im_size', 'im_id']
+    num_max_boxes: 50
  dataset:
+    !COCODataSet
    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
+    anno_path: annotations/instances_val2017.json
    image_dir: val2017
+    with_background: false
  sample_transforms:
    - !DecodeImage
      to_rgb: True
-      with_mixup: False
+      with_mixup: false
    - !ResizeImage
-      interp: 2 
+      interp: 2
      target_size: 608
    - !NormalizeImage
-      mean:
+      mean: [0.485, 0.456, 0.406]
-      - 0.485
+      std: [0.229, 0.224, 0.225]
-      - 0.456
+      is_scale: True
-      - 0.406
+      is_channel_first: false
-      std:
-      - 0.229
-      - 0.224
-      - 0.225
-      is_scale: False
-      is_channel_first: False      
    - !Permute
-      to_bgr: False
+      to_bgr: false
+      channel_first: True
+  batch_size: 8
+  drop_empty: false
+  worker_num: 8
+  bufsize: 32
-YoloTestFeed:
+TestReader:
-  batch_size: 1
+  inputs_def:
-  image_shape: [3, 608, 608]
+    image_shape: [3, 608, 608]
+    fields: ['image', 'im_size', 'im_id']
  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
+    !ImageFolder
+      anno_path: annotations/instances_val2017.json
+      with_background: false
  sample_transforms:
    - !DecodeImage
      to_rgb: True
-      with_mixup: False
+      with_mixup: false
    - !ResizeImage
-      interp: 2 
+      interp: 2
      target_size: 608
    - !NormalizeImage
-      mean:
+      mean: [0.485, 0.456, 0.406]
-      - 0.485
+      std: [0.229, 0.224, 0.225]
-      - 0.456
+      is_scale: True
-      - 0.406
+      is_channel_first: false
-      std:
-      - 0.229
-      - 0.224
-      - 0.225
-      is_scale: False
-      is_channel_first: False      
    - !Permute
-      to_bgr: False
+      to_bgr: false
+      channel_first: True
+  batch_size: 1
--- a/configs/face_detection/blazeface.yml
+++ b/configs/face_detection/blazeface.yml
 architecture: BlazeFace
 max_iters: 320000
-train_feed: SSDTrainFeed
+pretrain_weights:
-eval_feed: SSDEvalFeed
-test_feed: SSDTestFeed
-pretrain_weights: 
 use_gpu: true
 snapshot_iter: 10000
 log_smooth_window: 20
@@ -43,18 +40,18 @@ OptimizerBuilder:
    factor: 0.0005
    type: L2
-SSDTrainFeed:
+TrainReader:
-  batch_size: 8
+  inputs_def:
-  use_process: True
+    image_shape: [3, 640, 640]
+    fields: ['image', 'gt_bbox', 'gt_class']
  dataset:
+    !WIDERFaceDataSet
    dataset_dir: dataset/wider_face
-    annotation: wider_face_split/wider_face_train_bbx_gt.txt
+    anno_path: wider_face_split/wider_face_train_bbx_gt.txt
    image_dir: WIDER_train/images
-  image_shape: [3, 640, 640]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
-    with_mixup: false
  - !NormalizeBox {}
  - !RandomDistort
    brightness_lower: 0.875
@@ -82,49 +79,41 @@ SSDTrainFeed:
    is_scale: false
    mean: [104, 117, 123]
    std: [127.502231, 127.502231, 127.502231]
+  batch_size: 8
+  use_process: true
+  shuffle: true
-SSDEvalFeed:
+EvalReader:
-  batch_size: 1
+  inputs_def:
-  use_process: false
+    fields: ['image', 'im_id']
-  fields: ['image', 'im_id', 'gt_box']
  dataset:
+    !WIDERFaceDataSet
    dataset_dir: dataset/wider_face
-    annotation: wider_face_split/wider_face_val_bbx_gt.txt   
+    anno_path: wider_face_split/wider_face_val_bbx_gt.txt
    image_dir: WIDER_val/images
-  drop_last: false
-  image_shape: [3, 640, 640]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
-    with_mixup: false
  - !NormalizeBox {}
-  - !ResizeImage
-    interp: 1
-    target_size: 640
-    use_cv2: false
  - !Permute {}
  - !NormalizeImage
    is_scale: false
    mean: [104, 117, 123]
    std: [127.502231, 127.502231, 127.502231]
-SSDTestFeed:
  batch_size: 1
-  use_process: false
+TestReader:
+  inputs_def:
+    fields: ['image', 'im_id', 'im_shape']
  dataset:
+    !ImageFolder
    use_default_label: true
-  drop_last: false
-  image_shape: [3, 640, 640]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
-    with_mixup: false
-  - !ResizeImage
-    interp: 1
-    target_size: 640
-    use_cv2: false
  - !Permute {}
  - !NormalizeImage
    is_scale: false
    mean: [104, 117, 123]
    std: [127.502231, 127.502231, 127.502231]
+  batch_size: 1
--- a/configs/face_detection/blazeface_nas.yml
+++ b/configs/face_detection/blazeface_nas.yml
 architecture: BlazeFace
 max_iters: 320000
-train_feed: SSDTrainFeed
+pretrain_weights:
-eval_feed: SSDEvalFeed
-test_feed: SSDTestFeed
-pretrain_weights: 
 use_gpu: true
 snapshot_iter: 10000
 log_smooth_window: 20
@@ -45,18 +42,18 @@ OptimizerBuilder:
    factor: 0.0005
    type: L2
-SSDTrainFeed:
+TrainReader:
-  batch_size: 8
+  inputs_def:
-  use_process: True
+    image_shape: [3, 640, 640]
+    fields: ['image', 'gt_bbox', 'gt_class']
  dataset:
+    !WIDERFaceDataSet
    dataset_dir: dataset/wider_face
-    annotation: wider_face_split/wider_face_train_bbx_gt.txt
+    anno_path: wider_face_split/wider_face_train_bbx_gt.txt
    image_dir: WIDER_train/images
-  image_shape: [3, 640, 640]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
-    with_mixup: false
  - !NormalizeBox {}
  - !RandomDistort
    brightness_lower: 0.875
@@ -84,21 +81,21 @@ SSDTrainFeed:
    is_scale: false
    mean: [104, 117, 123]
    std: [127.502231, 127.502231, 127.502231]
+  batch_size: 8
+  use_process: true
+  shuffle: true
-SSDEvalFeed:
+EvalReader:
-  batch_size: 1
+  inputs_def:
-  use_process: false
+    fields: ['image', 'im_id']
-  fields: ['image', 'im_id', 'gt_box']
  dataset:
+    !WIDERFaceDataSet
    dataset_dir: dataset/wider_face
-    annotation: wider_face_split/wider_face_val_bbx_gt.txt
+    anno_path: wider_face_split/wider_face_val_bbx_gt.txt
    image_dir: WIDER_val/images
-  drop_last: false
-  image_shape: [3, 640, 640]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
-    with_mixup: false
  - !NormalizeBox {}
  - !ResizeImage
    interp: 1
@@ -109,18 +106,18 @@ SSDEvalFeed:
    is_scale: false
    mean: [104, 117, 123]
    std: [127.502231, 127.502231, 127.502231]
-SSDTestFeed:
  batch_size: 1
-  use_process: false
+TestReader:
+  inputs_def:
+    image_shape: [3,640,640]
+    fields: ['image', 'im_id', 'im_shape']
  dataset:
+    !ImageFolder
    use_default_label: true
-  drop_last: false
-  image_shape: [3, 640, 640]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
-    with_mixup: false
  - !ResizeImage
    interp: 1
    target_size: 640
@@ -130,3 +127,4 @@ SSDTestFeed:
    is_scale: false
    mean: [104, 117, 123]
    std: [127.502231, 127.502231, 127.502231]
+  batch_size: 1
--- a/configs/face_detection/faceboxes.yml
+++ b/configs/face_detection/faceboxes.yml
 architecture: FaceBoxes
-train_feed: SSDTrainFeed
-eval_feed: SSDEvalFeed
-test_feed: SSDTestFeed
 pretrain_weights:
 use_gpu: true
 max_iters: 320000
@@ -43,18 +40,21 @@ OptimizerBuilder:
    factor: 0.0005
    type: L2
-SSDTrainFeed:
+TrainReader:
  batch_size: 8
  use_process: True
+  shuffle: true
+  inputs_def:
+    image_shape: [3, 640, 640]
+    fields: ['image', 'gt_bbox', 'gt_class']
  dataset:
+    !WIDERFaceDataSet
    dataset_dir: dataset/wider_face
-    annotation: wider_face_split/wider_face_train_bbx_gt.txt
+    anno_path: wider_face_split/wider_face_train_bbx_gt.txt
    image_dir: WIDER_train/images
-  image_shape: [3, 640, 640]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
-    with_mixup: false
  - !NormalizeBox {}
  - !RandomDistort
    brightness_lower: 0.875
@@ -83,48 +83,37 @@ SSDTrainFeed:
    mean: [104, 117, 123]
    std: [127.502231, 127.502231, 127.502231]
-SSDEvalFeed:
+EvalReader:
  batch_size: 1
  use_process: false
-  fields: ['image', 'im_id', 'gt_box']
+  inputs_def:
+    fields: ['image', 'im_id']
  dataset:
+    !WIDERFaceDataSet
    dataset_dir: dataset/wider_face
-    annotation: wider_face_split/wider_face_val_bbx_gt.txt
+    anno_path: wider_face_split/wider_face_val_bbx_gt.txt
    image_dir: WIDER_val/images
-  drop_last: false
-  image_shape: [3, 640, 640]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
-    with_mixup: false
-  - !NormalizeBox {}
-  - !ResizeImage
-    interp: 1
-    target_size: 640
-    use_cv2: false
  - !Permute {}
  - !NormalizeImage
    is_scale: false
    mean: [104, 117, 123]
    std: [127.502231, 127.502231, 127.502231]
-SSDTestFeed:
+TestReader:
-  batch_size: 1
+  inputs_def:
-  use_process: false
+    fields: ['image', 'im_id', 'im_shape']
  dataset:
+    !ImageFolder
    use_default_label: true
-  drop_last: false
-  image_shape: [3, 640, 640]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
-    with_mixup: false
-  - !ResizeImage
-    interp: 1
-    target_size: 640
-    use_cv2: false
  - !Permute {}
  - !NormalizeImage
    is_scale: false
    mean: [104, 117, 123]
    std: [127.502231, 127.502231, 127.502231]
+  batch_size: 1
--- a/configs/face_detection/faceboxes_lite.yml
+++ b/configs/face_detection/faceboxes_lite.yml
 architecture: FaceBoxes
-train_feed: SSDTrainFeed
-eval_feed: SSDEvalFeed
-test_feed: SSDTestFeed
 pretrain_weights:
 use_gpu: true
 max_iters: 320000
@@ -43,18 +40,21 @@ OptimizerBuilder:
    factor: 0.0005
    type: L2
-SSDTrainFeed:
+TrainReader:
  batch_size: 8
  use_process: True
+  shuffle: true
+  inputs_def:
+    image_shape: [3, 640, 640]
+    fields: ['image', 'gt_bbox', 'gt_class']
  dataset:
+    !WIDERFaceDataSet
    dataset_dir: dataset/wider_face
-    annotation: wider_face_split/wider_face_train_bbx_gt.txt
+    anno_path: wider_face_split/wider_face_train_bbx_gt.txt
    image_dir: WIDER_train/images
-  image_shape: [3, 640, 640]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
-    with_mixup: false
  - !NormalizeBox {}
  - !RandomDistort
    brightness_lower: 0.875
@@ -83,48 +83,38 @@ SSDTrainFeed:
    mean: [104, 117, 123]
    std: [127.502231, 127.502231, 127.502231]
-SSDEvalFeed:
+EvalReader:
  batch_size: 1
  use_process: false
-  fields: ['image', 'im_id', 'gt_box']
+  inputs_def:
+    fields: ['image', 'im_id']
  dataset:
+    !WIDERFaceDataSet
    dataset_dir: dataset/wider_face
-    annotation: wider_face_split/wider_face_val_bbx_gt.txt
+    anno_path: wider_face_split/wider_face_val_bbx_gt.txt
    image_dir: WIDER_val/images
-  drop_last: false
-  image_shape: [3, 640, 640]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
-    with_mixup: false
  - !NormalizeBox {}
-  - !ResizeImage
-    interp: 1
-    target_size: 640
-    use_cv2: false
  - !Permute {}
  - !NormalizeImage
    is_scale: false
    mean: [104, 117, 123]
    std: [127.502231, 127.502231, 127.502231]
-SSDTestFeed:
+TestReader:
-  batch_size: 1
+  inputs_def:
-  use_process: false
+    fields: ['image', 'im_id', 'im_shape']
  dataset:
+    !ImageFolder
    use_default_label: true
-  drop_last: false
-  image_shape: [3, 640, 640]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
-    with_mixup: false
-  - !ResizeImage
-    interp: 1
-    target_size: 640
-    use_cv2: false
  - !Permute {}
  - !NormalizeImage
    is_scale: false
    mean: [104, 117, 123]
    std: [127.502231, 127.502231, 127.502231]
+  batch_size: 1
--- a/configs/faster_fpn_reader.yml
+++ b/configs/faster_fpn_reader.yml
+TrainReader:
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd']
+  dataset:
+    !COCODataSet
+    image_dir: train2017
+    anno_path: annotations/instances_train2017.json
+    dataset_dir: dataset/coco
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+  - !RandomFlipImage
+    prob: 0.5
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    target_size: 800
+    max_size: 1333
+    interp: 1
+    use_cv2: true
+  - !Permute
+    to_bgr: false
+    channel_first: true
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+    use_padded_im_info: false
+  batch_size: 1
+  shuffle: true
+  worker_num: 2
+  use_process: false
+EvalReader:
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
+    # for voc
+    #fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_difficult']
+  dataset:
+    !COCODataSet
+    image_dir: val2017
+    anno_path: annotations/instances_val2017.json
+    dataset_dir: dataset/coco
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+    use_padded_im_info: true
+  batch_size: 1
+  shuffle: false
+  drop_empty: false
+  worker_num: 2
+TestReader:
+  inputs_def:
+    # set image_shape if needed
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
+  dataset:
+    !ImageFolder
+    anno_path: annotations/instances_val2017.json
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+    use_padded_im_info: true
+  batch_size: 1
+  shuffle: false
--- a/configs/faster_rcnn_cbr101_vd_dual_fpn_1x.yml
+++ b/configs/faster_rcnn_cbr101_vd_dual_fpn_1x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 90000
 snapshot_iter: 10000
 use_gpu: true
@@ -106,34 +103,6 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_fpn_reader.yml'
-  # batch size per device
+TrainReader:
  batch_size: 2
-  dataset:
-    dataset_dir: dataset/coco
-    image_dir: train2017
-    annotation: annotations/instances_train2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/faster_rcnn_cbr50_vd_dual_fpn_1x.yml
+++ b/configs/faster_rcnn_cbr50_vd_dual_fpn_1x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 90000
 snapshot_iter: 10000
 use_gpu: true
@@ -106,34 +103,7 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_fpn_reader.yml'
+TrainReader:
  # batch size per device
  batch_size: 2
-  dataset:
-    dataset_dir: dataset/coco
-    image_dir: train2017
-    annotation: annotations/instances_train2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/faster_rcnn_r101_1x.yml
+++ b/configs/faster_rcnn_r101_1x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 use_gpu: true
 max_iters: 180000
 log_smooth_window: 20
@@ -91,25 +88,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_reader.yml'
-  # batch size per device
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  drop_last: false
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  num_workers: 2
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
--- a/configs/faster_rcnn_r101_fpn_1x.yml
+++ b/configs/faster_rcnn_r101_fpn_1x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 180000
 snapshot_iter: 10000
 use_gpu: true
@@ -103,34 +100,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_fpn_reader.yml'
-  # batch size per device
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    image_dir: train2017
-    annotation: annotations/instances_train2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/faster_rcnn_r101_fpn_2x.yml
+++ b/configs/faster_rcnn_r101_fpn_2x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 360000
 snapshot_iter: 10000
 use_gpu: true
@@ -103,34 +100,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_fpn_reader.yml'
-  # batch size per device
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    image_dir: train2017
-    annotation: annotations/instances_train2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/faster_rcnn_r101_vd_fpn_1x.yml
+++ b/configs/faster_rcnn_r101_vd_fpn_1x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 180000
 snapshot_iter: 10000
 use_gpu: true
@@ -104,34 +101,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_fpn_reader.yml'
-  # batch size per device
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    image_dir: train2017
-    annotation: annotations/instances_train2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/faster_rcnn_r101_vd_fpn_2x.yml
+++ b/configs/faster_rcnn_r101_vd_fpn_2x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 360000
 snapshot_iter: 10000
 use_gpu: true
@@ -104,34 +101,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_fpn_reader.yml'
-  # batch size per device
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    image_dir: train2017
-    annotation: annotations/instances_train2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/faster_rcnn_r50_1x.yml
+++ b/configs/faster_rcnn_r50_1x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 use_gpu: true
 max_iters: 180000
 log_smooth_window: 20
@@ -91,25 +88,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_reader.yml'
-  # batch size per device
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  drop_last: false
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  num_workers: 2
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
--- a/configs/faster_rcnn_r50_2x.yml
+++ b/configs/faster_rcnn_r50_2x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 use_gpu: true
 max_iters: 360000
 log_smooth_window: 20
@@ -91,25 +88,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_reader.yml'
-  # batch size per device
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  drop_last: false
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  num_workers: 2
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
--- a/configs/faster_rcnn_r50_fpn_1x.yml
+++ b/configs/faster_rcnn_r50_fpn_1x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 90000
 use_gpu: true
 snapshot_iter: 10000
@@ -104,34 +101,6 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_fpn_reader.yml'
+TrainReader:
  batch_size: 2
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  drop_last: false
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  drop_last: false
-  num_workers: 2
--- a/configs/faster_rcnn_r50_fpn_2x.yml
+++ b/configs/faster_rcnn_r50_fpn_2x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 180000
 use_gpu: true
 snapshot_iter: 10000
@@ -104,34 +101,6 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_fpn_reader.yml'
+TrainReader:
  batch_size: 2
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  drop_last: false
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  drop_last: false
-  num_workers: 2
--- a/configs/faster_rcnn_r50_vd_1x.yml
+++ b/configs/faster_rcnn_r50_vd_1x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 use_gpu: true
 max_iters: 180000
 log_smooth_window: 20
@@ -93,25 +90,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_reader.yml'
-  # batch size per device
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  drop_last: false
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  num_workers: 2
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
--- a/configs/faster_rcnn_r50_vd_fpn_2x.yml
+++ b/configs/faster_rcnn_r50_vd_fpn_2x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 180000
 snapshot_iter: 10000
 use_gpu: true
@@ -104,34 +101,6 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_fpn_reader.yml'
-  # batch size per device
+TrainReader:
  batch_size: 2
-  dataset:
-    dataset_dir: dataset/coco
-    image_dir: train2017
-    annotation: annotations/instances_train2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/faster_rcnn_se154_vd_fpn_s1x.yml
+++ b/configs/faster_rcnn_se154_vd_fpn_s1x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 260000
 snapshot_iter: 10000
 use_gpu: true
@@ -106,34 +103,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_fpn_reader.yml'
-  # batch size per device
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    image_dir: train2017
-    annotation: annotations/instances_train2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/faster_rcnn_x101_vd_64x4d_fpn_1x.yml
+++ b/configs/faster_rcnn_x101_vd_64x4d_fpn_1x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 180000
 snapshot_iter: 10000
 use_gpu: true
@@ -107,36 +104,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_fpn_reader.yml'
-  # batch size per device
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    image_dir: train2017
-    annotation: annotations/instances_train2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-  shuffle: true
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-  shuffle: false
--- a/configs/faster_rcnn_x101_vd_64x4d_fpn_2x.yml
+++ b/configs/faster_rcnn_x101_vd_64x4d_fpn_2x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 360000
 snapshot_iter: 10000
 use_gpu: true
@@ -106,36 +103,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_fpn_reader.yml'
-  # batch size per device
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    image_dir: train2017
-    annotation: annotations/instances_train2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-  shuffle: true
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-  shuffle: false
--- a/configs/faster_reader.yml
+++ b/configs/faster_reader.yml
+TrainReader:
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd']
+  dataset:
+    !COCODataSet
+    image_dir: train2017
+    anno_path: annotations/instances_train2017.json
+    dataset_dir: dataset/coco
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+  - !RandomFlipImage
+    prob: 0.5
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    target_size: 800
+    max_size: 1333
+    interp: 1
+    use_cv2: true
+  - !Permute
+    to_bgr: false
+    channel_first: true
+  batch_size: 1
+  shuffle: true
+  worker_num: 2
+  use_process: false
+EvalReader:
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
+    # for voc
+    #fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_difficult']
+  dataset:
+    !COCODataSet
+    image_dir: val2017
+    anno_path: annotations/instances_val2017.json
+    dataset_dir: dataset/coco
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_size: 1
+  shuffle: false
+  drop_empty: false
+  worker_num: 2
+TestReader:
+  inputs_def:
+    image_shape: [3,800,1333]
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
+  dataset:
+    !ImageFolder
+    anno_path: annotations/instances_val2017.json
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_size: 1
+  shuffle: false
--- a/configs/gn/cascade_mask_rcnn_r50_fpn_gn_2x.yml
+++ b/configs/gn/cascade_mask_rcnn_r50_fpn_gn_2x.yml
 architecture: CascadeMaskRCNN
-train_feed: MaskRCNNTrainFeed
-eval_feed: MaskRCNNEvalFeed
-test_feed: MaskRCNNTestFeed
 max_iters: 180000
 snapshot_iter: 10000
 use_gpu: true
@@ -88,7 +85,7 @@ MaskAssigner:
  resolution: 28
 CascadeBBoxHead:
-  head: CascadeXConvNormHead 
+  head: CascadeXConvNormHead
  nms:
    keep_top_k: 100
    nms_threshold: 0.5
@@ -115,33 +112,6 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-MaskRCNNTrainFeed:
+_READER_: '../mask_fpn_reader.yml'
+TrainReader:
  batch_size: 2
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/gn/faster_rcnn_r50_fpn_gn_2x.yml
+++ b/configs/gn/faster_rcnn_r50_fpn_gn_2x.yml
 architecture: FasterRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 180000
 snapshot_iter: 10000
 use_gpu: true
@@ -104,34 +101,6 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: '../faster_fpn_reader.yml'
+TrainReader:
  batch_size: 2
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  drop_last: false
-  num_workers: 16
-FasterRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-FasterRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  drop_last: false
-  num_workers: 2
--- a/configs/gn/mask_rcnn_r50_fpn_gn_2x.yml
+++ b/configs/gn/mask_rcnn_r50_fpn_gn_2x.yml
 architecture: MaskRCNN
-train_feed: MaskRCNNTrainFeed
-eval_feed: MaskRCNNEvalFeed
-test_feed: MaskRCNNTestFeed
 max_iters: 360000
 snapshot_iter: 10000
 use_gpu: true
@@ -113,33 +110,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-MaskRCNNTrainFeed:
+_READER_: '../mask_fpn_reader.yml'
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/mask_fpn_reader.yml
+++ b/configs/mask_fpn_reader.yml
+TrainReader:
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_mask']
+  dataset:
+    !COCODataSet
+    image_dir: train2017
+    anno_path: annotations/instances_train2017.json
+    dataset_dir: dataset/coco
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+  - !RandomFlipImage
+    prob: 0.5
+    is_mask_flip: true
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    target_size: 800
+    max_size: 1333
+    interp: 1
+    use_cv2: true
+  - !Permute
+    to_bgr: false
+    channel_first: true
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+    use_padded_im_info: false
+  batch_size: 1
+  shuffle: true
+  worker_num: 2
+  drop_last: false
+  use_process: false
+EvalReader:
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
+    # for voc
+    #fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_difficult']
+  dataset:
+    !COCODataSet
+    image_dir: val2017
+    anno_path: annotations/instances_val2017.json
+    dataset_dir: dataset/coco
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+    use_padded_im_info: true
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+  drop_empty: false
+  worker_num: 2
+TestReader:
+  inputs_def:
+    image_shape: [3,800,1333]
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
+  dataset:
+    !ImageFolder
+    anno_path: annotations/instances_val2017.json
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+    use_padded_im_info: true
+  batch_size: 1
+  shuffle: false
+  drop_last: false
--- a/configs/mask_rcnn_r101_fpn_1x.yml
+++ b/configs/mask_rcnn_r101_fpn_1x.yml
 architecture: MaskRCNN
-train_feed: MaskRCNNTrainFeed
-eval_feed: MaskRCNNEvalFeed
-test_feed: MaskRCNNTestFeed
 use_gpu: true
 max_iters: 180000
 snapshot_iter: 10000
@@ -111,33 +108,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-MaskRCNNTrainFeed:
+_READER_: 'mask_fpn_reader.yml'
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/mask_rcnn_r101_vd_fpn_1x.yml
+++ b/configs/mask_rcnn_r101_vd_fpn_1x.yml
 architecture: MaskRCNN
-train_feed: MaskRCNNTrainFeed
-eval_feed: MaskRCNNEvalFeed
-test_feed: MaskRCNNTestFeed
 max_iters: 180000
 snapshot_iter: 10000
 use_gpu: true
 log_smooth_window: 20
 save_dir: output
-pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar 
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar
 weights: output/mask_rcnn_r101_vd_fpn_1x/model_final
 metric: COCO
 num_classes: 81
@@ -112,33 +109,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-MaskRCNNTrainFeed:
+_READER_: 'mask_fpn_reader.yml'
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco 
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco 
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/mask_rcnn_r50_1x.yml
+++ b/configs/mask_rcnn_r50_1x.yml
 architecture: MaskRCNN
-train_feed: MaskRCNNTrainFeed
-eval_feed: MaskRCNNEvalFeed
-test_feed: MaskRCNNTestFeed
 use_gpu: true
 max_iters: 180000
 snapshot_iter: 10000
@@ -102,22 +99,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-MaskRCNNTrainFeed:
+_READER_: 'mask_reader.yml'
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  num_workers: 2
-MaskRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-MaskRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
--- a/configs/mask_rcnn_r50_2x.yml
+++ b/configs/mask_rcnn_r50_2x.yml
 architecture: MaskRCNN
-train_feed: MaskRCNNTrainFeed
-eval_feed: MaskRCNNEvalFeed
-test_feed: MaskRCNNTestFeed
 use_gpu: true
 max_iters: 360000
 snapshot_iter: 10000
@@ -104,22 +101,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-MaskRCNNTrainFeed:
+_READER_: 'mask_reader.yml'
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  num_workers: 2
-MaskRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-MaskRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
--- a/configs/mask_rcnn_r50_fpn_1x.yml
+++ b/configs/mask_rcnn_r50_fpn_1x.yml
 architecture: MaskRCNN
-train_feed: MaskRCNNTrainFeed
-eval_feed: MaskRCNNEvalFeed
-test_feed: MaskRCNNTestFeed
 use_gpu: true
 max_iters: 180000
 snapshot_iter: 10000
@@ -111,33 +108,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-MaskRCNNTrainFeed:
+_READER_: 'mask_fpn_reader.yml'
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/mask_rcnn_r50_fpn_2x.yml
+++ b/configs/mask_rcnn_r50_fpn_2x.yml
 architecture: MaskRCNN
-train_feed: MaskRCNNTrainFeed
-eval_feed: MaskRCNNEvalFeed
-test_feed: MaskRCNNTestFeed
 max_iters: 360000
 snapshot_iter: 10000
 use_gpu: true
@@ -111,33 +108,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-MaskRCNNTrainFeed:
+_READER_: 'mask_fpn_reader.yml'
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/mask_rcnn_r50_vd_fpn_2x.yml
+++ b/configs/mask_rcnn_r50_vd_fpn_2x.yml
 architecture: MaskRCNN
-train_feed: MaskRCNNTrainFeed
-eval_feed: MaskRCNNEvalFeed
-test_feed: MaskRCNNTestFeed
 use_gpu: true
 max_iters: 360000
 snapshot_iter: 10000
@@ -112,34 +109,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-MaskRCNNTrainFeed:
+_READER_: 'mask_fpn_reader.yml'
-  # batch size per device
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    image_dir: train2017
-    annotation: annotations/instances_train2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/mask_rcnn_se154_vd_fpn_s1x.yml
+++ b/configs/mask_rcnn_se154_vd_fpn_s1x.yml
 architecture: MaskRCNN
-train_feed: MaskRCNNTrainFeed
-eval_feed: MaskRCNNEvalFeed
-test_feed: MaskRCNNTestFeed
 max_iters: 260000
 snapshot_iter: 10000
 use_gpu: true
@@ -114,34 +111,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-MaskRCNNTrainFeed:
+_READER_: 'mask_fpn_reader.yml'
-  # batch size per device
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    image_dir: train2017
-    annotation: annotations/instances_train2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/mask_rcnn_x101_vd_64x4d_fpn_1x.yml
+++ b/configs/mask_rcnn_x101_vd_64x4d_fpn_1x.yml
 architecture: MaskRCNN
-train_feed: MaskRCNNTrainFeed
-eval_feed: MaskRCNNEvalFeed
-test_feed: MaskRCNNTestFeed
 max_iters: 180000
 snapshot_iter: 10000
 use_gpu: true
 log_smooth_window: 20
 save_dir: output
-pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar 
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar
 weights: output/mask_rcnn_x101_vd_64x4d_fpn_1x/model_final
 metric: COCO
 num_classes: 81
@@ -114,33 +111,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-MaskRCNNTrainFeed:
+_READER_: 'mask_fpn_reader.yml'
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco 
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/mask_rcnn_x101_vd_64x4d_fpn_2x.yml
+++ b/configs/mask_rcnn_x101_vd_64x4d_fpn_2x.yml
 architecture: MaskRCNN
-train_feed: MaskRCNNTrainFeed
-eval_feed: MaskRCNNEvalFeed
-test_feed: MaskRCNNTestFeed
 max_iters: 360000
 snapshot_iter: 10000
 use_gpu: true
 log_smooth_window: 20
 save_dir: output
-pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar 
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar
 weights: output/mask_rcnn_x101_vd_64x4d_fpn_2x/model_final
 metric: COCO
 num_classes: 81
@@ -114,33 +111,4 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-MaskRCNNTrainFeed:
+_READER_: 'mask_fpn_reader.yml'
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNEvalFeed:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco 
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
-MaskRCNNTestFeed:
-  batch_size: 1
-  dataset:
-    annotation: annotations/instances_val2017.json
-  batch_transforms:
-  - !PadBatch
-    pad_to_stride: 32
-  num_workers: 2
--- a/configs/mask_reader.yml
+++ b/configs/mask_reader.yml
+TrainReader:
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_mask']
+  dataset:
+    !COCODataSet
+    image_dir: train2017
+    anno_path: annotations/instances_train2017.json
+    dataset_dir: dataset/coco
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+  - !RandomFlipImage
+    prob: 0.5
+    is_mask_flip: true
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    target_size: 800
+    max_size: 1333
+    interp: 1
+    use_cv2: true
+  - !Permute
+    to_bgr: false
+    channel_first: true
+  batch_size: 1
+  shuffle: true
+  worker_num: 2
+  drop_last: false
+  use_process: false
+EvalReader:
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
+    # for voc
+    #fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_difficult']
+  dataset:
+    !COCODataSet
+    image_dir: val2017
+    anno_path: annotations/instances_val2017.json
+    dataset_dir: dataset/coco
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+  drop_empty: false
+  worker_num: 2
+TestReader:
+  inputs_def:
+    image_shape: [3,800,1333]
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
+  dataset:
+    !ImageFolder
+    anno_path: annotations/instances_val2017.json
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_size: 1
+  shuffle: false
+  drop_last: false
--- a/configs/obj365/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.yml
+++ b/configs/obj365/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.yml
 architecture: CascadeRCNNClsAware
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 800000
 snapshot_iter: 10000
 use_gpu: true
@@ -110,16 +107,17 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+TrainReader:
-  batch_size: 1
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd']
  dataset:
+    !COCODataSet
    dataset_dir: dataset/obj365
-    annotation: train.json
+    anno_path: train.json
    image_dir: train
-  sample_transforms: 
+  sample_transforms:
  - !DecodeImage
    to_rgb: True
-    with_mixup: False
  - !RandomFlipImage
    prob: 0.5
  - !NormalizeImage
@@ -132,7 +130,7 @@ FasterRCNNTrainFeed:
    std:
    - 0.229
    - 0.224
-    - 0.225 
+    - 0.225
  - !ResizeImage
    interp: 1
    target_size: [416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152, 1184, 1216, 1248, 1280, 1312, 1344, 1376, 1408]
@@ -143,16 +141,20 @@ FasterRCNNTrainFeed:
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
+  batch_size: 1
+  shuffle: true
  drop_last: false
-  num_workers: 2
+  worker_num: 2
-FasterRCNNEvalFeed:
+EvalReader:
-  batch_size: 1
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
  dataset:
+    !COCODataSet
    dataset_dir: dataset/obj365
-    annotation: val.json
+    anno_path: val.json
    image_dir: val
-  sample_transforms: 
+  sample_transforms:
  - !DecodeImage
    to_rgb: True
    with_mixup: False
@@ -166,7 +168,7 @@ FasterRCNNEvalFeed:
    std:
    - 0.229
    - 0.224
-    - 0.225 
+    - 0.225
  - !ResizeImage
    interp: 1
    target_size:
@@ -178,13 +180,36 @@ FasterRCNNEvalFeed:
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
-FasterRCNNTestFeed:
  batch_size: 1
+  worker_num: 2
+  drop_empty: false
+TestReader:
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
  dataset:
-    annotation: dataset/obj365/val.json
+    !ImageFolder
+    anno_path: annotations/instances_val2017.json
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
-  drop_last: false
+    use_padded_im_info: true
-  num_workers: 2
+  batch_size: 1
+  worker_num: 2
--- a/configs/obj365/cascade_rcnn_dcnv2_se154_vd_fpn_gn_cas.yml
+++ b/configs/obj365/cascade_rcnn_dcnv2_se154_vd_fpn_gn_cas.yml
 architecture: CascadeRCNN
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 500000
 snapshot_iter: 10000
 use_gpu: true
@@ -85,7 +82,7 @@ CascadeBBoxAssigner:
  fg_fraction: 0.25
 CascadeBBoxHead:
-  head: CascadeXConvNormHead 
+  head: CascadeXConvNormHead
  nms:
    keep_top_k: 100
    nms_threshold: 0.5
@@ -115,16 +112,17 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+TrainReader:
-  batch_size: 1
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd']
  dataset:
+    !COCODataSet
    dataset_dir: dataset/objects365
-    annotation: annotations/train.json
+    anno_path: annotations/train.json
    image_dir: train
-  sample_transforms: 
+  sample_transforms:
  - !DecodeImage
    to_rgb: False
-    with_mixup: False
  - !RandomFlipImage
    is_mask_flip: true
    is_normalized: false
@@ -133,13 +131,13 @@ FasterRCNNTrainFeed:
    is_channel_first: false
    is_scale: False
    mean:
-    - 102.9801 
+    - 102.9801
    - 115.9465
    - 122.7717
    std:
-    - 1.0 
+    - 1.0
-    - 1.0 
+    - 1.0
-    - 1.0 
+    - 1.0
  - !ResizeImage
    interp: 1
    target_size:
@@ -152,7 +150,7 @@ FasterRCNNTrainFeed:
    - 608
    - 640
    - 672
-    - 704 
+    - 704
    - 736
    - 768
    - 800
@@ -183,30 +181,34 @@ FasterRCNNTrainFeed:
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
-  num_workers: 4
+  batch_size: 1
+  worker_num: 4
+  shuffle: true
  class_aware_sampling: true
+  use_process: false
-FasterRCNNEvalFeed:
+EvalReader:
-  batch_size: 1
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
  dataset:
+    !COCODataSet
    dataset_dir: dataset/objects365
-    annotation: annotations/val.json
+    anno_path: annotations/val.json
    image_dir: val
-  sample_transforms: 
+  sample_transforms:
  - !DecodeImage
    to_rgb: False
-    with_mixup: False
  - !NormalizeImage
    is_channel_first: false
    is_scale: False
    mean:
-    - 102.9801 
+    - 102.9801
    - 115.9465
    - 122.7717
    std:
-    - 1.0 
+    - 1.0
-    - 1.0 
+    - 1.0
-    - 1.0 
+    - 1.0
  - !ResizeImage
    target_size: 800
    max_size: 1333
@@ -217,31 +219,33 @@ FasterRCNNEvalFeed:
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
+  batch_size: 1
+  drop_empty: false
+  worker_num: 2
-FasterRCNNTestFeed:
+TestReader:
  batch_size: 1
  dataset:
-    annotation: dataset/obj365/annotations/val.json
+    !ImageFolder
-  sample_transforms: 
+    anno_path: dataset/obj365/annotations/val.json
+  sample_transforms:
  - !DecodeImage
    to_rgb: False
-    with_mixup: False
  - !NormalizeImage
    is_channel_first: false
    is_scale: False
    mean:
-    - 102.9801 
+    - 102.9801
    - 115.9465
    - 122.7717
    std:
-    - 1.0 
+    - 1.0
-    - 1.0 
+    - 1.0
-    - 1.0 
+    - 1.0
  - !Permute
    channel_first: true
    to_bgr: false
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
-  drop_last: false
+  worker_num: 2
-  num_workers: 2
--- a/configs/oidv5/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.yml
+++ b/configs/oidv5/cascade_rcnn_cls_aware_r200_vd_fpn_dcnv2_nonlocal_softnms.yml
 architecture: CascadeRCNNClsAware
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 1500000
 snapshot_iter: 10000
 use_gpu: true
@@ -109,16 +106,17 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+TrainReader:
-  batch_size: 1
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd']
  dataset:
+    !COCODataSet
    dataset_dir: dataset/oid
-    annotation: train.json
+    anno_path: train.json
    image_dir: train
-  sample_transforms: 
+  sample_transforms:
  - !DecodeImage
    to_rgb: True
-    with_mixup: False
  - !RandomFlipImage
    prob: 0.5
  - !NormalizeImage
@@ -131,7 +129,7 @@ FasterRCNNTrainFeed:
    std:
    - 0.229
    - 0.224
-    - 0.225 
+    - 0.225
  - !ResizeImage
    interp: 1
    target_size: [416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152, 1184, 1216, 1248, 1280, 1312, 1344, 1376, 1408]
@@ -142,16 +140,20 @@ FasterRCNNTrainFeed:
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
+  batch_size: 1
  drop_last: false
-  num_workers: 2
+  shuffle: true
+  worker_num: 2
-FasterRCNNEvalFeed:
+EvalReader:
-  batch_size: 1
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
  dataset:
+    !COCODataSet
    dataset_dir: dataset/oidv5
-    annotation: val.json
+    anno_path: val.json
    image_dir: val
-  sample_transforms: 
+  sample_transforms:
  - !DecodeImage
    to_rgb: True
    with_mixup: False
@@ -165,7 +167,7 @@ FasterRCNNEvalFeed:
    std:
    - 0.229
    - 0.224
-    - 0.225 
+    - 0.225
  - !ResizeImage
    interp: 1
    target_size:
@@ -177,13 +179,34 @@ FasterRCNNEvalFeed:
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
+  batch_size: 1
+  worker_num: 2
+  drop_empty: false
-FasterRCNNTestFeed:
+TestReader:
  batch_size: 1
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
  dataset:
-    annotation: dataset/oidv5/val.json
+    !ImageFolder
+    anno_path: annotations/instances_val2017.json
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
-  drop_last: false
+  worker_num: 2
-  num_workers: 2
--- a/configs/retinanet_r101_fpn_1x.yml
+++ b/configs/retinanet_r101_fpn_1x.yml
 architecture: RetinaNet
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 90000
 use_gpu: true
 pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar
@@ -73,33 +70,21 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_fpn_reader.yml'
+TrainReader:
  batch_size: 2
  batch_transforms:
  - !PadBatch
    pad_to_stride: 128
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  num_workers: 2
-FasterRCNNEvalFeed:
+EvalReader:
  batch_size: 2
  batch_transforms:
  - !PadBatch
    pad_to_stride: 128
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-  num_workers: 2
-FasterRCNNTestFeed:
+TestReader:
  batch_size: 1
  batch_transforms:
  - !PadBatch
    pad_to_stride: 128
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
-  num_workers: 2
--- a/configs/retinanet_r50_fpn_1x.yml
+++ b/configs/retinanet_r50_fpn_1x.yml
 architecture: RetinaNet
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 90000
 use_gpu: true
 pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
@@ -73,33 +70,21 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_fpn_reader.yml'
+TrainReader:
  batch_size: 2
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
  batch_transforms:
  - !PadBatch
    pad_to_stride: 128
-  num_workers: 2
-FasterRCNNEvalFeed:
+EvalReader:
  batch_size: 2
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
  batch_transforms:
  - !PadBatch
    pad_to_stride: 128
-  num_workers: 2
-FasterRCNNTestFeed:
+TestReader:
  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
  batch_transforms:
  - !PadBatch
    pad_to_stride: 128
-  num_workers: 2
--- a/configs/retinanet_x101_vd_64x4d_fpn_1x.yml
+++ b/configs/retinanet_x101_vd_64x4d_fpn_1x.yml
 architecture: RetinaNet
-train_feed: FasterRCNNTrainFeed
-eval_feed: FasterRCNNEvalFeed
-test_feed: FasterRCNNTestFeed
 max_iters: 180000
 use_gpu: true
 pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar
@@ -76,33 +73,18 @@ OptimizerBuilder:
    factor: 0.0001
    type: L2
-FasterRCNNTrainFeed:
+_READER_: 'faster_fpn_reader.yml'
-  batch_size: 1
+TrainReader:
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
  batch_transforms:
  - !PadBatch
    pad_to_stride: 128
-  num_workers: 2
-FasterRCNNEvalFeed:
+EvalReader:
-  batch_size: 1
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
  batch_transforms:
  - !PadBatch
    pad_to_stride: 128
-  num_workers: 2
-FasterRCNNTestFeed:
+TestReader:
-  batch_size: 1
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
  batch_transforms:
  - !PadBatch
    pad_to_stride: 128
-  num_workers: 2
--- a/configs/ssd/ssd_mobilenet_v1_voc.yml
+++ b/configs/ssd/ssd_mobilenet_v1_voc.yml
 architecture: SSD
-train_feed: SSDTrainFeed
-eval_feed: SSDEvalFeed
-test_feed: SSDTestFeed
 pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_coco_pretrained.tar
 use_gpu: true
 max_iters: 28000
@@ -56,25 +53,91 @@ OptimizerBuilder:
    factor: 0.00005
    type: L2
-SSDTrainFeed:
+TrainReader:
-  batch_size: 32
+  inputs_def:
-  use_process: true
+    image_shape: [3, 300, 300]
+    fields: ['image', 'gt_bbox', 'gt_class']
  dataset:
+    !VOCDataSet
+    anno_path: trainval.txt
    dataset_dir: dataset/voc
-    annotation: trainval.txt
    use_default_label: true
+  sample_transforms:
-SSDEvalFeed:
+  - !DecodeImage
-  batch_size: 64
+    to_rgb: true
+  - !RandomDistort
+    brightness_lower: 0.875
+    brightness_upper: 1.125
+    is_order: true
+  - !RandomExpand
+    fill_value: [127.5, 127.5, 127.5]
+  - !RandomCrop
+    allow_no_crop: false
+  - !NormalizeBox {}
+  - !ResizeImage
+    interp: 1
+    target_size: 300
+    use_cv2: false
+  - !RandomFlipImage
+    is_normalized: true
+  - !Permute {}
+  - !NormalizeImage
+    is_scale: false
+    mean: [127.5, 127.5, 127.5]
+    std: [127.502231, 127.502231, 127.502231]
+  batch_size: 32
+  shuffle: true
+  drop_last: true
+  worker_num: 8
+  bufsize: 16
  use_process: true
+EvalReader:
+  inputs_def:
+    image_shape: [3, 300, 300]
+    fields: ['image', 'gt_bbox', 'gt_class', 'im_shape', 'im_id', 'is_difficult']
  dataset:
+    !VOCDataSet
+    anno_path: test.txt
    dataset_dir: dataset/voc
-    annotation: test.txt
    use_default_label: true
-  drop_last: false
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+  - !NormalizeBox {}
+  - !ResizeImage
+    interp: 1
+    target_size: 300
+    use_cv2: false
+  - !Permute {}
+  - !NormalizeImage
+    is_scale: false
+    mean: [127.5, 127.5, 127.5]
+    std: [127.502231, 127.502231, 127.502231]
+  batch_size: 32
+  worker_num: 8
+  bufsize: 32
+  use_process: false
-SSDTestFeed:
+TestReader:
-  batch_size: 1
+  inputs_def:
+    image_shape: [3,300,300]
+    fields: ['image', 'im_id', 'im_shape']
  dataset:
+    !ImageFolder
+    anno_path: test.txt
    use_default_label: true
-  drop_last: false
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+  - !ResizeImage
+    interp: 1
+    max_size: 0
+    target_size: 300
+    use_cv2: false
+  - !Permute {}
+  - !NormalizeImage
+    is_scale: false
+    mean: [127.5, 127.5, 127.5]
+    std: [127.502231, 127.502231, 127.502231]
+  batch_size: 1
--- a/configs/ssd/ssd_vgg16_300.yml
+++ b/configs/ssd/ssd_vgg16_300.yml
 architecture: SSD
-train_feed: SSDTrainFeed
-eval_feed: SSDEvalFeed
-test_feed: SSDTestFeed
 use_gpu: true
 max_iters: 400000
 snapshot_iter: 10000
@@ -60,37 +57,27 @@ OptimizerBuilder:
    factor: 0.0005
    type: L2
-SSDTrainFeed:
+TrainReader:
-  batch_size: 8
+  inputs_def:
+    image_shape: [3, 300, 300]
+    fields: ['image', 'gt_bbox', 'gt_class']
  dataset:
+    !COCODataSet
+    image_dir: val2017
+    anno_path: annotations/instances_val2017.json
    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  image_shape: [3, 300, 300]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
-    with_mixup: false
-  - !NormalizeBox {}
  - !RandomDistort
    brightness_lower: 0.875
    brightness_upper: 1.125
    is_order: true
-  - !ExpandImage
+  - !RandomExpand
-    max_ratio: 4
+    fill_value: [104, 117, 123]
-    mean: [104, 117, 123]
+  - !RandomCrop
-    prob: 0.5
+    allow_no_crop: true
-  - !CropImage
+  - !NormalizeBox {}
-    avoid_no_bbox: true
-    batch_sampler:
-    - [1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]
-    satisfy_all: false
  - !ResizeImage
    interp: 1
    target_size: 300
@@ -103,19 +90,27 @@ SSDTrainFeed:
    is_scale: false
    mean: [104, 117, 123]
    std: [1, 1, 1]
+  batch_size: 8
+  shuffle: true
+  worker_num: 8
+  bufsize: 32
+  use_process: true
+  drop_empty: true
-SSDEvalFeed:
+EvalReader:
-  batch_size: 16
+  inputs_def:
+    image_shape: [3, 300, 300]
+    fields: ['image', 'gt_bbox', 'gt_class', 'im_shape', 'im_id', 'is_difficult']
  dataset:
-    dataset_dir: dataset/coco
+    !COCODataSet
-    annotation: annotations/instances_val2017.json
    image_dir: val2017
-  drop_last: false
+    anno_path: annotations/instances_val2017.json
-  image_shape: [3, 300, 300]
+    dataset_dir: dataset/coco
  sample_transforms:
  - !DecodeImage
    to_rgb: true
    with_mixup: false
+  - !NormalizeBox {}
  - !ResizeImage
    interp: 1
    target_size: 300
@@ -126,12 +121,17 @@ SSDEvalFeed:
    is_scale: false
    mean: [104, 117, 123]
    std: [1, 1, 1]
+  batch_size: 16
+  worker_num: 8
+  bufsize: 32
-SSDTestFeed:
+TestReader:
-  batch_size: 1
+  inputs_def:
+    image_shape: [3,300,300]
+    fields: ['image', 'im_id', 'im_shape']
  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
+    !ImageFolder
-  image_shape: [3, 300, 300]
+    anno_path: annotations/instances_val2017.json
  sample_transforms:
  - !DecodeImage
    to_rgb: true
@@ -147,3 +147,4 @@ SSDTestFeed:
    is_scale: false
    mean: [104, 117, 123]
    std: [1, 1, 1]
+  batch_size: 1
--- a/configs/ssd/ssd_vgg16_300_voc.yml
+++ b/configs/ssd/ssd_vgg16_300_voc.yml
 architecture: SSD
-train_feed: SSDTrainFeed
-eval_feed: SSDEvalFeed
-test_feed: SSDTestFeed
 use_gpu: true
 max_iters: 120001
 snapshot_iter: 10000
@@ -60,41 +57,31 @@ OptimizerBuilder:
    factor: 0.0005
    type: L2
-SSDTrainFeed:
+TrainReader:
-  batch_size: 8
+  inputs_def:
+    image_shape: [3, 300, 300]
+    fields: ['image', 'gt_bbox', 'gt_class']
  dataset:
+    !VOCDataSet
    dataset_dir: dataset/voc
-    annotation: trainval.txt
+    anno_path: trainval.txt
    use_default_label: true
-  image_shape: [3, 300, 300]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
-    with_mixup: false
-  - !NormalizeBox {}
  - !RandomDistort
    brightness_lower: 0.875
    brightness_upper: 1.125
    is_order: true
-  - !ExpandImage
+  - !RandomExpand
-    max_ratio: 4
+    fill_value: [104, 117, 123]
-    mean: [104, 117, 123]
+  - !RandomCrop
-    prob: 0.5
+    allow_no_crop: true
-  - !CropImage
+  - !NormalizeBox {}
-    avoid_no_bbox: true
-    batch_sampler:
-    - [1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]
-    satisfy_all: false
  - !ResizeImage
    interp: 1
    target_size: 300
-    use_cv2: False
+    use_cv2: false
  - !RandomFlipImage
    is_normalized: true
  - !Permute
@@ -103,15 +90,21 @@ SSDTrainFeed:
    is_scale: false
    mean: [104, 117, 123]
    std: [1, 1, 1]
+  batch_size: 8
+  shuffle: true
+  worker_num: 8
+  bufsize: 32
+  use_process: 8
-SSDEvalFeed:
+EvalReader:
-  batch_size: 32
+  inputs_def:
+    image_shape: [3, 300, 300]
+    fields: ['image', 'gt_bbox', 'gt_class', 'im_shape', 'im_id', 'is_difficult']
  dataset:
+    !VOCDataSet
+    anno_path: test.txt
    dataset_dir: dataset/voc
-    annotation: test.txt
    use_default_label: true
-  drop_last: false
-  image_shape: [3, 300, 300]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
@@ -125,15 +118,20 @@ SSDEvalFeed:
    to_bgr: false
  - !NormalizeImage
    is_scale: false
-    mean: [104, 117, 123]  
+    mean: [104, 117, 123]
    std: [1, 1, 1]
+  batch_size: 32
+  worker_num: 8
+  bufsize: 32
-SSDTestFeed:
+TestReader:
-  batch_size: 1
+  inputs_def:
+    image_shape: [3,300,300]
+    fields: ['image', 'im_id', 'im_shape']
  dataset:
+    !ImageFolder
+    anno_path: test.txt
    use_default_label: true
-  drop_last: false
-  image_shape: [3, 300, 300]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
@@ -149,3 +147,4 @@ SSDTestFeed:
    is_scale: false
    mean: [104, 117, 123]
    std: [1, 1, 1]
+  batch_size: 1
--- a/configs/ssd/ssd_vgg16_512.yml
+++ b/configs/ssd/ssd_vgg16_512.yml
 architecture: SSD
-train_feed: SSDTrainFeed
-eval_feed: SSDEvalFeed
-test_feed: SSDTestFeed
 use_gpu: true
 max_iters: 400000
 snapshot_iter: 10000
@@ -62,37 +59,28 @@ OptimizerBuilder:
    factor: 0.0005
    type: L2
-SSDTrainFeed:
+TrainReader:
-  batch_size: 8
+  inputs_def:
+    image_shape: [3, 512, 512]
+    fields: ['image', 'gt_bbox', 'gt_class']
  dataset:
+    !COCODataSet
+    image_dir: val2017
+    anno_path: annotations/instances_val2017.json
    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  image_shape: [3, 512, 512]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
    with_mixup: false
-  - !NormalizeBox {}
  - !RandomDistort
    brightness_lower: 0.875
    brightness_upper: 1.125
    is_order: true
-  - !ExpandImage
+  - !RandomExpand
-    max_ratio: 4
+    fill_value: [104, 117, 123]
-    mean: [104, 117, 123]
+  - !RandomCrop
-    prob: 0.5
+    allow_no_crop: true
-  - !CropImage
+  - !NormalizeBox {}
-    avoid_no_bbox: true
-    batch_sampler:
-    - [1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]
-    satisfy_all: false
  - !ResizeImage
    interp: 1
    target_size: 512
@@ -105,15 +93,21 @@ SSDTrainFeed:
    is_scale: false
    mean: [104, 117, 123]
    std: [1, 1, 1]
-SSDEvalFeed:
  batch_size: 8
+  shuffle: true
+  worker_num: 8
+  bufsize: 32
+  use_process: 8
+EvalReader:
+  inputs_def:
+    image_shape: [3,512,512]
+    fields: ['image', 'gt_bbox', 'gt_class', 'im_shape', 'im_id', 'is_difficult']
  dataset:
-    dataset_dir: dataset/coco
+    !COCODataSet
-    annotation: annotations/instances_val2017.json
    image_dir: val2017
-  drop_last: false
+    anno_path: annotations/instances_val2017.json
-  image_shape: [3, 512, 512]
+    dataset_dir: dataset/coco
  sample_transforms:
  - !DecodeImage
    to_rgb: true
@@ -128,12 +122,18 @@ SSDEvalFeed:
    is_scale: false
    mean: [104, 117, 123]
    std: [1, 1, 1]
+  batch_size: 8
+  worker_num: 8
+  bufsize: 32
+  drop_empty: false
-SSDTestFeed:
+TestReader:
-  batch_size: 1
+  inputs_def:
+    image_shape: [3,512,512]
+    fields: ['image', 'im_id', 'im_shape']
  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
+    !ImageFolder
-  image_shape: [3, 512, 512]
+    anno_path: annotations/instances_val2017.json
  sample_transforms:
  - !DecodeImage
    to_rgb: true
@@ -149,3 +149,4 @@ SSDTestFeed:
    is_scale: false
    mean: [104, 117, 123]
    std: [1, 1, 1]
+  batch_size: 1
--- a/configs/ssd/ssd_vgg16_512_voc.yml
+++ b/configs/ssd/ssd_vgg16_512_voc.yml
 architecture: SSD
-train_feed: SSDTrainFeed
-eval_feed: SSDEvalFeed
-test_feed: SSDTestFeed
 use_gpu: true
 max_iters: 120000
 snapshot_iter: 10000
@@ -64,37 +61,27 @@ OptimizerBuilder:
    factor: 0.0005
    type: L2
-SSDTrainFeed:
+TrainReader:
-  batch_size: 8
+  inputs_def:
+    image_shape: [3, 512, 512]
+    fields: ['image', 'gt_bbox', 'gt_class']
  dataset:
+    !VOCDataSet
    dataset_dir: dataset/voc
-    annotation: trainval.txt
+    anno_path: trainval.txt
    use_default_label: true
-  image_shape: [3, 512, 512]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
-    with_mixup: false
-  - !NormalizeBox {}
  - !RandomDistort
    brightness_lower: 0.875
    brightness_upper: 1.125
    is_order: true
-  - !ExpandImage
+  - !RandomExpand
-    max_ratio: 4
+    fill_value: [123, 117, 104]
-    mean: [123, 117, 104]
+  - !RandomCrop
-    prob: 0.5
+    allow_no_crop: true
-  - !CropImage
+  - !NormalizeBox {}
-    avoid_no_bbox: true
-    batch_sampler:
-    - [1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0]
-    - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]
-    satisfy_all: false
  - !ResizeImage
    interp: 1
    target_size: 512
@@ -107,15 +94,21 @@ SSDTrainFeed:
    is_scale: false
    mean: [123, 117, 104]
    std: [1, 1, 1]
+  batch_size: 8
+  shuffle: true
+  worker_num: 8
+  bufsize: 32
+  use_process: 8
-SSDEvalFeed:
+EvalReader:
-  batch_size: 32
+  inputs_def:
+    image_shape: [3, 512, 512]
+    fields: ['image', 'gt_bbox', 'gt_class', 'im_shape', 'im_id', 'is_difficult']
  dataset:
+    !VOCDataSet
+    anno_path: test.txt
    dataset_dir: dataset/voc
-    annotation: test.txt
    use_default_label: true
-  drop_last: false
-  image_shape: [3, 512, 512]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
@@ -131,13 +124,18 @@ SSDEvalFeed:
    is_scale: false
    mean: [123, 117, 104]
    std: [1, 1, 1]
+  batch_size: 32
+  worker_num: 8
+  bufsize: 32
-SSDTestFeed:
+TestReader:
-  batch_size: 1
+  inputs_def:
+    image_shape: [3,512,512]
+    fields: ['image', 'im_id', 'im_shape']
  dataset:
+    !ImageFolder
+    anno_path: test.txt
    use_default_label: true
-  drop_last: false
-  image_shape: [3, 512, 512]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
@@ -153,3 +151,4 @@ SSDTestFeed:
    is_scale: false
    mean: [123, 117, 104]
    std: [1, 1, 1]
+  batch_size: 1
--- a/configs/yolov3_darknet.yml
+++ b/configs/yolov3_darknet.yml
 architecture: YOLOv3
-train_feed: YoloTrainFeed
-eval_feed: YoloEvalFeed
-test_feed: YoloTestFeed
 use_gpu: true
 max_iters: 500200
 log_smooth_window: 20
@@ -57,26 +54,4 @@ OptimizerBuilder:
    factor: 0.0005
    type: L2
-YoloTrainFeed:
+_READER_: 'yolov3_reader.yml'
-  batch_size: 8
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  num_workers: 8
-  bufsize: 128
-  use_process: true
-YoloEvalFeed:
-  batch_size: 8
-  image_shape: [3, 608, 608]
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-YoloTestFeed:
-  batch_size: 1
-  image_shape: [3, 608, 608]
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
--- a/configs/yolov3_darknet_voc.yml
+++ b/configs/yolov3_darknet_voc.yml
 architecture: YOLOv3
-train_feed: YoloTrainFeed
-eval_feed: YoloEvalFeed
-test_feed: YoloTestFeed
 use_gpu: true
 max_iters: 70000
 log_smooth_window: 20
@@ -58,27 +55,31 @@ OptimizerBuilder:
    factor: 0.0005
    type: L2
-YoloTrainFeed:
+_READER_: 'yolov3_reader.yml'
-  batch_size: 8
+TrainReader:
+  inputs_def:
+    fields: ['image', 'gt_bbox', 'gt_class', 'gt_score']
+    num_max_boxes: 50
  dataset:
+    !VOCDataSet
    dataset_dir: dataset/voc
-    annotation: trainval.txt
+    anno_path: trainval.txt
    use_default_label: true
-  num_workers: 8
+    with_background: false
-  bufsize: 128
-  use_process: true
-  mixup_epoch: 250
-YoloEvalFeed:
+EvalReader:
-  batch_size: 8
+  inputs_def:
-  image_shape: [3, 608, 608]
+    fields: ['image', 'im_size', 'im_id', 'gt_bbox', 'gt_class', 'is_difficult']
+    num_max_boxes: 50
  dataset:
+    !VOCDataSet
    dataset_dir: dataset/voc
-    annotation: test.txt
+    anno_path: test.txt
    use_default_label: true
+    with_background: false
-YoloTestFeed:
+TestReader:
-  batch_size: 1
-  image_shape: [3, 608, 608]
  dataset:
+    !ImageFolder
    use_default_label: true
+    with_background: false
--- a/configs/yolov3_mobilenet_v1.yml
+++ b/configs/yolov3_mobilenet_v1.yml
 architecture: YOLOv3
-train_feed: YoloTrainFeed
-eval_feed: YoloEvalFeed
-test_feed: YoloTestFeed
 use_gpu: true
 max_iters: 500200
 log_smooth_window: 20
@@ -58,26 +55,4 @@ OptimizerBuilder:
    factor: 0.0005
    type: L2
-YoloTrainFeed:
+_READER_: 'yolov3_reader.yml'
-  batch_size: 8
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  num_workers: 8
-  bufsize: 128
-  use_process: true
-YoloEvalFeed:
-  batch_size: 8
-  image_shape: [3, 608, 608]
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-YoloTestFeed:
-  batch_size: 1
-  image_shape: [3, 608, 608]
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
--- a/configs/yolov3_mobilenet_v1_fruit.yml
+++ b/configs/yolov3_mobilenet_v1_fruit.yml
 architecture: YOLOv3
-train_feed: YoloTrainFeed
-eval_feed: YoloEvalFeed
-test_feed: YoloTestFeed
 use_gpu: true
 max_iters: 20000
 log_smooth_window: 20
 save_dir: output
-snapshot_iter: 1000
+snapshot_iter: 200
 metric: VOC
 map_type: 11point
 pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar
@@ -60,16 +57,19 @@ OptimizerBuilder:
    factor: 0.0005
    type: L2
-YoloTrainFeed:
+_READER_: 'yolov3_reader.yml'
-  batch_size: 1
+# will merge TrainReader into yolov3_reader.yml
+TrainReader:
+  inputs_def:
+    image_shape: [3, 608, 608]
+    fields: ['image', 'gt_bbox', 'gt_class', 'gt_score']
+    num_max_boxes: 50
  dataset:
+    !VOCDataSet
    dataset_dir: dataset/fruit
-    annotation: fruit-detection/train.txt
+    anno_path: train.txt
+    with_background: false
    use_default_label: false
-  num_workers: 16
-  bufsize: 128
-  use_process: true
-  mixup_epoch: -1
  sample_transforms:
  - !DecodeImage
    to_rgb: true
@@ -83,40 +83,45 @@ YoloTrainFeed:
    max_size: 0
    target_size: 608
  - !RandomFlipImage
-    is_mask_flip: false
    is_normalized: true
    prob: 0.5
  - !NormalizeImage
-    is_channel_first: false
+    mean: [0.485, 0.456, 0.406]
+    std: [0.229, 0.224, 0.225]
    is_scale: true
-    mean:
+    is_channel_first: false
-    - 0.485
+  - !PadBox
-    - 0.456
+    num_max_boxes: 50
-    - 0.406
+  - !BboxXYXY2XYWH {}
-    std:
+  batch_transforms:
-    - 0.229
+  - !RandomShape
-    - 0.224
+    sizes: [608]
-    - 0.225
  - !Permute
    channel_first: true
    to_bgr: false
-  batch_transforms:
+  batch_size: 1
-  - !RandomShape 
+  shuffle: true
-    sizes: [608] 
+  mixup_epoch: -1
-  with_background: false
+  worker_num: 4
+  bufsize: 64
+  use_process: true
-YoloEvalFeed:
+EvalReader:
  batch_size: 1
-  image_shape: [3, 608, 608]
+  inputs_def:
+    image_shape: [3, 608, 608]
+    fields: ['image', 'im_size', 'im_id', 'gt_bbox', 'gt_class', 'is_difficult']
+    num_max_boxes: 50
  dataset:
+    !VOCDataSet
    dataset_dir: dataset/fruit
-    annotation: fruit-detection/val.txt
+    anno_path: val.txt
    use_default_label: false
+    with_background: false
-YoloTestFeed:
+TestReader:
  batch_size: 1
-  image_shape: [3, 608, 608]
  dataset:
-    dataset_dir: dataset/fruit
+    !ImageFolder
    use_default_label: false
+    with_background: false
--- a/configs/yolov3_mobilenet_v1_voc.yml
+++ b/configs/yolov3_mobilenet_v1_voc.yml
 architecture: YOLOv3
-train_feed: YoloTrainFeed
-eval_feed: YoloEvalFeed
-test_feed: YoloTestFeed
 use_gpu: true
 max_iters: 70000
 log_smooth_window: 20
@@ -59,27 +56,29 @@ OptimizerBuilder:
    factor: 0.0005
    type: L2
-YoloTrainFeed:
+_READER_: 'yolov3_reader.yml'
-  batch_size: 8
+TrainReader:
  dataset:
+    !VOCDataSet
    dataset_dir: dataset/voc
-    annotation: trainval.txt
+    anno_path: trainval.txt
    use_default_label: true
-  num_workers: 8
+    with_background: false
-  bufsize: 128
-  use_process: true
-  mixup_epoch: 250
-YoloEvalFeed:
+EvalReader:
-  batch_size: 8
+  inputs_def:
-  image_shape: [3, 608, 608]
+    image_shape: [3, 608, 608]
+    fields: ['image', 'im_size', 'im_id', 'gt_bbox', 'gt_class', 'is_difficult']
+    num_max_boxes: 50
  dataset:
+    !VOCDataSet
    dataset_dir: dataset/voc
-    annotation: test.txt
+    anno_path: test.txt
    use_default_label: true
+    with_background: false
-YoloTestFeed:
+TestReader:
-  batch_size: 1
-  image_shape: [3, 608, 608]
  dataset:
+    !ImageFolder
    use_default_label: true
+    with_background: false
--- a/configs/yolov3_r34.yml
+++ b/configs/yolov3_r34.yml
 architecture: YOLOv3
-train_feed: YoloTrainFeed
-eval_feed: YoloEvalFeed
-test_feed: YoloTestFeed
 use_gpu: true
 max_iters: 500200
 log_smooth_window: 20
@@ -60,26 +57,4 @@ OptimizerBuilder:
    factor: 0.0005
    type: L2
-YoloTrainFeed:
+_READER_: 'yolov3_reader.yml'
-  batch_size: 8
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_train2017.json
-    image_dir: train2017
-  num_workers: 8
-  bufsize: 128
-  use_process: true
-YoloEvalFeed:
-  batch_size: 8
-  image_shape: [3, 608, 608]
-  dataset:
-    dataset_dir: dataset/coco
-    annotation: annotations/instances_val2017.json
-    image_dir: val2017
-YoloTestFeed:
-  batch_size: 1
-  image_shape: [3, 608, 608]
-  dataset:
-    annotation: dataset/coco/annotations/instances_val2017.json
--- a/configs/yolov3_r34_voc.yml
+++ b/configs/yolov3_r34_voc.yml
 architecture: YOLOv3
-train_feed: YoloTrainFeed
-eval_feed: YoloEvalFeed
-test_feed: YoloTestFeed
 use_gpu: true
 max_iters: 70000
 log_smooth_window: 20
@@ -61,27 +58,29 @@ OptimizerBuilder:
    factor: 0.0005
    type: L2
-YoloTrainFeed:
+_READER_: 'yolov3_reader.yml'
-  batch_size: 8
+TrainReader:
  dataset:
+    !VOCDataSet
    dataset_dir: dataset/voc
-    annotation: trainval.txt
+    anno_path: trainval.txt
    use_default_label: true
-  num_workers: 8
+    with_background: false
-  bufsize: 128
-  use_process: true
-  mixup_epoch: 250
-YoloEvalFeed:
+EvalReader:
-  batch_size: 8
+  inputs_def:
-  image_shape: [3, 608, 608]
+    image_shape: [3, 608, 608]
+    fields: ['image', 'im_size', 'im_id', 'gt_bbox', 'gt_class', 'is_difficult']
+    num_max_boxes: 50
  dataset:
+    !VOCDataSet
    dataset_dir: dataset/voc
-    annotation: test.txt
+    anno_path: test.txt
    use_default_label: true
+    with_background: false
-YoloTestFeed:
+TestReader:
-  batch_size: 1
-  image_shape: [3, 608, 608]
  dataset:
+    !ImageFolder
    use_default_label: true
+    with_background: false
--- a/configs/yolov3_reader.yml
+++ b/configs/yolov3_reader.yml
+TrainReader:
+  inputs_def:
+    fields: ['image', 'gt_bbox', 'gt_class', 'gt_score']
+    num_max_boxes: 50
+  dataset:
+    !COCODataSet
+      image_dir: train2017
+      anno_path: annotations/instances_train2017.json
+      dataset_dir: dataset/coco
+      with_background: false
+  sample_transforms:
+    - !DecodeImage
+      to_rgb: True
+      with_mixup: True
+    - !MixupImage
+      alpha: 1.5
+      beta: 1.5
+    - !ColorDistort {}
+    - !RandomExpand
+      fill_value: [123.675, 116.28, 103.53]
+    - !RandomCrop {}
+    - !RandomFlipImage
+      is_normalized: false
+    - !NormalizeBox {}
+    - !PadBox
+      num_max_boxes: 50
+    - !BboxXYXY2XYWH {}
+  batch_transforms:
+  - !RandomShape
+    sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
+    random_inter: True
+  - !NormalizeImage
+    mean: [0.485, 0.456, 0.406]
+    std: [0.229, 0.224, 0.225]
+    is_scale: True
+    is_channel_first: false
+  - !Permute
+    to_bgr: false
+    channel_first: True
+  batch_size: 8
+  shuffle: true
+  mixup_epoch: 250
+  drop_last: true
+  worker_num: 8
+  bufsize: 32
+  use_process: true
+EvalReader:
+  inputs_def:
+    fields: ['image', 'im_size', 'im_id']
+    num_max_boxes: 50
+  dataset:
+    !COCODataSet
+      image_dir: val2017
+      anno_path: annotations/instances_val2017.json
+      dataset_dir: dataset/coco
+      with_background: false
+  sample_transforms:
+    - !DecodeImage
+      to_rgb: True
+    - !ResizeImage
+      target_size: 608
+      interp: 2
+    - !NormalizeImage
+      mean: [0.485, 0.456, 0.406]
+      std: [0.229, 0.224, 0.225]
+      is_scale: True
+      is_channel_first: false
+    - !PadBox
+      num_max_boxes: 50
+    - !Permute
+      to_bgr: false
+      channel_first: True
+  batch_size: 8
+  drop_empty: false
+  worker_num: 8
+  bufsize: 32
+TestReader:
+  inputs_def:
+    image_shape: [3, 608, 608]
+    fields: ['image', 'im_size', 'im_id']
+  dataset:
+    !ImageFolder
+      anno_path: annotations/instances_val2017.json
+      with_background: false
+  sample_transforms:
+    - !DecodeImage
+      to_rgb: True
+    - !ResizeImage
+      target_size: 608
+      interp: 2
+    - !NormalizeImage
+      mean: [0.485, 0.456, 0.406]
+      std: [0.229, 0.224, 0.225]
+      is_scale: True
+      is_channel_first: false
+    - !Permute
+      to_bgr: false
+      channel_first: True
+  batch_size: 1
--- a/configs2/faster_rcnn_r50_1x.yml
+++ b/configs2/faster_rcnn_r50_1x.yml
+architecture: FasterRCNN
+use_gpu: true
+max_iters: 180000
+log_smooth_window: 20
+save_dir: output
+snapshot_iter: 10000
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
+metric: COCO
+weights: output/faster_rcnn_r50_1x/model_final
+num_classes: 81
+FasterRCNN:
+  backbone: ResNet
+  rpn_head: RPNHead
+  roi_extractor: RoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNet:
+  norm_type: affine_channel
+  depth: 50
+  feature_maps: 4
+  freeze_at: 2
+ResNetC5:
+  depth: 50
+  norm_type: affine_channel
+RPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+    use_random: true
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 12000
+    post_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 6000
+    post_nms_top_n: 1000
+RoIAlign:
+  resolution: 14
+  sampling_ratio: 0
+  spatial_scale: 0.0625
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+BBoxHead:
+  head: ResNetC5
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [120000, 160000]
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+_LOADER_: 'faster_reader.yml'
+TrainLoader:
+  inputs_def:
+    image_shape: [3,800,800]
+    fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd']
+  batch_size: 3
--- a/configs2/faster_reader.yml
+++ b/configs2/faster_reader.yml
+TrainReader:
+  inputs_def:
+    image_shape: [3,NULL,NULL]
+    fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd']
+  dataset:
+    !COCODataSet
+    image_dir: val2017
+    anno_path: annotations/instances_val2017.json
+    dataset_dir: dataset/coco
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+  - !RandomFlipImage
+    prob: 0.5
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    target_size: 800
+    max_size: 1333
+    interp: 1
+    use_cv2: true
+  - !Permute
+    to_bgr: false
+    channel_first: true
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+    use_padded_im_info: false
+  batch_size: 1
+  shuffle: true
+  worker_num: 2
+  drop_last: false
+  use_multi_process: false
+EvalReader:
+  inputs_def:
+    image_shape: [3,800,1333]
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
+    # for voc
+    #fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_difficult']
+  dataset:
+    !COCODataSet
+    image_dir: val2017
+    anno_path: annotations/instances_val2017.json
+    dataset_dir: dataset/coco
+    #sample_num: 100
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+    use_padded_im_info: true
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+#  worker_num: 2
+TestReader:
+  inputs_def:
+    image_shape: [3,800,1333]
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
+  dataset:
+    !ImageFolder
+    anno_path: annotations/instances_val2017.json
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+    use_padded_im_info: true
+  batch_size: 1
+  shuffle: false
+  drop_last: false
--- a/docs/GETTING_STARTED.md
+++ b/docs/GETTING_STARTED.md
@@ -34,7 +34,6 @@ list below can be viewed by `--help`
 |   -r/--resume_checkpoint |     train      |  Checkpoint path for resuming training  |  None  |  `-r output/faster_rcnn_r50_1x/10000`  |
 |        --eval            |     train      |  Whether to perform evaluation in training  |  False  |    |
 |      --output_eval       |     train/eval |  json path in evalution  |  current path  |  `--output_eval ./json_result`  |
-|   -d/--dataset_dir       |   train/eval   |  path for dataset, same as dataset_dir in configs  |  None  |  `-d dataset/coco`  |
 |       --fp16             |     train      |  Whether to enable mixed precision training  |  False  |  GPU training is required  |
 |       --loss_scale       |     train      |  Loss scaling factor for mixed precision training  |  8.0  |  enable when `--fp16` is True  |  
 |       --json_eval        |       eval     |  Whether to evaluate with already existed bbox.json or mask.json  |  False  |  json path is set in `--output_eval`  |

--- a/docs/GETTING_STARTED_cn.md
+++ b/docs/GETTING_STARTED_cn.md
@@ -31,7 +31,6 @@ python tools/infer.py -c configs/faster_rcnn_r50_1x.yml --infer_img=demo/0000005
 |   -r/--resume_checkpoint |     train      |  从某一检查点恢复训练  |  None  |  `-r output/faster_rcnn_r50_1x/10000`  |
 |        --eval            |     train      |  是否边训练边测试  |  False  |    |
 |      --output_eval       |     train/eval |  编辑评测保存json路径  |  当前路径  |  `--output_eval ./json_result`  |
-|   -d/--dataset_dir       |   train/eval   |  数据集路径, 同配置文件里的dataset_dir  |  None  |  `-d dataset/coco`  |
 |       --fp16             |     train      |  是否使用混合精度训练模式  |  False  |  需使用GPU训练  |
 |       --loss_scale       |     train      |  设置混合精度训练模式中损失值的缩放比例  |  8.0  |  需先开启`--fp16`后使用  |  
 |       --json_eval        |       eval     |  是否通过已存在的bbox.json或者mask.json进行评估  |  False  |  json文件路径在`--output_eval`中设置  |

--- a/ppdet/core/workspace.py
+++ b/ppdet/core/workspace.py
@@ -22,6 +22,7 @@ import sys
 import yaml
 import copy
+import collections
 from .config.schema import SchemaDict, SharedConfig, extract_schema
 from .config.yaml_helpers import serializable
@@ -65,6 +66,8 @@ class AttrDict(dict):
 global_config = AttrDict()
+READER_KEY = '_READER_'
 def load_config(file_path):
    """
@@ -77,25 +80,59 @@ def load_config(file_path):
    """
    _, ext = os.path.splitext(file_path)
    assert ext in ['.yml', '.yaml'], "only support yaml files for now"
+    cfg = AttrDict()
    with open(file_path) as f:
-        merge_config(yaml.load(f, Loader=yaml.Loader))
+        cfg = merge_config(yaml.load(f, Loader=yaml.Loader), cfg)
+    if READER_KEY in cfg:
+        reader_cfg = cfg[READER_KEY]
+        if reader_cfg.startswith("~"):
+            reader_cfg = os.path.expanduser(reader_cfg)
+        if not reader_cfg.startswith('/'):
+            reader_cfg = os.path.join(os.path.dirname(file_path), reader_cfg)
+        with open(reader_cfg) as f:
+            merge_config(yaml.load(f, Loader=yaml.Loader))
+        del cfg[READER_KEY]
+    merge_config(cfg)
    return global_config
-def merge_config(config):
+def dict_merge(dct, merge_dct):
+    """ Recursive dict merge. Inspired by :meth:``dict.update()``, instead of
+    updating only top-level keys, dict_merge recurses down into dicts nested
+    to an arbitrary depth, updating keys. The ``merge_dct`` is merged into
+    ``dct``.
+    Args:
+        dct: dict onto which the merge is executed
+        merge_dct: dct merged into dct
+    Returns: dct
+    """
+    for k, v in merge_dct.items():
+        if (k in dct and isinstance(dct[k], dict) and
+                isinstance(merge_dct[k], collections.Mapping)):
+            dict_merge(dct[k], merge_dct[k])
+        else:
+            dct[k] = merge_dct[k]
+    return dct
+def merge_config(config, another_cfg=None):
    """
-    Merge config into global config.
+    Merge config into global config or another_cfg.
    Args:
        config (dict): Config to be merged.
    Returns: global config
    """
-    for key, value in config.items():
+    global global_config
-        if isinstance(value, dict) and key in global_config:
+    dct = another_cfg if another_cfg is not None else global_config
-            global_config[key].update(value)
+    return dict_merge(dct, config)
-        else:
-            global_config[key] = value
 def get_registered_modules():

--- a/ppdet/data/README.md
+++ b/ppdet/data/README.md
-docs/DATA.md
\ No newline at end of file
--- a/ppdet/data/README_cn.md
+++ b/ppdet/data/README_cn.md
-docs/DATA_cn.md
\ No newline at end of file
--- a/ppdet/data/__init__.py
+++ b/ppdet/data/__init__.py
@@ -12,35 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# function:
-#    module to prepare data for detection model training
-#
-# implementation notes:
-# - Dateset
-#    basic interface to accessing data samples in stream mode
-#
-# - xxxSource (RoiDbSource)
-#    * subclass of 'Dataset'
-#    * load data from local files and other source data
-#
-# - xxxOperator (DecodeImage)
-#    * subclass of 'BaseOperator'
-#    * each op can transform a sample, eg: decode/resize/crop image
-#    * each op must obey basic rules defined in transform.operator.base
-#
-# - transformer
-#    * subclass of 'Dataset'
-#    * 'MappedDataset' accept a 'xxxSource' and a list of 'xxxOperator'
-#       to build a transformed 'Dataset'
 from __future__ import absolute_import
-from .dataset import Dataset
+from .reader import *
-from .reader import Reader
+from .source import *
-import traceback
+from .transform import *
-if traceback.extract_stack()[0][
-        0] == 'ppdet/data/tools/generate_data_for_training.py':
-    __all__ = ['Dataset', 'Reader']
-else:
-    from .data_feed import create_reader
-    __all__ = ['Dataset', 'Reader', 'create_reader']
--- a/ppdet/data/data_feed.py
+++ b/ppdet/data/data_feed.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import absolute_import
-from __future__ import print_function
-from __future__ import division
-import os
-import inspect
-from ppdet.core.workspace import register, serializable
-from ppdet.utils.download import get_dataset_path
-from ppdet.data.reader import Reader
-# XXX these are for triggering the decorator
-from ppdet.data.transform.operators import (
-    DecodeImage, MixupImage, NormalizeBox, NormalizeImage, RandomDistort,
-    RandomFlipImage, RandomInterpImage, ResizeImage, ExpandImage, CropImage,
-    Permute, MultiscaleTestResize, Resize, ColorDistort, NormalizePermute,
-    RandomExpand, RandomCrop)
-from ppdet.data.transform.arrange_sample import (
-    ArrangeRCNN, ArrangeEvalRCNN, ArrangeTestRCNN, ArrangeSSD, ArrangeEvalSSD,
-    ArrangeTestSSD, ArrangeYOLO, ArrangeEvalYOLO, ArrangeTestYOLO)
-__all__ = [
-    'PadBatch', 'MultiScale', 'RandomShape', 'PadMSTest', 'DataSet',
-    'CocoDataSet', 'DataFeed', 'TrainFeed', 'EvalFeed', 'FasterRCNNTrainFeed',
-    'MaskRCNNTrainFeed', 'FasterRCNNEvalFeed', 'MaskRCNNEvalFeed',
-    'FasterRCNNTestFeed', 'MaskRCNNTestFeed', 'SSDTrainFeed', 'SSDEvalFeed',
-    'SSDTestFeed', 'YoloTrainFeed', 'YoloEvalFeed', 'YoloTestFeed',
-    'create_reader'
-]
-def _prepare_data_config(feed, args_path):
-    # if `DATASET_DIR` does not exists, search ~/.paddle/dataset for a directory
-    # named `DATASET_DIR` (e.g., coco, pascal), if not present either, download
-    dataset_home = args_path if args_path else feed.dataset.dataset_dir
-    if dataset_home:
-        annotation = getattr(feed.dataset, 'annotation', None)
-        image_dir = getattr(feed.dataset, 'image_dir', None)
-        dataset_dir = get_dataset_path(dataset_home, annotation, image_dir)
-        if annotation:
-            feed.dataset.annotation = os.path.join(dataset_dir, annotation)
-        if image_dir:
-            feed.dataset.image_dir = os.path.join(dataset_dir, image_dir)
-    mixup_epoch = -1
-    if getattr(feed, 'mixup_epoch', None) is not None:
-        mixup_epoch = feed.mixup_epoch
-    data_config = {
-        'ANNO_FILE': feed.dataset.annotation,
-        'IMAGE_DIR': feed.dataset.image_dir,
-        'USE_DEFAULT_LABEL': feed.dataset.use_default_label,
-        'IS_SHUFFLE': feed.shuffle,
-        'SAMPLES': feed.samples,
-        'WITH_BACKGROUND': feed.with_background,
-        'MIXUP_EPOCH': mixup_epoch,
-        'TYPE': type(feed.dataset).__source__
-    }
-    if feed.mode == 'TRAIN':
-        data_config['CLASS_AWARE_SAMPLING'] = getattr(
-            feed, 'class_aware_sampling', False)
-    if len(getattr(feed.dataset, 'images', [])) > 0:
-        data_config['IMAGES'] = feed.dataset.images
-    return data_config
-def create_reader(feed, max_iter=0, args_path=None, my_source=None):
-    """
-    Return iterable data reader.
-    Args:
-        max_iter (int): number of iterations.
-        my_source (callable): callable function to create a source iterator
-            which is used to provide source data in 'ppdet.data.reader'
-    """
-    # if `DATASET_DIR` does not exists, search ~/.paddle/dataset for a directory
-    # named `DATASET_DIR` (e.g., coco, pascal), if not present either, download
-    data_config = _prepare_data_config(feed, args_path)
-    bufsize = getattr(feed, 'bufsize', 10)
-    use_process = getattr(feed, 'use_process', False)
-    memsize = getattr(feed, 'memsize', '3G')
-    transform_config = {
-        'WORKER_CONF': {
-            'bufsize': bufsize,
-            'worker_num': feed.num_workers,
-            'use_process': use_process,
-            'memsize': memsize
-        },
-        'BATCH_SIZE': feed.batch_size,
-        'DROP_LAST': feed.drop_last,
-        'USE_PADDED_IM_INFO': feed.use_padded_im_info,
-    }
-    batch_transforms = feed.batch_transforms
-    pad = [t for t in batch_transforms if isinstance(t, PadBatch)]
-    rand_shape = [t for t in batch_transforms if isinstance(t, RandomShape)]
-    multi_scale = [t for t in batch_transforms if isinstance(t, MultiScale)]
-    pad_ms_test = [t for t in batch_transforms if isinstance(t, PadMSTest)]
-    if any(pad):
-        transform_config['IS_PADDING'] = True
-        if pad[0].pad_to_stride != 0:
-            transform_config['COARSEST_STRIDE'] = pad[0].pad_to_stride
-    if any(rand_shape):
-        transform_config['RANDOM_SHAPES'] = rand_shape[0].sizes
-    if any(multi_scale):
-        transform_config['MULTI_SCALES'] = multi_scale[0].scales
-    if any(pad_ms_test):
-        transform_config['ENABLE_MULTISCALE_TEST'] = True
-        transform_config['NUM_SCALE'] = feed.num_scale
-        transform_config['COARSEST_STRIDE'] = pad_ms_test[0].pad_to_stride
-    if hasattr(inspect, 'getfullargspec'):
-        argspec = inspect.getfullargspec
-    else:
-        argspec = inspect.getargspec
-    ops = []
-    for op in feed.sample_transforms:
-        op_dict = op.__dict__.copy()
-        argnames = [
-            arg for arg in argspec(type(op).__init__).args if arg != 'self'
-        ]
-        op_dict = {k: v for k, v in op_dict.items() if k in argnames}
-        op_dict['op'] = op.__class__.__name__
-        ops.append(op_dict)
-    transform_config['OPS'] = ops
-    return Reader.create(feed.mode, data_config, transform_config, max_iter,
-                         my_source)
-# XXX batch transforms are only stubs for now, actually handled by `post_map`
-@serializable
-class PadBatch(object):
-    """
-    Pad a batch of samples to same dimensions
-    Args:
-        pad_to_stride (int): pad to multiple of strides, e.g., 32
-    """
-    def __init__(self, pad_to_stride=0):
-        super(PadBatch, self).__init__()
-        self.pad_to_stride = pad_to_stride
-@serializable
-class MultiScale(object):
-    """
-    Randomly resize image by scale
-    Args:
-        scales (list): list of int, randomly resize to one of these scales
-    """
-    def __init__(self, scales=[]):
-        super(MultiScale, self).__init__()
-        self.scales = scales
-@serializable
-class RandomShape(object):
-    """
-    Randomly reshape a batch
-    Args:
-        sizes (list): list of int, random choose a size from these
-    """
-    def __init__(self, sizes=[]):
-        super(RandomShape, self).__init__()
-        self.sizes = sizes
-@serializable
-class PadMSTest(object):
-    """
-    Padding for multi-scale test
-    Args:
-        pad_to_stride (int): pad to multiple of strides, e.g., 32
-    """
-    def __init__(self, pad_to_stride=0):
-        super(PadMSTest, self).__init__()
-        self.pad_to_stride = pad_to_stride
-@serializable
-class DataSet(object):
-    """
-    Dataset, e.g., coco, pascal voc
-    Args:
-        annotation (str): annotation file path
-        image_dir (str): directory where image files are stored
-        shuffle (bool): shuffle samples
-    """
-    __source__ = 'RoiDbSource'
-    def __init__(self,
-                 annotation,
-                 image_dir=None,
-                 dataset_dir=None,
-                 use_default_label=None):
-        super(DataSet, self).__init__()
-        self.dataset_dir = dataset_dir
-        self.annotation = annotation
-        self.image_dir = image_dir
-        self.use_default_label = use_default_label
-COCO_DATASET_DIR = 'dataset/coco'
-COCO_TRAIN_ANNOTATION = 'annotations/instances_train2017.json'
-COCO_TRAIN_IMAGE_DIR = 'train2017'
-COCO_VAL_ANNOTATION = 'annotations/instances_val2017.json'
-COCO_VAL_IMAGE_DIR = 'val2017'
-@serializable
-class CocoDataSet(DataSet):
-    def __init__(self,
-                 dataset_dir=COCO_DATASET_DIR,
-                 annotation=COCO_TRAIN_ANNOTATION,
-                 image_dir=COCO_TRAIN_IMAGE_DIR):
-        super(CocoDataSet, self).__init__(
-            dataset_dir=dataset_dir, annotation=annotation, image_dir=image_dir)
-VOC_DATASET_DIR = 'dataset/voc'
-VOC_TRAIN_ANNOTATION = 'train.txt'
-VOC_VAL_ANNOTATION = 'val.txt'
-VOC_IMAGE_DIR = None
-VOC_USE_DEFAULT_LABEL = True
-@serializable
-class VocDataSet(DataSet):
-    __source__ = 'VOCSource'
-    def __init__(self,
-                 dataset_dir=VOC_DATASET_DIR,
-                 annotation=VOC_TRAIN_ANNOTATION,
-                 image_dir=VOC_IMAGE_DIR,
-                 use_default_label=VOC_USE_DEFAULT_LABEL):
-        super(VocDataSet, self).__init__(
-            dataset_dir=dataset_dir,
-            annotation=annotation,
-            image_dir=image_dir,
-            use_default_label=use_default_label)
-@serializable
-class SimpleDataSet(DataSet):
-    __source__ = 'SimpleSource'
-    def __init__(self,
-                 dataset_dir=None,
-                 annotation=None,
-                 image_dir=None,
-                 use_default_label=None):
-        super(SimpleDataSet, self).__init__(
-            dataset_dir=dataset_dir, annotation=annotation, image_dir=image_dir)
-        self.images = []
-    def add_images(self, images):
-        self.images.extend(images)
-@serializable
-class DataFeed(object):
-    """
-    DataFeed encompasses all data loading related settings
-    Args:
-        dataset (object): a `Dataset` instance
-        fields (list): list of data fields needed
-        image_shape (list): list of image dims (C, MAX_DIM, MIN_DIM)
-        sample_transforms (list): list of sample transformations to use
-        batch_transforms (list): list of batch transformations to use
-        batch_size (int): number of images per device
-        shuffle (bool): if samples should be shuffled
-        drop_last (bool): drop last batch if size is uneven
-        num_workers (int): number of workers processes (or threads)
-        bufsize (int): size of queue used to buffer results from workers
-        use_process (bool): use process or thread as workers
-        memsize (str): size of shared memory used in result queue
-                        when 'use_process' is True, default to '3G'
-    """
-    __category__ = 'data'
-    def __init__(self,
-                 dataset,
-                 fields,
-                 image_shape,
-                 sample_transforms=None,
-                 batch_transforms=None,
-                 batch_size=1,
-                 shuffle=False,
-                 samples=-1,
-                 drop_last=False,
-                 with_background=True,
-                 num_workers=2,
-                 bufsize=10,
-                 use_process=False,
-                 memsize=None,
-                 use_padded_im_info=False,
-                 class_aware_sampling=False):
-        super(DataFeed, self).__init__()
-        self.fields = fields
-        self.image_shape = image_shape
-        self.sample_transforms = sample_transforms
-        self.batch_transforms = batch_transforms
-        self.batch_size = batch_size
-        self.shuffle = shuffle
-        self.samples = samples
-        self.drop_last = drop_last
-        self.with_background = with_background
-        self.num_workers = num_workers
-        self.bufsize = bufsize
-        self.use_process = use_process
-        self.memsize = memsize
-        self.dataset = dataset
-        self.use_padded_im_info = use_padded_im_info
-        self.class_aware_sampling = class_aware_sampling
-        if isinstance(dataset, dict):
-            self.dataset = DataSet(**dataset)
-# for custom (i.e., Non-preset) datasets
-@register
-class TrainFeed(DataFeed):
-    __doc__ = DataFeed.__doc__
-    def __init__(self,
-                 dataset,
-                 fields,
-                 image_shape,
-                 sample_transforms=[],
-                 batch_transforms=[],
-                 batch_size=1,
-                 shuffle=True,
-                 samples=-1,
-                 drop_last=False,
-                 with_background=True,
-                 num_workers=2,
-                 bufsize=10,
-                 use_process=True,
-                 memsize=None):
-        super(TrainFeed, self).__init__(
-            dataset,
-            fields,
-            image_shape,
-            sample_transforms,
-            batch_transforms,
-            batch_size=batch_size,
-            shuffle=shuffle,
-            samples=samples,
-            drop_last=drop_last,
-            with_background=with_background,
-            num_workers=num_workers,
-            bufsize=bufsize,
-            use_process=use_process,
-            memsize=memsize)
-@register
-class EvalFeed(DataFeed):
-    __doc__ = DataFeed.__doc__
-    def __init__(self,
-                 dataset,
-                 fields,
-                 image_shape,
-                 sample_transforms=[],
-                 batch_transforms=[],
-                 batch_size=1,
-                 shuffle=False,
-                 samples=-1,
-                 drop_last=False,
-                 with_background=True,
-                 num_workers=2):
-        super(EvalFeed, self).__init__(
-            dataset,
-            fields,
-            image_shape,
-            sample_transforms,
-            batch_transforms,
-            batch_size=batch_size,
-            shuffle=shuffle,
-            samples=samples,
-            drop_last=drop_last,
-            with_background=with_background,
-            num_workers=num_workers)
-@register
-class TestFeed(DataFeed):
-    __doc__ = DataFeed.__doc__
-    def __init__(self,
-                 dataset,
-                 fields,
-                 image_shape,
-                 sample_transforms=[],
-                 batch_transforms=[],
-                 batch_size=1,
-                 shuffle=False,
-                 drop_last=False,
-                 with_background=True,
-                 num_workers=2):
-        super(TestFeed, self).__init__(
-            dataset,
-            fields,
-            image_shape,
-            sample_transforms,
-            batch_transforms,
-            batch_size=batch_size,
-            shuffle=shuffle,
-            drop_last=drop_last,
-            with_background=with_background,
-            num_workers=num_workers)
-# yapf: disable
-@register
-class FasterRCNNTrainFeed(DataFeed):
-    __doc__ = DataFeed.__doc__
-    def __init__(self,
-                 dataset=CocoDataSet().__dict__,
-                 fields=[
-                     'image', 'im_info', 'im_id', 'gt_box', 'gt_label',
-                     'is_crowd'
-                 ],
-                 image_shape=[None, 3, None, None],
-                 sample_transforms=[
-                     DecodeImage(to_rgb=True),
-                     RandomFlipImage(prob=0.5),
-                     NormalizeImage(mean=[0.485, 0.456, 0.406],
-                                    std=[0.229, 0.224, 0.225],
-                                    is_scale=True,
-                                    is_channel_first=False),
-                     ResizeImage(target_size=800, max_size=1333, interp=1),
-                     Permute(to_bgr=False)
-                 ],
-                 batch_transforms=[PadBatch()],
-                 batch_size=1,
-                 shuffle=True,
-                 samples=-1,
-                 drop_last=False,
-                 bufsize=10,
-                 num_workers=2,
-                 use_process=False,
-                 memsize=None,
-                 class_aware_sampling=False):
-        # XXX this should be handled by the data loader, since `fields` is
-        # given, just collect them
-        sample_transforms.append(ArrangeRCNN())
-        super(FasterRCNNTrainFeed, self).__init__(
-            dataset,
-            fields,
-            image_shape,
-            sample_transforms,
-            batch_transforms,
-            batch_size=batch_size,
-            shuffle=shuffle,
-            samples=samples,
-            drop_last=drop_last,
-            bufsize=bufsize,
-            num_workers=num_workers,
-            use_process=use_process,
-            memsize=memsize,
-            class_aware_sampling=class_aware_sampling)
-        # XXX these modes should be unified
-        self.mode = 'TRAIN'
-@register
-class FasterRCNNEvalFeed(DataFeed):
-    __doc__ = DataFeed.__doc__
-    def __init__(self,
-                 dataset=CocoDataSet(COCO_VAL_ANNOTATION,
-                                     COCO_VAL_IMAGE_DIR).__dict__,
-                 fields=['image', 'im_info', 'im_id', 'im_shape', 'gt_box',
-                         'gt_label', 'is_difficult'],
-                 image_shape=[None, 3, None, None],
-                 sample_transforms=[
-                     DecodeImage(to_rgb=True),
-                     NormalizeImage(mean=[0.485, 0.456, 0.406],
-                                    std=[0.229, 0.224, 0.225],
-                                    is_scale=True,
-                                    is_channel_first=False),
-                     ResizeImage(target_size=800, max_size=1333, interp=1),
-                     Permute(to_bgr=False)
-                 ],
-                 batch_transforms=[PadBatch()],
-                 batch_size=1,
-                 shuffle=False,
-                 samples=-1,
-                 drop_last=False,
-                 num_workers=2,
-                 use_padded_im_info=True,
-                 enable_multiscale=False,
-                 num_scale=1,
-                 enable_aug_flip=False):
-        sample_transforms.append(ArrangeEvalRCNN())
-        super(FasterRCNNEvalFeed, self).__init__(
-            dataset,
-            fields,
-            image_shape,
-            sample_transforms,
-            batch_transforms,
-            batch_size=batch_size,
-            shuffle=shuffle,
-            samples=samples,
-            drop_last=drop_last,
-            num_workers=num_workers,
-            use_padded_im_info=use_padded_im_info)
-        self.mode = 'VAL'
-        self.enable_multiscale = enable_multiscale
-        self.num_scale = num_scale
-        self.enable_aug_flip = enable_aug_flip
-@register
-class FasterRCNNTestFeed(DataFeed):
-    __doc__ = DataFeed.__doc__
-    def __init__(self,
-                 dataset=SimpleDataSet(COCO_VAL_ANNOTATION,
-                                       COCO_VAL_IMAGE_DIR).__dict__,
-                 fields=['image', 'im_info', 'im_id', 'im_shape'],
-                 image_shape=[None, 3, None, None],
-                 sample_transforms=[
-                     DecodeImage(to_rgb=True),
-                     NormalizeImage(mean=[0.485, 0.456, 0.406],
-                                    std=[0.229, 0.224, 0.225],
-                                    is_scale=True,
-                                    is_channel_first=False),
-                     ResizeImage(target_size=800, max_size=1333, interp=1),
-                     Permute(to_bgr=False)
-                 ],
-                 batch_transforms=[PadBatch()],
-                 batch_size=1,
-                 shuffle=False,
-                 samples=-1,
-                 drop_last=False,
-                 num_workers=2,
-                 use_padded_im_info=True):
-        sample_transforms.append(ArrangeTestRCNN())
-        if isinstance(dataset, dict):
-            dataset = SimpleDataSet(**dataset)
-        super(FasterRCNNTestFeed, self).__init__(
-            dataset,
-            fields,
-            image_shape,
-            sample_transforms,
-            batch_transforms,
-            batch_size=batch_size,
-            shuffle=shuffle,
-            samples=samples,
-            drop_last=drop_last,
-            num_workers=num_workers,
-            use_padded_im_info=use_padded_im_info)
-        self.mode = 'TEST'
-# XXX currently use two presets, in the future, these should be combined into a
-# single `RCNNTrainFeed`. Mask (and keypoint) should be processed
-# automatically if `gt_mask` (or `gt_keypoints`) is in the required fields
-@register
-class MaskRCNNTrainFeed(DataFeed):
-    __doc__ = DataFeed.__doc__
-    def __init__(self,
-                 dataset=CocoDataSet().__dict__,
-                 fields=[
-                     'image', 'im_info', 'im_id', 'gt_box', 'gt_label',
-                     'is_crowd', 'gt_mask'
-                 ],
-                 image_shape=[None, 3, None, None],
-                 sample_transforms=[
-                     DecodeImage(to_rgb=True),
-                     RandomFlipImage(prob=0.5, is_mask_flip=True),
-                     NormalizeImage(mean=[0.485, 0.456, 0.406],
-                                    std=[0.229, 0.224, 0.225],
-                                    is_scale=True,
-                                    is_channel_first=False),
-                     ResizeImage(target_size=800,
-                                 max_size=1333,
-                                 interp=1,
-                                 use_cv2=True),
-                     Permute(to_bgr=False, channel_first=True)
-                 ],
-                 batch_transforms=[PadBatch()],
-                 batch_size=1,
-                 shuffle=True,
-                 samples=-1,
-                 drop_last=False,
-                 num_workers=2,
-                 use_process=False,
-                 use_padded_im_info=False):
-        sample_transforms.append(ArrangeRCNN(is_mask=True))
-        super(MaskRCNNTrainFeed, self).__init__(
-            dataset,
-            fields,
-            image_shape,
-            sample_transforms,
-            batch_transforms,
-            batch_size=batch_size,
-            shuffle=shuffle,
-            samples=samples,
-            drop_last=drop_last,
-            num_workers=num_workers,
-            use_process=use_process)
-        self.mode = 'TRAIN'
-@register
-class MaskRCNNEvalFeed(DataFeed):
-    __doc__ = DataFeed.__doc__
-    def __init__(self,
-                 dataset=CocoDataSet(COCO_VAL_ANNOTATION,
-                                     COCO_VAL_IMAGE_DIR).__dict__,
-                 fields=['image', 'im_info', 'im_id', 'im_shape'],
-                 image_shape=[None, 3, None, None],
-                 sample_transforms=[
-                     DecodeImage(to_rgb=True),
-                     NormalizeImage(mean=[0.485, 0.456, 0.406],
-                                    std=[0.229, 0.224, 0.225],
-                                    is_scale=True,
-                                    is_channel_first=False),
-                     ResizeImage(target_size=800,
-                                 max_size=1333,
-                                 interp=1,
-                                 use_cv2=True),
-                     Permute(to_bgr=False, channel_first=True)
-                 ],
-                 batch_transforms=[PadBatch()],
-                 batch_size=1,
-                 shuffle=False,
-                 samples=-1,
-                 drop_last=False,
-                 num_workers=2,
-                 use_process=False,
-                 use_padded_im_info=True,
-                 enable_multiscale=False,
-                 num_scale=1,
-                 enable_aug_flip=False):
-        sample_transforms.append(ArrangeTestRCNN())
-        super(MaskRCNNEvalFeed, self).__init__(
-            dataset,
-            fields,
-            image_shape,
-            sample_transforms,
-            batch_transforms,
-            batch_size=batch_size,
-            shuffle=shuffle,
-            samples=samples,
-            drop_last=drop_last,
-            num_workers=num_workers,
-            use_process=use_process,
-            use_padded_im_info=use_padded_im_info)
-        self.mode = 'VAL'
-        self.enable_multiscale = enable_multiscale
-        self.num_scale = num_scale
-        self.enable_aug_flip = enable_aug_flip
-@register
-class MaskRCNNTestFeed(DataFeed):
-    __doc__ = DataFeed.__doc__
-    def __init__(self,
-                 dataset=SimpleDataSet(COCO_VAL_ANNOTATION,
-                                       COCO_VAL_IMAGE_DIR).__dict__,
-                 fields=['image', 'im_info', 'im_id', 'im_shape'],
-                 image_shape=[None, 3, None, None],
-                 sample_transforms=[
-                     DecodeImage(to_rgb=True),
-                     NormalizeImage(
-                         mean=[0.485, 0.456, 0.406],
-                         std=[0.229, 0.224, 0.225],
-                         is_scale=True,
-                         is_channel_first=False),
-                     ResizeImage(target_size=800, max_size=1333, interp=1),
-                     Permute(to_bgr=False, channel_first=True)
-                 ],
-                 batch_transforms=[PadBatch()],
-                 batch_size=1,
-                 shuffle=False,
-                 samples=-1,
-                 drop_last=False,
-                 num_workers=2,
-                 use_process=False,
-                 use_padded_im_info=True):
-        sample_transforms.append(ArrangeTestRCNN())
-        if isinstance(dataset, dict):
-            dataset = SimpleDataSet(**dataset)
-        super(MaskRCNNTestFeed, self).__init__(
-            dataset,
-            fields,
-            image_shape,
-            sample_transforms,
-            batch_transforms,
-            batch_size=batch_size,
-            shuffle=shuffle,
-            samples=samples,
-            drop_last=drop_last,
-            num_workers=num_workers,
-            use_process=use_process,
-            use_padded_im_info=use_padded_im_info)
-        self.mode = 'TEST'
-@register
-class SSDTrainFeed(DataFeed):
-    __doc__ = DataFeed.__doc__
-    def __init__(self,
-                 dataset=VocDataSet().__dict__,
-                 fields=['image', 'gt_box', 'gt_label'],
-                 image_shape=[3, 300, 300],
-                 sample_transforms=[
-                     DecodeImage(to_rgb=True, with_mixup=False),
-                     NormalizeBox(),
-                     RandomDistort(brightness_lower=0.875,
-                                   brightness_upper=1.125,
-                                   is_order=True),
-                     ExpandImage(max_ratio=4, prob=0.5),
-                     CropImage(batch_sampler=[[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0],
-                                [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0],
-                                [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0],
-                                [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0],
-                                [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0],
-                                [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0],
-                                [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]],
-                               satisfy_all=False, avoid_no_bbox=False),
-                     ResizeImage(target_size=300, use_cv2=False, interp=1),
-                     RandomFlipImage(is_normalized=True),
-                     Permute(),
-                     NormalizeImage(mean=[127.5, 127.5, 127.5],
-                                    std=[127.502231, 127.502231, 127.502231],
-                                    is_scale=False)
-                 ],
-                 batch_transforms=[],
-                 batch_size=32,
-                 shuffle=True,
-                 samples=-1,
-                 drop_last=True,
-                 num_workers=8,
-                 bufsize=10,
-                 use_process=True,
-                 memsize=None):
-        sample_transforms.append(ArrangeSSD())
-        super(SSDTrainFeed, self).__init__(
-            dataset,
-            fields,
-            image_shape,
-            sample_transforms,
-            batch_transforms,
-            batch_size=batch_size,
-            shuffle=shuffle,
-            samples=samples,
-            drop_last=drop_last,
-            num_workers=num_workers,
-            bufsize=bufsize,
-            use_process=use_process,
-            memsize=None)
-        self.mode = 'TRAIN'
-@register
-class SSDEvalFeed(DataFeed):
-    __doc__ = DataFeed.__doc__
-    def __init__(
-            self,
-            dataset=VocDataSet(VOC_VAL_ANNOTATION).__dict__,
-            fields=['image', 'im_shape', 'im_id', 'gt_box',
-                         'gt_label', 'is_difficult'],
-            image_shape=[3, 300, 300],
-            sample_transforms=[
-                DecodeImage(to_rgb=True, with_mixup=False),
-                NormalizeBox(),
-                ResizeImage(target_size=300, use_cv2=False, interp=1),
-                Permute(),
-                NormalizeImage(
-                    mean=[127.5, 127.5, 127.5],
-                    std=[127.502231, 127.502231, 127.502231],
-                    is_scale=False)
-            ],
-            batch_transforms=[],
-            batch_size=64,
-            shuffle=False,
-            samples=-1,
-            drop_last=True,
-            num_workers=8,
-            bufsize=10,
-            use_process=False,
-            memsize=None):
-        sample_transforms.append(ArrangeEvalSSD(fields))
-        super(SSDEvalFeed, self).__init__(
-            dataset,
-            fields,
-            image_shape,
-            sample_transforms,
-            batch_transforms,
-            batch_size=batch_size,
-            shuffle=shuffle,
-            samples=samples,
-            drop_last=drop_last,
-            num_workers=num_workers,
-            bufsize=bufsize,
-            use_process=use_process,
-            memsize=memsize)
-        self.mode = 'VAL'
-@register
-class SSDTestFeed(DataFeed):
-    __doc__ = DataFeed.__doc__
-    def __init__(self,
-                 dataset=SimpleDataSet(VOC_VAL_ANNOTATION).__dict__,
-                 fields=['image', 'im_id', 'im_shape'],
-                 image_shape=[3, 300, 300],
-                 sample_transforms=[
-                     DecodeImage(to_rgb=True),
-                     ResizeImage(target_size=300, use_cv2=False, interp=1),
-                     Permute(),
-                     NormalizeImage(
-                         mean=[127.5, 127.5, 127.5],
-                         std=[127.502231, 127.502231, 127.502231],
-                         is_scale=False)
-                 ],
-                 batch_transforms=[],
-                 batch_size=1,
-                 shuffle=False,
-                 samples=-1,
-                 drop_last=False,
-                 num_workers=8,
-                 bufsize=10,
-                 use_process=False,
-                 memsize=None):
-        sample_transforms.append(ArrangeTestSSD())
-        if isinstance(dataset, dict):
-            dataset = SimpleDataSet(**dataset)
-        super(SSDTestFeed, self).__init__(
-            dataset,
-            fields,
-            image_shape,
-            sample_transforms,
-            batch_transforms,
-            batch_size=batch_size,
-            shuffle=shuffle,
-            samples=samples,
-            drop_last=drop_last,
-            num_workers=num_workers,
-            bufsize=bufsize,
-            use_process=use_process,
-            memsize=memsize)
-        self.mode = 'TEST'
-@register
-class YoloTrainFeed(DataFeed):
-    __doc__ = DataFeed.__doc__
-    def __init__(self,
-                 dataset=CocoDataSet().__dict__,
-                 fields=['image', 'gt_box', 'gt_label', 'gt_score'],
-                 image_shape=[3, 608, 608],
-                 sample_transforms=[
-                     DecodeImage(to_rgb=True, with_mixup=True),
-                     MixupImage(alpha=1.5, beta=1.5),
-                     ColorDistort(),
-                     RandomExpand(fill_value=[123.675, 116.28, 103.53]),
-                     RandomCrop(),
-                     RandomFlipImage(is_normalized=False),
-                     Resize(target_dim=608, interp='random'),
-                     NormalizePermute(
-                         mean=[123.675, 116.28, 103.53],
-                         std=[58.395, 57.120, 57.375]),
-                     NormalizeBox(),
-                 ],
-                 batch_transforms=[
-                     RandomShape(sizes=[
-                         320, 352, 384, 416, 448, 480, 512, 544, 576, 608
-                     ])
-                 ],
-                 batch_size=8,
-                 shuffle=True,
-                 samples=-1,
-                 drop_last=True,
-                 with_background=False,
-                 num_workers=8,
-                 bufsize=128,
-                 use_process=True,
-                 memsize=None,
-                 num_max_boxes=50,
-                 mixup_epoch=250,
-                 class_aware_sampling=False):
-        sample_transforms.append(ArrangeYOLO())
-        super(YoloTrainFeed, self).__init__(
-            dataset,
-            fields,
-            image_shape,
-            sample_transforms,
-            batch_transforms,
-            batch_size=batch_size,
-            shuffle=shuffle,
-            samples=samples,
-            drop_last=drop_last,
-            with_background=with_background,
-            num_workers=num_workers,
-            bufsize=bufsize,
-            use_process=use_process,
-            memsize=memsize,
-            class_aware_sampling=class_aware_sampling)
-        self.num_max_boxes = num_max_boxes
-        self.mixup_epoch = mixup_epoch
-        self.mode = 'TRAIN'
-@register
-class YoloEvalFeed(DataFeed):
-    __doc__ = DataFeed.__doc__
-    def __init__(self,
-                 dataset=CocoDataSet(COCO_VAL_ANNOTATION,
-                                     COCO_VAL_IMAGE_DIR).__dict__,
-                 fields=['image', 'im_size', 'im_id', 'gt_box',
-                         'gt_label', 'is_difficult'],
-                 image_shape=[3, 608, 608],
-                 sample_transforms=[
-                     DecodeImage(to_rgb=True),
-                     ResizeImage(target_size=608, interp=2),
-                     NormalizeImage(
-                         mean=[0.485, 0.456, 0.406],
-                         std=[0.229, 0.224, 0.225],
-                         is_scale=True,
-                         is_channel_first=False),
-                     Permute(to_bgr=False),
-                 ],
-                 batch_transforms=[],
-                 batch_size=8,
-                 shuffle=False,
-                 samples=-1,
-                 drop_last=False,
-                 with_background=False,
-                 num_workers=8,
-                 num_max_boxes=50,
-                 use_process=False,
-                 memsize=None):
-        sample_transforms.append(ArrangeEvalYOLO())
-        super(YoloEvalFeed, self).__init__(
-            dataset,
-            fields,
-            image_shape,
-            sample_transforms,
-            batch_transforms,
-            batch_size=batch_size,
-            shuffle=shuffle,
-            samples=samples,
-            drop_last=drop_last,
-            with_background=with_background,
-            num_workers=num_workers,
-            use_process=use_process,
-            memsize=memsize)
-        self.num_max_boxes = num_max_boxes
-        self.mode = 'VAL'
-        self.bufsize = 128
-        # support image shape config, resize image with image_shape
-        for i, trans in enumerate(sample_transforms):
-            if isinstance(trans, ResizeImage):
-                sample_transforms[i] = ResizeImage(
-                        target_size=self.image_shape[-1],
-                        interp=trans.interp)
-            if isinstance(trans, Resize):
-                sample_transforms[i].target_dim = self.image_shape[-1]
-@register
-class YoloTestFeed(DataFeed):
-    __doc__ = DataFeed.__doc__
-    def __init__(self,
-                 dataset=SimpleDataSet(COCO_VAL_ANNOTATION,
-                                       COCO_VAL_IMAGE_DIR).__dict__,
-                 fields=['image', 'im_size', 'im_id'],
-                 image_shape=[3, 608, 608],
-                 sample_transforms=[
-                     DecodeImage(to_rgb=True),
-                     ResizeImage(target_size=608, interp=2),
-                     NormalizeImage(mean=[0.485, 0.456, 0.406],
-                                    std=[0.229, 0.224, 0.225],
-                                    is_scale=True,
-                                    is_channel_first=False),
-                     Permute(to_bgr=False),
-                 ],
-                 batch_transforms=[],
-                 batch_size=1,
-                 shuffle=False,
-                 samples=-1,
-                 drop_last=False,
-                 with_background=False,
-                 num_workers=8,
-                 num_max_boxes=50,
-                 use_process=False,
-                 memsize=None):
-        sample_transforms.append(ArrangeTestYOLO())
-        if isinstance(dataset, dict):
-            dataset = SimpleDataSet(**dataset)
-        super(YoloTestFeed, self).__init__(
-            dataset,
-            fields,
-            image_shape,
-            sample_transforms,
-            batch_transforms,
-            batch_size=batch_size,
-            shuffle=shuffle,
-            samples=samples,
-            drop_last=drop_last,
-            with_background=with_background,
-            num_workers=num_workers,
-            use_process=use_process,
-            memsize=memsize)
-        self.mode = 'TEST'
-        self.bufsize = 128
-        # support image shape config, resize image with image_shape
-        for i, trans in enumerate(sample_transforms):
-            if isinstance(trans, ResizeImage):
-                sample_transforms[i] = ResizeImage(
-                        target_size=self.image_shape[-1],
-                        interp=trans.interp)
-            if isinstance(trans, Resize):
-                sample_transforms[i].target_dim = self.image_shape[-1]
-# yapf: enable
--- a/ppdet/data/dataset.py
+++ b/ppdet/data/dataset.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# function:
-#    interface for accessing data samples in stream
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-class Dataset(object):
-    """interface to access a stream of data samples"""
-    def __init__(self):
-        self._epoch = -1
-        self._pos = 0
-    def __next__(self):
-        return self.next()
-    def __iter__(self):
-        return self
-    def __str__(self):
-        return "{}(epoch:{:d}, size:{:d}, pos:{:d})".format(
-            type(self).__name__, self._epoch,
-            self.size(), self._pos)
-    def next(self):
-        """get next sample"""
-        raise NotImplementedError('%s.next not available' %
-                                  (self.__class__.__name__))
-    def reset(self):
-        """reset to initial status and begins a new epoch"""
-        raise NotImplementedError('%s.reset not available' %
-                                  (self.__class__.__name__))
-    def size(self):
-        """get number of samples in this dataset"""
-        raise NotImplementedError('%s.size not available' %
-                                  (self.__class__.__name__))
-    def drained(self):
-        """whether all sampled has been readed out for this epoch"""
-        raise NotImplementedError('%s.drained not available' %
-                                  (self.__class__.__name__))
-    def epoch_id(self):
-        """return epoch id for latest sample"""
-        raise NotImplementedError('%s.epoch_id not available' %
-                                  (self.__class__.__name__))
--- a/ppdet/data/transform/parallel_map.py
+++ b/ppdet/data/transform/parallel_map.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # function:
-#   transform samples in 'source' using 'mapper'
+#   transform samples in 'source' using 'worker'
 from __future__ import absolute_import
 from __future__ import division
@@ -30,7 +30,7 @@ import uuid
 import logging
 import signal
 import threading
-from .transformer import ProxiedDataset
+import traceback
 logger = logging.getLogger(__name__)
@@ -38,13 +38,14 @@ logger = logging.getLogger(__name__)
 class EndSignal(object):
    """ signal used to notify worker to exit
    """
    def __init__(self, id, errno=0, errmsg=''):
        self.id = id
        self.errno = errno
        self.errmsg = errmsg
-class ParallelMappedDataset(ProxiedDataset):
+class ParallelMap(object):
    """
    Transform samples to mapped samples which is similar to 
    'basic.MappedDataset', but multiple workers (threads or processes) 
@@ -54,45 +55,48 @@ class ParallelMappedDataset(ProxiedDataset):
        this class is not thread-safe
    """
-    def __init__(self, source, mapper, worker_args):
+    def __init__(self,
-        super(ParallelMappedDataset, self).__init__(source)
+                 source,
-        worker_args = {k.lower(): v for k, v in worker_args.items()}
+                 worker,
+                 worker_num,
-        args = {
+                 bufsize=100,
-            'bufsize': 100,
+                 use_process=False,
-            'worker_num': 8,
+                 memsize='3G'):
-            'use_process': False,
+        self._worker_num = worker_num
-            'memsize': '3G'
+        self._bufsize = bufsize
-        }
+        self._use_process = use_process
-        args.update(worker_args)
+        if self._use_process and type(memsize) is str:
-        if args['use_process'] and type(args['memsize']) is str:
+            assert memsize[-1].lower() == 'g', \
-            assert args['memsize'][-1].lower() == 'g', \
+                "invalid param for memsize[%s], should be ended with 'G' or 'g'" % (memsize)
-                "invalid param for memsize[{}], should " \
+            gb = memsize[:-1]
-                "be ended with 'G' or 'g'".format(args['memsize'])
+            self._memsize = int(gb) * 1024**3
-            gb = args['memsize'][:-1]
-            args['memsize'] = int(gb) * 1024 ** 3
-        self._worker_args = args
        self._started = False
        self._source = source
-        self._mapper = mapper
+        self._worker = worker
        self._exit = False
        self._setup()
+        self._souce_drained = False
+    def __iter__(self):
+        return self
+    def __next__(self):
+        return self.next()
    def _setup(self):
        """setup input/output queues and workers """
-        use_process = self._worker_args.get('use_process', False)
+        use_process = self._use_process
        if use_process and sys.platform == "win32":
            logger.info("Use multi-thread reader instead of "
                        "multi-process reader on Windows.")
            use_process = False
-        bufsize = self._worker_args['bufsize']
+        bufsize = self._bufsize
        if use_process:
            from .shared_queue import SharedQueue as Queue
            from multiprocessing import Process as Worker
            from multiprocessing import Event
-            memsize = self._worker_args['memsize']
+            memsize = self._memsize
            self._inq = Queue(bufsize, memsize=memsize)
            self._outq = Queue(bufsize, memsize=memsize)
        else:
@@ -105,7 +109,7 @@ class ParallelMappedDataset(ProxiedDataset):
            self._inq = Queue(bufsize)
            self._outq = Queue(bufsize)
-        consumer_num = self._worker_args['worker_num']
+        consumer_num = self._worker_num
        id = str(uuid.uuid4())[-3:]
        self._producer = threading.Thread(
            target=self._produce,
@@ -118,8 +122,7 @@ class ParallelMappedDataset(ProxiedDataset):
            consumer_id = 'consumer-' + id + '-' + str(i)
            p = Worker(
                target=self._consume,
-                args=(consumer_id, self._inq, self._outq,
+                args=(consumer_id, self._inq, self._outq, self._worker))
-                      self._mapper))
            self._consumers.append(p)
            p.daemon = True
            setattr(p, 'id', consumer_id)
@@ -137,9 +140,11 @@ class ParallelMappedDataset(ProxiedDataset):
            if self._exit:
                break
            try:
-                inq.put(source.next())
+                s = source.next()
+                inq.put(s)
                self._produced += 1
            except StopIteration:
+                self._souce_drained = True
                self._feeding_ev.clear()
                self._feeding_ev.wait()
            except Exception as e:
@@ -149,11 +154,11 @@ class ParallelMappedDataset(ProxiedDataset):
                inq.put(endsig)
                break
-    def _consume(self, id, inq, outq, mapper):
+    def _consume(self, id, inq, outq, worker):
        """Fetch data from 'inq', process it and put result to 'outq'"""
-        if self._worker_args['use_process']:
+        if self._use_process:
            # handle SIGTERM signal to exit to prevent print stack frame
-            signal.signal(signal.SIGTERM, lambda signum, frame : sys.exit())
+            signal.signal(signal.SIGTERM, lambda signum, frame: sys.exit())
        endsig = EndSignal(id)
        while True:
@@ -166,7 +171,7 @@ class ParallelMappedDataset(ProxiedDataset):
                break
            try:
-                result = mapper(sample)
+                result = worker(sample)
                outq.put(result)
            except Exception as e:
                endsig.errno = -2
@@ -192,12 +197,12 @@ class ParallelMappedDataset(ProxiedDataset):
        for w in self._consumers:
            if not w.is_alive() and w.id not in self._consumer_endsig:
                abnormal_num += 1
-                if self._worker_args['use_process']:
+                if self._use_process:
                    errmsg = "consumer[{}] exit abnormally with exitcode[{}]" \
                                .format(w.pid, w.exitcode)
                else:
                    errmsg = "consumer[{}] exit abnormally".format(w.ident)
                logger.warn(errmsg)
        if abnormal_num > 0:
@@ -255,7 +260,8 @@ class ParallelMappedDataset(ProxiedDataset):
                " for some consumers exited abnormally before!!!"
            if not self.drained():
-                logger.warn("reset before epoch[{}] finishes".format(self._epoch))
+                logger.warn("reset before epoch[{}] finishes".format(
+                    self._epoch))
                self._produced = self._produced - self._consumed
            else:
                self._produced = 0
@@ -266,10 +272,11 @@ class ParallelMappedDataset(ProxiedDataset):
            + " cannot start another epoch"
        self._source.reset()
+        self._souce_drained = False
        self._consumed = 0
        self._feeding_ev.set()
 # FIXME(dengkaipeng): fix me if you have better impliment
 # handle terminate reader process, do not print stack frame
-signal.signal(signal.SIGTERM, lambda signum, frame : sys.exit())
+signal.signal(signal.SIGTERM, lambda signum, frame: sys.exit())
--- a/ppdet/data/reader.py
+++ b/ppdet/data/reader.py
@@ -12,131 +12,393 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# function:
-#    Interface to build readers for detection data like COCO or VOC
-#
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-from __future__ import unicode_literals
-from numbers import Integral
+import copy
+import functools
+import collections
+import traceback
+import numpy as np
 import logging
-from .source import build_source
-from .transform import build_mapper, map, batch, batch_map
+from ppdet.core.workspace import register, serializable
+from .parallel_map import ParallelMap
+__all__ = ['Reader', 'create_reader']
 logger = logging.getLogger(__name__)
+class Compose(object):
+    def __init__(self, transforms, ctx=None):
+        self.transforms = transforms
+        self.ctx = ctx
+    def __call__(self, data):
+        ctx = self.ctx if self.ctx else {}
+        for f in self.transforms:
+            try:
+                data = f(data, ctx)
+            except Exception as e:
+                stack_info = traceback.format_exc()
+                logger.info("fail to map op [{}] with error: {} and stack:\n{}".
+                            format(f, e, str(stack_info)))
+                raise e
+        return data
+def _calc_img_weights(roidbs):
+    """ calculate the probabilities of each sample
+    """
+    imgs_cls = []
+    num_per_cls = {}
+    img_weights = []
+    for i, roidb in enumerate(roidbs):
+        img_cls = set([k for cls in roidbs[i]['gt_class'] for k in cls])
+        imgs_cls.append(img_cls)
+        for c in img_cls:
+            if c not in num_per_cls:
+                num_per_cls[c] = 1
+            else:
+                num_per_cls[c] += 1
+    for i in range(len(roidbs)):
+        weights = 0
+        for c in imgs_cls[i]:
+            weights += 1 / num_per_cls[c]
+        img_weights.append(weights)
+    # probabilities sum to 1
+    img_weights = img_weights / np.sum(img_weights)
+    return img_weights
+def _has_empty(item):
+    def empty(x):
+        if isinstance(x, np.ndarray) and x.size == 0:
+            return True
+        elif isinstance(x, collections.Sequence) and len(x) == 0:
+            return True
+        else:
+            return False
+    if isinstance(item, collections.Sequence) and len(item) == 0:
+        return True
+    if item is None:
+        return True
+    if empty(item):
+        return True
+    return False
+def _segm(samples):
+    assert 'gt_poly' in samples
+    segms = samples['gt_poly']
+    if 'is_crowd' in samples:
+        is_crowd = samples['is_crowd']
+        if len(segms) != 0:
+            assert len(segms) == is_crowd.shape[0]
+    gt_masks = []
+    valid = True
+    for i in range(len(segms)):
+        segm = segms[i]
+        gt_segm = []
+        if 'is_crowd' in samples and is_crowd[i]:
+            gt_segm.append([[0, 0]])
+        else:
+            for poly in segm:
+                if len(poly) == 0:
+                    valid = False
+                    break
+                gt_segm.append(np.array(poly).reshape(-1, 2))
+        if (not valid) or len(gt_segm) == 0:
+            break
+        gt_masks.append(gt_segm)
+    return gt_masks
+def batch_arrange(batch_samples, fields):
+    def im_shape(samples, dim=3):
+        # hard code
+        assert 'h' in samples
+        assert 'w' in samples
+        if dim == 3:  # RCNN, ..
+            return np.array((samples['h'], samples['w'], 1), dtype=np.float32)
+        else:  # YOLOv3, ..
+            return np.array((samples['h'], samples['w']), dtype=np.int32)
+    arrange_batch = []
+    for samples in batch_samples:
+        one_ins = ()
+        for i, field in enumerate(fields):
+            if field == 'gt_mask':
+                one_ins += (_segm(samples), )
+            elif field == 'im_shape':
+                one_ins += (im_shape(samples), )
+            elif field == 'im_size':
+                one_ins += (im_shape(samples, 2), )
+            else:
+                if field == 'is_difficult':
+                    field = 'difficult'
+                assert field in samples, '{} not in samples'.format(field)
+                one_ins += (samples[field], )
+        arrange_batch.append(one_ins)
+    return arrange_batch
+@register
+@serializable
 class Reader(object):
-    """Interface to make readers for training or evaluation"""
+    """
+    Args:
+        dataset (DataSet): DataSet object
+        sample_transforms (list of BaseOperator): a list of sample transforms
+            operators.
+        batch_transforms (list of BaseOperator): a list of batch transforms
+            operators.
+        batch_size (int): batch size.
+        shuffle (bool): whether shuffle dataset or not. Default False.
+        drop_last (bool): whether drop last batch or not. Default False.
+        drop_empty (bool): whether drop sample when it's gt is empty or not.
+            Default True.
+        mixup_epoch (int): mixup epoc number. Default is -1, meaning
+            not use mixup.
+        class_aware_sampling (bool): whether use class-aware sampling or not.
+            Default False.
+        worker_num (int): number of working threads/processes.
+            Default -1, meaning not use multi-threads/multi-processes.
+        use_process (bool): whether use multi-processes or not.
+            It only works when worker_num > 1. Default False.
+        bufsize (int): buffer size for multi-threads/multi-processes,
+            please note, one instance in buffer is one batch data.
+        memsize (str): size of shared memory used in result queue when
+            use_process is true. Default 3G.
+        inputs_def (dict): network input definition use to get input fields,
+            which is used to determine the order of returned data.
+    """
+    def __init__(self,
+                 dataset=None,
+                 sample_transforms=None,
+                 batch_transforms=None,
+                 batch_size=None,
+                 shuffle=False,
+                 drop_last=False,
+                 drop_empty=True,
+                 mixup_epoch=-1,
+                 class_aware_sampling=False,
+                 worker_num=-1,
+                 use_process=False,
+                 bufsize=100,
+                 memsize='3G',
+                 inputs_def=None):
+        self._dataset = dataset
+        self._roidbs = self._dataset.get_roidb()
+        self._fields = copy.deepcopy(inputs_def[
+            'fields']) if inputs_def else None
+        # transform
+        self._sample_transforms = Compose(sample_transforms,
+                                          {'fields': self._fields})
+        self._batch_transforms = None
+        if batch_transforms:
+            self._batch_transforms = Compose(batch_transforms,
+                                             {'fields': self._fields})
+        # data
+        if inputs_def and inputs_def.get('multi_scale', False):
+            from ppdet.modeling.architectures.input_helper import multiscale_def
+            im_shape = inputs_def[
+                'image_shape'] if 'image_shape' in inputs_def else [
+                    3, None, None
+                ]
+            _, ms_fields = multiscale_def(im_shape, inputs_def['num_scales'],
+                                          inputs_def['use_flip'])
+            self._fields += ms_fields
+        self._batch_size = batch_size
+        self._shuffle = shuffle
+        self._drop_last = drop_last
+        self._drop_empty = drop_empty
+        # sampling
+        self._mixup_epoch = mixup_epoch
+        self._class_aware_sampling = class_aware_sampling
-    def __init__(self, data_cf, trans_conf, maxiter=-1):
+        self._load_img = False
-        self._data_cf = data_cf
+        self._sample_num = len(self._roidbs)
-        self._trans_conf = trans_conf
-        self._maxiter = maxiter
-        self._cname2cid = None
-        assert isinstance(self._maxiter, Integral), "maxiter should be int"
-    def _make_reader(self, mode, my_source=None):
+        if self._class_aware_sampling:
-        """Build reader for training or validation"""
+            self.img_weights = _calc_img_weights(self._roidbs)
-        if my_source is None:
+        self._indexes = None
-            file_conf = self._data_cf[mode]
-            # 1, Build data source
+        self._pos = -1
+        self._epoch = -1
+        self._drained = False
-            sc_conf = {'data_cf': file_conf, 'cname2cid': self._cname2cid}
+        # multi-process
-            sc = build_source(sc_conf)
+        self._worker_num = worker_num
+        self._parallel = None
+        if self._worker_num > -1:
+            task = functools.partial(self.worker, self._drop_empty)
+            self._parallel = ParallelMap(self, task, worker_num, bufsize,
+                                         use_process, memsize)
+    def __call__(self):
+        if self._worker_num > -1:
+            return self._parallel
+        else:
+            return self
+    def __iter__(self):
+        return self
+    def reset(self):
+        """implementation of Dataset.reset
+        """
+        self.indexes = [i for i in range(self.size())]
+        if self._class_aware_sampling:
+            self.indexes = np.random.choice(
+                self._sample_num,
+                self._sample_num,
+                replace=False,
+                p=self.img_weights)
+        if self._shuffle:
+            np.random.shuffle(self.indexes)
+        if self._mixup_epoch > 0 and len(self.indexes) < 2:
+            logger.info("Disable mixup for dataset samples "
+                        "less than 2 samples")
+            self._mixup_epoch = -1
+        if self._epoch < 0:
+            self._epoch = 0
        else:
-            sc = my_source
+            self._epoch += 1
-        # 2, Buid a transformed dataset
+        self._pos = 0
-        ops = self._trans_conf[mode]['OPS']
+        self._drained = False
-        batchsize = self._trans_conf[mode]['BATCH_SIZE']
-        drop_last = False if 'DROP_LAST' not in \
+    def __next__(self):
-            self._trans_conf[mode] else self._trans_conf[mode]['DROP_LAST']
+        return self.next()
-        mapper = build_mapper(ops, {'is_train': mode == 'TRAIN'})
+    def next(self):
+        if self._epoch < 0:
-        worker_args = None
+            self.reset()
-        if 'WORKER_CONF' in self._trans_conf[mode]:
+        if self.drained():
-            worker_args = self._trans_conf[mode]['WORKER_CONF']
+            raise StopIteration
-            worker_args = {k.lower(): v for k, v in worker_args.items()}
+        batch = self._load_batch()
+        if self._drop_last and len(batch) < self._batch_size:
-        mapped_ds = map(sc, mapper, worker_args)
+            raise StopIteration
-        # In VAL mode, gt_bbox, gt_label can be empty, and should
+        if self._worker_num > -1:
-        # not be dropped
+            return batch
-        batched_ds = batch(
+        else:
-            mapped_ds, batchsize, drop_last, drop_empty=(mode != "VAL"))
+            return self.worker(self._drop_empty, batch)
-        trans_conf = {k.lower(): v for k, v in self._trans_conf[mode].items()}
+    def _load_batch(self):
-        need_keys = {
+        batch = []
-            'is_padding',
+        bs = 0
-            'coarsest_stride',
+        while bs != self._batch_size:
-            'random_shapes',
+            if self._pos >= self.size():
-            'multi_scales',
+                break
-            'use_padded_im_info',
+            pos = self.indexes[self._pos]
-            'enable_multiscale_test',
+            sample = copy.deepcopy(self._roidbs[pos])
-            'num_scale',
+            self._pos += 1
-        }
-        bm_config = {
+            if self._drop_empty and self._fields and 'gt_mask' in self._fields:
-            key: value
+                if _has_empty(_segm(sample)):
-            for key, value in trans_conf.items() if key in need_keys
+                    #logger.warn('gt_mask is empty or not valid in {}'.format(
-        }
+                    #    sample['im_file']))
+                    continue
-        batched_ds = batch_map(batched_ds, bm_config)
+            if self._drop_empty and self._fields and 'gt_bbox' in self._fields:
+                if _has_empty(sample['gt_bbox']):
-        batched_ds.reset()
+                    #logger.warn('gt_bbox {} is empty or not valid in {}, '
-        if mode.lower() == 'train':
+                    #   'drop this sample'.format(
-            if self._cname2cid is not None:
+                    #    sample['im_file'], sample['gt_bbox']))
-                logger.warn('cname2cid already set, it will be overridden')
+                    continue
-            self._cname2cid = getattr(sc, 'cname2cid', None)
+            if self._load_img:
-        # 3, Build a reader
+                sample['image'] = self._load_image(sample['im_file'])
-        maxit = -1 if self._maxiter <= 0 else self._maxiter
+            if self._epoch < self._mixup_epoch:
-        def _reader():
+                num = len(self.indexes)
-            n = 0
+                mix_idx = np.random.randint(1, num)
-            while True:
+                mix_idx = self.indexes[(mix_idx + self._pos - 1) % num]
-                for _batch in batched_ds:
+                sample['mixup'] = copy.deepcopy(self._roidbs[mix_idx])
+                if self._load_img:
+                    sample['mixup']['image'] = self._load_image(sample['mixup'][
+                        'im_file'])
+            batch.append(sample)
+            bs += 1
+        return batch
+    def worker(self, drop_empty=True, batch_samples=None):
+        """
+        sample transform and batch transform.
+        """
+        batch = []
+        for sample in batch_samples:
+            sample = self._sample_transforms(sample)
+            if drop_empty and 'gt_bbox' in sample:
+                if _has_empty(sample['gt_bbox']):
+                    #logger.warn('gt_bbox {} is empty or not valid in {}, '
+                    #   'drop this sample'.format(
+                    #    sample['im_file'], sample['gt_bbox']))
+                    continue
+            batch.append(sample)
+        if len(batch) > 0 and self._batch_transforms:
+            batch = self._batch_transforms(batch)
+        if len(batch) > 0 and self._fields:
+            batch = batch_arrange(batch, self._fields)
+        return batch
+    def _load_image(self, filename):
+        with open(filename, 'rb') as f:
+            return f.read()
+    def size(self):
+        """ implementation of Dataset.size
+        """
+        return self._sample_num
+    def drained(self):
+        """ implementation of Dataset.drained
+        """
+        assert self._epoch >= 0, 'The first epoch has not begin!'
+        return self._pos >= self.size()
+    def stop(self):
+        if self._parallel:
+            self._parallel.stop()
+def create_reader(cfg, max_iter=0):
+    """
+    Return iterable data reader.
+    Args:
+        max_iter (int): number of iterations.
+    """
+    if not isinstance(cfg, dict):
+        raise TypeError("The config should be a dict when creating reader.")
+    reader = Reader(**cfg)()
+    def _reader():
+        n = 0
+        while True:
+            for _batch in reader:
+                if len(_batch) > 0:
                    yield _batch
                    n += 1
-                    if maxit > 0 and n == maxit:
+                if max_iter > 0 and n == max_iter:
-                        return
-                batched_ds.reset()
-                if maxit <= 0:
                    return
+            reader.reset()
+            if max_iter <= 0:
+                return
-        if hasattr(sc, 'get_imid2path'):
+    return _reader
-            _reader.imid2path = sc.get_imid2path()
-        return _reader
-    def train(self):
-        """Build reader for training"""
-        return self._make_reader('TRAIN')
-    def val(self):
-        """Build reader for validation"""
-        return self._make_reader('VAL')
-    def test(self):
-        """Build reader for inference"""
-        return self._make_reader('TEST')
-    @classmethod
-    def create(cls,
-               mode,
-               data_config,
-               transform_config,
-               max_iter=-1,
-               my_source=None,
-               ret_iter=True):
-        """ create a specific reader """
-        reader = Reader({mode: data_config}, {mode: transform_config}, max_iter)
-        if ret_iter:
-            return reader._make_reader(mode, my_source)
-        else:
-            return reader
--- a/ppdet/data/transform/shared_queue/__init__.py
+++ b/ppdet/data/transform/shared_queue/__init__.py
--- a/ppdet/data/transform/shared_queue/queue.py
+++ b/ppdet/data/transform/shared_queue/queue.py
--- a/ppdet/data/transform/shared_queue/sharedmemory.py
+++ b/ppdet/data/transform/shared_queue/sharedmemory.py
--- a/ppdet/data/source/__init__.py
+++ b/ppdet/data/source/__init__.py
@@ -12,62 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
+from . import coco
-from __future__ import division
+from . import voc
-from __future__ import print_function
+from . import widerface
-import copy
+from .coco import *
+from .voc import *
-from .roidb_source import RoiDbSource
+from .widerface import *
-from .simple_source import SimpleSource
-from .iterator_source import IteratorSource
-from .class_aware_sampling_roidb_source import ClassAwareSamplingRoiDbSource
-def build_source(config):
-    """
-    Build dataset from source data, default source type is 'RoiDbSource'
-    Args:
-        config (dict): should have following structure:
-        {
-            data_cf (dict):
-                anno_file (str): label file or image list file path
-                image_dir (str): root directory for images
-                samples (int): number of samples to load, -1 means all
-                is_shuffle (bool): should samples be shuffled
-                load_img (bool): should images be loaded
-                mixup_epoch (int): parse mixup in first n epoch
-                with_background (bool): whether load background as a class
-            cname2cid (dict): the label name to id dictionary
-        }
-    """
-    if 'data_cf' in config:
-        data_cf = config['data_cf']
-        data_cf['cname2cid'] = config['cname2cid']
-    else:
-        data_cf = config
-    data_cf = {k.lower(): v for k, v in data_cf.items()}
-    args = copy.deepcopy(data_cf)
-    # defaut type is 'RoiDbSource'
-    source_type = 'RoiDbSource'
-    if 'type' in data_cf:
-        if data_cf['type'] in ['VOCSource', 'COCOSource', 'RoiDbSource']:
-            if 'class_aware_sampling' in args and args['class_aware_sampling']:
-                source_type = 'ClassAwareSamplingRoiDbSource'
-            else:
-                source_type = 'RoiDbSource'
-            if 'class_aware_sampling' in args:
-                del args['class_aware_sampling']
-        else:
-            source_type = data_cf['type']
-        del args['type']
-    if source_type == 'RoiDbSource':
-        return RoiDbSource(**args)
-    elif source_type == 'SimpleSource':
-        return SimpleSource(**args)
-    elif source_type == 'ClassAwareSamplingRoiDbSource':
-        return ClassAwareSamplingRoiDbSource(**args)
-    else:
-        raise ValueError('source type not supported: ' + source_type)
--- a/ppdet/data/source/class_aware_sampling_roidb_source.py
+++ b/ppdet/data/source/class_aware_sampling_roidb_source.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#function:
-#    interface to load data from local files and parse it for samples, 
-#    eg: roidb data in pickled files
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-import os
-import random
-import copy
-import collections
-import pickle as pkl
-import numpy as np
-from .roidb_source import RoiDbSource
-class ClassAwareSamplingRoiDbSource(RoiDbSource):
-    """ interface to load class aware sampling roidb data from files
-    """
-    def __init__(self,
-                 anno_file,
-                 image_dir=None,
-                 samples=-1,
-                 is_shuffle=True,
-                 load_img=False,
-                 cname2cid=None,
-                 use_default_label=None,
-                 mixup_epoch=-1,
-                 with_background=True):
-        """ Init
-        Args:
-            fname (str): label file path
-            image_dir (str): root dir for images
-            samples (int): samples to load, -1 means all
-            is_shuffle (bool): whether to shuffle samples
-            load_img (bool): whether load data in this class
-            cname2cid (dict): the label name to id dictionary
-            use_default_label (bool):whether use the default mapping of label to id
-            mixup_epoch (int): parse mixup in first n epoch
-            with_background (bool): whether load background 
-                                    as a class
-        """
-        super(ClassAwareSamplingRoiDbSource, self).__init__(
-            anno_file=anno_file,
-            image_dir=image_dir,
-            samples=samples,
-            is_shuffle=is_shuffle,
-            load_img=load_img,
-            cname2cid=cname2cid,
-            use_default_label=use_default_label,
-            mixup_epoch=mixup_epoch,
-            with_background=with_background)
-        self._img_weights = None
-    def __str__(self):
-        return 'ClassAwareSamplingRoidbSource(fname:%s,epoch:%d,size:%d)' \
-            % (self._fname, self._epoch, self.size())
-    def next(self):
-        """ load next sample
-        """
-        if self._epoch < 0:
-            self.reset()
-        _pos = np.random.choice(
-            self._samples, 1, replace=False, p=self._img_weights)[0]
-        sample = copy.deepcopy(self._roidb[_pos])
-        if self._load_img:
-            sample['image'] = self._load_image(sample['im_file'])
-        else:
-            sample['im_file'] = os.path.join(self._image_dir, sample['im_file'])
-        return sample
-    def _calc_img_weights(self):
-        """ calculate the probabilities of each sample
-        """
-        imgs_cls = []
-        num_per_cls = {}
-        img_weights = []
-        for i, roidb in enumerate(self._roidb):
-            img_cls = set(
-                [k for cls in self._roidb[i]['gt_class'] for k in cls])
-            imgs_cls.append(img_cls)
-            for c in img_cls:
-                if c not in num_per_cls:
-                    num_per_cls[c] = 1
-                else:
-                    num_per_cls[c] += 1
-        for i in range(len(self._roidb)):
-            weights = 0
-            for c in imgs_cls[i]:
-                weights += 1 / num_per_cls[c]
-            img_weights.append(weights)
-        # Probabilities sum to 1
-        img_weights = img_weights / np.sum(img_weights)
-        return img_weights
-    def reset(self):
-        """ implementation of Dataset.reset
-        """
-        if self._roidb is None:
-            self._roidb = self._load()
-        if self._img_weights is None:
-            self._img_weights = self._calc_img_weights()
-        self._samples = len(self._roidb)
-        if self._epoch < 0:
-            self._epoch = 0
--- a/ppdet/data/source/coco.py
+++ b/ppdet/data/source/coco.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import numpy as np
+from pycocotools.coco import COCO
+from .dataset import DataSet
+from ppdet.core.workspace import register, serializable
+import logging
+logger = logging.getLogger(__name__)
+@register
+@serializable
+class COCODataSet(DataSet):
+    """
+    Load COCO records with annotations in json file 'anno_path'
+    Args:
+        dataset_dir (str): root directory for dataset.
+        image_dir (str): directory for images.
+        anno_path (str): json file path.
+        sample_num (int): number of samples to load, -1 means all.
+        with_background (bool): whether load background as a class.
+            if True, total class number will be 81. default True.
+    """
+    def __init__(self,
+                 image_dir=None,
+                 anno_path=None,
+                 dataset_dir=None,
+                 sample_num=-1,
+                 with_background=True):
+        super(COCODataSet, self).__init__(
+            image_dir=image_dir,
+            anno_path=anno_path,
+            dataset_dir=dataset_dir,
+            sample_num=sample_num,
+            with_background=with_background)
+        self.anno_path = anno_path
+        self.sample_num = sample_num
+        self.with_background = with_background
+        # `roidbs` is list of dict whose structure is:
+        # {
+        #     'im_file': im_fname, # image file name
+        #     'im_id': img_id, # image id
+        #     'h': im_h, # height of image
+        #     'w': im_w, # width
+        #     'is_crowd': is_crowd,
+        #     'gt_score': gt_score,
+        #     'gt_class': gt_class,
+        #     'gt_bbox': gt_bbox,
+        #     'gt_poly': gt_poly,
+        # }
+        self.roidbs = None
+        # a dict used to map category name to class id
+        self.cname2cid = None
+    def load_roidb_and_cname2cid(self):
+        anno_path = os.path.join(self.dataset_dir, self.anno_path)
+        image_dir = os.path.join(self.dataset_dir, self.image_dir)
+        assert anno_path.endswith('.json'), \
+            'invalid coco annotation file: ' + anno_path
+        coco = COCO(anno_path)
+        img_ids = coco.getImgIds()
+        cat_ids = coco.getCatIds()
+        records = []
+        ct = 0
+        # when with_background = True, mapping category to classid, like:
+        #   background:0, first_class:1, second_class:2, ...
+        catid2clsid = dict({
+            catid: i + int(self.with_background)
+            for i, catid in enumerate(cat_ids)
+        })
+        cname2cid = dict({
+            coco.loadCats(catid)[0]['name']: clsid
+            for catid, clsid in catid2clsid.items()
+        })
+        for img_id in img_ids:
+            img_anno = coco.loadImgs(img_id)[0]
+            im_fname = img_anno['file_name']
+            im_w = float(img_anno['width'])
+            im_h = float(img_anno['height'])
+            ins_anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False)
+            instances = coco.loadAnns(ins_anno_ids)
+            bboxes = []
+            for inst in instances:
+                x, y, box_w, box_h = inst['bbox']
+                x1 = max(0, x)
+                y1 = max(0, y)
+                x2 = min(im_w - 1, x1 + max(0, box_w - 1))
+                y2 = min(im_h - 1, y1 + max(0, box_h - 1))
+                if inst['area'] > 0 and x2 >= x1 and y2 >= y1:
+                    inst['clean_bbox'] = [x1, y1, x2, y2]
+                    bboxes.append(inst)
+                else:
+                    logger.warn(
+                        'Found an invalid bbox in annotations: im_id: {}, '
+                        'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format(
+                            img_id, float(inst['area']), x1, y1, x2, y2))
+            num_bbox = len(bboxes)
+            gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
+            gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
+            gt_score = np.ones((num_bbox, 1), dtype=np.float32)
+            is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
+            difficult = np.zeros((num_bbox, 1), dtype=np.int32)
+            gt_poly = [None] * num_bbox
+            for i, box in enumerate(bboxes):
+                catid = box['category_id']
+                gt_class[i][0] = catid2clsid[catid]
+                gt_bbox[i, :] = box['clean_bbox']
+                is_crowd[i][0] = box['iscrowd']
+                if 'segmentation' in box:
+                    gt_poly[i] = box['segmentation']
+            im_fname = os.path.join(image_dir,
+                                    im_fname) if image_dir else im_fname
+            coco_rec = {
+                'im_file': im_fname,
+                'im_id': np.array([img_id]),
+                'h': im_h,
+                'w': im_w,
+                'is_crowd': is_crowd,
+                'gt_class': gt_class,
+                'gt_bbox': gt_bbox,
+                'gt_score': gt_score,
+                'gt_poly': gt_poly,
+                'difficult': difficult
+            }
+            logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format(
+                im_fname, img_id, im_h, im_w))
+            records.append(coco_rec)
+            ct += 1
+            if self.sample_num > 0 and ct >= self.sample_num:
+                break
+        assert len(records) > 0, 'not found any coco record in %s' % (anno_path)
+        logger.info('{} samples in file {}'.format(ct, anno_path))
+        self.roidbs, self.cname2cid = records, cname2cid
--- a/ppdet/data/source/coco_loader.py
+++ b/ppdet/data/source/coco_loader.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import numpy as np
-from pycocotools.coco import COCO
-import logging
-logger = logging.getLogger(__name__)
-def load(anno_path, sample_num=-1, with_background=True):
-    """
-    Load COCO records with annotations in json file 'anno_path'
-    Args:
-        anno_path (str): json file path
-        sample_num (int): number of samples to load, -1 means all
-        with_background (bool): whether load background as a class.
-                                if True, total class number will
-                                be 81. default True
-    Returns:
-        (records, cname2cid)
-        'records' is list of dict whose structure is:
-        {
-            'im_file': im_fname, # image file name
-            'im_id': img_id, # image id
-            'h': im_h, # height of image
-            'w': im_w, # width
-            'is_crowd': is_crowd,
-            'gt_score': gt_score,
-            'gt_class': gt_class,
-            'gt_bbox': gt_bbox,
-            'gt_poly': gt_poly,
-        }
-        'cname2cid' is a dict used to map category name to class id
-    """
-    assert anno_path.endswith('.json'), 'invalid coco annotation file: ' \
-        + anno_path
-    coco = COCO(anno_path)
-    img_ids = coco.getImgIds()
-    cat_ids = coco.getCatIds()
-    records = []
-    ct = 0
-    # when with_background = True, mapping category to classid, like:
-    #   background:0, first_class:1, second_class:2, ...
-    catid2clsid = dict(
-        {catid: i + int(with_background)
-         for i, catid in enumerate(cat_ids)})
-    cname2cid = dict({
-        coco.loadCats(catid)[0]['name']: clsid
-        for catid, clsid in catid2clsid.items()
-    })
-    for img_id in img_ids:
-        img_anno = coco.loadImgs(img_id)[0]
-        im_fname = img_anno['file_name']
-        im_w = float(img_anno['width'])
-        im_h = float(img_anno['height'])
-        ins_anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False)
-        instances = coco.loadAnns(ins_anno_ids)
-        bboxes = []
-        for inst in instances:
-            x, y, box_w, box_h = inst['bbox']
-            x1 = max(0, x)
-            y1 = max(0, y)
-            x2 = min(im_w - 1, x1 + max(0, box_w - 1))
-            y2 = min(im_h - 1, y1 + max(0, box_h - 1))
-            if inst['area'] > 0 and x2 >= x1 and y2 >= y1:
-                inst['clean_bbox'] = [x1, y1, x2, y2]
-                bboxes.append(inst)
-            else:
-                logger.warn(
-                    'Found an invalid bbox in annotations: im_id: {}, area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.
-                    format(img_id, float(inst['area']), x1, y1, x2, y2))
-        num_bbox = len(bboxes)
-        gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
-        gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
-        gt_score = np.ones((num_bbox, 1), dtype=np.float32)
-        is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
-        difficult = np.zeros((num_bbox, 1), dtype=np.int32)
-        gt_poly = [None] * num_bbox
-        for i, box in enumerate(bboxes):
-            catid = box['category_id']
-            gt_class[i][0] = catid2clsid[catid]
-            gt_bbox[i, :] = box['clean_bbox']
-            is_crowd[i][0] = box['iscrowd']
-            if 'segmentation' in box:
-                gt_poly[i] = box['segmentation']
-        coco_rec = {
-            'im_file': im_fname,
-            'im_id': np.array([img_id]),
-            'h': im_h,
-            'w': im_w,
-            'is_crowd': is_crowd,
-            'gt_class': gt_class,
-            'gt_bbox': gt_bbox,
-            'gt_score': gt_score,
-            'gt_poly': gt_poly,
-            'difficult': difficult
-        }
-        logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format(
-            im_fname, img_id, im_h, im_w))
-        records.append(coco_rec)
-        ct += 1
-        if sample_num > 0 and ct >= sample_num:
-            break
-    assert len(records) > 0, 'not found any coco record in %s' % (anno_path)
-    logger.info('{} samples in file {}'.format(ct, anno_path))
-    return records, cname2cid
--- a/ppdet/data/source/dataset.py
+++ b/ppdet/data/source/dataset.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import numpy as np
+try:
+    from collections.abc import Sequence
+except Exception:
+    from collections import Sequence
+from ppdet.core.workspace import register, serializable
+from ppdet.utils.download import get_dataset_path
+@serializable
+class DataSet(object):
+    """
+    Dataset, e.g., coco, pascal voc
+    Args:
+        annotation (str): annotation file path
+        image_dir (str): directory where image files are stored
+        shuffle (bool): shuffle samples
+    """
+    def __init__(self,
+                 dataset_dir=None,
+                 image_dir=None,
+                 anno_path=None,
+                 sample_num=-1,
+                 with_background=True,
+                 use_default_label=None,
+                 **kwargs):
+        super(DataSet, self).__init__()
+        self.anno_path = anno_path
+        self.image_dir = image_dir if image_dir is not None else ''
+        self.dataset_dir = dataset_dir if dataset_dir is not None else ''
+        self.sample_num = sample_num
+        self.with_background = with_background
+        self.use_default_label = use_default_label
+        self.cname2cid = None
+        self._imid2path = None
+    def load_roidb_and_cname2cid(self):
+        """load dataset"""
+        raise NotImplementedError('%s.load_roidb_and_cname2cid not available' %
+                                  (self.__class__.__name__))
+    def get_roidb(self):
+        if not self.roidbs:
+            data_dir = get_dataset_path(self.dataset_dir, self.anno_path,
+                                        self.image_dir)
+            if data_dir:
+                self.dataset_dir = data_dir
+            self.load_roidb_and_cname2cid()
+        return self.roidbs
+    def get_cname2cid(self):
+        if not self.cname2cid:
+            self.load_roidb_and_cname2cid()
+        return self.cname2cid
+    def get_anno(self):
+        return os.path.join(self.dataset_dir, self.anno_path)
+    def get_imid2path(self):
+        return self._imid2path
+    def get_cname2cid(self):
+        return self.cname2cid
+def _is_valid_file(f, extensions=('.jpg', '.jpeg', '.png', '.bmp')):
+    return f.lower().endswith(extensions)
+def _make_dataset(dir):
+    dir = os.path.expanduser(dir)
+    if not os.path.isdir(d):
+        raise ('{} should be a dir'.format(dir))
+    images = []
+    for root, _, fnames in sorted(os.walk(dir, followlinks=True)):
+        for fname in sorted(fnames):
+            path = os.path.join(root, fname)
+            if is_valid_file(path):
+                images.append(path)
+    return images
+@register
+@serializable
+class ImageFolder(DataSet):
+    """
+    Args:
+        dataset_dir (str): root directory for dataset.
+        image_dir(list|str): list of image folders or list of image files
+        anno_path (str): annotation file path.
+        samples (int): number of samples to load, -1 means all
+    """
+    def __init__(self,
+                 dataset_dir=None,
+                 image_dir=None,
+                 anno_path=None,
+                 sample_num=-1,
+                 with_background=True,
+                 **kwargs):
+        super(ImageFolder, self).__init__(image_dir, anno_path, sample_num,
+                                          dataset_dir, with_background)
+        self.anno_path = anno_path
+        self.sample_num = sample_num
+        self.with_background = with_background
+        self.roidbs = None
+        self._imid2path = {}
+    def get_roidb(self):
+        if not self.roidbs:
+            self.roidbs = self._load_images()
+        return self.roidbs
+    def set_images(self, images):
+        self.image_dir = images
+        self.roidbs = self._load_images()
+    def _parse(self):
+        image_dir = self.image_dir
+        if not isinstance(image_dir, Sequence):
+            image_dir = [image_dir]
+        images = []
+        for im_dir in image_dir:
+            if os.path.isdir(im_dir):
+                im_dir = os.path.join(self.dataset_dir, im_dir)
+                images.extend(_make_dataset(im_dir))
+            elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
+                images.append(im_dir)
+        return images
+    def _load_images(self):
+        images = self._parse()
+        ct = 0
+        records = []
+        for image in images:
+            assert image != '' and os.path.isfile(image), \
+                    "Image {} not found".format(image)
+            if self.sample_num > 0 and ct >= self.sample_num:
+                break
+            rec = {'im_id': np.array([ct]), 'im_file': image}
+            self._imid2path[ct] = image
+            ct += 1
+            records.append(rec)
+        assert len(records) > 0, "No image file found"
+        return records
--- a/ppdet/data/source/iterator_source.py
+++ b/ppdet/data/source/iterator_source.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import os
-import numpy as np
-import copy
-import logging
-logger = logging.getLogger(__name__)
-from ..dataset import Dataset
-class IteratorSource(Dataset):
-    """
-    Load data samples from iterator in stream mode
-    Args:
-        iter_maker (callable): callable function to generate a iter
-        samples (int): number of samples to load, -1 means all
-    """
-    def __init__(self, iter_maker, samples=-1, **kwargs):
-        super(IteratorSource, self).__init__()
-        self._epoch = -1
-        self._iter_maker = iter_maker
-        self._data_iter = None
-        self._pos = -1
-        self._drained = False
-        self._samples = samples
-        self._sample_num = -1
-    def next(self):
-        if self._epoch < 0:
-            self.reset()
-        if self._data_iter is not None:
-            try:
-                sample = next(self._data_iter)
-                self._pos += 1
-                ret = sample
-            except StopIteration as e:
-                if self._sample_num <= 0:
-                    self._sample_num = self._pos
-                elif self._sample_num != self._pos:
-                    logger.info('num of loaded samples is different '
-                                'with previouse setting[prev:%d,now:%d]' %
-                                (self._sample_num, self._pos))
-                    self._sample_num = self._pos
-                self._data_iter = None
-                self._drained = True
-                raise e
-        else:
-            raise StopIteration("no more data in " + str(self))
-        if self._samples > 0 and self._pos >= self._samples:
-            self._data_iter = None
-            self._drained = True
-            raise StopIteration("no more data in " + str(self))
-        else:
-            return ret
-    def reset(self):
-        if self._data_iter is None:
-            self._data_iter = self._iter_maker()
-        if self._epoch < 0:
-            self._epoch = 0
-        else:
-            self._epoch += 1
-        self._pos = 0
-        self._drained = False
-    def size(self):
-        return self._sample_num
-    def drained(self):
-        assert self._epoch >= 0, "the first epoch has not started yet"
-        return self._pos >= self.size()
-    def epoch_id(self):
-        return self._epoch
--- a/ppdet/data/source/loader.py
+++ b/ppdet/data/source/loader.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# function:
-#   load data records from local files(maybe in COCO or VOC data formats)
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-import os
-import numpy as np
-import logging
-import pickle as pkl
-logger = logging.getLogger(__name__)
-def check_records(records):
-    """ check the fields of 'records' must contains some keys
-    """
-    needed_fields = [
-        'im_file', 'im_id', 'h', 'w', 'is_crowd', 'gt_class', 'gt_bbox',
-        'gt_poly'
-    ]
-    for i, rec in enumerate(records):
-        for k in needed_fields:
-            assert k in rec, 'not found field[%s] in record[%d]' % (k, i)
-def load_roidb(anno_file, sample_num=-1):
-    """ load normalized data records from file 
-        'anno_file' which is a pickled file.
-        And the records should has a structure:
-        {
-            'im_file': str, # image file name
-            'im_id': int, # image id
-            'h': int, # height of image
-            'w': int, # width of image
-            'is_crowd': bool,
-            'gt_class': list of np.ndarray, # classids info
-            'gt_bbox': list of np.ndarray, # bounding box info
-            'gt_poly': list of int, # poly info
-        }
-    Args:
-        anno_file (str): file name for picked records
-        sample_num (int): number of samples to load
-    Returns:
-        list of records for detection model training
-    """
-    assert anno_file.endswith('.roidb'), 'invalid roidb file[%s]' % (anno_file)
-    with open(anno_file, 'rb') as f:
-        roidb = f.read()
-        # for support python3 and python2
-        try:
-            records, cname2cid = pkl.loads(roidb, encoding='bytes')
-        except:
-            records, cname2cid = pkl.loads(roidb)
-        assert type(records) is list, 'invalid data type from roidb'
-    if sample_num > 0 and sample_num < len(records):
-        records = records[:sample_num]
-    return records, cname2cid
-def load(fname,
-         samples=-1,
-         with_background=True,
-         with_cat2id=False,
-         use_default_label=None,
-         cname2cid=None):
-    """ Load data records from 'fnames'
-    Args:
-        fnames (str): file name for data record, eg:
-            instances_val2017.json or COCO17_val2017.roidb
-        samples (int): number of samples to load, default to all
-        with_background (bool): whether load background as a class.
-                                default True.
-        with_cat2id (bool): whether return cname2cid info out
-        use_default_label (bool): whether use the default mapping of label to id
-        cname2cid (dict): the mapping of category name to id
-    Returns:
-        list of loaded records whose structure is:
-        {
-            'im_file': str, # image file name
-            'im_id': int, # image id
-            'h': int, # height of image
-            'w': int, # width of image
-            'is_crowd': bool,
-            'gt_class': list of np.ndarray, # classids info
-            'gt_bbox': list of np.ndarray, # bounding box info
-            'gt_poly': list of int, # poly info
-        }
-    """
-    if fname.endswith('.roidb'):
-        records, cname2cid = load_roidb(fname, samples)
-    elif fname.endswith('.json'):
-        from . import coco_loader
-        records, cname2cid = coco_loader.load(fname, samples, with_background)
-    elif "wider_face" in fname:
-        from . import widerface_loader
-        records = widerface_loader.load(fname, samples)
-        return records
-    elif os.path.isfile(fname):
-        from . import voc_loader
-        if use_default_label is None or cname2cid is not None:
-            records, cname2cid = voc_loader.get_roidb(
-                fname, samples, cname2cid, with_background=with_background)
-        else:
-            records, cname2cid = voc_loader.load(
-                fname,
-                samples,
-                use_default_label,
-                with_background=with_background)
-    else:
-        raise ValueError('invalid file type when load data from file[%s]' %
-                         (fname))
-    check_records(records)
-    if with_cat2id:
-        return records, cname2cid
-    else:
-        return records
--- a/ppdet/data/source/roidb_source.py
+++ b/ppdet/data/source/roidb_source.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#function:
-#    interface to load data from local files and parse it for samples, 
-#    eg: roidb data in pickled files
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-import os
-import random
-import copy
-import pickle as pkl
-from ..dataset import Dataset
-import logging
-logger = logging.getLogger(__name__)
-class RoiDbSource(Dataset):
-    """ interface to load roidb data from files
-    """
-    def __init__(self,
-                 anno_file,
-                 image_dir=None,
-                 samples=-1,
-                 is_shuffle=True,
-                 load_img=False,
-                 cname2cid=None,
-                 use_default_label=None,
-                 mixup_epoch=-1,
-                 with_background=True):
-        """ Init
-        Args:
-            fname (str): label file path
-            image_dir (str): root dir for images
-            samples (int): samples to load, -1 means all
-            is_shuffle (bool): whether to shuffle samples
-            load_img (bool): whether load data in this class
-            cname2cid (dict): the label name to id dictionary
-            use_default_label (bool):whether use the default mapping of label to id
-            mixup_epoch (int): parse mixup in first n epoch
-            with_background (bool): whether load background 
-                                    as a class
-        """
-        super(RoiDbSource, self).__init__()
-        self._epoch = -1
-        assert os.path.isfile(anno_file) or os.path.isdir(anno_file), \
-                'anno_file {} is not a file or a directory'.format(anno_file)
-        self._fname = anno_file
-        self._image_dir = image_dir if image_dir is not None else ''
-        if image_dir is not None:
-            assert os.path.isdir(image_dir), \
-                    'image_dir {} is not a directory'.format(image_dir)
-        self._roidb = None
-        self._pos = -1
-        self._drained = False
-        self._samples = samples
-        self._is_shuffle = is_shuffle
-        self._load_img = load_img
-        self.use_default_label = use_default_label
-        self._mixup_epoch = mixup_epoch
-        self._with_background = with_background
-        self.cname2cid = cname2cid
-        self._imid2path = None
-    def __str__(self):
-        return 'RoiDbSource(epoch:%d,size:%d,pos:%d,fname:%s)' \
-            % (self._epoch, self.size(), self._pos, self._fname)
-    def next(self):
-        """ load next sample
-        """
-        if self._epoch < 0:
-            self.reset()
-        if self._pos >= self._samples:
-            self._drained = True
-            raise StopIteration('%s no more data' % (str(self)))
-        sample = copy.deepcopy(self._roidb[self._pos])
-        if self._load_img:
-            sample['image'] = self._load_image(sample['im_file'])
-        else:
-            sample['im_file'] = os.path.join(self._image_dir, sample['im_file'])
-        if self._epoch < self._mixup_epoch:
-            mix_idx = random.randint(1, self._samples - 1)
-            mix_pos = (mix_idx + self._pos) % self._samples
-            sample['mixup'] = copy.deepcopy(self._roidb[mix_pos])
-            if self._load_img:
-                sample['mixup']['image'] = \
-                        self._load_image(sample['mixup']['im_file'])
-            else:
-                sample['mixup']['im_file'] = \
-                        os.path.join(self._image_dir, sample['mixup']['im_file'])
-        self._pos += 1
-        return sample
-    def _load(self):
-        """ load data from file
-        """
-        from . import loader
-        records, cname2cid = loader.load(self._fname, self._samples,
-                                         self._with_background, True,
-                                         self.use_default_label, self.cname2cid)
-        self.cname2cid = cname2cid
-        return records
-    def _load_image(self, where):
-        fn = os.path.join(self._image_dir, where)
-        with open(fn, 'rb') as f:
-            return f.read()
-    def reset(self):
-        """ implementation of Dataset.reset
-        """
-        if self._roidb is None:
-            self._roidb = self._load()
-        self._samples = len(self._roidb)
-        if self._is_shuffle:
-            random.shuffle(self._roidb)
-        if self._mixup_epoch > 0 and self._samples < 2:
-            logger.info("Disable mixup for dataset samples "
-                        "less than 2 samples")
-            self._mixup_epoch = -1
-        if self._epoch < 0:
-            self._epoch = 0
-        else:
-            self._epoch += 1
-        self._pos = 0
-        self._drained = False
-    def size(self):
-        """ implementation of Dataset.size
-        """
-        return len(self._roidb)
-    def drained(self):
-        """ implementation of Dataset.drained
-        """
-        assert self._epoch >= 0, 'The first epoch has not begin!'
-        return self._pos >= self.size()
-    def epoch_id(self):
-        """ return epoch id for latest sample
-        """
-        return self._epoch
-    def get_imid2path(self):
-        """return image id to image path map"""
-        if self._imid2path is None:
-            self._imid2path = {}
-            for record in self._roidb:
-                im_id = record['im_id']
-                im_id = im_id if isinstance(im_id, int) else im_id[0]
-                im_path = os.path.join(self._image_dir, record['im_file'])
-                self._imid2path[im_id] = im_path
-        return self._imid2path
--- a/ppdet/data/source/simple_source.py
+++ b/ppdet/data/source/simple_source.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# function:
-#    interface to load data from txt file.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import os
-import numpy as np
-import copy
-from ..dataset import Dataset
-class SimpleSource(Dataset):
-    """
-    Load image files for testing purpose
-    Args:
-        images (list): list of path of images
-        samples (int): number of samples to load, -1 means all
-        load_img (bool): should images be loaded
-    """
-    def __init__(self, images=[], samples=-1, load_img=True, **kwargs):
-        super(SimpleSource, self).__init__()
-        self._epoch = -1
-        for image in images:
-            assert image != '' and os.path.isfile(image), \
-                    "Image {} not found".format(image)
-        self._images = images
-        self._fname = None
-        self._simple = None
-        self._pos = -1
-        self._drained = False
-        self._samples = samples
-        self._load_img = load_img
-        self._imid2path = {}
-    def next(self):
-        if self._epoch < 0:
-            self.reset()
-        if self._pos >= self.size():
-            self._drained = True
-            raise StopIteration("no more data in " + str(self))
-        else:
-            sample = copy.deepcopy(self._simple[self._pos])
-            if self._load_img:
-                sample['image'] = self._load_image(sample['im_file'])
-            self._pos += 1
-            return sample
-    def _load(self):
-        ct = 0
-        records = []
-        for image in self._images:
-            if self._samples > 0 and ct >= self._samples:
-                break
-            rec = {'im_id': np.array([ct]), 'im_file': image}
-            self._imid2path[ct] = image
-            ct += 1
-            records.append(rec)
-        assert len(records) > 0, "no image file found"
-        return records
-    def _load_image(self, where):
-        with open(where, 'rb') as f:
-            return f.read()
-    def reset(self):
-        if self._simple is None:
-            self._simple = self._load()
-        if self._epoch < 0:
-            self._epoch = 0
-        else:
-            self._epoch += 1
-        self._pos = 0
-        self._drained = False
-    def size(self):
-        return len(self._simple)
-    def drained(self):
-        assert self._epoch >= 0, "the first epoch has not started yet"
-        return self._pos >= self.size()
-    def epoch_id(self):
-        return self._epoch
-    def get_imid2path(self):
-        """return image id to image path map"""
-        return self._imid2path
--- a/ppdet/data/source/voc.py
+++ b/ppdet/data/source/voc.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import numpy as np
+import xml.etree.ElementTree as ET
+from ppdet.core.workspace import register, serializable
+from .dataset import DataSet
+@register
+@serializable
+class VOCDataSet(DataSet):
+    """
+    Load dataset with PascalVOC format.
+    Notes:
+    `anno_path` must contains xml file and image file path for annotations.
+    Args:
+        dataset_dir (str): root directory for dataset.
+        image_dir (str): directory for images.
+        anno_path (str): voc annotation file path.
+        sample_num (int): number of samples to load, -1 means all.
+        use_default_label (bool): whether use the default mapping of
+            label to integer index. Default True.
+        with_background (bool): whether load background as a class,
+            default True.
+        label_list (str): if use_default_label is False, will load
+            mapping between category and class index.
+    """
+    def __init__(self,
+                 dataset_dir=None,
+                 image_dir=None,
+                 anno_path=None,
+                 sample_num=-1,
+                 use_default_label=True,
+                 with_background=True,
+                 label_list='label_list.txt'):
+        super(VOCDataSet, self).__init__(
+            image_dir=image_dir,
+            anno_path=anno_path,
+            sample_num=sample_num,
+            dataset_dir=dataset_dir,
+            with_background=with_background)
+        # roidbs is list of dict whose structure is:
+        # {
+        #     'im_file': im_fname, # image file name
+        #     'im_id': im_id, # image id
+        #     'h': im_h, # height of image
+        #     'w': im_w, # width
+        #     'is_crowd': is_crowd,
+        #     'gt_class': gt_class,
+        #     'gt_bbox': gt_bbox,
+        #     'gt_poly': gt_poly,
+        # }
+        self.roidbs = None
+        # 'cname2id' is a dict to map category name to class id
+        self.cname2cid = None
+        self.label_list = label_list
+    def load_roidb_and_cname2cid(self):
+        anno_path = os.path.join(self.dataset_dir, self.anno_path)
+        image_dir = os.path.join(self.dataset_dir, self.image_dir)
+        # mapping category name to class id
+        # if with_background is True:
+        #   background:0, first_class:1, second_class:2, ...
+        # if with_background is False:
+        #   first_class:0, second_class:1, ...
+        records = []
+        ct = 0
+        cname2cid = {}
+        if not self.use_default_label:
+            label_path = os.path.join(self.dataset_dir, self.label_list)
+            if not os.path.exists(label_path):
+                raise ValueError("label_list {} does not exists".format(
+                    label_path))
+            with open(label_path, 'r') as fr:
+                label_id = int(self.with_background)
+                for line in fr.readlines():
+                    cname2cid[line.strip()] = label_id
+                    label_id += 1
+        else:
+            cname2cid = pascalvoc_label(self.with_background)
+        with open(anno_path, 'r') as fr:
+            while True:
+                line = fr.readline()
+                if not line:
+                    break
+                img_file, xml_file = [os.path.join(image_dir, x) \
+                        for x in line.strip().split()[:2]]
+                if not os.path.isfile(xml_file):
+                    continue
+                tree = ET.parse(xml_file)
+                if tree.find('id') is None:
+                    im_id = np.array([ct])
+                else:
+                    im_id = np.array([int(tree.find('id').text)])
+                objs = tree.findall('object')
+                im_w = float(tree.find('size').find('width').text)
+                im_h = float(tree.find('size').find('height').text)
+                gt_bbox = np.zeros((len(objs), 4), dtype=np.float32)
+                gt_class = np.zeros((len(objs), 1), dtype=np.int32)
+                gt_score = np.ones((len(objs), 1), dtype=np.float32)
+                is_crowd = np.zeros((len(objs), 1), dtype=np.int32)
+                difficult = np.zeros((len(objs), 1), dtype=np.int32)
+                for i, obj in enumerate(objs):
+                    cname = obj.find('name').text
+                    gt_class[i][0] = cname2cid[cname]
+                    _difficult = int(obj.find('difficult').text)
+                    x1 = float(obj.find('bndbox').find('xmin').text)
+                    y1 = float(obj.find('bndbox').find('ymin').text)
+                    x2 = float(obj.find('bndbox').find('xmax').text)
+                    y2 = float(obj.find('bndbox').find('ymax').text)
+                    x1 = max(0, x1)
+                    y1 = max(0, y1)
+                    x2 = min(im_w - 1, x2)
+                    y2 = min(im_h - 1, y2)
+                    gt_bbox[i] = [x1, y1, x2, y2]
+                    is_crowd[i][0] = 0
+                    difficult[i][0] = _difficult
+                voc_rec = {
+                    'im_file': img_file,
+                    'im_id': im_id,
+                    'h': im_h,
+                    'w': im_w,
+                    'is_crowd': is_crowd,
+                    'gt_class': gt_class,
+                    'gt_score': gt_score,
+                    'gt_bbox': gt_bbox,
+                    'gt_poly': [],
+                    'difficult': difficult
+                }
+                if len(objs) != 0:
+                    records.append(voc_rec)
+                ct += 1
+                if self.sample_num > 0 and ct >= self.sample_num:
+                    break
+        assert len(records) > 0, 'not found any voc record in %s' % (
+            self.anno_path)
+        self.roidbs, self.cname2cid = records, cname2cid
+def pascalvoc_label(with_background=True):
+    labels_map = {
+        'aeroplane': 1,
+        'bicycle': 2,
+        'bird': 3,
+        'boat': 4,
+        'bottle': 5,
+        'bus': 6,
+        'car': 7,
+        'cat': 8,
+        'chair': 9,
+        'cow': 10,
+        'diningtable': 11,
+        'dog': 12,
+        'horse': 13,
+        'motorbike': 14,
+        'person': 15,
+        'pottedplant': 16,
+        'sheep': 17,
+        'sofa': 18,
+        'train': 19,
+        'tvmonitor': 20
+    }
+    if not with_background:
+        labels_map = {k: v - 1 for k, v in labels_map.items()}
+    return labels_map
--- a/ppdet/data/source/voc_loader.py
+++ b/ppdet/data/source/voc_loader.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import numpy as np
-import xml.etree.ElementTree as ET
-def get_roidb(anno_path, sample_num=-1, cname2cid=None, with_background=True):
-    """
-    Load VOC records with annotations in xml directory 'anno_path'
-    Notes:
-    ${anno_path} must contains xml file and image file path for annotations
-    Args:
-        anno_path (str): root directory for voc annotation data
-        sample_num (int): number of samples to load, -1 means all
-        cname2cid (dict): the label name to id dictionary
-        with_background (bool): whether load background as a class.
-                                if True, total class number will
-                                be 81. default True
-    Returns:
-        (records, catname2clsid)
-        'records' is list of dict whose structure is:
-        {
-            'im_file': im_fname, # image file name
-            'im_id': im_id, # image id
-            'h': im_h, # height of image
-            'w': im_w, # width
-            'is_crowd': is_crowd,
-            'gt_class': gt_class,
-            'gt_bbox': gt_bbox,
-            'gt_poly': gt_poly,
-        }
-        'cname2id' is a dict to map category name to class id
-    """
-    data_dir = os.path.dirname(anno_path)
-    records = []
-    ct = 0
-    existence = False if cname2cid is None else True
-    if cname2cid is None:
-        cname2cid = {}
-    # mapping category name to class id
-    # background:0, first_class:1, second_class:2, ...
-    with open(anno_path, 'r') as fr:
-        while True:
-            line = fr.readline()
-            if not line:
-                break
-            img_file, xml_file = [os.path.join(data_dir, x) \
-                    for x in line.strip().split()[:2]]
-            if not os.path.isfile(xml_file):
-                continue
-            tree = ET.parse(xml_file)
-            if tree.find('id') is None:
-                im_id = np.array([ct])
-            else:
-                im_id = np.array([int(tree.find('id').text)])
-            objs = tree.findall('object')
-            im_w = float(tree.find('size').find('width').text)
-            im_h = float(tree.find('size').find('height').text)
-            gt_bbox = np.zeros((len(objs), 4), dtype=np.float32)
-            gt_class = np.zeros((len(objs), 1), dtype=np.int32)
-            gt_score = np.ones((len(objs), 1), dtype=np.float32)
-            is_crowd = np.zeros((len(objs), 1), dtype=np.int32)
-            difficult = np.zeros((len(objs), 1), dtype=np.int32)
-            for i, obj in enumerate(objs):
-                cname = obj.find('name').text
-                if not existence and cname not in cname2cid:
-                    # the background's id is 0, so need to add 1.
-                    cname2cid[cname] = len(cname2cid) + int(with_background)
-                elif existence and cname not in cname2cid:
-                    raise KeyError(
-                        'Not found cname[%s] in cname2cid when map it to cid.' %
-                        (cname))
-                gt_class[i][0] = cname2cid[cname]
-                _difficult = int(obj.find('difficult').text)
-                x1 = float(obj.find('bndbox').find('xmin').text)
-                y1 = float(obj.find('bndbox').find('ymin').text)
-                x2 = float(obj.find('bndbox').find('xmax').text)
-                y2 = float(obj.find('bndbox').find('ymax').text)
-                x1 = max(0, x1)
-                y1 = max(0, y1)
-                x2 = min(im_w - 1, x2)
-                y2 = min(im_h - 1, y2)
-                gt_bbox[i] = [x1, y1, x2, y2]
-                is_crowd[i][0] = 0
-                difficult[i][0] = _difficult
-            voc_rec = {
-                'im_file': img_file,
-                'im_id': im_id,
-                'h': im_h,
-                'w': im_w,
-                'is_crowd': is_crowd,
-                'gt_class': gt_class,
-                'gt_score': gt_score,
-                'gt_bbox': gt_bbox,
-                'gt_poly': [],
-                'difficult': difficult
-            }
-            if len(objs) != 0:
-                records.append(voc_rec)
-            ct += 1
-            if sample_num > 0 and ct >= sample_num:
-                break
-    assert len(records) > 0, 'not found any voc record in %s' % (anno_path)
-    return [records, cname2cid]
-def load(anno_path, sample_num=-1, use_default_label=True,
-         with_background=True):
-    """
-    Load VOC records with annotations in
-    xml directory 'anno_path'
-    Notes:
-    ${anno_path} must contains xml file and image file path for annotations
-    Args:
-        @anno_path (str): root directory for voc annotation data
-        @sample_num (int): number of samples to load, -1 means all
-        @use_default_label (bool): whether use the default mapping of label to id
-        @with_background (bool): whether load background as a class.
-                                 if True, total class number will
-                                 be 81. default True
-    Returns:
-        (records, catname2clsid)
-        'records' is list of dict whose structure is:
-        {
-            'im_file': im_fname, # image file name
-            'im_id': im_id, # image id
-            'h': im_h, # height of image
-            'w': im_w, # width
-            'is_crowd': is_crowd,
-            'gt_class': gt_class,
-            'gt_bbox': gt_bbox,
-            'gt_poly': gt_poly,
-        }
-        'cname2id' is a dict to map category name to class id
-    """
-    data_dir = os.path.dirname(anno_path)
-    # mapping category name to class id
-    # if with_background is True:
-    #   background:0, first_class:1, second_class:2, ...
-    # if with_background is False:
-    #   first_class:0, second_class:1, ...
-    records = []
-    ct = 0
-    cname2cid = {}
-    if not use_default_label:
-        label_path = os.path.join(data_dir, 'label_list.txt')
-        with open(label_path, 'r') as fr:
-            label_id = int(with_background)
-            for line in fr.readlines():
-                cname2cid[line.strip()] = label_id
-                label_id += 1
-    else:
-        cname2cid = pascalvoc_label(with_background)
-    with open(anno_path, 'r') as fr:
-        while True:
-            line = fr.readline()
-            if not line:
-                break
-            img_file, xml_file = [os.path.join(data_dir, x) \
-                    for x in line.strip().split()[:2]]
-            if not os.path.isfile(xml_file):
-                continue
-            tree = ET.parse(xml_file)
-            if tree.find('id') is None:
-                im_id = np.array([ct])
-            else:
-                im_id = np.array([int(tree.find('id').text)])
-            objs = tree.findall('object')
-            im_w = float(tree.find('size').find('width').text)
-            im_h = float(tree.find('size').find('height').text)
-            gt_bbox = np.zeros((len(objs), 4), dtype=np.float32)
-            gt_class = np.zeros((len(objs), 1), dtype=np.int32)
-            gt_score = np.ones((len(objs), 1), dtype=np.float32)
-            is_crowd = np.zeros((len(objs), 1), dtype=np.int32)
-            difficult = np.zeros((len(objs), 1), dtype=np.int32)
-            for i, obj in enumerate(objs):
-                cname = obj.find('name').text
-                gt_class[i][0] = cname2cid[cname]
-                _difficult = int(obj.find('difficult').text)
-                x1 = float(obj.find('bndbox').find('xmin').text)
-                y1 = float(obj.find('bndbox').find('ymin').text)
-                x2 = float(obj.find('bndbox').find('xmax').text)
-                y2 = float(obj.find('bndbox').find('ymax').text)
-                x1 = max(0, x1)
-                y1 = max(0, y1)
-                x2 = min(im_w - 1, x2)
-                y2 = min(im_h - 1, y2)
-                gt_bbox[i] = [x1, y1, x2, y2]
-                is_crowd[i][0] = 0
-                difficult[i][0] = _difficult
-            voc_rec = {
-                'im_file': img_file,
-                'im_id': im_id,
-                'h': im_h,
-                'w': im_w,
-                'is_crowd': is_crowd,
-                'gt_class': gt_class,
-                'gt_score': gt_score,
-                'gt_bbox': gt_bbox,
-                'gt_poly': [],
-                'difficult': difficult
-            }
-            if len(objs) != 0:
-                records.append(voc_rec)
-            ct += 1
-            if sample_num > 0 and ct >= sample_num:
-                break
-    assert len(records) > 0, 'not found any voc record in %s' % (anno_path)
-    return [records, cname2cid]
-def pascalvoc_label(with_background=True):
-    labels_map = {
-        'aeroplane': 1,
-        'bicycle': 2,
-        'bird': 3,
-        'boat': 4,
-        'bottle': 5,
-        'bus': 6,
-        'car': 7,
-        'cat': 8,
-        'chair': 9,
-        'cow': 10,
-        'diningtable': 11,
-        'dog': 12,
-        'horse': 13,
-        'motorbike': 14,
-        'person': 15,
-        'pottedplant': 16,
-        'sheep': 17,
-        'sofa': 18,
-        'train': 19,
-        'tvmonitor': 20
-    }
-    if not with_background:
-        labels_map = {k: v - 1 for k, v in labels_map.items()}
-    return labels_map
--- a/ppdet/data/source/widerface_loader.py
+++ b/ppdet/data/source/widerface_loader.py
@@ -17,74 +17,93 @@ import numpy as np
 import logging
 logger = logging.getLogger(__name__)
+from ppdet.core.workspace import register, serializable
+from .dataset import DataSet
-def load(anno_path, sample_num=-1, cname2cid=None, with_background=True):
+@register
+@serializable
+class WIDERFaceDataSet(DataSet):
    """
    Load WiderFace records with 'anno_path'
    Args:
+        dataset_dir (str): root directory for dataset.
+        image_dir (str): directory for images.
        anno_path (str): root directory for voc annotation data
        sample_num (int): number of samples to load, -1 means all
        with_background (bool): whether load background as a class.
-                                 if True, total class number will
+            if True, total class number will be 2. default True.
-                                 be 2. default True
-    Returns:
-        (records, catname2clsid)
-        'records' is list of dict whose structure is:
-        {
-            'im_file': im_fname, # image file name
-            'im_id': im_id, # image id
-            'gt_class': gt_class,
-            'gt_bbox': gt_bbox,
-        }
-        'cname2id' is a dict to map category name to class id
    """
-    txt_file = anno_path
+    def __init__(self,
+                 dataset_dir=None,
-    records = []
+                 image_dir=None,
-    ct = 0
+                 anno_path=None,
-    file_lists = _load_file_list(txt_file)
+                 sample_num=-1,
-    cname2cid = widerface_label(with_background)
+                 with_background=True):
+        super(WIDERFaceDataSet, self).__init__(
-    for item in file_lists:
+            image_dir=image_dir,
-        im_fname = item[0]
+            anno_path=anno_path,
-        im_id = np.array([ct])
+            sample_num=sample_num,
-        gt_bbox = np.zeros((len(item) - 2, 4), dtype=np.float32)
+            dataset_dir=dataset_dir,
-        gt_class = np.ones((len(item) - 2, 1), dtype=np.int32)
+            with_background=with_background)
-        for index_box in range(len(item)):
+        self.anno_path = anno_path
-            if index_box >= 2:
+        self.sample_num = sample_num
-                temp_info_box = item[index_box].split(' ')
+        self.with_background = with_background
-                xmin = float(temp_info_box[0])
+        self.roidbs = None
-                ymin = float(temp_info_box[1])
+        self.cname2cid = None
-                w = float(temp_info_box[2])
-                h = float(temp_info_box[3])
+    def load_roidb_and_cname2cid(self):
-                # Filter out wrong labels
+        anno_path = os.path.join(self.dataset_dir, self.anno_path)
-                if w < 0 or h < 0:
+        image_dir = os.path.join(self.dataset_dir, self.image_dir)
-                    continue
-                xmin = max(0, xmin)
+        txt_file = anno_path
-                ymin = max(0, ymin)
-                xmax = xmin + w
+        records = []
-                ymax = ymin + h
+        ct = 0
-                gt_bbox[index_box - 2] = [xmin, ymin, xmax, ymax]
+        file_lists = _load_file_list(txt_file)
+        cname2cid = widerface_label(self.with_background)
-        widerface_rec = {
-            'im_file': im_fname,
+        for item in file_lists:
-            'im_id': im_id,
+            im_fname = item[0]
-            'gt_bbox': gt_bbox,
+            im_id = np.array([ct])
-            'gt_class': gt_class,
+            gt_bbox = np.zeros((len(item) - 2, 4), dtype=np.float32)
-        }
+            gt_class = np.ones((len(item) - 2, 1), dtype=np.int32)
-        # logger.debug
+            for index_box in range(len(item)):
-        if len(item) != 0:
+                if index_box >= 2:
-            records.append(widerface_rec)
+                    temp_info_box = item[index_box].split(' ')
+                    xmin = float(temp_info_box[0])
-        ct += 1
+                    ymin = float(temp_info_box[1])
-        if sample_num > 0 and ct >= sample_num:
+                    w = float(temp_info_box[2])
-            break
+                    h = float(temp_info_box[3])
-    assert len(records) > 0, 'not found any widerface in %s' % (anno_path)
+                    # Filter out wrong labels
-    logger.info('{} samples in file {}'.format(ct, anno_path))
+                    if w < 0 or h < 0:
-    return records, cname2cid
+                        continue
+                    xmin = max(0, xmin)
+                    ymin = max(0, ymin)
+                    xmax = xmin + w
+                    ymax = ymin + h
+                    gt_bbox[index_box - 2] = [xmin, ymin, xmax, ymax]
+            im_fname = os.path.join(image_dir,
+                                    im_fname) if image_dir else im_fname
+            widerface_rec = {
+                'im_file': im_fname,
+                'im_id': im_id,
+                'gt_bbox': gt_bbox,
+                'gt_class': gt_class,
+            }
+            # logger.debug
+            if len(item) != 0:
+                records.append(widerface_rec)
+            ct += 1
+            if self.sample_num > 0 and ct >= self.sample_num:
+                break
+        assert len(records) > 0, 'not found any widerface in %s' % (anno_path)
+        logger.info('{} samples in file {}'.format(ct, anno_path))
+        self.roidbs, self.cname2cid = records, cname2cid
 def _load_file_list(input_txt):

--- a/ppdet/data/tests/000012.jpg
+++ b/ppdet/data/tests/000012.jpg
--- a/ppdet/data/tests/coco.yml
+++ b/ppdet/data/tests/coco.yml
-DATA:
-    TRAIN:
-        ANNO_FILE: data/coco.test/train2017.roidb
-        IMAGE_DIR: data/coco.test/train2017
-        SAMPLES: 10
-        TYPE: RoiDbSource
-    VAL: 
-        ANNO_FILE: data/coco.test/val2017.roidb
-        IMAGE_DIR: data/coco.test/val2017
-        SAMPLES: 10
-        TYPE: RoiDbSource
-TRANSFORM:
-    TRAIN:
-        OPS:
-            - OP: DecodeImage
-              TO_RGB: False
-            - OP: RandomFlipImage
-              PROB: 0.5
-            - OP: NormalizeImage
-              MEAN: [102.9801, 115.9465, 122.7717]
-              IS_SCALE: False
-              IS_CHANNEL_FIRST: False
-            - OP: ResizeImage
-              TARGET_SIZE: 800
-              MAX_SIZE: 1333
-            - OP: Permute
-              TO_BGR: False
-            - OP: ArrangeRCNN
-        BATCH_SIZE: 1
-        IS_PADDING: True
-        DROP_LAST: False
-        WORKER_CONF:
-            BUFSIZE: 100
-            WORKER_NUM: 4
-            USE_PROCESS: True
-            MEMSIZE: 2G
-    VAL:
-        OPS:
-            - OP: DecodeImage
-              TO_RGB: True
-            - OP: ResizeImage
-              TARGET_SIZE: 224
-            - OP: ArrangeSSD
-        BATCH_SIZE: 1
-        WORKER_CONF:
-            BUFSIZE: 100
-            WORKER_NUM: 4
-            USE_PROCESS: True
--- a/ppdet/data/tests/data/prepare_data.sh
+++ b/ppdet/data/tests/data/prepare_data.sh
-#!/bin/bash
-#function:
-#   prepare coco data for testing
-root=$(dirname `readlink -f ${BASH_SOURCE}[0]`)
-cwd=`pwd`
-if [[ $cwd != $root ]];then
-    pushd $root 2>&1 1>/dev/null
-fi
-test_coco_python2_url="http://filecenter.matrix.baidu.com/api/v1/file/wanglong03/coco.test.python2.zip/20190603095315/download"
-test_coco_python3_url="http://filecenter.matrix.baidu.com/api/v1/file/wanglong03/coco.test.python3.zip/20190603095447/download"
-if [[ $1 = "python2" ]];then
-    test_coco_data_url=${test_coco_python2_url}
-    coco_zip_file="coco.test.python2.zip"
-else
-    test_coco_data_url=${test_coco_python3_url}
-    coco_zip_file="coco.test.python3.zip"
-fi
-echo "download testing coco from url[${test_coco_data_url}]"
-coco_root_dir=${coco_zip_file/.zip/}
-# clear already exist file or directory
-rm -rf ${coco_root_dir} ${coco_zip_file}
-wget ${test_coco_data_url} -O ${coco_zip_file}
-if [ -e $coco_zip_file ];then
-    echo "succeed to download ${coco_zip_file}, so unzip it"
-    unzip ${coco_zip_file} >/dev/null 2>&1
-fi
-if [ -e ${coco_root_dir} ];then
-    rm -rf coco.test
-    ln -s ${coco_root_dir} coco.test
-    echo "succeed to generate coco data in[${coco_root_dir}] for testing"
-    exit 0
-else
-    echo "failed to generate coco data"
-    exit 1
-fi
--- a/ppdet/data/tests/rcnn_dataset.yml
+++ b/ppdet/data/tests/rcnn_dataset.yml
-DATA:
-    TRAIN:
-        ANNO_FILE: data/coco.test/train2017.roidb
-        IMAGE_DIR: data/coco.test/train2017
-        SAMPLES: 10
-        IS_SHUFFLE: True
-        TYPE: RoiDbSource
-TRANSFORM:
-    TRAIN:
-        OPS:
-            - OP: DecodeImage
-              TO_RGB: False
-            - OP: RandomFlipImage
-              PROB: 0.5
-            - OP: NormalizeImage
-              MEAN: [102.9801, 115.9465, 122.7717]
-              IS_SCALE: False
-              IS_CHANNEL_FIRST: False
-            - OP: ResizeImage
-              TARGET_SIZE: 800
-              MAX_SIZE: 1333
-            - OP: Permute
-              TO_BGR: False
-            - OP: ArrangeRCNN
-        BATCH_SIZE: 1
-        IS_PADDING: True
-        DROP_LAST: False
-        WORKER_CONF:
-            BUFSIZE: 100
-            WORKER_NUM: 4
-            MEMSIZE: 2G
-            USE_PROCESS: True
--- a/ppdet/data/tests/set_env.py
+++ b/ppdet/data/tests/set_env.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import os
-import six
-import logging
-import matplotlib
-matplotlib.use('Agg', force=False)
-prefix = os.path.dirname(os.path.abspath(__file__))
-#coco data for testing
-if six.PY3:
-    version = 'python3'
-else:
-    version = 'python2'
-data_root = os.path.join(prefix, 'data/coco.test.%s' % (version))
-# coco data for testing
-coco_data = {
-    'TRAIN': {
-        'ANNO_FILE': os.path.join(data_root, 'train2017.roidb'),
-        'IMAGE_DIR': os.path.join(data_root, 'train2017')
-    },
-    'VAL': {
-        'ANNO_FILE': os.path.join(data_root, 'val2017.roidb'),
-        'IMAGE_DIR': os.path.join(data_root, 'val2017')
-    }
-}
-script = os.path.join(os.path.dirname(__file__), 'data/prepare_data.sh')
-if not os.path.exists(data_root):
-    ret = os.system('bash %s %s' % (script, version))
-    if ret != 0:
-        logging.error('not found file[%s], you should manually prepare '
-                      'your data using "data/prepare_data.sh"' % (data_root))
-        sys.exit(1)
--- a/ppdet/data/tests/test.yml
+++ b/ppdet/data/tests/test.yml
+TrainReader:
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_mask']
+  dataset:
+    !COCODataSet
+    image_dir: val2017
+    anno_path: annotations/instances_val2017.json
+    dataset_dir: dataset/coco
+    sample_num: 10
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  - !RandomFlipImage
+    is_mask_flip: true
+    is_normalized: false
+    prob: 0.5
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+    use_padded_im_info: false
+  batch_size: 1
+  shuffle: true
+  worker_num: 2
+  drop_last: false
+  use_process: false
+EvalReader:
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id']
+  dataset:
+    !COCODataSet
+    image_dir: val2017
+    anno_path: annotations/instances_val2017.json
+    dataset_dir: dataset/coco
+    sample_num: 10
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+    use_padded_im_info: true
+  batch_size: 1
+  shuffle: false
+  drop_last: false
--- a/ppdet/data/tests/test_dataset.py
+++ b/ppdet/data/tests/test_dataset.py
@@ -19,22 +19,26 @@ import logging
 import random
 import copy
-import set_env
+from ppdet.data.parallel_map import ParallelMap
-import ppdet.data.transform as tf
-from ppdet.data.dataset import Dataset
-class MemorySource(Dataset):
+class MemorySource(object):
    """ memory data source for testing
    """
    def __init__(self, samples):
-        super(MemorySource, self).__init__()
        self._epoch = -1
        self._pos = -1
        self._drained = False
        self._samples = samples
+    def __iter__(self):
+        return self
+    def __next__(self):
+        return self.next()
    def next(self):
        if self._epoch < 0:
            self.reset()
@@ -95,20 +99,20 @@ class TestDataset(unittest.TestCase):
    def test_transform_with_abnormal_worker(self):
        """ test dataset transform with abnormally exit process
        """
-        samples = list(range(1000))
+        samples = list(range(20))
-        ds = MemorySource(samples)
+        mem_sc = MemorySource(samples)
-        def _mapper(sample):
+        def _worker(sample):
            if sample == 3:
                sys.exit(1)
            return 2 * sample
-        worker_conf = {'WORKER_NUM': 2, 'use_process': True}
+        test_worker = ParallelMap(
-        mapped = tf.map(ds, _mapper, worker_conf)
+            mem_sc, _worker, worker_num=2, use_process=True)
        ct = 0
-        for i, d in enumerate(mapped):
+        for i, d in enumerate(test_worker):
            ct += 1
            self.assertTrue(d / 2 in samples)
@@ -117,20 +121,20 @@ class TestDataset(unittest.TestCase):
    def test_transform_with_delay_worker(self):
        """ test dataset transform with delayed process
        """
-        samples = list(range(1000))
+        samples = list(range(20))
-        ds = MemorySource(samples)
+        mem_sc = MemorySource(samples)
-        def _mapper(sample):
+        def _worker(sample):
            if sample == 3:
                time.sleep(30)
            return 2 * sample
-        worker_conf = {'WORKER_NUM': 2, 'use_process': True}
+        test_worker = ParallelMap(
-        mapped = tf.map(ds, _mapper, worker_conf)
+            mem_sc, _worker, worker_num=2, use_process=True)
        ct = 0
-        for i, d in enumerate(mapped):
+        for i, d in enumerate(test_worker):
            ct += 1
            self.assertTrue(d / 2 in samples)
@@ -140,4 +144,3 @@ class TestDataset(unittest.TestCase):
 if __name__ == '__main__':
    logging.basicConfig()
    unittest.main()
--- a/ppdet/data/tests/test_iterator_source.py
+++ b/ppdet/data/tests/test_iterator_source.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import time
-import unittest
-import sys
-import logging
-import set_env
-from ppdet.data.source import IteratorSource
-def _generate_iter_maker(num=10):
-    def _reader():
-        for i in range(num):
-            yield {'image': 'image_' + str(i), 'label': i}
-    return _reader
-class TestIteratorSource(unittest.TestCase):
-    """Test cases for dataset.source.roidb_source
-    """
-    @classmethod
-    def setUpClass(cls):
-        """ setup
-        """
-        pass
-    @classmethod
-    def tearDownClass(cls):
-        """ tearDownClass """
-        pass
-    def test_basic(self):
-        """ test basic apis 'next/size/drained'
-        """
-        iter_maker = _generate_iter_maker()
-        iter_source = IteratorSource(iter_maker)
-        for i, sample in enumerate(iter_source):
-            self.assertTrue('image' in sample)
-            self.assertGreater(len(sample['image']), 0)
-        self.assertTrue(iter_source.drained())
-        self.assertEqual(i + 1, iter_source.size())
-    def test_reset(self):
-        """ test functions 'reset/epoch_id'
-        """
-        iter_maker = _generate_iter_maker()
-        iter_source = IteratorSource(iter_maker)
-        self.assertTrue(iter_source.next() is not None)
-        self.assertEqual(iter_source.epoch_id(), 0)
-        iter_source.reset()
-        self.assertEqual(iter_source.epoch_id(), 1)
-        self.assertTrue(iter_source.next() is not None)
-if __name__ == '__main__':
-    unittest.main()
--- a/ppdet/data/tests/test_loader.py
+++ b/ppdet/data/tests/test_loader.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,96 +12,154 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-import os
-import time
 import unittest
-import sys
+import os
-import logging
-import numpy as np
+from ppdet.data.source.coco import COCODataSet
+from ppdet.data.reader import Reader
+from ppdet.utils.download import get_path
+from ppdet.utils.download import DATASET_HOME
-import set_env
+from ppdet.data.transform.operators import DecodeImage, ResizeImage, Permute
+from ppdet.data.transform.batch_operators import PadBatch
+COCO_VAL_URL = 'http://images.cocodataset.org/zips/val2017.zip'
+COCO_VAL_MD5SUM = '442b8da7639aecaf257c1dceb8ba8c80'
+COCO_ANNO_URL = 'http://images.cocodataset.org/annotations/annotations_trainval2017.zip'
+COCO_ANNO_MD5SUM = 'f4bbac642086de4f52a3fdda2de5fa2c'
-class TestLoader(unittest.TestCase):
-    """Test cases for dataset.source.loader
-    """
+class TestReader(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        """ setup
        """
-        cls.prefix = os.path.dirname(os.path.abspath(__file__))
+        root_path = os.path.join(DATASET_HOME, 'coco')
-        # json data
+        _, _ = get_path(COCO_VAL_URL, root_path, COCO_VAL_MD5SUM)
-        cls.anno_path = os.path.join(cls.prefix,
+        _, _ = get_path(COCO_ANNO_URL, root_path, COCO_ANNO_MD5SUM)
-                                     'data/coco/instances_val2017.json')
+        cls.anno_path = 'annotations/instances_val2017.json'
-        cls.image_dir = os.path.join(cls.prefix, 'data/coco/val2017')
+        cls.image_dir = 'val2017'
-        cls.anno_path1 = os.path.join(cls.prefix,
+        cls.root_path = root_path
-                                      "data/voc/ImageSets/Main/train.txt")
-        cls.image_dir1 = os.path.join(cls.prefix, "data/voc/JPEGImages")
    @classmethod
    def tearDownClass(cls):
        """ tearDownClass """
        pass
-    def test_load_coco_in_json(self):
+    def test_loader(self):
-        """ test loading COCO data in json file
+        coco_loader = COCODataSet(
-        """
+            dataset_dir=self.root_path,
-        from ppdet.data.source.coco_loader import load
+            image_dir=self.image_dir,
-        if not os.path.exists(self.anno_path):
+            anno_path=self.anno_path,
-            logging.warn('not found %s, so skip this test' % (self.anno_path))
+            sample_num=10)
-            return
+        sample_trans = [
-        samples = 10
+            DecodeImage(to_rgb=True), ResizeImage(
-        records, cname2id = load(self.anno_path, samples)
+                target_size=800, max_size=1333, interp=1), Permute(to_bgr=False)
-        self.assertEqual(len(records), samples)
+        ]
-        self.assertGreater(len(cname2id), 0)
+        batch_trans = [PadBatch(pad_to_stride=32, use_padded_im_info=True), ]
-    def test_load_coco_in_roidb(self):
+        inputs_def = {
-        """ test loading COCO data in pickled records
+            'fields': [
-        """
+                'image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd',
-        anno_path = os.path.join(self.prefix,
+                'gt_mask'
-                                 'data/roidbs/instances_val2017.roidb')
+            ],
+        }
-        if not os.path.exists(anno_path):
+        data_loader = Reader(
-            logging.warn('not found %s, so skip this test' % (anno_path))
+            coco_loader,
-            return
+            sample_transforms=sample_trans,
+            batch_transforms=batch_trans,
-        samples = 10
+            batch_size=2,
-        from ppdet.data.source.loader import load_roidb
+            shuffle=True,
-        records, cname2cid = load_roidb(anno_path, samples)
+            drop_empty=True,
-        self.assertEqual(len(records), samples)
+            inputs_def=inputs_def)()
-        self.assertGreater(len(cname2cid), 0)
+        for i in range(2):
+            for samples in data_loader:
-    def test_load_voc_in_xml(self):
+                for sample in samples:
-        """ test loading VOC data in xml files
+                    im_shape = sample[0].shape
-        """
+                    self.assertEqual(im_shape[0], 3)
-        from ppdet.data.source.voc_loader import load
+                    self.assertEqual(im_shape[1] % 32, 0)
-        if not os.path.exists(self.anno_path1):
+                    self.assertEqual(im_shape[2] % 32, 0)
-            logging.warn('not found %s, so skip this test' % (self.anno_path1))
-            return
+                    im_info_shape = sample[1].shape
-        samples = 3
+                    self.assertEqual(im_info_shape[-1], 3)
-        records, cname2cid = load(self.anno_path1, samples)
-        self.assertEqual(len(records), samples)
+                    im_id_shape = sample[2].shape
-        self.assertGreater(len(cname2cid), 0)
+                    self.assertEqual(im_id_shape[-1], 1)
-    def test_load_voc_in_roidb(self):
+                    gt_bbox_shape = sample[3].shape
-        """ test loading VOC data in pickled records
+                    self.assertEqual(gt_bbox_shape[-1], 4)
-        """
-        anno_path = os.path.join(self.prefix, 'data/roidbs/train.roidb')
+                    gt_class_shape = sample[4].shape
+                    self.assertEqual(gt_class_shape[-1], 1)
-        if not os.path.exists(anno_path):
+                    self.assertEqual(gt_class_shape[0], gt_bbox_shape[0])
-            logging.warn('not found %s, so skip this test' % (anno_path))
-            return
+                    is_crowd_shape = sample[5].shape
+                    self.assertEqual(is_crowd_shape[-1], 1)
-        samples = 3
+                    self.assertEqual(is_crowd_shape[0], gt_bbox_shape[0])
-        from ppdet.data.source.loader import load_roidb
-        records, cname2cid = load_roidb(anno_path, samples)
+                    mask = sample[6]
-        self.assertEqual(len(records), samples)
+                    self.assertEqual(len(mask), gt_bbox_shape[0])
-        self.assertGreater(len(cname2cid), 0)
+                    self.assertEqual(mask[0][0].shape[-1], 2)
+            data_loader.reset()
+    def test_loader_multi_threads(self):
+        coco_loader = COCODataSet(
+            dataset_dir=self.root_path,
+            image_dir=self.image_dir,
+            anno_path=self.anno_path,
+            sample_num=10)
+        sample_trans = [
+            DecodeImage(to_rgb=True), ResizeImage(
+                target_size=800, max_size=1333, interp=1), Permute(to_bgr=False)
+        ]
+        batch_trans = [PadBatch(pad_to_stride=32, use_padded_im_info=True), ]
+        inputs_def = {
+            'fields': [
+                'image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd',
+                'gt_mask'
+            ],
+        }
+        data_loader = Reader(
+            coco_loader,
+            sample_transforms=sample_trans,
+            batch_transforms=batch_trans,
+            batch_size=2,
+            shuffle=True,
+            drop_empty=True,
+            worker_num=2,
+            use_process=False,
+            bufsize=8,
+            inputs_def=inputs_def)()
+        for i in range(2):
+            for samples in data_loader:
+                for sample in samples:
+                    im_shape = sample[0].shape
+                    self.assertEqual(im_shape[0], 3)
+                    self.assertEqual(im_shape[1] % 32, 0)
+                    self.assertEqual(im_shape[2] % 32, 0)
+                    im_info_shape = sample[1].shape
+                    self.assertEqual(im_info_shape[-1], 3)
+                    im_id_shape = sample[2].shape
+                    self.assertEqual(im_id_shape[-1], 1)
+                    gt_bbox_shape = sample[3].shape
+                    self.assertEqual(gt_bbox_shape[-1], 4)
+                    gt_class_shape = sample[4].shape
+                    self.assertEqual(gt_class_shape[-1], 1)
+                    self.assertEqual(gt_class_shape[0], gt_bbox_shape[0])
+                    is_crowd_shape = sample[5].shape
+                    self.assertEqual(is_crowd_shape[-1], 1)
+                    self.assertEqual(is_crowd_shape[0], gt_bbox_shape[0])
+                    mask = sample[6]
+                    self.assertEqual(len(mask), gt_bbox_shape[0])
+                    self.assertEqual(mask[0][0].shape[-1], 2)
+            data_loader.reset()
 if __name__ == '__main__':

--- a/ppdet/data/tests/test_loader_yaml.py
+++ b/ppdet/data/tests/test_loader_yaml.py
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+import os
+import yaml
+import logging
+from ppdet.utils.download import get_path
+from ppdet.utils.download import DATASET_HOME
+from ppdet.core.workspace import load_config, merge_config
+from ppdet.data.reader import create_reader
+COCO_VAL_URL = 'http://images.cocodataset.org/zips/val2017.zip'
+COCO_VAL_MD5SUM = '442b8da7639aecaf257c1dceb8ba8c80'
+COCO_ANNO_URL = 'http://images.cocodataset.org/annotations/annotations_trainval2017.zip'
+COCO_ANNO_MD5SUM = 'f4bbac642086de4f52a3fdda2de5fa2c'
+FORMAT = '[%(asctime)s-%(filename)s-%(levelname)s:%(message)s]'
+logging.basicConfig(level=logging.INFO, format=FORMAT)
+logger = logging.getLogger(__name__)
+class TestReaderYAML(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        """ setup
+        """
+        root_path = os.path.join(DATASET_HOME, 'coco')
+        _, _ = get_path(COCO_VAL_URL, root_path, COCO_VAL_MD5SUM)
+        _, _ = get_path(COCO_ANNO_URL, root_path, COCO_ANNO_MD5SUM)
+        cls.anno_path = 'annotations/instances_val2017.json'
+        cls.image_dir = 'val2017'
+        cls.root_path = root_path
+    @classmethod
+    def tearDownClass(cls):
+        """ tearDownClass """
+        pass
+    def test_loader_yaml(self):
+        cfg_file = 'ppdet/data/tests/test.yml'
+        cfg = load_config(cfg_file)
+        data_cfg = '[!COCODataSet {{image_dir: {0}, dataset_dir: {1}, ' \
+            'anno_path: {2}, sample_num: 10}}]'.format(
+                self.image_dir, self.root_path, self.anno_path)
+        dataset_ins = yaml.load(data_cfg, Loader=yaml.Loader)
+        update_train_cfg = {'TrainReader': {'dataset': dataset_ins[0]}}
+        update_test_cfg = {'EvalReader': {'dataset': dataset_ins[0]}}
+        merge_config(update_train_cfg)
+        merge_config(update_test_cfg)
+        reader = create_reader(cfg['TrainReader'], 10)()
+        for samples in reader:
+            for sample in samples:
+                im_shape = sample[0].shape
+                self.assertEqual(im_shape[0], 3)
+                self.assertEqual(im_shape[1] % 32, 0)
+                self.assertEqual(im_shape[2] % 32, 0)
+                im_info_shape = sample[1].shape
+                self.assertEqual(im_info_shape[-1], 3)
+                im_id_shape = sample[2].shape
+                self.assertEqual(im_id_shape[-1], 1)
+                gt_bbox_shape = sample[3].shape
+                self.assertEqual(gt_bbox_shape[-1], 4)
+                gt_class_shape = sample[4].shape
+                self.assertEqual(gt_class_shape[-1], 1)
+                self.assertEqual(gt_class_shape[0], gt_bbox_shape[0])
+                is_crowd_shape = sample[5].shape
+                self.assertEqual(is_crowd_shape[-1], 1)
+                self.assertEqual(is_crowd_shape[0], gt_bbox_shape[0])
+                mask = sample[6]
+                self.assertEqual(len(mask), gt_bbox_shape[0])
+                self.assertEqual(mask[0][0].shape[-1], 2)
+        reader = create_reader(cfg['EvalReader'], 10)()
+        for samples in reader:
+            for sample in samples:
+                im_shape = sample[0].shape
+                self.assertEqual(im_shape[0], 3)
+                self.assertEqual(im_shape[1] % 32, 0)
+                self.assertEqual(im_shape[2] % 32, 0)
+                im_info_shape = sample[1].shape
+                self.assertEqual(im_info_shape[-1], 3)
+                im_id_shape = sample[2].shape
+                self.assertEqual(im_id_shape[-1], 1)
+if __name__ == '__main__':
+    unittest.main()
--- a/ppdet/data/tests/test_operator.py
+++ b/ppdet/data/tests/test_operator.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import unittest
-import logging
-import numpy as np
-import set_env
-import ppdet.data.transform as tf
-logging.basicConfig(level=logging.INFO)
-class TestBase(unittest.TestCase):
-    """Test cases for dataset.transform.operator
-    """
-    @classmethod
-    def setUpClass(cls, with_mixup=False):
-        """ setup
-        """
-        roidb_fname = set_env.coco_data['TRAIN']['ANNO_FILE']
-        image_dir = set_env.coco_data['TRAIN']['IMAGE_DIR']
-        import pickle as pkl
-        with open(roidb_fname, 'rb') as f:
-            roidb = f.read()
-            roidb = pkl.loads(roidb)
-        fn = os.path.join(image_dir, roidb[0][0]['im_file'])
-        with open(fn, 'rb') as f:
-            roidb[0][0]['image'] = f.read()
-        if with_mixup:
-            mixup_fn = os.path.join(image_dir, roidb[0][1]['im_file'])
-            roidb[0][0]['mixup'] = roidb[0][1]
-            with open(fn, 'rb') as f:
-                roidb[0][0]['mixup']['image'] = f.read()
-        cls.sample = roidb[0][0]
-    @classmethod
-    def tearDownClass(cls):
-        """ tearDownClass """
-        pass
-    def test_ops_all(self):
-        """ test operators
-        """
-        # ResizeImage
-        ops_conf = [{
-            'op': 'DecodeImage'
-        }, {
-            'op': 'ResizeImage',
-            'target_size': 300,
-            'max_size': 1333
-        }]
-        mapper = tf.build_mapper(ops_conf)
-        self.assertTrue(mapper is not None)
-        data = self.sample.copy()
-        result0 = mapper(data)
-        self.assertIsNotNone(result0['image'])
-        self.assertEqual(len(result0['image'].shape), 3)
-        # RandFlipImage
-        ops_conf = [{'op': 'RandomFlipImage'}]
-        mapper = tf.build_mapper(ops_conf)
-        self.assertTrue(mapper is not None)
-        result1 = mapper(result0)
-        self.assertEqual(result1['image'].shape, result0['image'].shape)
-        self.assertEqual(result1['gt_bbox'].shape, result0['gt_bbox'].shape)
-        # NormalizeImage
-        ops_conf = [{'op': 'NormalizeImage', 'is_channel_first': False}]
-        mapper = tf.build_mapper(ops_conf)
-        self.assertTrue(mapper is not None)
-        result2 = mapper(result1)
-        im1 = result1['image']
-        count = np.where(im1 <= 1)[0]
-        if im1.dtype == 'float64':
-            self.assertEqual(count, im1.shape[0] * im1.shape[1], im1.shape[2])
-        # ArrangeSample
-        ops_conf = [{'op': 'ArrangeRCNN'}]
-        mapper = tf.build_mapper(ops_conf)
-        self.assertTrue(mapper is not None)
-        result3 = mapper(result2)
-        self.assertEqual(type(result3), tuple)
-    def test_ops_part1(self):
-        """test Crop and Resize
-        """
-        ops_conf = [{
-            'op': 'DecodeImage'
-        }, {
-            'op': 'NormalizeBox'
-        }, {
-            'op': 'CropImage',
-            'batch_sampler': [[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0],
-                              [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0],
-                              [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0],
-                              [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0],
-                              [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0],
-                              [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0],
-                              [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
-        }]
-        mapper = tf.build_mapper(ops_conf)
-        self.assertTrue(mapper is not None)
-        data = self.sample.copy()
-        result = mapper(data)
-        self.assertEqual(len(result['image'].shape), 3)
-    def test_ops_part2(self):
-        """test Expand and RandomDistort
-        """
-        ops_conf = [{
-            'op': 'DecodeImage'
-        }, {
-            'op': 'NormalizeBox'
-        }, {
-            'op': 'ExpandImage',
-            'max_ratio': 1.5,
-            'prob': 1
-        }]
-        mapper = tf.build_mapper(ops_conf)
-        self.assertTrue(mapper is not None)
-        data = self.sample.copy()
-        result = mapper(data)
-        self.assertEqual(len(result['image'].shape), 3)
-        self.assertGreater(result['gt_bbox'].shape[0], 0)
-    def test_ops_part3(self):
-        """test Mixup and RandomInterp
-        """
-        ops_conf = [{
-            'op': 'DecodeImage',
-            'with_mixup': True,
-        }, {
-            'op': 'MixupImage',
-        }, {
-            'op': 'RandomInterpImage',
-            'target_size': 608
-        }]
-        mapper = tf.build_mapper(ops_conf)
-        self.assertTrue(mapper is not None)
-        data = self.sample.copy()
-        result = mapper(data)
-        self.assertEqual(len(result['image'].shape), 3)
-        self.assertGreater(result['gt_bbox'].shape[0], 0)
-        #self.assertGreater(result['gt_score'].shape[0], 0)
-if __name__ == '__main__':
-    unittest.main()
--- a/ppdet/data/tests/test_reader.py
+++ b/ppdet/data/tests/test_reader.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import time
-import unittest
-import sys
-import logging
-import numpy as np
-import yaml
-import set_env
-from ppdet.data.reader import Reader
-from ppdet.data.source import build_source
-from ppdet.data.source import IteratorSource
-class TestReader(unittest.TestCase):
-    """Test cases for dataset.reader
-    """
-    @classmethod
-    def setUpClass(cls):
-        """ setup
-        """
-        prefix = os.path.dirname(os.path.abspath(__file__))
-        coco_yml = os.path.join(prefix, 'coco.yml')
-        with open(coco_yml, 'rb') as f:
-            cls.coco_conf = yaml.load(f.read())
-        cls.coco_conf['DATA']['TRAIN'] = set_env.coco_data['TRAIN']
-        cls.coco_conf['DATA']['VAL'] = set_env.coco_data['VAL']
-        rcnn_yml = os.path.join(prefix, 'rcnn_dataset.yml')
-        with open(rcnn_yml, 'rb') as f:
-            cls.rcnn_conf = yaml.load(f.read())
-        cls.rcnn_conf['DATA']['TRAIN'] = set_env.coco_data['TRAIN']
-        cls.rcnn_conf['DATA']['VAL'] = set_env.coco_data['VAL']
-    @classmethod
-    def tearDownClass(cls):
-        """ tearDownClass """
-        pass
-    def test_train(self):
-        """ Test reader for training
-        """
-        coco = Reader(
-            self.coco_conf['DATA'], self.coco_conf['TRANSFORM'], maxiter=1000)
-        train_rd = coco.train()
-        self.assertTrue(train_rd is not None)
-        ct = 0
-        total = 0
-        bytes = 0
-        prev_ts = None
-        for sample in train_rd():
-            if prev_ts is None:
-                start_ts = time.time()
-                prev_ts = start_ts
-            ct += 1
-            bytes += 4 * sample[0][0].size * len(sample[0])
-            self.assertTrue(sample is not None)
-            cost = time.time() - prev_ts
-            if cost >= 1.0:
-                total += ct
-                qps = total / (time.time() - start_ts)
-                bps = bytes / (time.time() - start_ts)
-                logging.info('got %d/%d samples in %.3fsec with qps:%d bps:%d' %
-                             (ct, total, cost, qps, bps))
-                bytes = 0
-                ct = 0
-                prev_ts = time.time()
-        total += ct
-        self.assertEqual(total, coco._maxiter)
-    def test_val(self):
-        """ Test reader for validation
-        """
-        coco = Reader(self.coco_conf['DATA'], self.coco_conf['TRANSFORM'], 10)
-        val_rd = coco.val()
-        self.assertTrue(val_rd is not None)
-        # test 3 epoches
-        for _ in range(3):
-            ct = 0
-            for sample in val_rd():
-                ct += 1
-                self.assertTrue(sample is not None)
-            self.assertGreaterEqual(ct, coco._maxiter)
-    def test_rcnn_train(self):
-        """ Test reader for training
-        """
-        anno = self.rcnn_conf['DATA']['TRAIN']['ANNO_FILE']
-        if not os.path.exists(anno):
-            logging.error('exit test_rcnn for not found file[%s]' % (anno))
-            return
-        rcnn = Reader(self.rcnn_conf['DATA'], self.rcnn_conf['TRANSFORM'], 10)
-        rcnn_rd = rcnn.train()
-        self.assertTrue(rcnn_rd is not None)
-        ct = 0
-        out = None
-        for sample in rcnn_rd():
-            out = sample
-            ct += 1
-            self.assertTrue(sample is not None)
-        self.assertEqual(out[0][0].shape[0], 3)
-        self.assertEqual(out[0][1].shape[0], 3)
-        self.assertEqual(out[0][3].shape[1], 4)
-        self.assertEqual(out[0][4].shape[1], 1)
-        self.assertEqual(out[0][5].shape[1], 1)
-        self.assertGreaterEqual(ct, rcnn._maxiter)
-    def test_create(self):
-        """ Test create a reader using my source
-        """
-        def _my_data_reader():
-            mydata = build_source(self.rcnn_conf['DATA']['TRAIN'])
-            for i, sample in enumerate(mydata):
-                yield sample
-        my_source = IteratorSource(_my_data_reader)
-        mode = 'TRAIN'
-        train_rd = Reader.create(
-            mode,
-            self.rcnn_conf['DATA'][mode],
-            self.rcnn_conf['TRANSFORM'][mode],
-            max_iter=10,
-            my_source=my_source)
-        out = None
-        for sample in train_rd():
-            out = sample
-            self.assertTrue(sample is not None)
-        self.assertEqual(out[0][0].shape[0], 3)
-        self.assertEqual(out[0][1].shape[0], 3)
-        self.assertEqual(out[0][3].shape[1], 4)
-        self.assertEqual(out[0][4].shape[1], 1)
-        self.assertEqual(out[0][5].shape[1], 1)
-if __name__ == '__main__':
-    unittest.main()
--- a/ppdet/data/tests/test_roidb_source.py
+++ b/ppdet/data/tests/test_roidb_source.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import time
-import unittest
-import sys
-import logging
-import set_env
-from ppdet.data.source import build_source
-class TestRoiDbSource(unittest.TestCase):
-    """Test cases for dataset.source.roidb_source
-    """
-    @classmethod
-    def setUpClass(cls):
-        """ setup
-        """
-        anno_path = set_env.coco_data['TRAIN']['ANNO_FILE']
-        image_dir = set_env.coco_data['TRAIN']['IMAGE_DIR']
-        cls.config = {
-            'data_cf': {
-                'anno_file': anno_path,
-                'image_dir': image_dir,
-                'samples': 100,
-                'load_img': True
-            },
-            'cname2cid': None
-        }
-    @classmethod
-    def tearDownClass(cls):
-        """ tearDownClass """
-        pass
-    def test_basic(self):
-        """ test basic apis 'next/size/drained'
-        """
-        roi_source = build_source(self.config)
-        for i, sample in enumerate(roi_source):
-            self.assertTrue('image' in sample)
-            self.assertGreater(len(sample['image']), 0)
-        self.assertTrue(roi_source.drained())
-        self.assertEqual(i + 1, roi_source.size())
-    def test_reset(self):
-        """ test functions 'reset/epoch_id'
-        """
-        roi_source = build_source(self.config)
-        self.assertTrue(roi_source.next() is not None)
-        self.assertEqual(roi_source.epoch_id(), 0)
-        roi_source.reset()
-        self.assertEqual(roi_source.epoch_id(), 1)
-        self.assertTrue(roi_source.next() is not None)
-if __name__ == '__main__':
-    unittest.main()
--- a/ppdet/data/tests/test_transformer.py
+++ b/ppdet/data/tests/test_transformer.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import time
-import unittest
-import sys
-import logging
-import numpy as np
-import set_env
-import ppdet.data.transform as tf
-from ppdet.data.source import build_source
-logger = logging.getLogger(__name__)
-logging.basicConfig(level=logging.INFO)
-class TestTransformer(unittest.TestCase):
-    """Test cases for dataset.transform.transformer
-    """
-    @classmethod
-    def setUpClass(cls):
-        """ setup
-        """
-        prefix = os.path.dirname(os.path.abspath(__file__))
-        # json data
-        anno_path = set_env.coco_data['TRAIN']['ANNO_FILE']
-        image_dir = set_env.coco_data['TRAIN']['IMAGE_DIR']
-        cls.sc_config = {
-            'anno_file': anno_path,
-            'image_dir': image_dir,
-            'samples': 200
-        }
-        cls.ops = [{
-            'op': 'DecodeImage',
-            'to_rgb': True
-        }, {
-            'op': 'ResizeImage',
-            'target_size': 800,
-            'max_size': 1333
-        }, {
-            'op': 'ArrangeRCNN',
-            'is_mask': False
-        }]
-    @classmethod
-    def tearDownClass(cls):
-        """ tearDownClass """
-        pass
-    def test_map(self):
-        """ test transformer.map
-        """
-        mapper = tf.build_mapper(self.ops)
-        ds = build_source(self.sc_config)
-        mapped_ds = tf.map(ds, mapper)
-        ct = 0
-        for sample in mapped_ds:
-            self.assertTrue(type(sample[0]) is np.ndarray)
-            ct += 1
-        self.assertEqual(ct, mapped_ds.size())
-    def test_parallel_map(self):
-        """ test transformer.map with concurrent workers
-        """
-        mapper = tf.build_mapper(self.ops)
-        ds = build_source(self.sc_config)
-        worker_conf = {'WORKER_NUM': 2, 'use_process': True}
-        mapped_ds = tf.map(ds, mapper, worker_conf)
-        ct = 0
-        for sample in mapped_ds:
-            self.assertTrue(type(sample[0]) is np.ndarray)
-            ct += 1
-        self.assertTrue(mapped_ds.drained())
-        self.assertEqual(ct, mapped_ds.size())
-        mapped_ds.reset()
-        ct = 0
-        for sample in mapped_ds:
-            self.assertTrue(type(sample[0]) is np.ndarray)
-            ct += 1
-        self.assertEqual(ct, mapped_ds.size())
-    def test_batch(self):
-        """ test batched dataset
-        """
-        batchsize = 2
-        mapper = tf.build_mapper(self.ops)
-        ds = build_source(self.sc_config)
-        mapped_ds = tf.map(ds, mapper)
-        batched_ds = tf.batch(mapped_ds, batchsize, True)
-        for sample in batched_ds:
-            out = sample
-        self.assertEqual(len(out), batchsize)
-if __name__ == '__main__':
-    unittest.main()
--- a/ppdet/data/tools/generate_data_for_training.py
+++ b/ppdet/data/tools/generate_data_for_training.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# function:
-#   tool used convert COCO or VOC data to a pickled file whose
-#   schema for each sample is the same.
-#
-# notes:
-#   Original data format of COCO or VOC can also be directly
-#   used by 'PPdetection' to train.
-#   This tool just convert data to a unified schema,
-#   and it's useful when debuging with small dataset.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-import argparse
-import os
-import sys
-import logging
-import pickle as pkl
-path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../')
-if path not in sys.path:
-    sys.path.insert(0, path)
-from data.source import loader
-def parse_args():
-    """ parse arguments
-    """
-    parser = argparse.ArgumentParser(
-        description='Generate Standard Dataset for PPdetection')
-    parser.add_argument(
-        '--type',
-        type=str,
-        default='json',
-        help='file format of label file, eg: json for COCO and xml for VOC')
-    parser.add_argument(
-        '--annotation',
-        type=str,
-        help='label file name for COCO or VOC dataset, '
-        'eg: instances_val2017.json or train.txt')
-    parser.add_argument(
-        '--save-dir',
-        type=str,
-        default='roidb',
-        help='directory to save roidb file which contains pickled samples')
-    parser.add_argument(
-        '--samples',
-        type=int,
-        default=-1,
-        help='number of samples to dump, default to all')
-    args = parser.parse_args()
-    return args
-def dump_coco_as_pickle(args):
-    """ Load COCO data, and then save it as pickled file.
-        Notes:
-            label file of COCO contains a json which consists
-            of label info for each sample
-    """
-    samples = args.samples
-    save_dir = args.save_dir
-    if not os.path.exists(save_dir):
-        os.makedirs(save_dir)
-    anno_path = args.annotation
-    roidb, cat2id = loader.load(anno_path, samples, with_cat2id=True)
-    samples = len(roidb)
-    dsname = os.path.basename(anno_path).rstrip('.json')
-    roidb_fname = save_dir + "/%s.roidb" % (dsname)
-    with open(roidb_fname, "wb") as fout:
-        pkl.dump((roidb, cat2id), fout)
-    #for rec in roidb:
-    #    sys.stderr.write('%s\n' % (rec['im_file']))
-    logging.info('dumped %d samples to file[%s]' % (samples, roidb_fname))
-def dump_voc_as_pickle(args):
-    """ Load VOC data, and then save it as pickled file.
-        Notes:
-            we assume label file of VOC contains lines
-            each of which corresponds to a xml file
-            that contains it's label info
-    """
-    samples = args.samples
-    save_dir = args.save_dir
-    if not os.path.exists(save_dir):
-        os.makedirs(save_dir)
-    save_dir = args.save_dir
-    anno_path = os.path.expanduser(args.annotation)
-    roidb, cat2id = loader.load(
-        anno_path, samples, with_cat2id=True, use_default_label=None)
-    samples = len(roidb)
-    part = anno_path.split('/')
-    dsname = part[-4]
-    roidb_fname = save_dir + "/%s.roidb" % (dsname)
-    with open(roidb_fname, "wb") as fout:
-        pkl.dump((roidb, cat2id), fout)
-    anno_path = os.path.join(anno_path.split('/train.txt')[0], 'label_list.txt')
-    with open(anno_path, 'w') as fw:
-        for key in cat2id.keys():
-            fw.write(key + '\n')
-    logging.info('dumped %d samples to file[%s]' % (samples, roidb_fname))
-if __name__ == "__main__":
-    """ Make sure you have already downloaded original COCO or VOC data,
-        then you can convert it using this tool.
-    Usage:
-        python generate_data_for_training.py --type=json
-            --annotation=./annotations/instances_val2017.json
-            --save-dir=./roidb --samples=100
-    """
-    args = parse_args()
-    # VOC data are organized in xml files
-    if args.type == 'xml':
-        dump_voc_as_pickle(args)
-    # COCO data are organized in json file
-    elif args.type == 'json':
-        dump_coco_as_pickle(args)
-    else:
-        TypeError('Can\'t deal with {} type. '\
-            'Only xml or json file format supported'.format(args.type))
--- a/ppdet/data/transform/__init__.py
+++ b/ppdet/data/transform/__init__.py
@@ -12,132 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
+from . import operators
-from __future__ import print_function
+from . import batch_operators
-import copy
+from .operators import *
-import logging
+from .batch_operators import *
-import traceback
-from .transformer import MappedDataset, BatchedDataset
-from .post_map import build_post_map
-from .parallel_map import ParallelMappedDataset
-from .operators import BaseOperator, registered_ops
-__all__ = ['build_mapper', 'map', 'batch', 'batch_map']
-logger = logging.getLogger(__name__)
-def build_mapper(ops, context=None):
-    """
-    Build a mapper for operators in 'ops'
-    Args:
-        ops (list of operator.BaseOperator or list of op dict):
-            configs for oprators, eg:
-            [{'name': 'DecodeImage', 'params': {'to_rgb': True}}, {xxx}]
-        context (dict): a context object for mapper
-    Returns:
-        a mapper function which accept one argument 'sample' and
-        return the processed result
-    """
-    new_ops = []
-    for _dict in ops:
-        new_dict = {}
-        for i, j in _dict.items():
-            new_dict[i.lower()] = j
-        new_ops.append(new_dict)
-    ops = new_ops
-    op_funcs = []
-    op_repr = []
-    for op in ops:
-        if type(op) is dict and 'op' in op:
-            op_func = getattr(BaseOperator, op['op'])
-            params = copy.deepcopy(op)
-            del params['op']
-            o = op_func(**params)
-        elif not isinstance(op, BaseOperator):
-            op_func = getattr(BaseOperator, op['name'])
-            params = {} if 'params' not in op else op['params']
-            o = op_func(**params)
-        else:
-            assert isinstance(op, BaseOperator), \
-                "invalid operator when build ops"
-            o = op
-        op_funcs.append(o)
-        op_repr.append('{{{}}}'.format(str(o)))
-    op_repr = '[{}]'.format(','.join(op_repr))
-    def _mapper(sample):
-        ctx = {} if context is None else copy.deepcopy(context)
-        for f in op_funcs:
-            try:
-                out = f(sample, ctx)
-                sample = out
-            except Exception as e:
-                stack_info = traceback.format_exc()
-                logger.warn("fail to map op [{}] with error: {} and stack:\n{}".
-                            format(f, e, str(stack_info)))
-                raise e
-        return out
-    _mapper.ops = op_repr
-    return _mapper
-def map(ds, mapper, worker_args=None):
-    """
-    Apply 'mapper' to 'ds'
-    Args:
-        ds (instance of Dataset): dataset to be mapped
-        mapper (function): action to be executed for every data sample
-        worker_args (dict): configs for concurrent mapper
-    Returns:
-        a mapped dataset
-    """
-    if worker_args is not None:
-        return ParallelMappedDataset(ds, mapper, worker_args)
-    else:
-        return MappedDataset(ds, mapper)
-def batch(ds, batchsize, drop_last=False, drop_empty=True):
-    """
-    Batch data samples to batches
-    Args:
-        batchsize (int): number of samples for a batch
-        drop_last (bool): drop last few samples if not enough for a batch
-    Returns:
-        a batched dataset
-    """
-    return BatchedDataset(
-        ds, batchsize, drop_last=drop_last, drop_empty=drop_empty)
-def batch_map(ds, config):
-    """
-    Post process the batches.
-    Args:
-        ds (instance of Dataset): dataset to be mapped
-        mapper (function): action to be executed for every batch
-    Returns:
-        a batched dataset which is processed
-    """
-    mapper = build_post_map(**config)
-    return MappedDataset(ds, mapper)
-for nm in registered_ops:
-    op = getattr(BaseOperator, nm)
-    locals()[nm] = op
+__all__ = []
 __all__ += registered_ops
--- a/ppdet/data/transform/arrange_sample.py
+++ b/ppdet/data/transform/arrange_sample.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# function:
-#    operators to process sample,
-#    eg: decode/resize/crop image
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import logging
-import numpy as np
-from .operators import BaseOperator, register_op
-logger = logging.getLogger(__name__)
-@register_op
-class ArrangeRCNN(BaseOperator):
-    """
-    Transform dict to tuple format needed for training.
-    Args:
-        is_mask (bool): whether to use include mask data
-    """
-    def __init__(self, is_mask=False):
-        super(ArrangeRCNN, self).__init__()
-        self.is_mask = is_mask
-        assert isinstance(self.is_mask, bool), "wrong type for is_mask"
-    def __call__(self, sample, context=None):
-        """
-        Args:
-            sample: a dict which contains image
-                    info and annotation info.
-            context: a dict which contains additional info.
-        Returns:
-            sample: a tuple containing following items
-                (image, im_info, im_id, gt_bbox, gt_class, is_crowd, gt_masks)
-        """
-        im = sample['image']
-        gt_bbox = sample['gt_bbox']
-        gt_class = sample['gt_class']
-        keys = list(sample.keys())
-        if 'is_crowd' in keys:
-            is_crowd = sample['is_crowd']
-        else:
-            raise KeyError("The dataset doesn't have 'is_crowd' key.")
-        if 'im_info' in keys:
-            im_info = sample['im_info']
-        else:
-            raise KeyError("The dataset doesn't have 'im_info' key.")
-        im_id = sample['im_id']
-        outs = (im, im_info, im_id, gt_bbox, gt_class, is_crowd)
-        gt_masks = []
-        if self.is_mask and len(sample['gt_poly']) != 0 \
-                and 'is_crowd' in keys:
-            valid = True
-            segms = sample['gt_poly']
-            assert len(segms) == is_crowd.shape[0]
-            for i in range(len(sample['gt_poly'])):
-                segm, iscrowd = segms[i], is_crowd[i]
-                gt_segm = []
-                if iscrowd:
-                    gt_segm.append([[0, 0]])
-                else:
-                    for poly in segm:
-                        if len(poly) == 0:
-                            valid = False
-                            break
-                        gt_segm.append(np.array(poly).reshape(-1, 2))
-                if (not valid) or len(gt_segm) == 0:
-                    break
-                gt_masks.append(gt_segm)
-            outs = outs + (gt_masks, )
-        return outs
-@register_op
-class ArrangeEvalRCNN(BaseOperator):
-    """
-    Transform dict to the tuple format needed for evaluation.
-    """
-    def __init__(self):
-        super(ArrangeEvalRCNN, self).__init__()
-    def __call__(self, sample, context=None):
-        """
-        Args:
-            sample: a dict which contains image
-                    info and annotation info.
-            context: a dict which contains additional info.
-        Returns:
-            sample: a tuple containing the following items:
-                    (image, im_info, im_id, im_shape, gt_bbox,
-                    gt_class, difficult)
-        """
-        ims = []
-        keys = sorted(list(sample.keys()))
-        for k in keys:
-            if 'image' in k:
-                ims.append(sample[k])
-        if 'im_info' in keys:
-            im_info = sample['im_info']
-        else:
-            raise KeyError("The dataset doesn't have 'im_info' key.")
-        im_id = sample['im_id']
-        h = sample['h']
-        w = sample['w']
-        # For rcnn models in eval and infer stage, original image size
-        # is needed to clip the bounding boxes. And box clip op in
-        # bbox prediction needs im_info as input in format of [N, 3],
-        # so im_shape is appended by 1 to match dimension.
-        im_shape = np.array((h, w, 1), dtype=np.float32)
-        gt_bbox = sample['gt_bbox']
-        gt_class = sample['gt_class']
-        difficult = sample['difficult']
-        remain_list = [im_info, im_id, im_shape, gt_bbox, gt_class, difficult]
-        ims.extend(remain_list)
-        outs = tuple(ims)
-        return outs
-@register_op
-class ArrangeTestRCNN(BaseOperator):
-    """
-    Transform dict to the tuple format needed for training.
-    """
-    def __init__(self):
-        super(ArrangeTestRCNN, self).__init__()
-    def __call__(self, sample, context=None):
-        """
-        Args:
-            sample: a dict which contains image
-                    info and annotation info.
-            context: a dict which contains additional info.
-        Returns:
-            sample: a tuple containing the following items:
-                    (image, im_info, im_id, im_shape)
-        """
-        ims = []
-        keys = sorted(list(sample.keys()))
-        for k in keys:
-            if 'image' in k:
-                ims.append(sample[k])
-        if 'im_info' in keys:
-            im_info = sample['im_info']
-        else:
-            raise KeyError("The dataset doesn't have 'im_info' key.")
-        im_id = sample['im_id']
-        h = sample['h']
-        w = sample['w']
-        # For rcnn models in eval and infer stage, original image size
-        # is needed to clip the bounding boxes. And box clip op in
-        # bbox prediction needs im_info as input in format of [N, 3],
-        # so im_shape is appended by 1 to match dimension.
-        im_shape = np.array((h, w, 1), dtype=np.float32)
-        remain_list = [im_info, im_id, im_shape]
-        ims.extend(remain_list)
-        outs = tuple(ims)
-        return outs
-@register_op
-class ArrangeSSD(BaseOperator):
-    """
-    Transform dict to tuple format needed for training.
-    """
-    def __init__(self):
-        super(ArrangeSSD, self).__init__()
-    def __call__(self, sample, context=None):
-        """
-        Args:
-            sample: a dict which contains image
-                    info and annotation info.
-            context: a dict which contains additional info.
-        Returns:
-            sample: a tuple containing the following items:
-                    (image, gt_bbox, gt_class, difficult)
-        """
-        im = sample['image']
-        gt_bbox = sample['gt_bbox']
-        gt_class = sample['gt_class']
-        outs = (im, gt_bbox, gt_class)
-        return outs
-@register_op
-class ArrangeEvalSSD(BaseOperator):
-    """
-    Transform dict to tuple format needed for training.
-    """
-    def __init__(self, fields):
-        super(ArrangeEvalSSD, self).__init__()
-        self.fields = fields
-    def __call__(self, sample, context=None):
-        """
-        Args:
-            sample: a dict which contains image
-                    info and annotation info.
-            context: a dict which contains additional info.
-        Returns:
-            sample: a tuple containing the following items: (image)
-        """
-        outs = []
-        if len(sample['gt_bbox']) != len(sample['gt_class']):
-            raise ValueError("gt num mismatch: bbox and class.")
-        for field in self.fields:
-            if field == 'im_shape':
-                h = sample['h']
-                w = sample['w']
-                im_shape = np.array((h, w))
-                outs.append(im_shape)
-            elif field == 'is_difficult':
-                outs.append(sample['difficult'])
-            elif field == 'gt_box':
-                outs.append(sample['gt_bbox'])
-            elif field == 'gt_label':
-                outs.append(sample['gt_class'])
-            else:
-                outs.append(sample[field])
-        outs = tuple(outs)
-        return outs
-@register_op
-class ArrangeTestSSD(BaseOperator):
-    """
-    Transform dict to tuple format needed for training.
-    Args:
-        is_mask (bool): whether to use include mask data
-    """
-    def __init__(self):
-        super(ArrangeTestSSD, self).__init__()
-    def __call__(self, sample, context=None):
-        """
-        Args:
-            sample: a dict which contains image
-                    info and annotation info.
-            context: a dict which contains additional info.
-        Returns:
-            sample: a tuple containing the following items: (image)
-        """
-        im = sample['image']
-        im_id = sample['im_id']
-        h = sample['h']
-        w = sample['w']
-        im_shape = np.array((h, w))
-        outs = (im, im_id, im_shape)
-        return outs
-@register_op
-class ArrangeYOLO(BaseOperator):
-    """
-    Transform dict to the tuple format needed for training.
-    """
-    def __init__(self):
-        super(ArrangeYOLO, self).__init__()
-    def __call__(self, sample, context=None):
-        """
-        Args:
-            sample: a dict which contains image
-                    info and annotation info.
-            context: a dict which contains additional info.
-        Returns:
-            sample: a tuple containing the following items:
-                (image, gt_bbox, gt_class, gt_score,
-                 is_crowd, im_info, gt_masks)
-        """
-        im = sample['image']
-        if len(sample['gt_bbox']) != len(sample['gt_class']):
-            raise ValueError("gt num mismatch: bbox and class.")
-        if len(sample['gt_bbox']) != len(sample['gt_score']):
-            raise ValueError("gt num mismatch: bbox and score.")
-        gt_bbox = np.zeros((50, 4), dtype=im.dtype)
-        gt_class = np.zeros((50, ), dtype=np.int32)
-        gt_score = np.zeros((50, ), dtype=im.dtype)
-        gt_num = min(50, len(sample['gt_bbox']))
-        if gt_num > 0:
-            gt_bbox[:gt_num, :] = sample['gt_bbox'][:gt_num, :]
-            gt_class[:gt_num] = sample['gt_class'][:gt_num, 0]
-            gt_score[:gt_num] = sample['gt_score'][:gt_num, 0]
-        # parse [x1, y1, x2, y2] to [x, y, w, h]
-        gt_bbox[:, 2:4] = gt_bbox[:, 2:4] - gt_bbox[:, :2]
-        gt_bbox[:, :2] = gt_bbox[:, :2] + gt_bbox[:, 2:4] / 2.
-        outs = (im, gt_bbox, gt_class, gt_score)
-        return outs
-@register_op
-class ArrangeEvalYOLO(BaseOperator):
-    """
-    Transform dict to the tuple format needed for evaluation.
-    """
-    def __init__(self):
-        super(ArrangeEvalYOLO, self).__init__()
-    def __call__(self, sample, context=None):
-        """
-        Args:
-            sample: a dict which contains image
-                    info and annotation info.
-            context: a dict which contains additional info.
-        Returns:
-            sample: a tuple containing the following items:
-                (image, im_shape, im_id, gt_bbox, gt_class,
-                 difficult)
-        """
-        im = sample['image']
-        if len(sample['gt_bbox']) != len(sample['gt_class']):
-            raise ValueError("gt num mismatch: bbox and class.")
-        im_id = sample['im_id']
-        h = sample['h']
-        w = sample['w']
-        im_shape = np.array((h, w))
-        gt_bbox = np.zeros((50, 4), dtype=im.dtype)
-        gt_class = np.zeros((50, ), dtype=np.int32)
-        difficult = np.zeros((50, ), dtype=np.int32)
-        gt_num = min(50, len(sample['gt_bbox']))
-        if gt_num > 0:
-            gt_bbox[:gt_num, :] = sample['gt_bbox'][:gt_num, :]
-            gt_class[:gt_num] = sample['gt_class'][:gt_num, 0]
-            difficult[:gt_num] = sample['difficult'][:gt_num, 0]
-        outs = (im, im_shape, im_id, gt_bbox, gt_class, difficult)
-        return outs
-@register_op
-class ArrangeTestYOLO(BaseOperator):
-    """
-    Transform dict to the tuple format needed for inference.
-    """
-    def __init__(self):
-        super(ArrangeTestYOLO, self).__init__()
-    def __call__(self, sample, context=None):
-        """
-        Args:
-            sample: a dict which contains image
-                    info and annotation info.
-            context: a dict which contains additional info.
-        Returns:
-            sample: a tuple containing the following items:
-                (image, gt_bbox, gt_class, gt_score, is_crowd,
-                 im_info, gt_masks)
-        """
-        im = sample['image']
-        im_id = sample['im_id']
-        h = sample['h']
-        w = sample['w']
-        im_shape = np.array((h, w))
-        outs = (im, im_shape, im_id)
-        return outs
--- a/ppdet/data/transform/batch_operators.py
+++ b/ppdet/data/transform/batch_operators.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+try:
+    from collections.abc import Sequence
+except Exception:
+    from collections import Sequence
+import logging
+import cv2
+import numpy as np
+from .operators import register_op, BaseOperator
+logger = logging.getLogger(__name__)
+@register_op
+class PadBatch(BaseOperator):
+    """
+    Pad a batch of samples so they can be divisible by a stride.
+    The layout of each image should be 'CHW'.
+    Args:
+        pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
+            height and width is divisible by `pad_to_stride`.
+    """
+    def __init__(self, pad_to_stride=0, use_padded_im_info=True):
+        super(PadBatch, self).__init__()
+        self.pad_to_stride = pad_to_stride
+        self.use_padded_im_info = use_padded_im_info
+    def __call__(self, samples, context=None):
+        """
+        Args:
+            samples (list): a batch of sample, each is dict.
+        """
+        coarsest_stride = self.pad_to_stride
+        if coarsest_stride == 0:
+            return samples
+        max_shape = np.array([data['image'].shape for data in samples]).max(
+            axis=0)
+        if coarsest_stride > 0:
+            max_shape[1] = int(
+                np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride)
+            max_shape[2] = int(
+                np.ceil(max_shape[2] / coarsest_stride) * coarsest_stride)
+        padding_batch = []
+        for data in samples:
+            im = data['image']
+            im_c, im_h, im_w = im.shape[:]
+            padding_im = np.zeros(
+                (im_c, max_shape[1], max_shape[2]), dtype=np.float32)
+            padding_im[:, :im_h, :im_w] = im
+            data['image'] = padding_im
+            if self.use_padded_im_info:
+                data['im_info'][:2] = max_shape[1:3]
+        return samples
+@register_op
+class RandomShape(BaseOperator):
+    """
+    Randomly reshape a batch. If random_inter is True, also randomly
+    select one an interpolation algorithm [cv2.INTER_NEAREST, cv2.INTER_LINEAR,
+    cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4]. If random_inter is
+    False, use cv2.INTER_NEAREST.
+    Args:
+        sizes (list): list of int, random choose a size from these
+        random_inter (bool): whether to randomly interpolation, defalut true.
+    """
+    def __init__(self, sizes=[], random_inter=False):
+        super(RandomShape, self).__init__()
+        self.sizes = sizes
+        self.random_inter = random_inter
+        self.interps = [
+            cv2.INTER_NEAREST,
+            cv2.INTER_LINEAR,
+            cv2.INTER_AREA,
+            cv2.INTER_CUBIC,
+            cv2.INTER_LANCZOS4,
+        ] if random_inter else []
+    def __call__(self, samples, context=None):
+        shape = np.random.choice(self.sizes)
+        method = np.random.choice(self.interps) if self.random_inter \
+            else cv2.INTER_NEAREST
+        for i in range(len(samples)):
+            im = samples[i]['image']
+            h, w = im.shape[:2]
+            scale_x = float(shape) / w
+            scale_y = float(shape) / h
+            im = cv2.resize(
+                im, None, None, fx=scale_x, fy=scale_y, interpolation=method)
+            samples[i]['image'] = im
+        return samples
+@register_op
+class PadMultiScaleTest(BaseOperator):
+    """
+    Pad the image so they can be divisible by a stride for multi-scale testing.
+    Args:
+        pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
+            height and width is divisible by `pad_to_stride`.
+    """
+    def __init__(self, pad_to_stride=0):
+        super(PadMultiScaleTest, self).__init__()
+        self.pad_to_stride = pad_to_stride
+    def __call__(self, samples, context=None):
+        coarsest_stride = self.pad_to_stride
+        if coarsest_stride == 0:
+            return samples
+        batch_input = True
+        if not isinstance(samples, Sequence):
+            batch_input = False
+            samples = [samples]
+        if len(samples) != 1:
+            raise ValueError("Batch size must be 1 when using multiscale test, "
+                             "but now batch size is {}".format(len(samples)))
+        for i in range(len(samples)):
+            sample = samples[i]
+            for k in sample.keys():
+                # hard code
+                if k.startswith('image'):
+                    im = sample[k]
+                    im_c, im_h, im_w = im.shape
+                    max_h = int(
+                        np.ceil(im_h / coarsest_stride) * coarsest_stride)
+                    max_w = int(
+                        np.ceil(im_w / coarsest_stride) * coarsest_stride)
+                    padding_im = np.zeros(
+                        (im_c, max_h, max_w), dtype=np.float32)
+                    padding_im[:, :im_h, :im_w] = im
+                    sample[k] = padding_im
+                    info_name = 'im_info' if k == 'image' else 'im_info_' + k
+                    # update im_info
+                    sample[info_name][:2] = [max_h, max_w]
+        if not batch_input:
+            samples = samples[0]
+        return samples
--- a/ppdet/data/transform/operators.py
+++ b/ppdet/data/transform/operators.py
@@ -32,6 +32,7 @@ import logging
 import random
 import math
 import numpy as np
 import cv2
 from PIL import Image, ImageEnhance
@@ -182,10 +183,10 @@ class MultiscaleTestResize(BaseOperator):
        base_name_list = ['image']
        origin_ims['image'] = im
        if self.use_flip:
-            sample['flip_image'] = im[:, ::-1, :]
+            sample['image_flip'] = im[:, ::-1, :]
-            base_name_list.append('flip_image')
+            base_name_list.append('image_flip')
-            origin_ims['flip_image'] = sample['flip_image']
+            origin_ims['image_flip'] = sample['image_flip']
-        im_info = []
        for base_name in base_name_list:
            im_scale = float(self.origin_target_size) / float(im_size_min)
            # Prevent the biggest axis from being more than max_size
@@ -203,8 +204,12 @@ class MultiscaleTestResize(BaseOperator):
                fx=im_scale_x,
                fy=im_scale_y,
                interpolation=self.interp)
-            im_info.extend([resize_h, resize_w, im_scale])
            sample[base_name] = im_resize
+            info_name = 'im_info' if base_name == 'image' else 'im_info_image_flip'
+            sample[base_name] = im_resize
+            sample[info_name] = np.array(
+                [resize_h, resize_w, im_scale], dtype=np.float32)
            for i, size in enumerate(self.target_size):
                im_scale = float(size) / float(im_size_min)
                if np.round(im_scale * im_size_max) > self.max_size:
@@ -220,10 +225,15 @@ class MultiscaleTestResize(BaseOperator):
                    fx=im_scale_x,
                    fy=im_scale_y,
                    interpolation=self.interp)
-                im_info.extend([resize_h, resize_w, im_scale])
+                im_info = [resize_h, resize_w, im_scale]
+                # hard-code here, must be consistent with
+                # ppdet/modeling/architectures/input_helper.py
                name = base_name + '_scale_' + str(i)
+                info_name = 'im_info_' + name
                sample[name] = im_resize
-        sample['im_info'] = np.array(im_info, dtype=np.float32)
+                sample[info_name] = np.array(
+                    [resize_h, resize_w, im_scale], dtype=np.float32)
        return sample
@@ -315,6 +325,7 @@ class ResizeImage(BaseOperator):
                raise TypeError(
                    'If you set max_size to cap the maximum size of image,'
                    'please set use_cv2 to True to resize the image.')
+            im = im.astype('uint8')
            im = Image.fromarray(im)
            im = im.resize((int(resize_w), int(resize_h)), self.interp)
            im = np.array(im)
@@ -383,34 +394,44 @@ class RandomFlipImage(BaseOperator):
            sample: the image, bounding box and segmentation part
                    in sample are flipped.
        """
-        gt_bbox = sample['gt_bbox']
-        im = sample['image']
+        samples = sample
-        if not isinstance(im, np.ndarray):
+        batch_input = True
-            raise TypeError("{}: image is not a numpy array.".format(self))
+        if not isinstance(samples, Sequence):
-        if len(im.shape) != 3:
+            batch_input = False
-            raise ImageError("{}: image is not 3-dimensional.".format(self))
+            samples = [samples]
-        height, width, _ = im.shape
+        for sample in samples:
-        if np.random.uniform(0, 1) < self.prob:
+            gt_bbox = sample['gt_bbox']
-            im = im[:, ::-1, :]
+            im = sample['image']
-            if gt_bbox.shape[0] == 0:
+            if not isinstance(im, np.ndarray):
-                return sample
+                raise TypeError("{}: image is not a numpy array.".format(self))
-            oldx1 = gt_bbox[:, 0].copy()
+            if len(im.shape) != 3:
-            oldx2 = gt_bbox[:, 2].copy()
+                raise ImageError("{}: image is not 3-dimensional.".format(self))
-            if self.is_normalized:
+            height, width, _ = im.shape
-                gt_bbox[:, 0] = 1 - oldx2
+            if np.random.uniform(0, 1) < self.prob:
-                gt_bbox[:, 2] = 1 - oldx1
+                im = im[:, ::-1, :]
-            else:
+                if gt_bbox.shape[0] == 0:
-                gt_bbox[:, 0] = width - oldx2 - 1
+                    return sample
-                gt_bbox[:, 2] = width - oldx1 - 1
+                oldx1 = gt_bbox[:, 0].copy()
-            if gt_bbox.shape[0] != 0 and (gt_bbox[:, 2] < gt_bbox[:, 0]).all():
+                oldx2 = gt_bbox[:, 2].copy()
-                m = "{}: invalid box, x2 should be greater than x1".format(self)
+                if self.is_normalized:
-                raise BboxError(m)
+                    gt_bbox[:, 0] = 1 - oldx2
-            sample['gt_bbox'] = gt_bbox
+                    gt_bbox[:, 2] = 1 - oldx1
-            if self.is_mask_flip and len(sample['gt_poly']) != 0:
+                else:
-                sample['gt_poly'] = self.flip_segms(sample['gt_poly'], height,
+                    gt_bbox[:, 0] = width - oldx2 - 1
-                                                    width)
+                    gt_bbox[:, 2] = width - oldx1 - 1
-            sample['flipped'] = True
+                if gt_bbox.shape[0] != 0 and (
-            sample['image'] = im
+                        gt_bbox[:, 2] < gt_bbox[:, 0]).all():
+                    m = "{}: invalid box, x2 should be greater than x1".format(
+                        self)
+                    raise BboxError(m)
+                sample['gt_bbox'] = gt_bbox
+                if self.is_mask_flip and len(sample['gt_poly']) != 0:
+                    sample['gt_poly'] = self.flip_segms(sample['gt_poly'],
+                                                        height, width)
+                sample['flipped'] = True
+                sample['image'] = im
+        sample = samples if batch_input else samples[0]
        return sample
@@ -444,22 +465,31 @@ class NormalizeImage(BaseOperator):
            1.(optional) Scale the image to [0,1]
            2. Each pixel minus mean and is divided by std
        """
-        for k in sample.keys():
+        samples = sample
-            if 'image' in k:
+        batch_input = True
-                im = sample[k]
+        if not isinstance(samples, Sequence):
-                im = im.astype(np.float32, copy=False)
+            batch_input = False
-                if self.is_channel_first:
+            samples = [samples]
-                    mean = np.array(self.mean)[:, np.newaxis, np.newaxis]
+        for sample in samples:
-                    std = np.array(self.std)[:, np.newaxis, np.newaxis]
+            for k in sample.keys():
-                else:
+                # hard code
-                    mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
+                if k.startswith('image'):
-                    std = np.array(self.std)[np.newaxis, np.newaxis, :]
+                    im = sample[k]
-                if self.is_scale:
+                    im = im.astype(np.float32, copy=False)
-                    im = im / 255.0
+                    if self.is_channel_first:
-                im -= mean
+                        mean = np.array(self.mean)[:, np.newaxis, np.newaxis]
-                im /= std
+                        std = np.array(self.std)[:, np.newaxis, np.newaxis]
-                sample[k] = im
+                    else:
-        return sample
+                        mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
+                        std = np.array(self.std)[np.newaxis, np.newaxis, :]
+                    if self.is_scale:
+                        im = im / 255.0
+                    im -= mean
+                    im /= std
+                    sample[k] = im
+        if not batch_input:
+            samples = samples[0]
+        return samples
 @register_op
@@ -899,17 +929,26 @@ class Permute(BaseOperator):
            raise TypeError("{}: input type is invalid.".format(self))
    def __call__(self, sample, context=None):
-        assert 'image' in sample, "image data not found"
+        samples = sample
-        for k in sample.keys():
+        batch_input = True
-            if 'image' in k:
+        if not isinstance(samples, Sequence):
-                im = sample[k]
+            batch_input = False
-                if self.channel_first:
+            samples = [samples]
-                    im = np.swapaxes(im, 1, 2)
+        for sample in samples:
-                    im = np.swapaxes(im, 1, 0)
+            assert 'image' in sample, "image data not found"
-                if self.to_bgr:
+            for k in sample.keys():
-                    im = im[[2, 1, 0], :, :]
+                # hard code
-                sample[k] = im
+                if k.startswith('image'):
-        return sample
+                    im = sample[k]
+                    if self.channel_first:
+                        im = np.swapaxes(im, 1, 2)
+                        im = np.swapaxes(im, 1, 0)
+                    if self.to_bgr:
+                        im = im[[2, 1, 0], :, :]
+                    sample[k] = im
+        if not batch_input:
+            samples = samples[0]
+        return samples
 @register_op
@@ -1233,6 +1272,7 @@ class RandomExpand(BaseOperator):
        sample['image'] = canvas
        if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
            sample['gt_bbox'] += np.array([x, y] * 2, dtype=np.float32)
        return sample
@@ -1361,3 +1401,63 @@ class RandomCrop(BaseOperator):
    def _crop_image(self, img, crop):
        x1, y1, x2, y2 = crop
        return img[y1:y2, x1:x2, :]
+@register_op
+class PadBox(BaseOperator):
+    def __init__(self, num_max_boxes=50):
+        """
+        Pad zeros to bboxes if number of bboxes is less than num_max_boxes.
+        Args:
+            num_max_boxes (int): the max number of bboxes
+        """
+        self.num_max_boxes = num_max_boxes
+        super(PadBox, self).__init__()
+    def __call__(self, sample, context=None):
+        assert 'gt_bbox' in sample
+        bbox = sample['gt_bbox']
+        gt_num = min(self.num_max_boxes, len(bbox))
+        num_max = self.num_max_boxes
+        fields = context['fields'] if context else []
+        pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
+        if gt_num > 0:
+            pad_bbox[:gt_num, :] = bbox[:gt_num, :]
+        sample['gt_bbox'] = pad_bbox
+        if 'gt_class' in fields:
+            pad_class = np.zeros((num_max), dtype=np.int32)
+            if gt_num > 0:
+                pad_class[:gt_num] = sample['gt_class'][:gt_num, 0]
+            sample['gt_class'] = pad_class
+        if 'gt_score' in fields:
+            pad_score = np.zeros((num_max), dtype=np.float32)
+            if gt_num > 0:
+                pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
+            sample['gt_score'] = pad_score
+        # in training, for example in op ExpandImage,
+        # the bbox and gt_class is expandded, but the difficult is not,
+        # so, judging by it's length
+        if 'is_difficult' in fields:
+            pad_diff = np.zeros((num_max), dtype=np.int32)
+            if gt_num > 0:
+                pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
+            sample['difficult'] = pad_diff
+        return sample
+@register_op
+class BboxXYXY2XYWH(BaseOperator):
+    """
+    Convert bbox XYXY format to XYWH format.
+    """
+    def __init__(self):
+        super(BboxXYXY2XYWH, self).__init__()
+    def __call__(self, sample, context=None):
+        assert 'gt_bbox' in sample
+        bbox = sample['gt_bbox']
+        bbox[:, 2:4] = bbox[:, 2:4] - bbox[:, :2]
+        bbox[:, :2] = bbox[:, :2] + bbox[:, 2:4] / 2.
+        sample['gt_bbox'] = bbox
+        return sample
--- a/ppdet/data/transform/post_map.py
+++ b/ppdet/data/transform/post_map.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import logging
-import cv2
-import numpy as np
-logger = logging.getLogger(__name__)
-def build_post_map(coarsest_stride=1,
-                   is_padding=False,
-                   random_shapes=[],
-                   multi_scales=[],
-                   use_padded_im_info=False,
-                   enable_multiscale_test=False,
-                   num_scale=1):
-    """
-    Build a mapper for post-processing batches
-    Args:
-        config (dict of parameters):
-          {
-            coarsest_stride (int): stride of the coarsest FPN level
-            is_padding (bool): whether to padding in minibatch
-            random_shapes (list of int): resize to image to random shapes, 
-                [] for not resize.
-            multi_scales (list of int): resize image by random scales, 
-                [] for not resize.
-            use_padded_im_info (bool): whether to update im_info after padding
-            enable_multiscale_test (bool): whether to use multiscale test.
-            num_scale (int) : the number of scales for multiscale test.
-          }
-    Returns:
-        a mapper function which accept one argument 'batch' and
-        return the processed result
-    """
-    def padding_minibatch(batch_data):
-        if len(batch_data) == 1 and coarsest_stride == 1:
-            return batch_data
-        max_shape = np.array([data[0].shape for data in batch_data]).max(axis=0)
-        if coarsest_stride > 1:
-            max_shape[1] = int(
-                np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride)
-            max_shape[2] = int(
-                np.ceil(max_shape[2] / coarsest_stride) * coarsest_stride)
-        padding_batch = []
-        for data in batch_data:
-            im_c, im_h, im_w = data[0].shape[:]
-            padding_im = np.zeros(
-                (im_c, max_shape[1], max_shape[2]), dtype=np.float32)
-            padding_im[:, :im_h, :im_w] = data[0]
-            if use_padded_im_info:
-                data[1][:2] = max_shape[1:3]
-            padding_batch.append((padding_im, ) + data[1:])
-        return padding_batch
-    def padding_multiscale_test(batch_data):
-        if len(batch_data) != 1:
-            raise NotImplementedError(
-                "Batch size must be 1 when using multiscale test, but now batch size is {}".
-                format(len(batch_data)))
-        if coarsest_stride > 1:
-            padding_batch = []
-            padding_images = []
-            data = batch_data[0]
-            for i, input in enumerate(data):
-                if i < num_scale:
-                    im_c, im_h, im_w = input.shape
-                    max_h = int(
-                        np.ceil(im_h / coarsest_stride) * coarsest_stride)
-                    max_w = int(
-                        np.ceil(im_w / coarsest_stride) * coarsest_stride)
-                    padding_im = np.zeros(
-                        (im_c, max_h, max_w), dtype=np.float32)
-                    padding_im[:, :im_h, :im_w] = input
-                    data[num_scale][3 * i:3 * i + 2] = [max_h, max_w]
-                    padding_batch.append(padding_im)
-                else:
-                    padding_batch.append(input)
-            return [tuple(padding_batch)]
-        # no need to padding
-        return batch_data
-    def random_shape(batch_data):
-        # For YOLO: gt_bbox is normalized, is scale invariant.
-        shape = np.random.choice(random_shapes)
-        scaled_batch = []
-        h, w = batch_data[0][0].shape[1:3]
-        scale_x = float(shape) / w
-        scale_y = float(shape) / h
-        for data in batch_data:
-            im = cv2.resize(
-                data[0].transpose((1, 2, 0)),
-                None,
-                None,
-                fx=scale_x,
-                fy=scale_y,
-                interpolation=cv2.INTER_NEAREST)
-            scaled_batch.append((im.transpose(2, 0, 1), ) + data[1:])
-        return scaled_batch
-    def multi_scale_resize(batch_data):
-        # For RCNN: image shape in record in im_info.
-        scale = np.random.choice(multi_scales)
-        scaled_batch = []
-        for data in batch_data:
-            im = cv2.resize(
-                data[0].transpose((1, 2, 0)),
-                None,
-                None,
-                fx=scale,
-                fy=scale,
-                interpolation=cv2.INTER_NEAREST)
-            im_info = [im.shape[:2], scale]
-            scaled_batch.append((im.transpose(2, 0, 1), im_info) + data[2:])
-        return scaled_batch
-    def _mapper(batch_data):
-        try:
-            if is_padding:
-                batch_data = padding_minibatch(batch_data)
-            if len(random_shapes) > 0:
-                batch_data = random_shape(batch_data)
-            if len(multi_scales) > 0:
-                batch_data = multi_scale_resize(batch_data)
-            if enable_multiscale_test:
-                batch_data = padding_multiscale_test(batch_data)
-        except Exception as e:
-            errmsg = "post-process failed with error: " + str(e)
-            logger.warn(errmsg)
-            raise e
-        return batch_data
-    return _mapper
--- a/ppdet/data/transform/transformer.py
+++ b/ppdet/data/transform/transformer.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import numpy as np
-import functools
-import collections
-from ..dataset import Dataset
-class ProxiedDataset(Dataset):
-    """proxy method called to 'self._ds' when if not defined"""
-    def __init__(self, ds):
-        super(ProxiedDataset, self).__init__()
-        self._ds = ds
-        methods = filter(lambda k: not k.startswith('_'),
-                         Dataset.__dict__.keys())
-        for m in methods:
-            func = functools.partial(self._proxy_method, getattr(self, m))
-            setattr(self, m, func)
-    def _proxy_method(self, func, *args, **kwargs):
-        """
-        proxy call to 'func', if not available then call self._ds.xxx
-        whose name is the same with func.__name__
-        """
-        method = func.__name__
-        try:
-            return func(*args, **kwargs)
-        except NotImplementedError:
-            ds_func = getattr(self._ds, method)
-            return ds_func(*args, **kwargs)
-class MappedDataset(ProxiedDataset):
-    def __init__(self, ds, mapper):
-        super(MappedDataset, self).__init__(ds)
-        self._ds = ds
-        self._mapper = mapper
-    def next(self):
-        sample = self._ds.next()
-        return self._mapper(sample)
-class BatchedDataset(ProxiedDataset):
-    """
-    Batching samples
-    Args:
-        ds (instance of Dataset): dataset to be batched
-        batchsize (int): sample number for each batch
-        drop_last (bool): drop last samples when not enough for one batch
-        drop_empty (bool): drop samples which have empty field
-    """
-    def __init__(self, ds, batchsize, drop_last=False, drop_empty=True):
-        super(BatchedDataset, self).__init__(ds)
-        self._batchsz = batchsize
-        self._drop_last = drop_last
-        self._drop_empty = drop_empty
-    def next(self):
-        """proxy to self._ds.next"""
-        def empty(x):
-            if isinstance(x, np.ndarray) and x.size == 0:
-                return True
-            elif isinstance(x, collections.Sequence) and len(x) == 0:
-                return True
-            else:
-                return False
-        def has_empty(items):
-            if any(x is None for x in items):
-                return True
-            if any(empty(x) for x in items):
-                return True
-            return False
-        batch = []
-        for _ in range(self._batchsz):
-            try:
-                out = self._ds.next()
-                while self._drop_empty and has_empty(out):
-                    out = self._ds.next()
-                batch.append(out)
-            except StopIteration:
-                if not self._drop_last and len(batch) > 0:
-                    return batch
-                else:
-                    raise StopIteration
-        return batch
--- a/ppdet/modeling/architectures/blazeface.py
+++ b/ppdet/modeling/architectures/blazeface.py
@@ -17,8 +17,9 @@ from __future__ import division
 from __future__ import print_function
 import numpy as np
-from paddle import fluid
+from collections import OrderedDict
+from paddle import fluid
 from paddle.fluid.param_attr import ParamAttr
 from paddle.fluid.regularizer import L2Decay
@@ -74,8 +75,8 @@ class BlazeFace(object):
    def build(self, feed_vars, mode='train'):
        im = feed_vars['image']
        if mode == 'train':
-            gt_box = feed_vars['gt_box']
+            gt_bbox = feed_vars['gt_bbox']
-            gt_label = feed_vars['gt_label']
+            gt_class = feed_vars['gt_class']
        body_feats = self.backbone(im)
        locs, confs, box, box_var = self._multi_box_head(
@@ -88,8 +89,8 @@ class BlazeFace(object):
            loss = fluid.layers.ssd_loss(
                locs,
                confs,
-                gt_box,
+                gt_bbox,
-                gt_label,
+                gt_class,
                box,
                box_var,
                overlap_threshold=0.35,
@@ -169,6 +170,38 @@ class BlazeFace(object):
        box_vars = fluid.layers.concat(vars)
        return face_mbox_loc, face_mbox_conf, prior_boxes, box_vars
+    def _inputs_def(self, image_shape):
+        im_shape = [None] + image_shape
+        # yapf: disable
+        inputs_def = {
+            'image':    {'shape': im_shape,  'dtype': 'float32', 'lod_level': 0},
+            'im_id':    {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 0},
+            'gt_bbox':  {'shape': [None, 4], 'dtype': 'float32', 'lod_level': 1},
+            'gt_class': {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 1},
+            'im_shape': {'shape': [None, 3], 'dtype': 'int32',   'lod_level': 0},
+        }
+        # yapf: enable
+        return inputs_def
+    def build_inputs(
+            self,
+            image_shape=[3, None, None],
+            fields=['image', 'im_id', 'gt_bbox', 'gt_class'],  # for train
+            use_dataloader=True,
+            iterable=False):
+        inputs_def = self._inputs_def(image_shape)
+        feed_vars = OrderedDict([(key, fluid.data(
+            name=key,
+            shape=inputs_def[key]['shape'],
+            dtype=inputs_def[key]['dtype'],
+            lod_level=inputs_def[key]['lod_level'])) for key in fields])
+        loader = fluid.io.DataLoader.from_generator(
+            feed_list=list(feed_vars.values()),
+            capacity=64,
+            use_double_buffer=True,
+            iterable=iterable) if use_dataloader else None
+        return feed_vars, loader
    def train(self, feed_vars):
        return self.build(feed_vars, 'train')

--- a/ppdet/modeling/architectures/cascade_mask_rcnn.py
+++ b/ppdet/modeling/architectures/cascade_mask_rcnn.py
@@ -17,12 +17,15 @@ from __future__ import division
 from __future__ import print_function
 from collections import OrderedDict
+import copy
 import paddle.fluid as fluid
 from ppdet.experimental import mixed_precision_global_state
 from ppdet.core.workspace import register
+from .input_helper import multiscale_def
 __all__ = ['CascadeMaskRCNN']
@@ -82,7 +85,7 @@ class CascadeMaskRCNN(object):
    def build(self, feed_vars, mode='train'):
        if mode == 'train':
            required_fields = [
-                'gt_label', 'gt_box', 'gt_mask', 'is_crowd', 'im_info'
+                'gt_class', 'gt_bbox', 'gt_mask', 'is_crowd', 'im_info'
            ]
        else:
            required_fields = ['im_shape', 'im_info']
@@ -90,7 +93,7 @@ class CascadeMaskRCNN(object):
        im = feed_vars['image']
        if mode == 'train':
-            gt_box = feed_vars['gt_box']
+            gt_bbox = feed_vars['gt_bbox']
            is_crowd = feed_vars['is_crowd']
        im_info = feed_vars['im_info']
@@ -116,7 +119,7 @@ class CascadeMaskRCNN(object):
        rpn_rois = self.rpn_head.get_proposals(body_feats, im_info, mode=mode)
        if mode == 'train':
-            rpn_loss = self.rpn_head.get_loss(im_info, gt_box, is_crowd)
+            rpn_loss = self.rpn_head.get_loss(im_info, gt_bbox, is_crowd)
        else:
            if self.rpn_only:
                im_scale = fluid.layers.slice(
@@ -174,7 +177,7 @@ class CascadeMaskRCNN(object):
            mask_rois, roi_has_mask_int32, mask_int32 = self.mask_assigner(
                rois=rois,
-                gt_classes=feed_vars['gt_label'],
+                gt_classes=feed_vars['gt_class'],
                is_crowd=feed_vars['is_crowd'],
                gt_segms=feed_vars['gt_mask'],
                im_info=feed_vars['im_info'],
@@ -204,25 +207,16 @@ class CascadeMaskRCNN(object):
        required_fields = ['image', 'im_info']
        self._input_check(required_fields, feed_vars)
-        ims = []
-        for k in feed_vars.keys():
-            if 'image' in k:
-                ims.append(feed_vars[k])
        result = {}
        if not mask_branch:
            assert 'im_shape' in feed_vars, \
                "{} has no im_shape field".format(feed_vars)
            result.update(feed_vars)
-        for i, im in enumerate(ims):
+        for i in range(len(self.im_info_names) // 2):
-            im_info = fluid.layers.slice(
+            im = feed_vars[self.im_info_names[2 * i]]
-                input=feed_vars['im_info'],
+            im_info = feed_vars[self.im_info_names[2 * i + 1]]
-                axes=[1],
-                starts=[3 * i],
-                ends=[3 * i + 3])
            body_feats = self.backbone(im)
-            result.update(body_feats)
            # FPN
            if self.fpn is not None:
@@ -282,7 +276,6 @@ class CascadeMaskRCNN(object):
            else:
                mask_name = 'mask_pred_' + str(i)
                bbox_pred = feed_vars['bbox']
-                result.update({im.name: im})
                if 'flip' in im.name:
                    mask_name += '_flip'
                    bbox_pred = feed_vars['bbox_flip']
@@ -372,6 +365,65 @@ class CascadeMaskRCNN(object):
        return refined_bbox
+    def _inputs_def(self, image_shape):
+        im_shape = [None] + image_shape
+        # yapf: disable
+        inputs_def = {
+            'image':    {'shape': im_shape,  'dtype': 'float32', 'lod_level': 0},
+            'im_info':  {'shape': [None, 3], 'dtype': 'float32', 'lod_level': 0},
+            'im_id':    {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 0},
+            'im_shape': {'shape': [None, 3], 'dtype': 'float32', 'lod_level': 0},
+            'gt_bbox':  {'shape': [None, 4], 'dtype': 'float32', 'lod_level': 1},
+            'gt_class': {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 1},
+            'is_crowd': {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 1},
+            'gt_mask':  {'shape': [None, 2], 'dtype': 'float32', 'lod_level': 3}, # polygon coordinates
+            'is_difficult': {'shape': [None, 1], 'dtype': 'int32', 'lod_level': 1},
+        }
+        # yapf: enable
+        return inputs_def
+    def build_inputs(self,
+                     image_shape=[3, None, None],
+                     fields=[
+                         'image', 'im_info', 'im_id', 'gt_bbox', 'gt_class',
+                         'is_crowd', 'gt_mask'
+                     ],
+                     multi_scale=False,
+                     num_scales=-1,
+                     use_flip=None,
+                     use_dataloader=True,
+                     iterable=False,
+                     mask_branch=False):
+        inputs_def = self._inputs_def(image_shape)
+        fields = copy.deepcopy(fields)
+        if multi_scale:
+            ms_def, ms_fields = multiscale_def(image_shape, num_scales,
+                                               use_flip)
+            inputs_def.update(ms_def)
+            fields += ms_fields
+            self.im_info_names = ['image', 'im_info'] + ms_fields
+            if mask_branch:
+                box_fields = ['bbox', 'bbox_flip'] if use_flip else ['bbox']
+                for key in box_fields:
+                    inputs_def[key] = {
+                        'shape': [6],
+                        'dtype': 'float32',
+                        'lod_level': 1
+                    }
+                fields += box_fields
+        feed_vars = OrderedDict([(key, fluid.layers.data(
+            name=key,
+            shape=inputs_def[key]['shape'],
+            dtype=inputs_def[key]['dtype'],
+            lod_level=inputs_def[key]['lod_level'])) for key in fields])
+        use_dataloader = use_dataloader and not mask_branch
+        loader = fluid.io.DataLoader.from_generator(
+            feed_list=list(feed_vars.values()),
+            capacity=64,
+            use_double_buffer=True,
+            iterable=iterable) if use_dataloader else None
+        return feed_vars, loader
    def train(self, feed_vars):
        return self.build(feed_vars, 'train')

--- a/ppdet/modeling/architectures/cascade_rcnn.py
+++ b/ppdet/modeling/architectures/cascade_rcnn.py
@@ -16,12 +16,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import copy
 from collections import OrderedDict
 import paddle.fluid as fluid
 from ppdet.experimental import mixed_precision_global_state
 from ppdet.core.workspace import register
+from .input_helper import multiscale_def
 __all__ = ['CascadeRCNN']
@@ -75,7 +77,7 @@ class CascadeRCNN(object):
    def build(self, feed_vars, mode='train'):
        if mode == 'train':
-            required_fields = ['gt_label', 'gt_box', 'is_crowd', 'im_info']
+            required_fields = ['gt_class', 'gt_bbox', 'is_crowd', 'im_info']
        else:
            required_fields = ['im_shape', 'im_info']
        self._input_check(required_fields, feed_vars)
@@ -84,7 +86,7 @@ class CascadeRCNN(object):
        im_info = feed_vars['im_info']
        if mode == 'train':
-            gt_box = feed_vars['gt_box']
+            gt_bbox = feed_vars['gt_bbox']
            is_crowd = feed_vars['is_crowd']
        mixed_precision_enabled = mixed_precision_global_state() is not None
@@ -108,7 +110,9 @@ class CascadeRCNN(object):
        rpn_rois = self.rpn_head.get_proposals(body_feats, im_info, mode=mode)
        if mode == 'train':
-            rpn_loss = self.rpn_head.get_loss(im_info, gt_box, is_crowd)
+            #fluid.layers.Print(gt_bbox)
+            #fluid.layers.Print(is_crowd)
+            rpn_loss = self.rpn_head.get_loss(im_info, gt_bbox, is_crowd)
        else:
            if self.rpn_only:
                im_scale = fluid.layers.slice(
@@ -171,19 +175,14 @@ class CascadeRCNN(object):
    def build_multi_scale(self, feed_vars):
        required_fields = ['image', 'im_shape', 'im_info']
        self._input_check(required_fields, feed_vars)
-        ims = []
-        for k in feed_vars.keys():
-            if 'image' in k:
-                ims.append(feed_vars[k])
        result = {}
-        result.update(feed_vars)
+        im_shape = feed_vars['im_shape']
-        for i, im in enumerate(ims):
+        result['im_shape'] = im_shape
-            im_info = fluid.layers.slice(
-                input=feed_vars['im_info'],
+        for i in range(len(self.im_info_names) // 2):
-                axes=[1],
+            im = feed_vars[self.im_info_names[2 * i]]
-                starts=[3 * i],
+            im_info = feed_vars[self.im_info_names[2 * i + 1]]
-                ends=[3 * i + 3])
-            im_shape = feed_vars['im_shape']
            # backbone
            body_feats = self.backbone(im)
@@ -277,6 +276,54 @@ class CascadeRCNN(object):
        return refined_bbox
+    def _inputs_def(self, image_shape):
+        im_shape = [None] + image_shape
+        # yapf: disable
+        inputs_def = {
+            'image':    {'shape': im_shape,  'dtype': 'float32', 'lod_level': 0},
+            'im_info':  {'shape': [None, 3], 'dtype': 'float32', 'lod_level': 0},
+            'im_shape': {'shape': [None, 3], 'dtype': 'float32', 'lod_level': 0},
+            'im_id':    {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 0},
+            'gt_bbox':  {'shape': [None, 4], 'dtype': 'float32', 'lod_level': 1},
+            'gt_class': {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 1},
+            'is_crowd': {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 1},
+            'is_difficult': {'shape': [None, 1], 'dtype': 'int32', 'lod_level': 1},
+        }
+        # yapf: enable
+        return inputs_def
+    def build_inputs(self,
+                     image_shape=[3, None, None],
+                     fields=[
+                         'image', 'im_info', 'im_id', 'gt_bbox', 'gt_class',
+                         'is_crowd'
+                     ],
+                     multi_scale=False,
+                     num_scales=-1,
+                     use_flip=None,
+                     use_dataloader=True,
+                     iterable=False):
+        inputs_def = self._inputs_def(image_shape)
+        fields = copy.deepcopy(fields)
+        if multi_scale:
+            ms_def, ms_fields = multiscale_def(image_shape, num_scales,
+                                               use_flip)
+            inputs_def.update(ms_def)
+            fields += ms_fields
+            self.im_info_names = ['image', 'im_info'] + ms_fields
+        feed_vars = OrderedDict([(key, fluid.layers.data(
+            name=key,
+            shape=inputs_def[key]['shape'],
+            dtype=inputs_def[key]['dtype'],
+            lod_level=inputs_def[key]['lod_level'])) for key in fields])
+        loader = fluid.io.DataLoader.from_generator(
+            feed_list=list(feed_vars.values()),
+            capacity=64,
+            use_double_buffer=True,
+            iterable=iterable) if use_dataloader else None
+        return feed_vars, loader
    def train(self, feed_vars):
        return self.build(feed_vars, 'train')

--- a/ppdet/modeling/architectures/cascade_rcnn_cls_aware.py
+++ b/ppdet/modeling/architectures/cascade_rcnn_cls_aware.py
@@ -19,6 +19,9 @@ from __future__ import print_function
 import numpy as np
 import sys
+from collections import OrderedDict
+import copy
 import paddle.fluid as fluid
 from ppdet.core.workspace import register
@@ -48,14 +51,14 @@ class CascadeRCNNClsAware(object):
        'bbox_head'
    ]
-    def __init__(self,
+    def __init__(
-                 backbone,
+            self,
-                 rpn_head,
+            backbone,
-                 roi_extractor='FPNRoIAlign',
+            rpn_head,
-                 bbox_head='CascadeBBoxHead',
+            roi_extractor='FPNRoIAlign',
-                 bbox_assigner='CascadeBBoxAssigner',
+            bbox_head='CascadeBBoxHead',
-                 fpn='FPN',
+            bbox_assigner='CascadeBBoxAssigner',
-                ):
+            fpn='FPN', ):
        super(CascadeRCNNClsAware, self).__init__()
        assert fpn is not None, "cascade RCNN requires FPN"
        self.backbone = backbone
@@ -78,9 +81,9 @@ class CascadeRCNNClsAware(object):
        im = feed_vars['image']
        im_info = feed_vars['im_info']
        if mode == 'train':
-            gt_box = feed_vars['gt_box']
+            gt_bbox = feed_vars['gt_bbox']
            is_crowd = feed_vars['is_crowd']
-            gt_label = feed_vars['gt_label']
+            gt_class = feed_vars['gt_class']
        else:
            im_shape = feed_vars['im_shape']
@@ -95,7 +98,7 @@ class CascadeRCNNClsAware(object):
        rpn_rois = self.rpn_head.get_proposals(body_feats, im_info, mode=mode)
        if mode == 'train':
-            rpn_loss = self.rpn_head.get_loss(im_info, gt_box, is_crowd)
+            rpn_loss = self.rpn_head.get_loss(im_info, gt_bbox, is_crowd)
        proposal_list = []
        roi_feat_list = []
@@ -103,10 +106,11 @@ class CascadeRCNNClsAware(object):
        rcnn_target_list = []
        bbox_pred = None
        self.cascade_var_v = []
        for stage in range(3):
-            var_v = np.array(self.cascade_bbox_reg_weights[stage], dtype="float32")
+            var_v = np.array(
+                self.cascade_bbox_reg_weights[stage], dtype="float32")
            prior_box_var = fluid.layers.create_tensor(dtype="float32")
            fluid.layers.assign(input=var_v, output=prior_box_var)
            self.cascade_var_v.append(prior_box_var)
@@ -124,42 +128,37 @@ class CascadeRCNNClsAware(object):
                outs = self.bbox_assigner(
                    input_rois=pool_rois, feed_vars=feed_vars, curr_stage=stage)
                pool_rois = outs[0]
-                rcnn_target_list.append( outs )
+                rcnn_target_list.append(outs)
            # extract roi features
            roi_feat = self.roi_extractor(body_feats, pool_rois, spatial_scale)
            roi_feat_list.append(roi_feat)
            # bbox head
            cls_score, bbox_pred = self.bbox_head.get_output(
                roi_feat,
                cls_agnostic_bbox_reg=self.bbox_head.num_classes,
                wb_scalar=1.0 / self.cascade_rcnn_loss_weight[stage],
-                name='_' + str(stage + 1) )
+                name='_' + str(stage + 1))
            cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False)
            decoded_box, decoded_assign_box = fluid.layers.box_decoder_and_assign(
-                                                        pool_rois,
+                pool_rois, self.cascade_var_v[stage], bbox_pred, cls_prob,
-                                                        self.cascade_var_v[stage],
+                self.bbox_clip)
-                                                        bbox_pred,
-                                                        cls_prob,
-                                                        self.bbox_clip)
            if mode == "train":
                decoded_box.stop_gradient = True
                decoded_assign_box.stop_gradient = True
            else:
-                self.cascade_cls_prob.append( cls_prob )
+                self.cascade_cls_prob.append(cls_prob)
                self.cascade_decoded_box.append(decoded_box)
            rcnn_pred_list.append((cls_score, bbox_pred))
        # out loop
        if mode == 'train':
-            loss = self.bbox_head.get_loss(rcnn_pred_list,
+            loss = self.bbox_head.get_loss(rcnn_pred_list, rcnn_target_list,
-                                           rcnn_target_list,
                                           self.cascade_rcnn_loss_weight)
            loss.update(rpn_loss)
            total_loss = fluid.layers.sum(list(loss.values()))
@@ -167,12 +166,46 @@ class CascadeRCNNClsAware(object):
            return loss
        else:
            pred = self.bbox_head.get_prediction_cls_aware(
-                       im_info, im_shape,
+                im_info, im_shape, self.cascade_cls_prob,
-                       self.cascade_cls_prob,
+                self.cascade_decoded_box, self.cascade_bbox_reg_weights)
-                       self.cascade_decoded_box,
-                      self.cascade_bbox_reg_weights)
            return pred
+    def _inputs_def(self, image_shape):
+        im_shape = [None] + image_shape
+        # yapf: disable
+        inputs_def = {
+            'image':    {'shape': im_shape,  'dtype': 'float32', 'lod_level': 0},
+            'im_info':  {'shape': [None, 3], 'dtype': 'float32', 'lod_level': 0},
+            'im_id':    {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 0},
+            'im_shape': {'shape': [None, 3], 'dtype': 'float32', 'lod_level': 0},
+            'gt_bbox':  {'shape': [None, 4], 'dtype': 'float32', 'lod_level': 1},
+            'gt_class': {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 1},
+            'is_crowd': {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 1},
+            'is_difficult': {'shape': [None, 1], 'dtype': 'int32', 'lod_level': 1},
+        }
+        # yapf: enable
+        return inputs_def
+    def build_inputs(self,
+                     image_shape=[3, None, None],
+                     fields=[
+                         'image', 'im_info', 'im_id', 'gt_bbox', 'gt_class',
+                         'is_crowd', 'gt_mask'
+                     ],
+                     use_dataloader=True,
+                     iterable=False):
+        inputs_def = self._inputs_def(image_shape)
+        feed_vars = OrderedDict([(key, fluid.layers.data(
+            name=key,
+            shape=inputs_def[key]['shape'],
+            dtype=inputs_def[key]['dtype'],
+            lod_level=inputs_def[key]['lod_level'])) for key in fields])
+        loader = fluid.io.DataLoader.from_generator(
+            feed_list=list(feed_vars.values()),
+            capacity=64,
+            use_double_buffer=True,
+            iterable=iterable) if use_dataloader else None
+        return feed_vars, loader
    def train(self, feed_vars):
        return self.build(feed_vars, 'train')

--- a/ppdet/modeling/architectures/faceboxes.py
+++ b/ppdet/modeling/architectures/faceboxes.py
@@ -17,8 +17,9 @@ from __future__ import division
 from __future__ import print_function
 import numpy as np
-from paddle import fluid
+from collections import OrderedDict
+from paddle import fluid
 from paddle.fluid.param_attr import ParamAttr
 from paddle.fluid.regularizer import L2Decay
@@ -66,8 +67,8 @@ class FaceBoxes(object):
    def build(self, feed_vars, mode='train'):
        im = feed_vars['image']
        if mode == 'train':
-            gt_box = feed_vars['gt_box']
+            gt_bbox = feed_vars['gt_bbox']
-            gt_label = feed_vars['gt_label']
+            gt_class = feed_vars['gt_class']
        body_feats = self.backbone(im)
        locs, confs, box, box_var = self._multi_box_head(
@@ -77,8 +78,8 @@ class FaceBoxes(object):
            loss = fluid.layers.ssd_loss(
                locs,
                confs,
-                gt_box,
+                gt_bbox,
-                gt_label,
+                gt_class,
                box,
                box_var,
                overlap_threshold=0.35,
@@ -141,6 +142,38 @@ class FaceBoxes(object):
        box_vars = fluid.layers.concat(vars)
        return face_mbox_loc, face_mbox_conf, prior_boxes, box_vars
+    def _inputs_def(self, image_shape):
+        im_shape = [None] + image_shape
+        # yapf: disable
+        inputs_def = {
+            'image':    {'shape': im_shape,  'dtype': 'float32', 'lod_level': 0},
+            'im_id':    {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 0},
+            'gt_bbox':  {'shape': [None, 4], 'dtype': 'float32', 'lod_level': 1},
+            'gt_class': {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 1},
+            'im_shape': {'shape': [None, 3], 'dtype': 'int32',   'lod_level': 0},
+        }
+        # yapf: enable
+        return inputs_def
+    def build_inputs(
+            self,
+            image_shape=[3, None, None],
+            fields=['image', 'im_id', 'gt_bbox', 'gt_class'],  # for train
+            use_dataloader=True,
+            iterable=False):
+        inputs_def = self._inputs_def(image_shape)
+        feed_vars = OrderedDict([(key, fluid.data(
+            name=key,
+            shape=inputs_def[key]['shape'],
+            dtype=inputs_def[key]['dtype'],
+            lod_level=inputs_def[key]['lod_level'])) for key in fields])
+        loader = fluid.io.DataLoader.from_generator(
+            feed_list=list(feed_vars.values()),
+            capacity=64,
+            use_double_buffer=True,
+            iterable=iterable) if use_dataloader else None
+        return feed_vars, loader
    def train(self, feed_vars):
        return self.build(feed_vars, 'train')

--- a/ppdet/modeling/architectures/faster_rcnn.py
+++ b/ppdet/modeling/architectures/faster_rcnn.py
@@ -17,12 +17,15 @@ from __future__ import division
 from __future__ import print_function
 from collections import OrderedDict
+import copy
 from paddle import fluid
 from ppdet.experimental import mixed_precision_global_state
 from ppdet.core.workspace import register
+from .input_helper import multiscale_def
 __all__ = ['FasterRCNN']
@@ -64,7 +67,7 @@ class FasterRCNN(object):
    def build(self, feed_vars, mode='train'):
        if mode == 'train':
-            required_fields = ['gt_label', 'gt_box', 'is_crowd', 'im_info']
+            required_fields = ['gt_class', 'gt_bbox', 'is_crowd', 'im_info']
        else:
            required_fields = ['im_shape', 'im_info']
        self._input_check(required_fields, feed_vars)
@@ -72,7 +75,7 @@ class FasterRCNN(object):
        im = feed_vars['image']
        im_info = feed_vars['im_info']
        if mode == 'train':
-            gt_box = feed_vars['gt_box']
+            gt_bbox = feed_vars['gt_bbox']
            is_crowd = feed_vars['is_crowd']
        else:
            im_shape = feed_vars['im_shape']
@@ -97,15 +100,15 @@ class FasterRCNN(object):
        rois = self.rpn_head.get_proposals(body_feats, im_info, mode=mode)
        if mode == 'train':
-            rpn_loss = self.rpn_head.get_loss(im_info, gt_box, is_crowd)
+            rpn_loss = self.rpn_head.get_loss(im_info, gt_bbox, is_crowd)
            # sampled rpn proposals
-            for var in ['gt_label', 'is_crowd', 'gt_box', 'im_info']:
+            for var in ['gt_class', 'is_crowd', 'gt_bbox', 'im_info']:
                assert var in feed_vars, "{} has no {}".format(feed_vars, var)
            outs = self.bbox_assigner(
                rpn_rois=rois,
-                gt_classes=feed_vars['gt_label'],
+                gt_classes=feed_vars['gt_class'],
                is_crowd=feed_vars['is_crowd'],
-                gt_boxes=feed_vars['gt_box'],
+                gt_boxes=feed_vars['gt_bbox'],
                im_info=feed_vars['im_info'])
            rois = outs[0]
@@ -145,21 +148,14 @@ class FasterRCNN(object):
    def build_multi_scale(self, feed_vars):
        required_fields = ['image', 'im_info', 'im_shape']
        self._input_check(required_fields, feed_vars)
-        ims = []
-        for k in feed_vars.keys():
-            if 'image' in k:
-                ims.append(feed_vars[k])
        result = {}
-        result.update(feed_vars)
+        im_shape = feed_vars['im_shape']
-        for i, im in enumerate(ims):
+        result['im_shape'] = im_shape
-            im_info = fluid.layers.slice(
+        for i in range(len(self.im_info_names) // 2):
-                input=feed_vars['im_info'],
+            im = feed_vars[self.im_info_names[2 * i]]
-                axes=[1],
+            im_info = feed_vars[self.im_info_names[2 * i + 1]]
-                starts=[3 * i],
-                ends=[3 * i + 3])
-            im_shape = feed_vars['im_shape']
            body_feats = self.backbone(im)
-            result.update(body_feats)
            body_feat_names = list(body_feats.keys())
            if self.fpn is not None:
@@ -192,6 +188,54 @@ class FasterRCNN(object):
            assert var in feed_vars, \
                "{} has no {} field".format(feed_vars, var)
+    def _inputs_def(self, image_shape):
+        im_shape = [None] + image_shape
+        # yapf: disable
+        inputs_def = {
+            'image':    {'shape': im_shape,  'dtype': 'float32', 'lod_level': 0},
+            'im_info':  {'shape': [None, 3], 'dtype': 'float32', 'lod_level': 0},
+            'im_id':    {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 0},
+            'im_shape': {'shape': [None, 3], 'dtype': 'float32', 'lod_level': 0},
+            'gt_bbox':  {'shape': [None, 4], 'dtype': 'float32', 'lod_level': 1},
+            'gt_class': {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 1},
+            'is_crowd': {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 1},
+            'is_difficult': {'shape': [None, 1], 'dtype': 'int32', 'lod_level': 1},
+        }
+        # yapf: enable
+        return inputs_def
+    def build_inputs(
+            self,
+            image_shape=[3, None, None],
+            fields=[
+                'image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd'
+            ],  # for train
+            multi_scale=False,
+            num_scales=-1,
+            use_flip=None,
+            use_dataloader=True,
+            iterable=False):
+        inputs_def = self._inputs_def(image_shape)
+        fields = copy.deepcopy(fields)
+        if multi_scale:
+            ms_def, ms_fields = multiscale_def(image_shape, num_scales,
+                                               use_flip)
+            inputs_def.update(ms_def)
+            fields += ms_fields
+            self.im_info_names = ['image', 'im_info'] + ms_fields
+        feed_vars = OrderedDict([(key, fluid.layers.data(
+            name=key,
+            shape=inputs_def[key]['shape'],
+            dtype=inputs_def[key]['dtype'],
+            lod_level=inputs_def[key]['lod_level'])) for key in fields])
+        loader = fluid.io.DataLoader.from_generator(
+            feed_list=list(feed_vars.values()),
+            capacity=64,
+            use_double_buffer=True,
+            iterable=iterable) if use_dataloader else None
+        return feed_vars, loader
    def train(self, feed_vars):
        return self.build(feed_vars, 'train')

--- a/ppdet/data/tests/run_all_tests.py
+++ b/ppdet/data/tests/run_all_tests.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,35 +11,34 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#!/usr/bin/python
-#-*-coding:utf-8-*-
-"""Run all tests
-"""
-import unittest
-import test_loader
-import test_operator
-import test_roidb_source
-import test_iterator_source
-import test_transformer
-import test_reader
-if __name__ == '__main__':
+def multiscale_def(image_shape, num_scale, use_flip=True):
-    alltests = unittest.TestSuite([
+    base_name_list = ['image']
-        unittest.TestLoader().loadTestsFromTestCase(t) \
+    multiscale_def = {}
-        for t in [
+    ms_def_names = []
-            test_loader.TestLoader,
+    if use_flip:
-            test_operator.TestBase,
+        num_scale //= 2
-            test_roidb_source.TestRoiDbSource,
+        base_name_list.append('image_flip')
-            test_iterator_source.TestIteratorSource,
+        multiscale_def['im_info_image_flip'] = {
-            test_transformer.TestTransformer,
+            'shape': [None, 3],
-            test_reader.TestReader,
+            'dtype': 'float32',
-        ]
+            'lod_level': 0
-    ])
+        }
+    for base_name in base_name_list:
-    was_succ = unittest\
+        for i in range(0, num_scale - 1):
-                .TextTestRunner(verbosity=2)\
+            name = base_name + '_scale_' + str(i)
-                .run(alltests)\
+            multiscale_def[name] = {
-                .wasSuccessful()
+                'shape': [None] + image_shape,
+                'dtype': 'float32',
-    exit(0 if was_succ else 1)
+                'lod_level': 0
+            }
+            im_info_name = 'im_info_' + name
+            multiscale_def[im_info_name] = {
+                'shape': [None, 3],
+                'dtype': 'float32',
+                'lod_level': 0
+            }
+            ms_def_names.append(name)
+            ms_def_names.append(im_info_name)
+    return multiscale_def, ms_def_names
--- a/ppdet/modeling/architectures/mask_rcnn.py
+++ b/ppdet/modeling/architectures/mask_rcnn.py
@@ -17,12 +17,15 @@ from __future__ import division
 from __future__ import print_function
 from collections import OrderedDict
+import copy
 import paddle.fluid as fluid
 from ppdet.experimental import mixed_precision_global_state
 from ppdet.core.workspace import register
+from .input_helper import multiscale_def
 __all__ = ['MaskRCNN']
@@ -71,7 +74,7 @@ class MaskRCNN(object):
    def build(self, feed_vars, mode='train'):
        if mode == 'train':
            required_fields = [
-                'gt_label', 'gt_box', 'gt_mask', 'is_crowd', 'im_info'
+                'gt_class', 'gt_bbox', 'gt_mask', 'is_crowd', 'im_info'
            ]
        else:
            required_fields = ['im_shape', 'im_info']
@@ -101,14 +104,14 @@ class MaskRCNN(object):
        rois = self.rpn_head.get_proposals(body_feats, im_info, mode=mode)
        if mode == 'train':
-            rpn_loss = self.rpn_head.get_loss(im_info, feed_vars['gt_box'],
+            rpn_loss = self.rpn_head.get_loss(im_info, feed_vars['gt_bbox'],
                                              feed_vars['is_crowd'])
            outs = self.bbox_assigner(
                rpn_rois=rois,
-                gt_classes=feed_vars['gt_label'],
+                gt_classes=feed_vars['gt_class'],
                is_crowd=feed_vars['is_crowd'],
-                gt_boxes=feed_vars['gt_box'],
+                gt_boxes=feed_vars['gt_bbox'],
                im_info=feed_vars['im_info'])
            rois = outs[0]
            labels_int32 = outs[1]
@@ -124,7 +127,7 @@ class MaskRCNN(object):
            mask_rois, roi_has_mask_int32, mask_int32 = self.mask_assigner(
                rois=rois,
-                gt_classes=feed_vars['gt_label'],
+                gt_classes=feed_vars['gt_class'],
                is_crowd=feed_vars['is_crowd'],
                gt_segms=feed_vars['gt_mask'],
                im_info=feed_vars['im_info'],
@@ -160,25 +163,16 @@ class MaskRCNN(object):
        required_fields = ['image', 'im_info']
        self._input_check(required_fields, feed_vars)
-        ims = []
-        for k in feed_vars.keys():
-            if 'image' in k:
-                ims.append(feed_vars[k])
        result = {}
        if not mask_branch:
            assert 'im_shape' in feed_vars, \
                "{} has no im_shape field".format(feed_vars)
            result.update(feed_vars)
-        for i, im in enumerate(ims):
+        for i in range(len(self.im_info_names) // 2):
-            im_info = fluid.layers.slice(
+            im = feed_vars[self.im_info_names[2 * i]]
-                input=feed_vars['im_info'],
+            im_info = feed_vars[self.im_info_names[2 * i + 1]]
-                axes=[1],
-                starts=[3 * i],
-                ends=[3 * i + 3])
            body_feats = self.backbone(im)
-            result.update(body_feats)
            # FPN
            if self.fpn is not None:
@@ -205,7 +199,7 @@ class MaskRCNN(object):
            else:
                mask_name = 'mask_pred_' + str(i)
                bbox_pred = feed_vars['bbox']
-                result.update({im.name: im})
+                #result.update({im.name: im})
                if 'flip' in im.name:
                    mask_name += '_flip'
                    bbox_pred = feed_vars['bbox_flip']
@@ -223,12 +217,12 @@ class MaskRCNN(object):
                          im_shape,
                          spatial_scale,
                          bbox_pred=None):
-        if self.fpn is None:
-            last_feat = body_feats[list(body_feats.keys())[-1]]
-            roi_feat = self.roi_extractor(last_feat, rois)
-        else:
-            roi_feat = self.roi_extractor(body_feats, rois, spatial_scale)
        if not bbox_pred:
+            if self.fpn is None:
+                last_feat = body_feats[list(body_feats.keys())[-1]]
+                roi_feat = self.roi_extractor(last_feat, rois)
+            else:
+                roi_feat = self.roi_extractor(body_feats, rois, spatial_scale)
            bbox_pred = self.bbox_head.get_prediction(roi_feat, rois, im_info,
                                                      im_shape)
            bbox_pred = bbox_pred['bbox']
@@ -258,6 +252,7 @@ class MaskRCNN(object):
                mask_rois = bbox * im_scale
                if self.fpn is None:
+                    last_feat = body_feats[list(body_feats.keys())[-1]]
                    mask_feat = self.roi_extractor(last_feat, mask_rois)
                    mask_feat = self.bbox_head.get_head_feat(mask_feat)
                else:
@@ -273,6 +268,65 @@ class MaskRCNN(object):
            assert var in feed_vars, \
                "{} has no {} field".format(feed_vars, var)
+    def _inputs_def(self, image_shape):
+        im_shape = [None] + image_shape
+        # yapf: disable
+        inputs_def = {
+            'image':    {'shape': im_shape,  'dtype': 'float32', 'lod_level': 0},
+            'im_info':  {'shape': [None, 3], 'dtype': 'float32', 'lod_level': 0},
+            'im_id':    {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 0},
+            'im_shape': {'shape': [None, 3], 'dtype': 'float32', 'lod_level': 0},
+            'gt_bbox':  {'shape': [None, 4], 'dtype': 'float32', 'lod_level': 1},
+            'gt_class': {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 1},
+            'is_crowd': {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 1},
+            'gt_mask':  {'shape': [None, 2], 'dtype': 'float32', 'lod_level': 3}, # polygon coordinates
+            'is_difficult': {'shape': [None, 1], 'dtype': 'int32', 'lod_level': 1},
+        }
+        # yapf: enable
+        return inputs_def
+    def build_inputs(self,
+                     image_shape=[3, None, None],
+                     fields=[
+                         'image', 'im_info', 'im_id', 'gt_bbox', 'gt_class',
+                         'is_crowd', 'gt_mask'
+                     ],
+                     multi_scale=False,
+                     num_scales=-1,
+                     use_flip=None,
+                     use_dataloader=True,
+                     iterable=False,
+                     mask_branch=False):
+        inputs_def = self._inputs_def(image_shape)
+        fields = copy.deepcopy(fields)
+        if multi_scale:
+            ms_def, ms_fields = multiscale_def(image_shape, num_scales,
+                                               use_flip)
+            inputs_def.update(ms_def)
+            fields += ms_fields
+            self.im_info_names = ['image', 'im_info'] + ms_fields
+            if mask_branch:
+                box_fields = ['bbox', 'bbox_flip'] if use_flip else ['bbox']
+                for key in box_fields:
+                    inputs_def[key] = {
+                        'shape': [6],
+                        'dtype': 'float32',
+                        'lod_level': 1
+                    }
+                fields += box_fields
+        feed_vars = OrderedDict([(key, fluid.layers.data(
+            name=key,
+            shape=inputs_def[key]['shape'],
+            dtype=inputs_def[key]['dtype'],
+            lod_level=inputs_def[key]['lod_level'])) for key in fields])
+        use_dataloader = use_dataloader and not mask_branch
+        loader = fluid.io.DataLoader.from_generator(
+            feed_list=list(feed_vars.values()),
+            capacity=64,
+            use_double_buffer=True,
+            iterable=iterable) if use_dataloader else None
+        return feed_vars, loader
    def train(self, feed_vars):
        return self.build(feed_vars, 'train')

--- a/ppdet/modeling/architectures/retinanet.py
+++ b/ppdet/modeling/architectures/retinanet.py
@@ -50,8 +50,8 @@ class RetinaNet(object):
        im = feed_vars['image']
        im_info = feed_vars['im_info']
        if mode == 'train':
-            gt_box = feed_vars['gt_box']
+            gt_bbox = feed_vars['gt_bbox']
-            gt_label = feed_vars['gt_label']
+            gt_class = feed_vars['gt_class']
            is_crowd = feed_vars['is_crowd']
        mixed_precision_enabled = mixed_precision_global_state() is not None
@@ -73,7 +73,7 @@ class RetinaNet(object):
        # retinanet head
        if mode == 'train':
            loss = self.retina_head.get_loss(body_feats, spatial_scale, im_info,
-                                             gt_box, gt_label, is_crowd)
+                                             gt_bbox, gt_class, is_crowd)
            total_loss = fluid.layers.sum(list(loss.values()))
            loss.update({'loss': total_loss})
            return loss
@@ -82,6 +82,43 @@ class RetinaNet(object):
                                                   im_info)
            return pred
+    def _inputs_def(self, image_shape):
+        im_shape = [None] + image_shape
+        # yapf: disable
+        inputs_def = {
+            'image':    {'shape': im_shape,  'dtype': 'float32', 'lod_level': 0},
+            'im_info':  {'shape': [None, 3], 'dtype': 'float32', 'lod_level': 0},
+            'im_id':    {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 0},
+            'im_shape': {'shape': [None, 3], 'dtype': 'float32', 'lod_level': 0},
+            'gt_bbox':  {'shape': [None, 4], 'dtype': 'float32', 'lod_level': 1},
+            'gt_class': {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 1},
+            'is_crowd': {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 1},
+            'is_difficult': {'shape': [None, 1], 'dtype': 'int32', 'lod_level': 1},
+        }
+        # yapf: enable
+        return inputs_def
+    def build_inputs(
+            self,
+            image_shape=[3, None, None],
+            fields=[
+                'image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd'
+            ],  # for-train
+            use_dataloader=True,
+            iterable=False):
+        inputs_def = self._inputs_def(image_shape)
+        feed_vars = OrderedDict([(key, fluid.layers.data(
+            name=key,
+            shape=inputs_def[key]['shape'],
+            dtype=inputs_def[key]['dtype'],
+            lod_level=inputs_def[key]['lod_level'])) for key in fields])
+        loader = fluid.io.DataLoader.from_generator(
+            feed_list=list(feed_vars.values()),
+            capacity=64,
+            use_double_buffer=True,
+            iterable=iterable) if use_dataloader else None
+        return feed_vars, loader
    def train(self, feed_vars):
        return self.build(feed_vars, 'train')

--- a/ppdet/modeling/architectures/ssd.py
+++ b/ppdet/modeling/architectures/ssd.py
@@ -59,8 +59,8 @@ class SSD(object):
    def build(self, feed_vars, mode='train'):
        im = feed_vars['image']
        if mode == 'train' or mode == 'eval':
-            gt_box = feed_vars['gt_box']
+            gt_bbox = feed_vars['gt_bbox']
-            gt_label = feed_vars['gt_label']
+            gt_class = feed_vars['gt_class']
        mixed_precision_enabled = mixed_precision_global_state() is not None
        # cast inputs to FP16
@@ -82,7 +82,7 @@ class SSD(object):
            inputs=body_feats, image=im, num_classes=self.num_classes)
        if mode == 'train':
-            loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box,
+            loss = fluid.layers.ssd_loss(locs, confs, gt_bbox, gt_class, box,
                                         box_var)
            loss = fluid.layers.reduce_sum(loss)
            return {'loss': loss}
@@ -90,6 +90,39 @@ class SSD(object):
            pred = self.output_decoder(locs, confs, box, box_var)
            return {'bbox': pred}
+    def _inputs_def(self, image_shape):
+        im_shape = [None] + image_shape
+        # yapf: disable
+        inputs_def = {
+            'image':        {'shape': im_shape,  'dtype': 'float32', 'lod_level': 0},
+            'im_id':        {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 0},
+            'gt_bbox':      {'shape': [None, 4], 'dtype': 'float32', 'lod_level': 1},
+            'gt_class':     {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 1},
+            'im_shape':     {'shape': [None, 3], 'dtype': 'int32',   'lod_level': 0},
+            'is_difficult': {'shape': [None, 1], 'dtype': 'int32',   'lod_level': 1},
+        }
+        # yapf: enable
+        return inputs_def
+    def build_inputs(
+            self,
+            image_shape=[3, None, None],
+            fields=['image', 'im_id', 'gt_bbox', 'gt_class'],  # for train
+            use_dataloader=True,
+            iterable=False):
+        inputs_def = self._inputs_def(image_shape)
+        feed_vars = OrderedDict([(key, fluid.data(
+            name=key,
+            shape=inputs_def[key]['shape'],
+            dtype=inputs_def[key]['dtype'],
+            lod_level=inputs_def[key]['lod_level'])) for key in fields])
+        loader = fluid.io.DataLoader.from_generator(
+            feed_list=list(feed_vars.values()),
+            capacity=64,
+            use_double_buffer=True,
+            iterable=iterable) if use_dataloader else None
+        return feed_vars, loader
    def train(self, feed_vars):
        return self.build(feed_vars, 'train')

--- a/ppdet/modeling/architectures/yolov3.py
+++ b/ppdet/modeling/architectures/yolov3.py
@@ -64,18 +64,53 @@ class YOLOv3(object):
            body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats]
        if mode == 'train':
-            gt_box = feed_vars['gt_box']
+            gt_bbox = feed_vars['gt_bbox']
-            gt_label = feed_vars['gt_label']
+            gt_class = feed_vars['gt_class']
            gt_score = feed_vars['gt_score']
            return {
-                'loss': self.yolo_head.get_loss(body_feats, gt_box, gt_label,
+                'loss': self.yolo_head.get_loss(body_feats, gt_bbox, gt_class,
                                                gt_score)
            }
        else:
            im_size = feed_vars['im_size']
            return self.yolo_head.get_prediction(body_feats, im_size)
+    def _inputs_def(self, image_shape, num_max_boxes):
+        im_shape = [None] + image_shape
+        # yapf: disable
+        inputs_def = {
+            'image':    {'shape': im_shape,                 'dtype': 'float32', 'lod_level': 0},
+            'im_size':  {'shape': [None, 2],                'dtype': 'int32',   'lod_level': 0},
+            'im_id':    {'shape': [None, 1],                'dtype': 'int32',   'lod_level': 0},
+            'gt_bbox':  {'shape': [None, num_max_boxes, 4], 'dtype': 'float32', 'lod_level': 0},
+            'gt_class': {'shape': [None, num_max_boxes],    'dtype': 'int32',   'lod_level': 0},
+            'gt_score': {'shape': [None, num_max_boxes],    'dtype': 'float32', 'lod_level': 0},
+            'is_difficult': {'shape': [None, num_max_boxes],'dtype': 'int32',   'lod_level': 0},
+        }
+        # yapf: enable
+        return inputs_def
+    def build_inputs(
+            self,
+            image_shape=[3, None, None],
+            fields=['image', 'gt_bbox', 'gt_class', 'gt_score'],  # for train
+            num_max_boxes=50,
+            use_dataloader=True,
+            iterable=False):
+        inputs_def = self._inputs_def(image_shape, num_max_boxes)
+        feed_vars = OrderedDict([(key, fluid.data(
+            name=key,
+            shape=inputs_def[key]['shape'],
+            dtype=inputs_def[key]['dtype'],
+            lod_level=inputs_def[key]['lod_level'])) for key in fields])
+        loader = fluid.io.DataLoader.from_generator(
+            feed_list=list(feed_vars.values()),
+            capacity=64,
+            use_double_buffer=True,
+            iterable=iterable) if use_dataloader else None
+        return feed_vars, loader
    def train(self, feed_vars):
        return self.build(feed_vars, mode='train')

--- a/ppdet/modeling/model_input.py
+++ b/ppdet/modeling/model_input.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import absolute_import
-from __future__ import print_function
-from __future__ import division
-from collections import OrderedDict
-from ppdet.data.transform.operators import *
-from paddle import fluid
-__all__ = ['create_feed']
-# yapf: disable
-feed_var_def = [
-    {'name': 'im_info',       'shape': [3],  'dtype': 'float32', 'lod_level': 0},
-    {'name': 'im_id',         'shape': [1],  'dtype': 'int32',   'lod_level': 0},
-    {'name': 'gt_box',        'shape': [4],  'dtype': 'float32', 'lod_level': 1},
-    {'name': 'gt_label',      'shape': [1],  'dtype': 'int32',   'lod_level': 1},
-    {'name': 'is_crowd',      'shape': [1],  'dtype': 'int32',   'lod_level': 1},
-    {'name': 'gt_mask',       'shape': [2],  'dtype': 'float32', 'lod_level': 3},
-    {'name': 'is_difficult',  'shape': [1],  'dtype': 'int32',   'lod_level': 1},
-    {'name': 'gt_score',      'shape': [1],  'dtype': 'float32', 'lod_level': 0},
-    {'name': 'im_shape',      'shape': [3],  'dtype': 'float32', 'lod_level': 0},
-    {'name': 'im_size',       'shape': [2],  'dtype': 'int32',   'lod_level': 0},
-]
-# yapf: enable
-def create_feed(feed, iterable=False, sub_prog_feed=False):
-    image_shape = feed.image_shape
-    feed_var_map = {var['name']: var for var in feed_var_def}
-    feed_var_map['image'] = {
-        'name': 'image',
-        'shape': image_shape,
-        'dtype': 'float32',
-        'lod_level': 0
-    }
-    # tensor padding with 0 is used instead of LoD tensor when 
-    # num_max_boxes is set
-    if getattr(feed, 'num_max_boxes', None) is not None:
-        feed_var_map['gt_label']['shape'] = [feed.num_max_boxes]
-        feed_var_map['gt_score']['shape'] = [feed.num_max_boxes]
-        feed_var_map['gt_box']['shape'] = [feed.num_max_boxes, 4]
-        feed_var_map['is_difficult']['shape'] = [feed.num_max_boxes]
-        feed_var_map['gt_label']['lod_level'] = 0
-        feed_var_map['gt_score']['lod_level'] = 0
-        feed_var_map['gt_box']['lod_level'] = 0
-        feed_var_map['is_difficult']['lod_level'] = 0
-    base_name_list = ['image']
-    num_scale = getattr(feed, 'num_scale', 1)
-    sample_transform = feed.sample_transforms
-    multiscale_test = False
-    aug_flip = False
-    for t in sample_transform:
-        if isinstance(t, MultiscaleTestResize):
-            multiscale_test = True
-            aug_flip = t.use_flip
-            assert (len(t.target_size)+1)*(aug_flip+1) == num_scale, \
-                "num_scale: {} is not equal to the actual number of scale: {}."\
-                .format(num_scale, (len(t.target_size)+1)*(aug_flip+1))
-            break
-    if aug_flip:
-        num_scale //= 2
-        base_name_list.insert(0, 'flip_image')
-        feed_var_map['flip_image'] = {
-            'name': 'flip_image',
-            'shape': image_shape,
-            'dtype': 'float32',
-            'lod_level': 0
-        }
-    image_name_list = []
-    if multiscale_test:
-        for base_name in base_name_list:
-            for i in range(0, num_scale):
-                name = base_name if i == 0 else base_name + '_scale_' + str(i -
-                                                                            1)
-                feed_var_map[name] = {
-                    'name': name,
-                    'shape': image_shape,
-                    'dtype': 'float32',
-                    'lod_level': 0
-                }
-                image_name_list.append(name)
-        feed_var_map['im_info']['shape'] = [feed.num_scale * 3]
-        feed.fields = image_name_list + feed.fields[1:]
-    if sub_prog_feed:
-        box_names = ['bbox', 'bbox_flip']
-        for box_name in box_names:
-            sub_prog_feed = {
-                'name': box_name,
-                'shape': [6],
-                'dtype': 'float32',
-                'lod_level': 1
-            }
-            feed.fields = feed.fields + [box_name]
-            feed_var_map[box_name] = sub_prog_feed
-    feed_vars = OrderedDict([(key, fluid.layers.data(
-        name=feed_var_map[key]['name'],
-        shape=feed_var_map[key]['shape'],
-        dtype=feed_var_map[key]['dtype'],
-        lod_level=feed_var_map[key]['lod_level'])) for key in feed.fields])
-    loader = fluid.io.DataLoader.from_generator(
-        feed_list=list(feed_vars.values()),
-        capacity=64,
-        use_double_buffer=True,
-        iterable=iterable) if not sub_prog_feed else None
-    return loader, feed_vars
--- a/ppdet/modeling/target_assigners.py
+++ b/ppdet/modeling/target_assigners.py
@@ -59,9 +59,9 @@ class CascadeBBoxAssigner(object):
        ]
        outs = fluid.layers.generate_proposal_labels(
            rpn_rois=input_rois,
-            gt_classes=feed_vars['gt_label'],
+            gt_classes=feed_vars['gt_class'],
            is_crowd=feed_vars['is_crowd'],
-            gt_boxes=feed_vars['gt_box'],
+            gt_boxes=feed_vars['gt_bbox'],
            im_info=feed_vars['im_info'],
            batch_size_per_im=self.batch_size_per_im,
            fg_thresh=self.fg_thresh[curr_stage],
@@ -71,5 +71,6 @@ class CascadeBBoxAssigner(object):
            use_random=self.use_random,
            class_nums=self.class_nums if self.class_aware else 2,
            is_cls_agnostic=not self.class_aware,
-            is_cascade_rcnn=True if curr_stage > 0 and not self.class_aware else False)
+            is_cascade_rcnn=True
+            if curr_stage > 0 and not self.class_aware else False)
        return outs
--- a/ppdet/modeling/tests/test_architectures.py
+++ b/ppdet/modeling/tests/test_architectures.py
@@ -37,16 +37,18 @@ class TestFasterRCNN(unittest.TestCase):
    @prog_scope()
    def test_train(self):
-        train_feed = create(self.cfg['train_feed'])
        model = create(self.detector_type)
-        _, feed_vars = create_feed(train_feed)
+        inputs_def = self.cfg['TrainReader']['inputs_def']
+        inputs_def['image_shape'] = [3, None, None]
+        feed_vars, _ = model.build_inputs(**inputs_def)
        train_fetches = model.train(feed_vars)
    @prog_scope()
    def test_test(self):
-        test_feed = create(self.cfg['eval_feed'])
+        inputs_def = self.cfg['EvalReader']['inputs_def']
+        inputs_def['image_shape'] = [3, None, None]
        model = create(self.detector_type)
-        _, feed_vars = create_feed(test_feed)
+        feed_vars, _ = model.build_inputs(**inputs_def)
        test_fetches = model.eval(feed_vars)

--- a/ppdet/utils/coco_eval.py
+++ b/ppdet/utils/coco_eval.py
@@ -22,8 +22,6 @@ import sys
 import json
 import cv2
 import numpy as np
-import matplotlib
-matplotlib.use('Agg')
 import logging
 logger = logging.getLogger(__name__)
@@ -221,7 +219,8 @@ def bbox2out(results, clsid2catid, is_bbox_normalized=False):
                            clip_bbox([xmin, ymin, xmax, ymax])
                    w = xmax - xmin
                    h = ymax - ymin
-                    im_height, im_width = t['im_shape'][0][i].tolist()
+                    im_shape = t['im_shape'][0][i].tolist()
+                    im_height, im_width = int(im_shape[0]), int(im_shape[1])
                    xmin *= im_width
                    ymin *= im_height
                    w *= im_width

--- a/ppdet/utils/download.py
+++ b/ppdet/utils/download.py
@@ -30,7 +30,10 @@ from .voc_utils import create_list
 import logging
 logger = logging.getLogger(__name__)
-__all__ = ['get_weights_path', 'get_dataset_path', 'download_dataset', 'create_voc_list']
+__all__ = [
+    'get_weights_path', 'get_dataset_path', 'download_dataset',
+    'create_voc_list'
+]
 WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/weights")
 DATASET_HOME = osp.expanduser("~/.cache/paddle/dataset")
@@ -72,8 +75,9 @@ DATASETS = {
            'a4a898d6193db4b9ef3260a68bad0dc7', ),
    ], ["WIDER_train", "WIDER_val", "wider_face_split"]),
    'fruit': ([(
-        'https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar',
+        'https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit.tar',
-        'ee4a1bf2e321b75b0850cc6e063f79d7', ), ], ["fruit-detection"]),
+        'baa8806617a54ccf3685fa7153388ae6', ), ],
+              ['Annotations', 'JPEGImages']),
    'objects365': (),
 }
@@ -101,17 +105,19 @@ def get_dataset_path(path, annotation, image_dir):
                "downloading dataset...".format(
                    osp.realpath(path), DATASET_HOME))
+    data_name = os.path.split(path.strip().lower())[-1]
    for name, dataset in DATASETS.items():
-        if os.path.split(path.strip().lower())[-1] == name:
+        if data_name == name:
            logger.info("Parse dataset_dir {} as dataset "
                        "{}".format(path, name))
            if name == 'objects365':
                raise NotImplementedError(
-                    "Dataset {} is not valid for download automatically. Please apply and download the dataset from https://www.objects365.org/download.html".
+                    "Dataset {} is not valid for download automatically. "
-                    format(name))
+                    "Please apply and download the dataset from "
+                    "https://www.objects365.org/download.html".format(name))
            data_dir = osp.join(DATASET_HOME, name)
            # For voc, only check dir VOCdevkit/VOC2012, VOCdevkit/VOC2007
-            if name == 'voc':
+            if name == 'voc' or name == 'fruit':
                exists = True
                for sub_dir in dataset[1]:
                    check_dir = osp.join(data_dir, sub_dir)
@@ -123,7 +129,7 @@ def get_dataset_path(path, annotation, image_dir):
                    return data_dir
            # voc exist is checked above, voc is not exist here
-            check_exist = name != 'voc'
+            check_exist = name != 'voc' and name != 'fruit'
            for url, md5sum in dataset[0]:
                get_path(url, data_dir, md5sum, check_exist)
@@ -147,7 +153,7 @@ def create_voc_list(data_dir, devkit_subdir='VOCdevkit'):
    # NOTE: since using auto download VOC
    # dataset, VOC default label list should be used, 
    # do not generate label_list.txt here. For default
-    # label, see ../data/source/voc_loader.py
+    # label, see ../data/source/voc.py
    create_list(devkit_dir, years, data_dir)
    logger.info("Create voc file list finished")
@@ -345,6 +351,8 @@ def _move_and_merge_tree(src, dst):
    """
    if not osp.exists(dst):
        shutil.move(src, dst)
+    elif osp.isfile(src):
+        shutil.move(src, dst)
    else:
        for fp in os.listdir(src):
            src_fp = osp.join(src, fp)

--- a/ppdet/utils/eval_utils.py
+++ b/ppdet/utils/eval_utils.py
@@ -23,8 +23,8 @@ import time
 import paddle.fluid as fluid
-from ppdet.utils.voc_eval import bbox_eval as voc_bbox_eval
+from .voc_eval import bbox_eval as voc_bbox_eval
-from ppdet.utils.post_process import mstest_box_post_process, mstest_mask_post_process, box_flip
+from .post_process import mstest_box_post_process, mstest_mask_post_process, box_flip
 __all__ = ['parse_fetches', 'eval_run', 'eval_results', 'json_eval_results']
@@ -41,7 +41,7 @@ def parse_fetches(fetches, prog=None, extra_keys=None):
    for k, v in fetches.items():
        if hasattr(v, 'name'):
            keys.append(k)
-            v.persistable = True
+            #v.persistable = True
            values.append(v.name)
        else:
            cls.append(v)
@@ -174,19 +174,19 @@ def eval_run(exe,
 def eval_results(results,
-                 feed,
                 metric,
                 num_classes,
                 resolution=None,
                 is_bbox_normalized=False,
                 output_directory=None,
-                 map_type='11point'):
+                 map_type='11point',
+                 dataset=None):
    """Evaluation for evaluation program results"""
    box_ap_stats = []
    if metric == 'COCO':
        from ppdet.utils.coco_eval import proposal_eval, bbox_eval, mask_eval
-        anno_file = getattr(feed.dataset, 'annotation', None)
+        anno_file = dataset.get_anno()
-        with_background = getattr(feed, 'with_background', True)
+        with_background = dataset.with_background
        if 'proposal' in results[0]:
            output = 'proposal.json'
            if output_directory:
@@ -224,13 +224,13 @@ def eval_results(results,
    return box_ap_stats
-def json_eval_results(feed, metric, json_directory=None):
+def json_eval_results(metric, json_directory=None, dataset=None):
    """
    cocoapi eval with already exists proposal.json, bbox.json or mask.json
    """
    assert metric == 'COCO'
    from ppdet.utils.coco_eval import cocoapi_eval
-    anno_file = getattr(feed.dataset, 'annotation', None)
+    anno_file = dataset.get_anno()
    json_file_list = ['proposal.json', 'bbox.json', 'mask.json']
    if json_directory:
        assert os.path.exists(

--- a/ppdet/utils/voc_eval.py
+++ b/ppdet/utils/voc_eval.py
@@ -21,7 +21,7 @@ import os
 import sys
 import numpy as np
-from ..data.source.voc_loader import pascalvoc_label
+from ..data.source.voc import pascalvoc_label
 from .map_utils import DetectionMAP
 from .coco_eval import bbox2out
@@ -69,13 +69,13 @@ def bbox_eval(results,
        if bboxes.shape == (1, 1) or bboxes is None:
            continue
-        gt_boxes = t['gt_box'][0]
+        gt_boxes = t['gt_bbox'][0]
-        gt_labels = t['gt_label'][0]
+        gt_labels = t['gt_class'][0]
        difficults = t['is_difficult'][0] if not evaluate_difficult \
                            else None
-        if len(t['gt_box'][1]) == 0:
+        if len(t['gt_bbox'][1]) == 0:
-            # gt_box, gt_label, difficult read as zero padded Tensor
+            # gt_bbox, gt_class, difficult read as zero padded Tensor
            bbox_idx = 0
            for i in range(len(gt_boxes)):
                gt_box = gt_boxes[i]
@@ -90,7 +90,7 @@ def bbox_eval(results,
                bbox_idx += bbox_num
        else:
            # gt_box, gt_label, difficult read as LoDTensor
-            gt_box_lengths = t['gt_box'][1][0]
+            gt_box_lengths = t['gt_bbox'][1][0]
            bbox_idx = 0
            gt_box_idx = 0
            for i in range(len(bbox_lengths)):

--- a/ppdet/utils/widerface_eval_utils.py
+++ b/ppdet/utils/widerface_eval_utils.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 import os
 import numpy as np
-from ppdet.data.source.widerface_loader import widerface_label
+from ppdet.data.source.widerface import widerface_label
 from ppdet.utils.coco_eval import bbox2out
 import logging

--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ docstring_parser @ http://github.com/willthefrog/docstring_parser/tarball/master
 typeguard ; python_version >= '3.4'
 tb-paddle
 tensorboard >= 1.15
+pycocotools
--- a/tools/eval.py
+++ b/tools/eval.py
@@ -36,8 +36,9 @@ import paddle.fluid as fluid
 from ppdet.utils.eval_utils import parse_fetches, eval_run, eval_results, json_eval_results
 import ppdet.utils.checkpoint as checkpoint
 from ppdet.utils.check import check_gpu, check_version
-from ppdet.modeling.model_input import create_feed
-from ppdet.data.data_feed import create_reader
+from ppdet.data.reader import create_reader
 from ppdet.core.workspace import load_config, merge_config, create
 from ppdet.utils.cli import ArgsParser
@@ -63,11 +64,6 @@ def main():
    # check if paddlepaddle version is satisfied
    check_version()
-    if 'eval_feed' not in cfg:
-        eval_feed = create(main_arch + 'EvalFeed')
-    else:
-        eval_feed = create(cfg.eval_feed)
    multi_scale_test = getattr(cfg, 'MultiScaleTEST', None)
    # define executor
@@ -80,13 +76,15 @@ def main():
    eval_prog = fluid.Program()
    with fluid.program_guard(eval_prog, startup_prog):
        with fluid.unique_name.guard():
-            loader, feed_vars = create_feed(eval_feed)
+            inputs_def = cfg['EvalReader']['inputs_def']
+            feed_vars, loader = model.build_inputs(**inputs_def)
            if multi_scale_test is None:
                fetches = model.eval(feed_vars)
            else:
                fetches = model.eval(feed_vars, multi_scale_test)
    eval_prog = eval_prog.clone(True)
-    reader = create_reader(eval_feed, args_path=FLAGS.dataset_dir)
+    reader = create_reader(cfg.EvalReader)
    loader.set_sample_list_generator(reader, place)
    # eval already exists json file
@@ -96,30 +94,26 @@ def main():
            "output_eval directly. And proposal.json, bbox.json and mask.json "
            "will be detected by default.")
        json_eval_results(
-            eval_feed, cfg.metric, json_directory=FLAGS.output_eval)
+            cfg.metric, json_directory=FLAGS.output_eval, dataset=dataset)
        return
    compile_program = fluid.compiler.CompiledProgram(
        eval_prog).with_data_parallel()
-    # load model
-    exe.run(startup_prog)
-    if 'weights' in cfg:
-        checkpoint.load_params(exe, eval_prog, cfg.weights)
    assert cfg.metric != 'OID', "eval process of OID dataset \
                          is not supported."
    if cfg.metric == "WIDERFACE":
        raise ValueError("metric type {} does not support in tools/eval.py, "
                         "please use tools/face_eval.py".format(cfg.metric))
    assert cfg.metric in ['COCO', 'VOC'], \
            "unknown metric type {}".format(cfg.metric)
    extra_keys = []
    if cfg.metric == 'COCO':
        extra_keys = ['im_info', 'im_id', 'im_shape']
    if cfg.metric == 'VOC':
-        extra_keys = ['gt_box', 'gt_label', 'is_difficult']
+        extra_keys = ['gt_bbox', 'gt_class', 'is_difficult']
    keys, values, cls = parse_fetches(fetches, eval_prog, extra_keys)
@@ -129,6 +123,8 @@ def main():
            callable(model.is_bbox_normalized):
        is_bbox_normalized = model.is_bbox_normalized()
+    dataset = cfg['EvalReader']['dataset']
    sub_eval_prog = None
    sub_keys = None
    sub_values = None
@@ -137,32 +133,44 @@ def main():
        sub_eval_prog = fluid.Program()
        with fluid.program_guard(sub_eval_prog, startup_prog):
            with fluid.unique_name.guard():
-                _, feed_vars = create_feed(eval_feed, False, sub_prog_feed=True)
+                inputs_def = cfg['EvalReader']['inputs_def']
+                inputs_def['mask_branch'] = True
+                feed_vars, eval_loader = model.build_inputs(**inputs_def)
                sub_fetches = model.eval(
                    feed_vars, multi_scale_test, mask_branch=True)
-                extra_keys = []
+                assert cfg.metric == 'COCO'
-                if cfg.metric == 'COCO':
+                extra_keys = ['im_id', 'im_shape']
-                    extra_keys = ['im_id', 'im_shape']
-                if cfg.metric == 'VOC':
-                    extra_keys = ['gt_box', 'gt_label', 'is_difficult']
        sub_keys, sub_values, _ = parse_fetches(sub_fetches, sub_eval_prog,
                                                extra_keys)
        sub_eval_prog = sub_eval_prog.clone(True)
-        if 'weights' in cfg:
+    #if 'weights' in cfg:
-            checkpoint.load_params(exe, sub_eval_prog, cfg.weights)
+    #    checkpoint.load_params(exe, sub_eval_prog, cfg.weights)
+    # load model
+    exe.run(startup_prog)
+    if 'weights' in cfg:
+        checkpoint.load_params(exe, startup_prog, cfg.weights)
    results = eval_run(exe, compile_program, loader, keys, values, cls, cfg,
                       sub_eval_prog, sub_keys, sub_values)
+    #print(cfg['EvalReader']['dataset'].__dict__)
    # evaluation
    resolution = None
    if 'mask' in results[0]:
        resolution = model.mask_head.resolution
    # if map_type not set, use default 11point, only use in VOC eval
    map_type = cfg.map_type if 'map_type' in cfg else '11point'
-    eval_results(results, eval_feed, cfg.metric, cfg.num_classes, resolution,
+    eval_results(
-                 is_bbox_normalized, FLAGS.output_eval, map_type)
+        results,
+        cfg.metric,
+        cfg.num_classes,
+        resolution,
+        is_bbox_normalized,
+        FLAGS.output_eval,
+        map_type,
+        dataset=dataset)
 if __name__ == '__main__':
@@ -172,12 +180,6 @@ if __name__ == '__main__':
        action='store_true',
        default=False,
        help="Whether to re eval with already exists bbox.json or mask.json")
-    parser.add_argument(
-        "-d",
-        "--dataset_dir",
-        default=None,
-        type=str,
-        help="Dataset path, same as DataFeed.dataset.dataset_dir")
    parser.add_argument(
        "-f",
        "--output_eval",

--- a/tools/export_model.py
+++ b/tools/export_model.py
@@ -82,11 +82,6 @@ def main():
    merge_config(FLAGS.opt)
-    if 'test_feed' not in cfg:
-        test_feed = create(main_arch + 'TestFeed')
-    else:
-        test_feed = create(cfg.test_feed)
    # Use CPU for exporting inference model instead of GPU
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
@@ -97,7 +92,9 @@ def main():
    infer_prog = fluid.Program()
    with fluid.program_guard(infer_prog, startup_prog):
        with fluid.unique_name.guard():
-            _, feed_vars = create_feed(test_feed, iterable=True)
+            inputs_def = cfg['TestReader']['inputs_def']
+            inputs_def['use_dataloader'] = False
+            feed_vars, _ = model.build_inputs(**inputs_def)
            test_fetches = model.test(feed_vars)
    infer_prog = infer_prog.clone(True)

--- a/tools/face_eval.py
+++ b/tools/face_eval.py
@@ -29,7 +29,6 @@ from ppdet.utils.check import check_gpu
 from ppdet.utils.widerface_eval_utils import get_shrink, bbox_vote, \
    save_widerface_bboxes, save_fddb_bboxes, to_chw_bgr
 from ppdet.core.workspace import load_config, merge_config, create
-from ppdet.modeling.model_input import create_feed
 import logging
 FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
@@ -53,7 +52,7 @@ def face_img_process(image,
 def face_eval_run(exe,
                  compile_program,
                  fetches,
-                  img_root_dir,
+                  image_dir,
                  gt_file,
                  pred_dir='output/pred',
                  eval_mode='widerface',
@@ -73,9 +72,10 @@ def face_eval_run(exe,
    dets_dist = OrderedDict()
    for iter_id, im_path in enumerate(imid2path):
-        image_path = os.path.join(img_root_dir, im_path)
+        image_path = os.path.join(image_dir, im_path)
        if eval_mode == 'fddb':
            image_path += '.jpg'
+        assert os.path.exists(image_path)
        image = Image.open(image_path).convert('RGB')
        if multi_scale:
            shrink, max_shrink = get_shrink(image.size[1], image.size[0])
@@ -220,11 +220,6 @@ def main():
    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)
-    if 'eval_feed' not in cfg:
-        eval_feed = create(main_arch + 'EvalFeed')
-    else:
-        eval_feed = create(cfg.eval_feed)
    # define executor
    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
@@ -235,7 +230,9 @@ def main():
    eval_prog = fluid.Program()
    with fluid.program_guard(eval_prog, startup_prog):
        with fluid.unique_name.guard():
-            _, feed_vars = create_feed(eval_feed, iterable=True)
+            inputs_def = cfg['EvalReader']['inputs_def']
+            inputs_def['use_dataloader'] = False
+            feed_vars, _ = model.build_inputs(**inputs_def)
            fetches = model.eval(feed_vars)
    eval_prog = eval_prog.clone(True)
@@ -248,21 +245,19 @@ def main():
    assert cfg.metric in ['WIDERFACE'], \
            "unknown metric type {}".format(cfg.metric)
-    annotation_file = getattr(eval_feed.dataset, 'annotation', None)
+    dataset = cfg['EvalReader']['dataset']
-    dataset_dir = FLAGS.dataset_dir if FLAGS.dataset_dir else \
-        getattr(eval_feed.dataset, 'dataset_dir', None)
+    annotation_file = dataset.get_anno()
-    img_root_dir = dataset_dir
+    dataset_dir = dataset.dataset_dir
-    if FLAGS.eval_mode == "widerface":
+    image_dir = dataset.image_dir
-        image_dir = getattr(eval_feed.dataset, 'image_dir', None)
-        img_root_dir = os.path.join(dataset_dir, image_dir)
-    gt_file = os.path.join(dataset_dir, annotation_file)
    pred_dir = FLAGS.output_eval if FLAGS.output_eval else 'output/pred'
    face_eval_run(
        exe,
        eval_prog,
        fetches,
-        img_root_dir,
+        image_dir,
-        gt_file,
+        annotation_file,
        pred_dir=pred_dir,
        eval_mode=FLAGS.eval_mode,
        multi_scale=FLAGS.multi_scale)

--- a/tools/infer.py
+++ b/tools/infer.py
@@ -38,8 +38,6 @@ set_paddle_flags(
 from paddle import fluid
 from ppdet.core.workspace import load_config, merge_config, create
-from ppdet.modeling.model_input import create_feed
-from ppdet.data.data_feed import create_reader
 from ppdet.utils.eval_utils import parse_fetches
 from ppdet.utils.cli import ArgsParser
@@ -47,6 +45,8 @@ from ppdet.utils.check import check_gpu, check_version
 from ppdet.utils.visualizer import visualize_results
 import ppdet.utils.checkpoint as checkpoint
+from ppdet.data.reader import create_reader
 import logging
 FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
 logging.basicConfig(level=logging.INFO, format=FORMAT)
@@ -110,13 +110,10 @@ def main():
    # check if paddlepaddle version is satisfied
    check_version()
-    if 'test_feed' not in cfg:
+    dataset = cfg.TestReader['dataset']
-        test_feed = create(main_arch + 'TestFeed')
-    else:
-        test_feed = create(cfg.test_feed)
    test_images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img)
-    test_feed.dataset.add_images(test_images)
+    dataset.set_images(test_images)
    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
@@ -127,11 +124,13 @@ def main():
    infer_prog = fluid.Program()
    with fluid.program_guard(infer_prog, startup_prog):
        with fluid.unique_name.guard():
-            loader, feed_vars = create_feed(test_feed, iterable=True)
+            inputs_def = cfg['TestReader']['inputs_def']
+            inputs_def['iterable'] = True
+            feed_vars, loader = model.build_inputs(**inputs_def)
            test_fetches = model.test(feed_vars)
    infer_prog = infer_prog.clone(True)
-    reader = create_reader(test_feed)
+    reader = create_reader(cfg.TestReader)
    loader.set_sample_list_generator(reader, place)
    exe.run(startup_prog)
@@ -158,9 +157,10 @@ def main():
    if cfg.metric == "WIDERFACE":
        from ppdet.utils.widerface_eval_utils import bbox2out, get_category_info
-    anno_file = getattr(test_feed.dataset, 'annotation', None)
+    anno_file = dataset.get_anno()
-    with_background = getattr(test_feed, 'with_background', True)
+    with_background = dataset.with_background
-    use_default_label = getattr(test_feed, 'use_default_label', False)
+    use_default_label = dataset.use_default_label
    clsid2catid, catid2name = get_category_info(anno_file, with_background,
                                                use_default_label)
@@ -177,7 +177,7 @@ def main():
        tb_image_step = 0
        tb_image_frame = 0  # each frame can display ten pictures at most. 
-    imid2path = reader.imid2path
+    imid2path = dataset.get_imid2path()
    for iter_id, data in enumerate(loader()):
        outs = exe.run(infer_prog,
                       feed=data,

--- a/tools/train.py
+++ b/tools/train.py
@@ -39,7 +39,7 @@ from paddle import fluid
 from ppdet.experimental import mixed_precision_context
 from ppdet.core.workspace import load_config, merge_config, create
-from ppdet.data.data_feed import create_reader
+from ppdet.data.reader import create_reader
 from ppdet.utils.cli import print_total_cfg
 from ppdet.utils import dist_utils
@@ -48,7 +48,6 @@ from ppdet.utils.stats import TrainingStats
 from ppdet.utils.cli import ArgsParser
 from ppdet.utils.check import check_gpu, check_version
 import ppdet.utils.checkpoint as checkpoint
-from ppdet.modeling.model_input import create_feed
 import logging
 FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
@@ -89,17 +88,6 @@ def main():
    else:
        devices_num = int(os.environ.get('CPU_NUM', 1))
-    if 'train_feed' not in cfg:
-        train_feed = create(main_arch + 'TrainFeed')
-    else:
-        train_feed = create(cfg.train_feed)
-    if FLAGS.eval:
-        if 'eval_feed' not in cfg:
-            eval_feed = create(main_arch + 'EvalFeed')
-        else:
-            eval_feed = create(cfg.eval_feed)
    if 'FLAGS_selected_gpus' in env:
        device_id = int(env['FLAGS_selected_gpus'])
    else:
@@ -116,8 +104,6 @@ def main():
    with fluid.program_guard(train_prog, startup_prog):
        with fluid.unique_name.guard():
            model = create(main_arch)
-            train_loader, feed_vars = create_feed(train_feed)
            if FLAGS.fp16:
                assert (getattr(model.backbone, 'norm_type', None)
                        != 'affine_channel'), \
@@ -125,8 +111,9 @@ def main():
                    ' please modify backbone settings to use batch norm'
            with mixed_precision_context(FLAGS.loss_scale, FLAGS.fp16) as ctx:
+                inputs_def = cfg['TrainReader']['inputs_def']
+                feed_vars, train_loader = model.build_inputs(**inputs_def)
                train_fetches = model.train(feed_vars)
                loss = train_fetches['loss']
                if FLAGS.fp16:
                    loss *= ctx.get_loss_scale_var()
@@ -145,11 +132,12 @@ def main():
        with fluid.program_guard(eval_prog, startup_prog):
            with fluid.unique_name.guard():
                model = create(main_arch)
-                eval_loader, feed_vars = create_feed(eval_feed)
+                inputs_def = cfg['EvalReader']['inputs_def']
+                feed_vars, eval_loader = model.build_inputs(**inputs_def)
                fetches = model.eval(feed_vars)
        eval_prog = eval_prog.clone(True)
-        eval_reader = create_reader(eval_feed, args_path=FLAGS.dataset_dir)
+        eval_reader = create_reader(cfg.EvalReader)
        eval_loader.set_sample_list_generator(eval_reader, place)
        # parse eval fetches
@@ -157,9 +145,9 @@ def main():
        if cfg.metric == 'COCO':
            extra_keys = ['im_info', 'im_id', 'im_shape']
        if cfg.metric == 'VOC':
-            extra_keys = ['gt_box', 'gt_label', 'is_difficult']
+            extra_keys = ['gt_bbox', 'gt_class', 'is_difficult']
        if cfg.metric == 'WIDERFACE':
-            extra_keys = ['im_id', 'im_shape', 'gt_box']
+            extra_keys = ['im_id', 'im_shape', 'gt_bbox']
        eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog,
                                                         extra_keys)
@@ -206,8 +194,8 @@ def main():
        checkpoint.load_params(
            exe, train_prog, cfg.pretrain_weights, ignore_params=ignore_params)
-    train_reader = create_reader(train_feed, (cfg.max_iters - start_iter) *
+    train_reader = create_reader(cfg.TrainReader,
-                                 devices_num, FLAGS.dataset_dir)
+                                 (cfg.max_iters - start_iter) * devices_num)
    train_loader.set_sample_list_generator(train_reader, place)
    # whether output bbox is normalized in model output layer
@@ -273,8 +261,9 @@ def main():
                if 'mask' in results[0]:
                    resolution = model.mask_head.resolution
                box_ap_stats = eval_results(
-                    results, eval_feed, cfg.metric, cfg.num_classes, resolution,
+                    results, cfg.metric, cfg.num_classes, resolution,
-                    is_bbox_normalized, FLAGS.output_eval, map_type)
+                    is_bbox_normalized, FLAGS.output_eval, map_type,
+                    cfg['EvalReader']['dataset'])
                # use tb_paddle to log mAP
                if FLAGS.use_tb:
@@ -320,12 +309,6 @@ if __name__ == '__main__':
        default=None,
        type=str,
        help="Evaluation directory, default is current directory.")
-    parser.add_argument(
-        "-d",
-        "--dataset_dir",
-        default=None,
-        type=str,
-        help="Dataset path, same as DataFeed.dataset.dataset_dir")
    parser.add_argument(
        "--use_tb",
        type=bool,