Object detection framework based on PaddlePaddle. (#2496)

* Unified object detection framework based on PaddlePaddle. * Include algo: Faster, Mask, FPN, Cascade, RetinaNet, Yolo v3, SSD.

Object detection framework based on PaddlePaddle. (#2496)
* Unified object detection framework based on PaddlePaddle. * Include algo: Faster, Mask, FPN, Cascade, RetinaNet, Yolo v3, SSD.
41d194cc · qingqing01 · GitHub · 120b2bda · 41d194cc · 41d194cc
123 changed file
--- a/PaddleCV/object_detection/.gitignore
+++ b/PaddleCV/object_detection/.gitignore
+# Virtualenv
+/.venv/
+/venv/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+# C extensions
+*.so
+# json file
+*.json
+# Distribution / packaging
+/bin/
+/build/
+/develop-eggs/
+/dist/
+/eggs/
+/lib/
+/lib64/
+/output/
+/parts/
+/sdist/
+/var/
+/*.egg-info/
+/.installed.cfg
+/*.egg
+/.eggs
+# AUTHORS and ChangeLog will be generated while packaging
+/AUTHORS
+/ChangeLog
+# BCloud / BuildSubmitter
+/build_submitter.*
+/logger_client_log
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+.tox/
+.coverage
+.cache
+.pytest_cache
+nosetests.xml
+coverage.xml
+# Translations
+*.mo
+# Sphinx documentation
+/docs/_build/
+*.json
--- a/PaddleCV/object_detection/.style.yapf
+++ b/PaddleCV/object_detection/.style.yapf
+[style]
+based_on_style = pep8
+column_limit = 80
--- a/PaddleCV/object_detection/README.md
+++ b/PaddleCV/object_detection/README.md
-## PaddlePaddle Object Detection
+PaddlePaddle Object Detection
+===
-Thanks for your attention. The object detection framework based on PaddlePaddle will be coming soon.
+The document will be coming soon.
--- a/PaddleCV/object_detection/configs/cascade_rcnn_r50_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/cascade_rcnn_r50_fpn_1x.yml
+architecture: CascadeRCNN
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+max_iters: 90000
+snapshot_iter: 10000
+use_gpu: true
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar
+weights: output/cascade_rcnn_r50_fpn_1x/model_final
+metric: COCO
+CascadeRCNN:
+  backbone: ResNet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: CascadeBBoxHead
+  bbox_assigner: CascadeBBoxAssigner
+ResNet:
+  norm_type: affine_channel
+  depth: 50
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  variant: b
+FPN:
+  min_level: 2
+  max_level: 6
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+FPNRPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  min_level: 2
+  max_level: 6
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_positive_overlap: 0.7
+    rpn_negative_overlap: 0.3
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  min_level: 2
+  max_level: 5
+  box_resolution: 7
+  sampling_ratio: 2
+CascadeBBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [10, 20, 30]
+  bg_thresh_lo: [0.0, 0.0, 0.0]
+  bg_thresh_hi: [0.5, 0.6, 0.7]
+  fg_thresh: [0.5, 0.6, 0.7]
+  fg_fraction: 0.25
+  num_classes: 81
+CascadeBBoxHead:
+  head: FC6FC7Head
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+FC6FC7Head:
+  num_chan: 1024
+LearningRate:
+  base_lr: 0.02
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [60000, 80000]
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+FasterRCNNTrainFeed:
+  batch_size: 2
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_train2017.json
+    image_dir: train2017
+  drop_last: false
+  num_workers: 2
+  shuffle: true 
+FasterRCNNEvalFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+FasterRCNNTestFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  drop_last: false
+  num_workers: 2
+  shuffle: false
--- a/PaddleCV/object_detection/configs/faster_rcnn_r101_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r101_1x.yml
+architecture: FasterRCNN
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+use_gpu: True
+max_iters: 180000
+log_smooth_window: 20
+save_dir: output
+snapshot_iter: 10000
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar
+metric: COCO
+weights: output/faster_rcnn_r101_1x/model_final
+FasterRCNN:
+  backbone: ResNet
+  rpn_head: RPNHead
+  roi_extractor: RoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNet:
+  norm_type: affine_channel
+  depth: 101
+  feature_maps: [2,3,4]
+  freeze_at: 2
+ResNetC5:
+  norm_type: affine_channel
+RPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+    use_random: true
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 12000
+    post_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 6000
+    post_nms_top_n: 1000
+RoIAlign:
+  resolution: 14
+  sampling_ratio: 0
+  spatial_scale: 0.0625
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+BBoxHead:
+  head: ResNetC5
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [12000, 16000]
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+FasterRCNNTrainFeed:
+  # batch size per device
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_train2017.json
+    image_dir: train2017
+  drop_last: false
+  num_workers: 2
+FasterRCNNEvalFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  num_workers: 2
+FasterRCNNTestFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  samples: 5
--- a/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_1x.yml
+architecture: FasterRCNN
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+max_iters: 180000
+snapshot_iter: 10000
+use_gpu: True
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar
+weights: output/faster_rcnn_r101_fpn_1x/model_final
+metric: COCO
+FasterRCNN:
+  backbone: ResNet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNet:
+  depth: 101
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  norm_type: affine_channel
+FPN:
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+FPNRPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 2000
+    pre_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 1000
+    pre_nms_top_n: 1000
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  max_level: 5
+  min_level: 2
+  box_resolution: 7
+  sampling_ratio: 2
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+BBoxHead:
+  head: TwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+TwoFCHead:
+  num_chan: 1024
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [120000, 160000]
+    values: null
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+FasterRCNNTrainFeed:
+  # batch size per device
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    image_dir: train2017
+    annotation: annotations/instances_train2017.json
+  num_workers: 2
+  shuffle: True
+FasterRCNNEvalFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  num_workers: 2
+FasterRCNNTestFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  num_workers: 2
+  shuffle: False
--- a/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_2x.yml
+architecture: FasterRCNN
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+max_iters: 360000
+snapshot_iter: 10000
+use_gpu: True
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar
+weights: output/faster_rcnn_r101_fpn_2x/model_final
+metric: COCO
+FasterRCNN:
+  backbone: ResNet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNet:
+  depth: 101
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  norm_type: affine_channel
+FPN:
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+FPNRPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 2000
+    pre_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 1000
+    pre_nms_top_n: 1000
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  max_level: 5
+  min_level: 2
+  box_resolution: 7
+  sampling_ratio: 2
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+BBoxHead:
+  head: TwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+TwoFCHead:
+  num_chan: 1024
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [240000, 320000]
+    values: null
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+FasterRCNNTrainFeed:
+  # batch size per device
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    image_dir: train2017
+    annotation: annotations/instances_train2017.json
+  num_workers: 2
+  shuffle: True
+FasterRCNNEvalFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  num_workers: 2
+FasterRCNNTestFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  num_workers: 2
+  shuffle: False
--- a/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_1x.yml
+architecture: FasterRCNN
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+max_iters: 180000
+snapshot_iter: 10000
+use_gpu: True
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar
+weights: output/faster_rcnn_r101_vd_fpn_1x/model_final
+metric: COCO
+FasterRCNN:
+  backbone: ResNet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNet:
+  depth: 101
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  norm_type: affine_channel
+  variant: d
+FPN:
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+FPNRPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 2000
+    pre_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 1000
+    pre_nms_top_n: 1000
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  max_level: 5
+  min_level: 2
+  box_resolution: 7
+  sampling_ratio: 2
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+BBoxHead:
+  head: TwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+TwoFCHead:
+  num_chan: 1024
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [120000, 160000]
+    values: null
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 1000
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+FasterRCNNTrainFeed:
+  # batch size per device
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    image_dir: train2017
+    annotation: annotations/instances_train2017.json
+  num_workers: 2
+  shuffle: True
+FasterRCNNEvalFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  num_workers: 2
+FasterRCNNTestFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  num_workers: 2
+  shuffle: False
--- a/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_2x.yml
+architecture: FasterRCNN
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+max_iters: 360000
+snapshot_iter: 10000
+use_gpu: True
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar
+weights: output/faster_rcnn_r101_vd_fpn_2x/model_final
+metric: COCO
+FasterRCNN:
+  backbone: ResNet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNet:
+  depth: 101
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  norm_type: affine_channel
+  variant: d
+FPN:
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+FPNRPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 2000
+    pre_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 1000
+    pre_nms_top_n: 1000
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  max_level: 5
+  min_level: 2
+  box_resolution: 7
+  sampling_ratio: 2
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+BBoxHead:
+  head: TwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+TwoFCHead:
+  num_chan: 1024
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [240000, 320000]
+    values: null
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 1000
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+FasterRCNNTrainFeed:
+  # batch size per device
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    image_dir: train2017
+    annotation: annotations/instances_train2017.json
+  num_workers: 2
+  shuffle: True
+FasterRCNNEvalFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  num_workers: 2
+FasterRCNNTestFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  num_workers: 2
+  shuffle: False
--- a/PaddleCV/object_detection/configs/faster_rcnn_r50_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_1x.yml
+architecture: FasterRCNN
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+use_gpu: True
+max_iters: 180000
+log_smooth_window: 20
+save_dir: output
+snapshot_iter: 10000
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar
+metric: COCO
+weights: output/faster_rcnn_r50_1x/model_final
+FasterRCNN:
+  backbone: ResNet
+  rpn_head: RPNHead
+  roi_extractor: RoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNet:
+  norm_type: affine_channel
+  depth: 50
+  feature_maps: [2,3,4]
+  freeze_at: 2
+ResNetC5:
+  norm_type: affine_channel
+RPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+    use_random: true
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 12000
+    post_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 6000
+    post_nms_top_n: 1000
+RoIAlign:
+  resolution: 14
+  sampling_ratio: 0
+  spatial_scale: 0.0625
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+BBoxHead:
+  head: ResNetC5
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [12000, 16000]
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+FasterRCNNTrainFeed:
+  # batch size per device
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_train2017.json
+    image_dir: train2017
+  drop_last: false
+  num_workers: 2
+FasterRCNNEvalFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  num_workers: 2
+FasterRCNNTestFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  samples: 5
--- a/PaddleCV/object_detection/configs/faster_rcnn_r50_2x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_2x.yml
+architecture: FasterRCNN
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+use_gpu: True
+max_iters: 360000
+log_smooth_window: 20
+save_dir: output
+snapshot_iter: 10000
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar
+metric: COCO
+weights: output/faster_rcnn_r50_2x/model_final
+FasterRCNN:
+  backbone: ResNet
+  rpn_head: RPNHead
+  roi_extractor: RoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNet:
+  norm_type: affine_channel
+  depth: 50
+  feature_maps: [2,3,4]
+  freeze_at: 2
+ResNetC5:
+  norm_type: affine_channel
+RPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+    use_random: true
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nums_top_n: 12000
+    post_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 6000
+    post_nms_top_n: 1000
+RoIAlign:
+  resolution: 14
+  sampling_ratio: 0
+  spatial_scale: 0.0625
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+BBoxHead:
+  head: ResNetC5
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [24000, 32000]
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+FasterRCNNTrainFeed:
+  # batch size per device
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_train2017.json
+    image_dir: train2017
+  drop_last: false
+  num_workers: 2
+FasterRCNNEvalFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  num_workers: 2
+FasterRCNNTestFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  samples: 5
--- a/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_1x.yml
+architecture: FasterRCNN
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+max_iters: 90000
+use_gpu: True
+snapshot_iter: 10000
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar
+metric: COCO
+weights: output/fpn/faster_rcnn_r50_fpn_1x/model_final
+FasterRCNN:
+  backbone: ResNet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNet:
+  norm_type: affine_channel
+  norm_decay: true
+  depth: 50
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+FPN:
+  min_level: 2
+  max_level: 6
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+FPNRPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  min_level: 2
+  max_level: 6
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_positive_overlap: 0.7
+    rpn_negative_overlap: 0.3
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  min_level: 2
+  max_level: 5
+  box_resolution: 7
+  sampling_ratio: 2
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_lo: 0.0
+  bg_thresh_hi: 0.5
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+BBoxHead:
+  head: TwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+TwoFCHead:
+  num_chan: 1024
+LearningRate:
+  base_lr: 0.02
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [60000, 80000]
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+FasterRCNNTrainFeed:
+  batch_size: 2
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_train2017.json
+    image_dir: train2017
+  drop_last: false
+  num_workers: 2
+  shuffle: true
+FasterRCNNEvalFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+FasterRCNNTestFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  drop_last: false
+  num_workers: 2
+  shuffle: false
--- a/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_2x.yml
+architecture: FasterRCNN
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+max_iters: 90000
+use_gpu: True
+snapshot_iter: 10000
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar
+metric: COCO
+weights: output/faster_rcnn_r50_fpn_2x/model_final
+FasterRCNN:
+  backbone: ResNet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNet:
+  norm_type: affine_channel
+  norm_decay: true
+  depth: 50
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+FPN:
+  min_level: 2
+  max_level: 6
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+FPNRPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  min_level: 2
+  max_level: 6
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_positive_overlap: 0.7
+    rpn_negative_overlap: 0.3
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  min_level: 2
+  max_level: 5
+  box_resolution: 7
+  sampling_ratio: 2
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_lo: 0.0
+  bg_thresh_hi: 0.5
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+BBoxHead:
+  head: TwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+TwoFCHead:
+  num_chan: 1024
+LearningRate:
+  base_lr: 0.02
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [120000, 160000]
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+FasterRCNNTrainFeed:
+  batch_size: 2
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_train2017.json
+    image_dir: train2017
+  drop_last: false
+  num_workers: 2
+  shuffle: true
+FasterRCNNEvalFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+FasterRCNNTestFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    annotation: coco/annotations/instances_val2017.json
+    image_dir: coco/val2017
+  drop_last: false
+  num_workers: 2
+  shuffle: false
--- a/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_1x.yml
+architecture: FasterRCNN
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+use_gpu: True
+max_iters: 180000
+log_smooth_window: 20
+save_dir: output/faster-r50-vd-c4-1x
+snapshot_iter: 10000
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar
+metric: COCO
+weights: output/faster_rcnn_r50_vd_1x/model_final
+FasterRCNN:
+  backbone: ResNet
+  rpn_head: RPNHead
+  roi_extractor: RoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNet:
+  norm_type: affine_channel
+  depth: 50
+  feature_maps: [2,3,4]
+  freeze_at: 2
+  variant: d
+ResNetC5:
+  norm_type: affine_channel
+  variant: d
+RPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+    use_random: true
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 12000
+    post_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 6000
+    post_nms_top_n: 1000
+RoIAlign:
+  resolution: 14
+  sampling_ratio: 0
+  spatial_scale: 0.0625
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+BBoxHead:
+  head: ResNetC5
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [12000, 16000]
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+FasterRCNNTrainFeed:
+  # batch size per device
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_train2017.json
+    image_dir: train2017
+  drop_last: false
+  num_workers: 2
+FasterRCNNEvalFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  num_workers: 2
+FasterRCNNTestFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  samples: 5
--- a/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_fpn_2x.yml
+architecture: FasterRCNN
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+max_iters: 180000
+snapshot_iter: 10000
+use_gpu: True
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar
+weights: output/faster_rcnn_r50_vd_fpn_2x/model_final
+metric: COCO
+FasterRCNN:
+  backbone: ResNet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNet:
+  depth: 50
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  norm_type: affine_channel
+  variant: d
+FPN:
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+FPNRPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 2000
+    pre_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 1000
+    pre_nms_top_n: 1000
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  max_level: 5
+  min_level: 2
+  box_resolution: 7
+  sampling_ratio: 2
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+BBoxHead:
+  head: TwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+TwoFCHead:
+  num_chan: 1024
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [120000, 160000]
+    values: null
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+FasterRCNNTrainFeed:
+  # batch size per device
+  batch_size: 2
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    image_dir: train2017
+    annotation: annotations/instances_train2017.json
+  num_workers: 2
+  shuffle: True
+FasterRCNNEvalFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  num_workers: 2
+FasterRCNNTestFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  num_workers: 2
+  shuffle: False
--- a/PaddleCV/object_detection/configs/faster_rcnn_se154_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_se154_1x.yml
+architecture: FasterRCNN
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+max_iters: 180000
+snapshot_iter: 10000
+use_gpu: True
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/SE154_vd_pretrained.tar
+weights: output/faster_rcnn_se154_1x/model_final
+metric: COCO
+FasterRCNN:
+  backbone: SENet
+  fpn: null
+  rpn_head: RPNHead
+  roi_extractor: RoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+SENet:
+  depth: 152
+  feature_maps: [2, 3, 4]
+  freeze_at: 2
+  group_width: 4
+  groups: 64
+  norm_type: affine_channel
+  variant: d
+SENetC5:
+  depth: 152
+  feature_maps: 5
+  freeze_at: 2
+  group_width: 4
+  groups: 64
+  norm_type: affine_channel
+  variant: d
+RPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 2000
+    pre_nms_top_n: 12000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 1000
+    pre_nms_top_n: 6000
+RoIAlign:
+  resolution: 7
+  sampling_ratio: 0
+  spatial_scale: 0.0625
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+BBoxHead:
+  head: SENetC5
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [120000, 160000]
+    values: null
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+FasterRCNNTrainFeed:
+  # batch size per device
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  num_workers: 2
+  shuffle: True
+FasterRCNNEvalFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  num_workers: 2
+FasterRCNNTestFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco 
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  num_workers: 2
+  shuffle: False
--- a/PaddleCV/object_detection/configs/faster_rcnn_se154_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_se154_fpn_1x.yml
+architecture: FasterRCNN
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+max_iters: 180000
+snapshot_iter: 10000
+use_gpu: True
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/SE154_vd_pretrained.tar
+weights: output/faster_rcnn_se154_fpn_1x/model_final
+metric: COCO
+FasterRCNN:
+  backbone: SENet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+SENet:
+  depth: 152
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  group_width: 4
+  groups: 64
+  norm_type: affine_channel
+  variant: d
+FPN:
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+FPNRPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 2000
+    pre_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 1000
+    pre_nms_top_n: 1000
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  max_level: 5
+  min_level: 2
+  box_resolution: 7
+  sampling_ratio: 2
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+BBoxHead:
+  head: TwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+TwoFCHead:
+  num_chan: 1024
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [120000, 160000]
+    values: null
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+FasterRCNNTrainFeed:
+  # batch size per device
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    image_dir: train2017
+    annotation: annotations/instances_train2017.json
+  num_workers: 2
+  shuffle: True
+FasterRCNNEvalFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  num_workers: 2
+FasterRCNNTestFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  num_workers: 2
+  shuffle: False
--- a/PaddleCV/object_detection/configs/faster_rcnn_se154_fpn_s1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_se154_fpn_s1x.yml
+architecture: FasterRCNN
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+max_iters: 260000
+snapshot_iter: 10000
+use_gpu: True
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/SE154_vd_pretrained.tar
+weights: output/faster_rcnn_se154_fpn_s1x/model_final
+metric: COCO
+FasterRCNN:
+  backbone: SENet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+SENet:
+  depth: 152
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  group_width: 4
+  groups: 64
+  norm_type: affine_channel
+  variant: d
+FPN:
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+FPNRPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 2000
+    pre_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 1000
+    pre_nms_top_n: 1000
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  max_level: 5
+  min_level: 2
+  box_resolution: 7
+  sampling_ratio: 2
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+BBoxHead:
+  head: TwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+TwoFCHead:
+  num_chan: 1024
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [200000, 240000]
+    values: null
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+FasterRCNNTrainFeed:
+  # batch size per device
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    image_dir: train2017
+    annotation: annotations/instances_train2017.json
+  num_workers: 2
+  shuffle: True
+FasterRCNNEvalFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  num_workers: 2
+FasterRCNNTestFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  num_workers: 2
+  shuffle: False
--- a/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_1x.yml
+architecture: FasterRCNN
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+max_iters: 180000
+snapshot_iter: 10000
+use_gpu: True
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_64x4d_pretrained.tar
+weights: output/faster_rcnn_x101_64x4d_fpn_1x/model_final
+metric: COCO
+FasterRCNN:
+  backbone: ResNeXt
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNeXt:
+  depth: 101
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  group_width: 4
+  groups: 64
+  norm_type: affine_channel
+FPN:
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+FPNRPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 2000
+    pre_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 1000
+    pre_nms_top_n: 1000
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  max_level: 5
+  min_level: 2
+  box_resolution: 7
+  sampling_ratio: 2
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+BBoxHead:
+  head: TwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+TwoFCHead:
+  num_chan: 1024
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [120000, 160000]
+    values: null
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+FasterRCNNTrainFeed:
+  # batch size per device
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    image_dir: train2017
+    annotation: annotations/instances_train2017.json
+  num_workers: 2
+  shuffle: True
+FasterRCNNEvalFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  num_workers: 2
+FasterRCNNTestFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  num_workers: 2
+  shuffle: False
--- a/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_2x.yml
+architecture: FasterRCNN
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+max_iters: 180000
+snapshot_iter: 10000
+use_gpu: True
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_64x4d_pretrained.tar
+weights: output/faster_rcnn_x101_64x4d_fpn_2x/model_final
+metric: COCO
+FasterRCNN:
+  backbone: ResNeXt
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNeXt:
+  depth: 101
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  group_width: 4
+  groups: 64
+  norm_type: affine_channel
+FPN:
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+FPNRPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 2000
+    pre_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 1000
+    pre_nms_top_n: 1000
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  max_level: 5
+  min_level: 2
+  box_resolution: 7
+  sampling_ratio: 2
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+BBoxHead:
+  head: TwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+TwoFCHead:
+  num_chan: 1024
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [240000, 320000]
+    values: null
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+FasterRCNNTrainFeed:
+  # batch size per device
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    image_dir: train2017
+    annotation: annotations/instances_train2017.json
+  num_workers: 2
+  shuffle: True
+FasterRCNNEvalFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  num_workers: 2
+FasterRCNNTestFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  num_workers: 2
+  shuffle: False
--- a/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_1x.yml
+architecture: MaskRCNN
+train_feed: MaskRCNNTrainFeed
+eval_feed: MaskRCNNEvalFeed
+test_feed: MaskRCNNTestFeed
+use_gpu: True
+max_iters: 180000
+snapshot_iter: 10000
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar
+metric: COCO
+weights: output/mask_rcnn_r101_fpn_1x/model_final/ 
+MaskRCNN:
+  backbone: ResNet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNet:
+  depth: 101
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  norm_type: affine_channel
+FPN:
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+FPNRPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  max_level: 5
+  min_level: 2
+  sampling_ratio: 2
+  box_resolution: 7
+  mask_resolution: 14
+MaskHead:
+  dilation: 1
+  num_chan_reduced: 256
+  num_classes: 81
+  num_convs: 4
+  resolution: 28
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+MaskAssigner:
+  resolution: 28
+BBoxHead:
+  head: TwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+TwoFCHead:
+  num_chan: 1024
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [120000, 160000]
+    values: null
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+MaskRCNNTrainFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_train2017.json
+    image_dir: train2017
+  num_workers: 2
+  use_padded_im_info: False
+MaskRCNNEvalFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  num_workers: 2
+  use_padded_im_info: True
+MaskRCNNTestFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  samples: 5
+  num_workers: 2
+  use_padded_im_info: True
--- a/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_2x.yml
+architecture: MaskRCNN
+train_feed: MaskRCNNTrainFeed
+eval_feed: MaskRCNNEvalFeed
+test_feed: MaskRCNNTestFeed
+use_gpu: True
+max_iters: 360000
+snapshot_iter: 10000
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar
+metric: COCO
+weights: output/mask_rcnn_r101_fpn_2x/model_final/ 
+MaskRCNN:
+  backbone: ResNet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNet:
+  depth: 101
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  norm_type: affine_channel
+FPN:
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+FPNRPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  max_level: 5
+  min_level: 2
+  sampling_ratio: 2
+  box_resolution: 7
+  mask_resolution: 14
+MaskHead:
+  dilation: 1
+  num_chan_reduced: 256
+  num_classes: 81
+  num_convs: 4
+  resolution: 28
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+MaskAssigner:
+  resolution: 28
+BBoxHead:
+  head: TwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+TwoFCHead:
+  num_chan: 1024
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [240000, 320000]
+    values: null
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+MaskRCNNTrainFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_train2017.json
+    image_dir: train2017
+  num_workers: 2
+  use_padded_im_info: False
+MaskRCNNEvalFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  num_workers: 2
+  use_padded_im_info: True
+MaskRCNNTestFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  samples: 5
+  num_workers: 2
+  use_padded_im_info: True
--- a/PaddleCV/object_detection/configs/mask_rcnn_r50_1x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r50_1x.yml
+architecture: MaskRCNN
+train_feed: MaskRCNNTrainFeed
+eval_feed: MaskRCNNEvalFeed
+test_feed: MaskRCNNTestFeed
+use_gpu: True
+max_iters: 180000
+snapshot_iter: 10000
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar
+metric: COCO
+weights: output/mask_rcnn_r50_1x/model_final
+MaskRCNN:
+  backbone: ResNet
+  rpn_head: RPNHead
+  roi_extractor: RoIAlign
+  bbox_assigner: BBoxAssigner
+  bbox_head: BBoxHead
+  mask_assigner: MaskAssigner
+  mask_head: MaskHead
+ResNet:
+  norm_type: affine_channel
+  norm_decay: true
+  depth: 50
+  feature_maps: [2,3, 4]
+  freeze_at: 2
+ResNetC5:
+  norm_type: affine_channel
+RPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 12000
+    post_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 6000
+    post_nms_top_n: 1000
+RoIAlign:
+  resolution: 14
+  spatial_scale: 0.0625
+  sampling_ratio: 0
+BBoxHead:
+  head: ResNetC5
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    normalized: false
+    score_threshold: 0.05
+  num_classes: 81
+MaskHead:
+  dilation: 1
+  num_chan_reduced: 256
+  num_classes: 81
+  resolution: 14
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+MaskAssigner:
+  num_classes: 81
+  resolution: 14
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [120000, 160000]
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+MaskRCNNTrainFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_train2017.json
+    image_dir: train2017
+  num_workers: 2
+  shuffle: true
+MaskRCNNEvalFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  shuffle: false
+MaskRCNNTestFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  samples: 5
--- a/PaddleCV/object_detection/configs/mask_rcnn_r50_2x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r50_2x.yml
+architecture: MaskRCNN
+train_feed: MaskRCNNTrainFeed
+eval_feed: MaskRCNNEvalFeed
+test_feed: MaskRCNNTestFeed
+use_gpu: True
+max_iters: 360000
+snapshot_iter: 10000
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar
+metric: COCO
+weights: output/mask_rcnn_r50_2x/model_final/ 
+MaskRCNN:
+  backbone: ResNet
+  rpn_head: RPNHead
+  roi_extractor: RoIAlign
+  bbox_assigner: BBoxAssigner
+  bbox_head: BBoxHead
+  mask_assigner: MaskAssigner
+  mask_head: MaskHead
+ResNet:
+  norm_type: affine_channel
+  norm_decay: true
+  depth: 50
+  feature_maps: [2,3, 4]
+  freeze_at: 2
+ResNetC5:
+  norm_type: affine_channel
+RPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 12000
+    post_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 6000
+    post_nms_top_n: 1000
+RoIAlign:
+  resolution: 14
+  spatial_scale: 0.0625
+  sampling_ratio: 0
+BBoxHead:
+  head: ResNetC5
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    normalized: false
+    score_threshold: 0.05
+  num_classes: 81
+MaskHead:
+  dilation: 1
+  num_chan_reduced: 256
+  num_classes: 81
+  resolution: 14
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+MaskAssigner:
+  num_classes: 81
+  resolution: 14
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [240000, 320000]
+  #start the warm up from base_lr * start_factor
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+MaskRCNNTrainFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_train2017.json
+    image_dir: train2017
+  num_workers: 2
+  shuffle: true
+MaskRCNNEvalFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  shuffle: false
+MaskRCNNTestFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  samples: 5
--- a/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_1x.yml
+architecture: MaskRCNN
+train_feed: MaskRCNNTrainFeed
+eval_feed: MaskRCNNEvalFeed
+test_feed: MaskRCNNTestFeed
+use_gpu: True
+max_iters: 180000
+snapshot_iter: 10000
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar
+metric: COCO
+weights: output/mask_rcnn_r50_fpn_1x/model_final/ 
+MaskRCNN:
+  backbone: ResNet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNet:
+  depth: 50
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  norm_type: affine_channel
+FPN:
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+FPNRPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  max_level: 5
+  min_level: 2
+  sampling_ratio: 2
+  box_resolution: 7
+  mask_resolution: 14
+MaskHead:
+  dilation: 1
+  num_chan_reduced: 256
+  num_classes: 81
+  num_convs: 4
+  resolution: 28
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+MaskAssigner:
+  resolution: 28
+BBoxHead:
+  head: TwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+TwoFCHead:
+  num_chan: 1024
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [120000, 160000]
+    values: null
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+MaskRCNNTrainFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_train2017.json
+    image_dir: train2017
+  num_workers: 2
+  use_padded_im_info: False
+MaskRCNNEvalFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  num_workers: 2
+  use_padded_im_info: True
+MaskRCNNTestFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  samples: 5
+  num_workers: 2
+  use_padded_im_info: True
--- a/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_2x.yml
+architecture: MaskRCNN
+train_feed: MaskRCNNTrainFeed
+eval_feed: MaskRCNNEvalFeed
+test_feed: MaskRCNNTestFeed
+use_gpu: True
+max_iters: 360000
+snapshot_iter: 10000
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar
+metric: COCO
+weights: output/mask_rcnn_r50_fpn_2x/model_final/ 
+MaskRCNN:
+  backbone: ResNet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNet:
+  depth: 50
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  norm_type: affine_channel
+FPN:
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+FPNRPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  max_level: 5
+  min_level: 2
+  sampling_ratio: 2
+  box_resolution: 7
+  mask_resolution: 14
+MaskHead:
+  dilation: 1
+  num_chan_reduced: 256
+  num_classes: 81
+  num_convs: 4
+  resolution: 28
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+MaskAssigner:
+  resolution: 28
+BBoxHead:
+  head: TwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+TwoFCHead:
+  num_chan: 1024
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [240000, 320000]
+    values: null
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+MaskRCNNTrainFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_train2017.json
+    image_dir: train2017
+  num_workers: 2
+  use_padded_im_info: False
+MaskRCNNEvalFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  num_workers: 2
+  use_padded_im_info: True
+MaskRCNNTestFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  samples: 5
+  num_workers: 2
+  use_padded_im_info: True
--- a/PaddleCV/object_detection/configs/mask_rcnn_r50_vd_fpn_2x.yml
+++ b/PaddleCV/object_detection/configs/mask_rcnn_r50_vd_fpn_2x.yml
+architecture: MaskRCNN
+train_feed: MaskRCNNTrainFeed
+eval_feed: MaskRCNNEvalFeed
+test_feed: MaskRCNNTestFeed
+use_gpu: True
+max_iters: 360000
+snapshot_iter: 10000
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar
+metric: COCO
+weights: output/mask_rcnn_r50_vd_fpn_2x/model_final/ 
+MaskRCNN:
+  backbone: ResNet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNet:
+  depth: 50
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  norm_type: affine_channel
+  variant: d 
+FPN:
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+FPNRPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  max_level: 5
+  min_level: 2
+  box_resolution: 7
+  sampling_ratio: 2
+  mask_resolution: 14
+MaskHead:
+  dilation: 1
+  num_chan_reduced: 256
+  num_classes: 81
+  num_convs: 4
+  resolution: 28
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+  num_classes: 81
+MaskAssigner:
+  resolution: 28
+BBoxHead:
+  head: TwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+  num_classes: 81
+TwoFCHead:
+  num_chan: 1024
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [240000, 320000]
+    values: null
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+MaskRCNNTrainFeed:
+  # batch size per device
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    image_dir: train2017
+    annotation: annotations/instances_train2017.json
+  num_workers: 2
+  shuffle: True
+  use_padded_im_info: False
+MaskRCNNEvalFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  num_workers: 2
+  use_padded_im_info: True
+MaskRCNNTestFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  num_workers: 2
+  use_padded_im_info: True
--- a/PaddleCV/object_detection/configs/retinanet_r50_fpn_1x.yml
+++ b/PaddleCV/object_detection/configs/retinanet_r50_fpn_1x.yml
+architecture: RetinaNet
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+max_iters: 90000
+use_gpu: yes
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar
+weights: output/retinanet_r50_fpn_1x/model_final
+log_smooth_window: 20
+snapshot_iter: 10000
+metric: COCO
+save_dir: output
+RetinaNet:
+  backbone: ResNet
+  fpn: FPN
+  retina_head: RetinaHead
+ResNet:
+  norm_type: affine_channel
+  freeze_norm: true
+  norm_decay: 0.0001
+  depth: 50
+  feature_maps: [3, 4, 5]
+  freeze_at: 2
+  variant: b
+FPN:
+  max_level: 7
+  min_level: 3
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125]
+  has_extra_convs: true
+RetinaHead:
+  num_convs_per_octave: 4
+  num_chan: 256
+  max_level: 7
+  min_level: 3
+  prior_prob: 0.01
+  base_scale: 4
+  num_scales_per_octave: 3
+  num_classes: 81
+  anchor_generator:
+    aspect_ratios: [1.0, 2.0, 0.5]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  target_assign:
+    positive_overlap: 0.5
+    negative_overlap: 0.4
+  gamma: 2.0
+  alpha: 0.25
+  sigma: 3.0151134457776365
+  output_decoder:
+    score_thresh: 0.05
+    nms_thresh: 0.5
+    pre_nms_top_n: 1000
+    detections_per_im: 100
+    nms_eta: 1.0
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [60000, 80000]
+    values: null
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+FasterRCNNTrainFeed:
+  batch_size: 2
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 128
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_train2017.json
+    image_dir: train2017
+  drop_last: false
+  image_shape: [3, 1333, 800]
+  num_workers: 2
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  - !RandomFlipImage
+    is_mask_flip: false
+    is_normalized: false
+    prob: 0.5
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485, 0.456, 0.406]
+    std: [0.229, 0.224, 0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  shuffle: true
+FasterRCNNEvalFeed:
+  batch_size: 2
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 128
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  shuffle: false
+  image_shape: [3, 1333, 800]
+  num_workers: 2
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485, 0.456, 0.406]
+    std: [0.229, 0.224, 0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  shuffle: false
+FasterRCNNTestFeed:
+  batch_size: 1
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 128
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: val2017.txt
+  drop_last: false
+  image_shape: [3, 1333, 800]
+  num_workers: 2
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485, 0.456, 0.406]
+    std: [0.229, 0.224, 0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  shuffle: false
--- a/PaddleCV/object_detection/configs/ssd_mobilenet_v1_voc.yml
+++ b/PaddleCV/object_detection/configs/ssd_mobilenet_v1_voc.yml
+architecture: SSD
+max_iters: 28000
+train_feed: SSDTrainFeed
+eval_feed: SSDEvalFeed
+test_feed: SSDTestFeed
+pretrain_weights: ./ssd3/
+use_gpu: true
+snapshot_iter: 2000
+log_smooth_window: 1
+metric: VOC
+save_dir: output
+weights: output/ssd_mobilenet_v1_voc/model_final/
+SSD:
+  backbone: MobileNet
+  multi_box_head: MultiBoxHead
+  num_classes: 21
+  metric: 
+    ap_version: 11point
+    evaluate_difficult: false
+    overlap_threshold: 0.5
+  output_decoder:
+    background_label: 0
+    keep_top_k: 200
+    nms_eta: 1.0
+    nms_threshold: 0.45
+    nms_top_k: 400
+    score_threshold: 0.01
+MobileNet:
+  norm_decay: 0.
+  conv_group_scale: 1
+  extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
+  with_extra_blocks: true
+MultiBoxHead:
+  aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
+  base_size: 300
+  flip: true
+  max_ratio: 90
+  max_sizes: [[], 150.0, 195.0, 240.0, 285.0, 300.0]
+  min_ratio: 20
+  min_sizes: [60.0, 105.0, 150.0, 195.0, 240.0, 285.0]
+  offset: 0.5
+LearningRate:
+  schedulers:
+  - !PiecewiseDecay
+    milestones: [10000, 15000, 20000, 25000]
+    values: [0.001, 0.0005, 0.00025, 0.0001, 0.00001]
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.0
+    type: RMSPropOptimizer
+  regularizer:
+    factor: 0.00005
+    type: L2
+SSDTrainFeed:
+  batch_size: 32
+  use_process: true
+  dataset:
+    dataset_dir: data/voc
+    annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt
+    image_dir: VOCdevkit/VOC_all/JPEGImages
+SSDEvalFeed:
+  batch_size: 64
+  use_process: true
+  dataset:
+    dataset_dir: data/voc
+    annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt
+    image_dir: VOCdevkit/VOC_all/JPEGImages
+    use_default_label: false
+  drop_last: false
+SSDTestFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: data/voc
+    annotation: VOCdevkit/VOC_all/ImageSets/Main/test.txt
+    image_dir: VOCdevkit/VOC_all/JPEGImages
+    use_default_label: false
+  drop_last: false
+  test_file: data/voc/VOCdevkit/VOC_all/ImageSets/Main/test.txt
--- a/PaddleCV/object_detection/configs/yolov3_darknet.yml
+++ b/PaddleCV/object_detection/configs/yolov3_darknet.yml
+architecture: YOLOv3
+train_feed: YoloTrainFeed
+eval_feed: YoloEvalFeed
+test_feed: YoloTestFeed
+use_gpu: yes
+max_iters: 500200
+log_smooth_window: 20
+save_dir: output
+snapshot_iter: 2000
+metric: COCO
+pretrain_weights: https://paddlemodels.bj.bcebos.com/yolo/darknet53.tar.gz
+weights: https://paddlemodels.bj.bcebos.com/yolo/yolov3.tar.gz
+YOLOv3:
+  backbone: DarkNet
+  yolo_head: YOLOv3Head
+DarkNet:
+  norm_type: sync_bn
+  norm_decay: 0.
+  depth: 53
+YOLOv3Head:
+  anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+  anchors: [[10, 13], [16, 30], [33, 23],
+            [30, 61], [62, 45], [59, 119],
+            [116, 90], [156, 198], [373, 326]]
+  norm_decay: 0.
+  ignore_thresh: 0.7
+  label_smooth: true
+  nms:
+    background_label: -1
+    keep_top_k: 100
+    nms_threshold: 0.45
+    nms_top_k: 1000
+    normalized: false
+    score_threshold: 0.01
+  num_classes: 80
+LearningRate:
+  base_lr: 0.001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones:
+    - 400000
+    - 450000
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 4000
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
+YoloTrainFeed:
+  batch_size: 8
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_train2017.json
+    image_dir: train2017
+  num_workers: 8
+  bufsize: 128
+  use_process: true
+YoloEvalFeed:
+  batch_size: 8
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+YoloTestFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: ../val2017.txt
+  samples: 5
--- a/PaddleCV/object_detection/configs/yolov3_mobilenet_v1.yml
+++ b/PaddleCV/object_detection/configs/yolov3_mobilenet_v1.yml
+architecture: YOLOv3
+train_feed: YoloTrainFeed
+eval_feed: YoloEvalFeed
+test_feed: YoloTestFeed
+use_gpu: yes
+max_iters: 500200
+log_smooth_window: 20
+save_dir: output
+snapshot_iter: 2000
+metric: COCO
+pretrain_weights: http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar
+weights: https://paddlemodels.bj.bcebos.com/yolo/yolo_mobilenet1.0.tar.gz
+YOLOv3:
+  backbone: MobileNet
+  yolo_head: YOLOv3Head
+MobileNet:
+  norm_type: sync_bn
+  norm_decay: 0.
+  conv_group_scale: 1
+  with_extra_blocks: false
+YOLOv3Head:
+  anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+  anchors: [[10, 13], [16, 30], [33, 23],
+            [30, 61], [62, 45], [59, 119],
+            [116, 90], [156, 198], [373, 326]]
+  norm_decay: 0.
+  ignore_thresh: 0.7
+  label_smooth: true
+  nms:
+    background_label: -1
+    keep_top_k: 100
+    nms_threshold: 0.45
+    nms_top_k: 1000
+    normalized: false
+    score_threshold: 0.01
+  num_classes: 80
+LearningRate:
+  base_lr: 0.001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones:
+    - 400000
+    - 450000
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 4000
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
+YoloTrainFeed:
+  batch_size: 8
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_train2017.json
+    image_dir: train2017
+  num_workers: 8
+  bufsize: 128
+  use_process: true
+YoloEvalFeed:
+  batch_size: 8
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+YoloTestFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: ../val2017.txt
+  samples: 5
--- a/PaddleCV/object_detection/configs/yolov3_r34.yml
+++ b/PaddleCV/object_detection/configs/yolov3_r34.yml
+architecture: YOLOv3
+train_feed: YoloTrainFeed
+eval_feed: YoloEvalFeed
+test_feed: YoloTestFeed
+use_gpu: yes
+max_iters: 500200
+log_smooth_window: 20
+save_dir: output
+snapshot_iter: 2000
+metric: COCO
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar
+weights: https://paddlemodels.bj.bcebos.com/yolo/yolo_resnet34.tar.gz
+YOLOv3:
+  backbone: ResNet
+  yolo_head: YOLOv3Head
+ResNet:
+  norm_type: sync_bn
+  norm_decay: 0.
+  depth: 34
+  feature_maps: [3, 4, 5]
+YOLOv3Head:
+  anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+  anchors: [[10, 13], [16, 30], [33, 23],
+            [30, 61], [62, 45], [59, 119],
+            [116, 90], [156, 198], [373, 326]]
+  norm_decay: 0.
+  ignore_thresh: 0.7
+  label_smooth: true
+  nms:
+    background_label: -1
+    keep_top_k: 100
+    nms_threshold: 0.45
+    nms_top_k: 1000
+    normalized: false
+    score_threshold: 0.01
+  num_classes: 80
+LearningRate:
+  base_lr: 0.001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones:
+    - 400000
+    - 450000
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 4000
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
+YoloTrainFeed:
+  batch_size: 8
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_train2017.json
+    image_dir: train2017
+  num_workers: 8
+  bufsize: 128
+  use_process: true
+YoloEvalFeed:
+  batch_size: 8
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+YoloTestFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: data/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  test_file: ../val2017.txt
+  samples: 5
--- a/PaddleCV/object_detection/ppdet/__init__.py
+++ b/PaddleCV/object_detection/ppdet/__init__.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/PaddleCV/object_detection/ppdet/core/__init__.py
+++ b/PaddleCV/object_detection/ppdet/core/__init__.py
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import ppdet.modeling
+import ppdet.optimizer
+import ppdet.data.data_feed
--- a/PaddleCV/object_detection/ppdet/core/config/__init__.py
+++ b/PaddleCV/object_detection/ppdet/core/config/__init__.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/PaddleCV/object_detection/ppdet/core/config/schema.py
+++ b/PaddleCV/object_detection/ppdet/core/config/schema.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+import inspect
+import importlib
+import re
+try:
+    from docstring_parser import parse as doc_parse
+except Exception:
+    def doc_parse(*args):
+        if not doc_parse.__warning_sent__:
+            from ppdet.utils.cli import ColorTTY
+            color_tty = ColorTTY()
+            message = "docstring_parser is not installed, " \
+                + "argument description is not available"
+            print(color_tty.yellow(message))
+            doc_parse.__warning_sent__ = True
+    doc_parse.__warning_sent__ = False
+try:
+    from typeguard import check_type
+except Exception:
+    def check_type(*args):
+        if not check_type.__warning_sent__:
+            from ppdet.utils.cli import ColorTTY
+            color_tty = ColorTTY()
+            message = "typeguard is not installed, type checking is not available"
+            print(color_tty.yellow(message))
+            check_type.__warning_sent__ = True
+    check_type.__warning_sent__ = False
+__all__ = ['SchemaValue', 'SchemaDict', 'extract_schema']
+class SchemaValue(object):
+    def __init__(self, name, doc='', type=None):
+        super(SchemaValue, self).__init__()
+        self.name = name
+        self.doc = doc
+        self.type = type
+    def set_default(self, value):
+        self.default = value
+    def has_default(self):
+        return hasattr(self, 'default')
+class SchemaDict(dict):
+    def __init__(self, **kwargs):
+        super(SchemaDict, self).__init__()
+        self.schema = {}
+        self.strict = False
+        self.doc = ""
+        self.update(kwargs)
+    def __setitem__(self, key, value):
+        # XXX also update regular dict to SchemaDict??
+        if isinstance(value, dict) and key in self and isinstance(self[key],
+                                                                  SchemaDict):
+            self[key].update(value)
+        else:
+            super(SchemaDict, self).__setitem__(key, value)
+    def __missing__(self, key):
+        if self.has_default(key):
+            return self.schema[key].default
+        elif key in self.schema:
+            return self.schema[key]
+        else:
+            raise KeyError(key)
+    def copy(self):
+        newone = SchemaDict()
+        newone.__dict__.update(self.__dict__)
+        newone.update(self)
+        return newone
+    def set_schema(self, key, value):
+        assert isinstance(value, SchemaValue)
+        self.schema[key] = value
+    def set_strict(self, strict):
+        self.strict = strict
+    def has_default(self, key):
+        return key in self.schema and self.schema[key].has_default()
+    def is_default(self, key):
+        if not self.has_default(key):
+            return False
+        if hasattr(self[key], '__dict__'):
+            return True
+        else:
+            return key not in self or self[key] == self.schema[key].default
+    def find_default_keys(self):
+        return [
+            k for k in list(self.keys()) + list(self.schema.keys())
+            if self.is_default(k)
+        ]
+    def mandatory(self):
+        return any([k for k in self.schema.keys() if not self.has_default(k)])
+    def find_missing_keys(self):
+        missing = [
+            k for k in self.schema.keys()
+            if k not in self and not self.has_default(k)
+        ]
+        placeholders = [k for k in self if self[k] in ('<missing>', '<value>')]
+        return missing + placeholders
+    def find_extra_keys(self):
+        return list(set(self.keys()) - set(self.schema.keys()))
+    def find_mismatch_keys(self):
+        mismatch_keys = []
+        for arg in self.schema.values():
+            if arg.type is not None:
+                try:
+                    check_type("{}.{}".format(self.name, arg.name),
+                               self[arg.name], arg.type)
+                except Exception:
+                    mismatch_keys.append(arg.name)
+        return mismatch_keys
+    def validate(self):
+        missing_keys = self.find_missing_keys()
+        if missing_keys:
+            raise ValueError("Missing param for class<{}>: {}".format(
+                self.name, ", ".join(missing_keys)))
+        extra_keys = self.find_extra_keys()
+        if extra_keys and self.strict:
+            raise ValueError("Extraneous param for class<{}>: {}".format(
+                self.name, ", ".join(extra_keys)))
+        mismatch_keys = self.find_mismatch_keys()
+        if mismatch_keys:
+            raise TypeError("Wrong param type for class<{}>: {}".format(
+                self.name, ", ".join(mismatch_keys)))
+def extract_schema(cls):
+    """
+    Extract schema from a given class
+    Args:
+        cls (type): Class from which to extract.
+    Returns:
+        schema (SchemaDict): Extracted schema.
+    """
+    ctor = cls.__init__
+    # python 2 compatibility
+    if hasattr(inspect, 'getfullargspec'):
+        argspec = inspect.getfullargspec(ctor)
+        annotations = argspec.annotations
+        has_kwargs = argspec.varkw is not None
+    else:
+        argspec = inspect.getargspec(ctor)
+        # python 2 type hinting workaround, see pep-3107
+        # however, since `typeguard` does not support python 2, type checking
+        # is still python 3 only for now
+        annotations = getattr(ctor, '__annotations__', {})
+        has_kwargs = argspec.keywords is not None
+    names = [arg for arg in argspec.args if arg != 'self']
+    defaults = argspec.defaults
+    num_defaults = argspec.defaults is not None and len(argspec.defaults) or 0
+    num_required = len(names) - num_defaults
+    docs = cls.__doc__
+    if docs is None and getattr(cls, '__category__', None) == 'op':
+        docs = cls.__call__.__doc__
+    docstring = doc_parse(docs)
+    if docstring is None:
+        comments = {}
+    else:
+        comments = {}
+        for p in docstring.params:
+            match_obj = re.match('^([a-zA-Z_]+[a-zA-Z_0-9]*).*', p.arg_name)
+            if match_obj is not None:
+                comments[match_obj.group(1)] = p.description
+    schema = SchemaDict()
+    schema.name = cls.__name__
+    schema.doc = ""
+    if docs is not None:
+        start_pos = docs[0] == '\n' and 1 or 0
+        schema.doc = docs[start_pos:].split("\n")[0].strip()
+    # XXX handle paddle's weird doc convention
+    if '**' == schema.doc[:2] and '**' == schema.doc[-2:]:
+        schema.doc = schema.doc[2:-2].strip()
+    schema.category = hasattr(cls, '__category__') and getattr(
+        cls, '__category__') or 'module'
+    schema.strict = not has_kwargs
+    schema.pymodule = importlib.import_module(cls.__module__)
+    schema.inject = getattr(cls, '__inject__', [])
+    for idx, name in enumerate(names):
+        comment = name in comments and comments[name] or name
+        if name in schema.inject:
+            type_ = None
+        else:
+            type_ = name in annotations and annotations[name] or None
+        value_schema = SchemaValue(name, comment, type_)
+        if idx >= num_required:
+            value_schema.set_default(defaults[idx - num_required])
+        schema.set_schema(name, value_schema)
+    return schema
--- a/PaddleCV/object_detection/ppdet/core/config/yaml_helpers.py
+++ b/PaddleCV/object_detection/ppdet/core/config/yaml_helpers.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import importlib
+import inspect
+import yaml
+__all__ = ['serializable', 'Callable']
+def _make_python_constructor(cls):
+    def python_constructor(loader, node):
+        if isinstance(node, yaml.SequenceNode):
+            args = loader.construct_sequence(node, deep=True)
+            return cls(*args)
+        else:
+            kwargs = loader.construct_mapping(node, deep=True)
+            try:
+                return cls(**kwargs)
+            except Exception as ex:
+                print("Error when construct {} instance from yaml config".
+                      format(cls.__name__))
+                raise ex
+    return python_constructor
+def _make_python_representer(cls):
+    # python 2 compatibility
+    if hasattr(inspect, 'getfullargspec'):
+        argspec = inspect.getfullargspec(cls)
+    else:
+        argspec = inspect.getargspec(cls.__init__)
+    argnames = [arg for arg in argspec.args if arg != 'self']
+    def python_representer(dumper, obj):
+        if argnames:
+            data = {name: getattr(obj, name) for name in argnames}
+        else:
+            data = obj.__dict__
+        if '_id' in data:
+            del data['_id']
+        return dumper.represent_mapping(u'!{}'.format(cls.__name__), data)
+    return python_representer
+def serializable(cls):
+    """
+    Add loader and dumper for given class, which must be "trivially serializable"
+    Args:
+        cls: class to be serialized
+    Returns: cls
+    """
+    yaml.add_constructor(u'!{}'.format(cls.__name__),
+                         _make_python_constructor(cls))
+    yaml.add_representer(cls, _make_python_representer(cls))
+    return cls
+@serializable
+class Callable(object):
+    """
+    Helper to be used in Yaml for creating arbitrary class objects
+    Args:
+        full_type (str): the full module path to target function
+    """
+    def __init__(self, full_type, args=[], kwargs={}):
+        super(Callable, self).__init__()
+        self.full_type = full_type
+        self.args = args
+        self.kwargs = kwargs
+    def __call__(self):
+        if '.' in self.full_type:
+            idx = self.full_type.rfind('.')
+            module = importlib.import_module(self.full_type[:idx])
+            func_name = self.full_type[idx + 1:]
+        else:
+            try:
+                module = importlib.import_module('builtins')
+            except Exception:
+                module = importlib.import_module('__builtin__')
+            func_name = self.full_type
+        func = getattr(module, func_name)
+        return func(*self.args, **self.kwargs)
--- a/PaddleCV/object_detection/ppdet/core/workspace.py
+++ b/PaddleCV/object_detection/ppdet/core/workspace.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+import importlib
+import os
+import sys
+import yaml
+from .config.schema import SchemaDict, extract_schema
+from .config.yaml_helpers import serializable
+__all__ = [
+    'global_config', 'load_config', 'merge_config', 'get_registered_modules',
+    'create', 'register', 'serializable'
+]
+class AttrDict(dict):
+    """Single level attribute dict, NOT recursive"""
+    def __init__(self, **kwargs):
+        super(AttrDict, self).__init__()
+        super(AttrDict, self).update(kwargs)
+    def __getattr__(self, key):
+        if key in self:
+            return self[key]
+        raise AttributeError("object has no attribute '{}'".format(key))
+global_config = AttrDict()
+def load_config(file_path):
+    """
+    Load config from file.
+    Args:
+        file_path (str): Path of the config file to be loaded.
+    Returns: global config
+    """
+    _, ext = os.path.splitext(file_path)
+    assert ext in ['.yml', '.yaml'], "only support yaml files for now"
+    merge_config(yaml.load(open(file_path), Loader=yaml.Loader))
+    return global_config
+def merge_config(config):
+    """
+    Merge config into global config.
+    Args:
+        config (dict): Config to be merged.
+    Returns: global config
+    """
+    for key, value in config.items():
+        if isinstance(value, dict) and key in global_config:
+            global_config[key].update(value)
+        else:
+            global_config[key] = value
+def get_registered_modules():
+    return {k: v for k, v in global_config.items() if isinstance(v, SchemaDict)}
+def make_partial(cls):
+    op_module = importlib.import_module(cls.__op__.__module__)
+    op = getattr(op_module, cls.__op__.__name__)
+    cls.__category__ = getattr(cls, '__category__', None) or 'op'
+    def partial_apply(self, *args, **kwargs):
+        kwargs_ = self.__dict__.copy()
+        kwargs_.update(kwargs)
+        return op(*args, **kwargs_)
+    if getattr(cls, '__append_doc__', True):  # XXX should default to True?
+        if sys.version_info[0] > 2:
+            cls.__doc__ = "Wrapper for `{}` OP".format(op.__name__)
+            cls.__init__.__doc__ = op.__doc__
+            cls.__call__ = partial_apply
+            cls.__call__.__doc__ = op.__doc__
+        else:
+            # XXX work around for python 2
+            partial_apply.__doc__ = op.__doc__
+            cls.__call__ = partial_apply
+    return cls
+def register(cls):
+    """
+    Register a given module class.
+    Args:
+        cls (type): Module class to be registered.
+    Returns: cls
+    """
+    if cls.__name__ in global_config:
+        raise ValueError("Module class already registered: {}".format(
+            cls.__name__))
+    if hasattr(cls, '__op__'):
+        cls = make_partial(cls)
+    global_config[cls.__name__] = extract_schema(cls)
+    return cls
+def create(cls_or_name, **kwargs):
+    """
+    Create an instance of given module class.
+    Args:
+        cls_or_name (type or str): Class of which to create instance.
+    Returns: instance of type `cls_or_name`
+    """
+    assert type(cls_or_name) in [type, str
+                                 ], "should be a class or name of a class"
+    name = type(cls_or_name) == str and cls_or_name or cls_or_name.__name__
+    assert name in global_config and isinstance(global_config[name], SchemaDict), \
+        "the module {} is not registered".format(name)
+    config = global_config[name]
+    config.update(kwargs)
+    config.validate()
+    cls = getattr(config.pymodule, name)
+    kwargs = {}
+    kwargs.update(global_config[name])
+    if getattr(config, 'inject', None):
+        for k in config.inject:
+            target_key = global_config[name][k]
+            # optional dependency
+            if target_key is None:
+                continue
+            # also accept dictionaries and serialized objects
+            if isinstance(target_key, dict) or hasattr(target_key, '__dict__'):
+                continue
+            elif isinstance(target_key, str):
+                if target_key not in global_config:
+                    raise ValueError("Missing injection config:", target_key)
+                target = global_config[target_key]
+                if isinstance(target, SchemaDict):
+                    kwargs[k] = create(target_key)
+                elif hasattr(target, '__dict__'):  # serialized object
+                    kwargs[k] = target
+            else:
+                raise ValueError("Unsupported injection type:", target_key)
+    return cls(**kwargs)
--- a/PaddleCV/object_detection/ppdet/data/__init__.py
+++ b/PaddleCV/object_detection/ppdet/data/__init__.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# function:
+#    module to prepare data for detection model training
+#
+# implementation notes:
+# - Dateset
+#    basic interface to accessing data samples in stream mode
+#
+# - xxxSource (RoiDbSource)
+#    * subclass of 'Dataset'
+#    * load data from local files and other source data
+#
+# - xxxOperator (DecodeImage)
+#    * subclass of 'BaseOperator'
+#    * each op can transform a sample, eg: decode/resize/crop image
+#    * each op must obey basic rules defined in transform.operator.base
+#
+# - transformer
+#    * subclass of 'Dataset'
+#    * 'MappedDataset' accept a 'xxxSource' and a list of 'xxxOperator'
+#       to build a transformed 'Dataset'
+from .dataset import Dataset
+from .reader import Reader
+from .data_feed import create_reader
+__all__ = ['Dataset', 'Reader', 'create_reader']
--- a/PaddleCV/object_detection/ppdet/data/data_feed.py
+++ b/PaddleCV/object_detection/ppdet/data/data_feed.py
--- a/PaddleCV/object_detection/ppdet/data/dataset.py
+++ b/PaddleCV/object_detection/ppdet/data/dataset.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# function:
+#    interface for accessing data samples in stream
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+class Dataset(object):
+    """interface to access a stream of data samples"""
+    def __init__(self):
+        self._epoch = -1
+    def __next__(self):
+        return self.next()
+    def __iter__(self):
+        return self
+    def __str__(self):
+        return "{}(fname:{}, epoch:{:d}, size:{:d}, pos:{:d})".format(
+            type(self).__name__, self._fname, self._epoch,
+            self.size(), self._pos)
+    def next(self):
+        """get next sample"""
+        raise NotImplementedError('%s.next not available' %
+                                  (self.__class__.__name__))
+    def reset(self):
+        """reset to initial status and begins a new epoch"""
+        raise NotImplementedError('%s.reset not available' %
+                                  (self.__class__.__name__))
+    def size(self):
+        """get number of samples in this dataset"""
+        raise NotImplementedError('%s.size not available' %
+                                  (self.__class__.__name__))
+    def drained(self):
+        """whether all sampled has been readed out for this epoch"""
+        raise NotImplementedError('%s.drained not available' %
+                                  (self.__class__.__name__))
+    def epoch_id(self):
+        """return epoch id for latest sample"""
+        raise NotImplementedError('%s.epoch_id not available' %
+                                  (self.__class__.__name__))
--- a/PaddleCV/object_detection/ppdet/data/reader.py
+++ b/PaddleCV/object_detection/ppdet/data/reader.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# function:
+#    Interface to build readers for detection data like COCO or VOC
+#
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+from numbers import Integral
+import logging
+from .source import build_source
+from .transform import build_mapper, map, batch, batch_map
+logger = logging.getLogger(__name__)
+class Reader(object):
+    """Interface to make readers for training or evaluation"""
+    def __init__(self, data_cf, trans_conf, maxiter=-1):
+        self._data_cf = data_cf
+        self._trans_conf = trans_conf
+        self._maxiter = maxiter
+        self._cname2cid = None
+        assert isinstance(self._maxiter, Integral), "maxiter should be int"
+    def _make_reader(self, mode):
+        """Build reader for training or validation"""
+        file_conf = self._data_cf[mode]
+        # 1, Build data source
+        sc_conf = {'data_cf': file_conf, 'cname2cid': self._cname2cid}
+        sc = build_source(sc_conf)
+        # 2, Buid a transformed dataset
+        ops = self._trans_conf[mode]['OPS']
+        batchsize = self._trans_conf[mode]['BATCH_SIZE']
+        drop_last = False if 'DROP_LAST' not in \
+            self._trans_conf[mode] else self._trans_conf[mode]['DROP_LAST']
+        mapper = build_mapper(ops, {'is_train': mode == 'TRAIN'})
+        worker_args = None
+        if 'WORKER_CONF' in self._trans_conf[mode]:
+            worker_args = self._trans_conf[mode]['WORKER_CONF']
+            worker_args = {k.lower(): v for k, v in worker_args.items()}
+        mapped_ds = map(sc, mapper, worker_args)
+        batched_ds = batch(mapped_ds, batchsize, drop_last)
+        trans_conf = {k.lower(): v for k, v in self._trans_conf[mode].items()}
+        need_keys = {
+            'is_padding',
+            'coarsest_stride',
+            'random_shapes',
+            'multi_scales',
+            'use_padded_im_info',
+        }
+        bm_config = {
+            key: value
+            for key, value in trans_conf.items() if key in need_keys
+        }
+        batched_ds = batch_map(batched_ds, bm_config)
+        batched_ds.reset()
+        if mode.lower() == 'train':
+            if self._cname2cid is not None:
+                logger.warn('cname2cid already set, it will be overridden')
+            self._cname2cid = sc.cname2cid
+        # 3, Build a reader
+        maxit = -1 if self._maxiter <= 0 else self._maxiter
+        def _reader():
+            n = 0
+            while True:
+                for _batch in batched_ds:
+                    yield _batch
+                    n += 1
+                    if maxit > 0 and n == maxit:
+                        return
+                batched_ds.reset()
+                if maxit <= 0:
+                    return
+        if hasattr(sc, 'get_imid2path'):
+            _reader.imid2path = sc.get_imid2path()
+        return _reader
+    def train(self):
+        """Build reader for training"""
+        return self._make_reader('TRAIN')
+    def val(self):
+        """Build reader for validation"""
+        return self._make_reader('VAL')
+    def test(self):
+        """Build reader for inference"""
+        return self._make_reader('TEST')
--- a/PaddleCV/object_detection/ppdet/data/source/__init__.py
+++ b/PaddleCV/object_detection/ppdet/data/source/__init__.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import copy
+from .roidb_source import RoiDbSource
+from .simple_source import SimpleSource
+def build_source(config):
+    """
+    Build dataset from source data, default source type is 'RoiDbSource'
+    Args:
+        config (dict): should have following structure:
+        {
+            data_cf (dict):
+                anno_file (str): label file or image list file path
+                image_dir (str): root directory for images
+                samples (int): number of samples to load, -1 means all
+                is_shuffle (bool): should samples be shuffled
+                load_img (bool): should images be loaded
+                mixup_epoch (int): parse mixup in first n epoch
+                with_background (bool): whether load background as a class
+            cname2cid (dict): the label name to id dictionary
+        }
+    """
+    if 'data_cf' in config:
+        data_cf = {k.lower(): v for k, v in config['data_cf'].items()}
+        data_cf['cname2cid'] = config['cname2cid']
+    else:
+        data_cf = config
+    args = copy.deepcopy(data_cf)
+    # defaut type is 'RoiDbSource'
+    source_type = 'RoiDbSource'
+    if 'type' in data_cf:
+        if data_cf['type'] in ['VOCSource', 'COCOSource', 'RoiDbSource']:
+            source_type = 'RoiDbSource'
+        else:
+            source_type = data_cf['type']
+        del args['type']
+    if source_type == 'RoiDbSource':
+        return RoiDbSource(**args)
+    elif source_type == 'SimpleSource':
+        return SimpleSource(**args)
+    else:
+        raise ValueError('source type not supported: ' + source_type)
--- a/PaddleCV/object_detection/ppdet/data/source/coco_loader.py
+++ b/PaddleCV/object_detection/ppdet/data/source/coco_loader.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+from pycocotools.coco import COCO
+import logging
+logger = logging.getLogger(__name__)
+def load(anno_path, sample_num=-1, with_background=True):
+    """
+    Load COCO records with annotations in json file 'anno_path'
+    Args:
+        anno_path (str): json file path
+        sample_num (int): number of samples to load, -1 means all
+        with_background (bool): whether load background as a class.
+                                if True, total class number will
+                                be 81. default True
+    Returns:
+        (records, cname2cid)
+        'records' is list of dict whose structure is:
+        {
+            'im_file': im_fname, # image file name
+            'im_id': img_id, # image id
+            'h': im_h, # height of image
+            'w': im_w, # width
+            'is_crowd': is_crowd,
+            'gt_score': gt_score,
+            'gt_class': gt_class,
+            'gt_bbox': gt_bbox,
+            'gt_poly': gt_poly,
+        }
+        'cname2cid' is a dict used to map category name to class id
+    """
+    assert anno_path.endswith('.json'), 'invalid coco annotation file: ' \
+        + anno_path
+    coco = COCO(anno_path)
+    img_ids = coco.getImgIds()
+    cat_ids = coco.getCatIds()
+    records = []
+    ct = 0
+    # when with_background = True, mapping category to classid, like:
+    #   background:0, first_class:1, second_class:2, ...
+    catid2clsid = dict(
+        {catid: i + int(with_background)
+         for i, catid in enumerate(cat_ids)})
+    cname2cid = dict({
+        coco.loadCats(catid)[0]['name']: clsid
+        for catid, clsid in catid2clsid.items()
+    })
+    for img_id in img_ids:
+        img_anno = coco.loadImgs(img_id)[0]
+        im_fname = img_anno['file_name']
+        im_w = img_anno['width']
+        im_h = img_anno['height']
+        ins_anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False)
+        instances = coco.loadAnns(ins_anno_ids)
+        bboxes = []
+        for inst in instances:
+            x, y, box_w, box_h = inst['bbox']
+            x1 = max(0, x)
+            y1 = max(0, y)
+            x2 = min(im_w - 1, x1 + max(0, box_w - 1))
+            y2 = min(im_h - 1, y1 + max(0, box_h - 1))
+            if inst['area'] > 0 and x2 >= x1 and y2 >= y1:
+                inst['clean_bbox'] = [x1, y1, x2, y2]
+                bboxes.append(inst)
+        num_bbox = len(bboxes)
+        gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
+        gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
+        gt_score = np.ones((num_bbox, 1), dtype=np.float32)
+        is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
+        difficult = np.zeros((num_bbox, 1), dtype=np.int32)
+        gt_poly = [None] * num_bbox
+        for i, box in enumerate(bboxes):
+            catid = box['category_id']
+            gt_class[i][0] = catid2clsid[catid]
+            gt_bbox[i, :] = box['clean_bbox']
+            is_crowd[i][0] = box['iscrowd']
+            gt_poly[i] = box['segmentation']
+        coco_rec = {
+            'im_file': im_fname,
+            'im_id': np.array([img_id]),
+            'h': im_h,
+            'w': im_w,
+            'is_crowd': is_crowd,
+            'gt_class': gt_class,
+            'gt_bbox': gt_bbox,
+            'gt_score': gt_score,
+            'gt_poly': gt_poly,
+            'difficult': difficult
+        }
+        logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format(
+            im_fname, img_id, im_h, im_w))
+        records.append(coco_rec)
+        ct += 1
+        if sample_num > 0 and ct >= sample_num:
+            break
+    assert len(records) > 0, 'not found any coco record in %s' % (anno_path)
+    logger.info('{} samples in file {}'.format(ct, anno_path))
+    return records, cname2cid
--- a/PaddleCV/object_detection/ppdet/data/source/loader.py
+++ b/PaddleCV/object_detection/ppdet/data/source/loader.py
--- a/PaddleCV/object_detection/ppdet/data/source/roidb_source.py
+++ b/PaddleCV/object_detection/ppdet/data/source/roidb_source.py
--- a/PaddleCV/object_detection/ppdet/data/source/simple_source.py
+++ b/PaddleCV/object_detection/ppdet/data/source/simple_source.py
--- a/PaddleCV/object_detection/ppdet/data/source/voc_loader.py
+++ b/PaddleCV/object_detection/ppdet/data/source/voc_loader.py
--- a/PaddleCV/object_detection/ppdet/data/tests/000012.jpg
+++ b/PaddleCV/object_detection/ppdet/data/tests/000012.jpg
--- a/PaddleCV/object_detection/ppdet/data/tests/coco.yml
+++ b/PaddleCV/object_detection/ppdet/data/tests/coco.yml
+DATA:
+    TRAIN:
+        ANNO_FILE: data/coco.test/train2017.roidb
+        IMAGE_DIR: data/coco.test/train2017
+        SAMPLES: 10
+        TYPE: RoiDbSource
+    VAL: 
+        ANNO_FILE: data/coco.test/val2017.roidb
+        IMAGE_DIR: data/coco.test/val2017
+        SAMPLES: 10
+        TYPE: RoiDbSource
+TRANSFORM:
+    TRAIN:
+        OPS:
+            - OP: DecodeImage
+              TO_RGB: False
+            - OP: RandomFlipImage
+              PROB: 0.5
+            - OP: NormalizeImage
+              MEAN: [102.9801, 115.9465, 122.7717]
+              IS_SCALE: False
+              IS_CHANNEL_FIRST: False
+            - OP: ResizeImage
+              TARGET_SIZE: 800
+              MAX_SIZE: 1333
+            - OP: Rgb2Bgr
+              TO_BGR: False
+            - OP: ArrangeRCNN
+        BATCH_SIZE: 1
+        IS_PADDING: True
+        DROP_LAST: False
+    VAL:
+        OPS:
+            - OP: DecodeImage
+              TO_RGB: True
+            - OP: ResizeImage
+              TARGET_SIZE: 224
+            - OP: ArrangeSSD
+        BATCH_SIZE: 1
+    WORKER_CONF:
+        BUFSIZE: 200
+        WORKER_NUM: 8
+        USE_PROCESS: False
--- a/PaddleCV/object_detection/ppdet/data/tests/data/prepare_data.sh
+++ b/PaddleCV/object_detection/ppdet/data/tests/data/prepare_data.sh
+#!/bin/bash
+#function:
+#   prepare coco data for testing
+root=$(dirname `readlink -f ${BASH_SOURCE}[0]`)
+cwd=`pwd`
+if [[ $cwd != $root ]];then
+    pushd $root 2>&1 1>/dev/null
+fi
+test_coco_python2_url="http://filecenter.matrix.baidu.com/api/v1/file/wanglong03/coco.test.python2.zip/20190603095315/download"
+test_coco_python3_url="http://filecenter.matrix.baidu.com/api/v1/file/wanglong03/coco.test.python3.zip/20190603095447/download"
+if [[ $1 = "python2" ]];then
+    test_coco_data_url=${test_coco_python2_url}
+    coco_zip_file="coco.test.python2.zip"
+else
+    test_coco_data_url=${test_coco_python3_url}
+    coco_zip_file="coco.test.python3.zip"
+fi
+echo "download testing coco from url[${test_coco_data_url}]"
+coco_root_dir=${coco_zip_file/.zip/}
+# clear already exist file or directory
+rm -rf ${coco_root_dir} ${coco_zip_file}
+wget ${test_coco_data_url} -O ${coco_zip_file}
+if [ -e $coco_zip_file ];then
+    echo "succeed to download ${coco_zip_file}, so unzip it"
+    unzip ${coco_zip_file} >/dev/null 2>&1
+fi
+if [ -e ${coco_root_dir} ];then
+    rm -rf coco.test
+    ln -s ${coco_root_dir} coco.test
+    echo "succeed to generate coco data in[${coco_root_dir}] for testing"
+    exit 0
+else
+    echo "failed to generate coco data"
+    exit 1
+fi
--- a/PaddleCV/object_detection/ppdet/data/tests/rcnn_dataset.yml
+++ b/PaddleCV/object_detection/ppdet/data/tests/rcnn_dataset.yml
--- a/PaddleCV/object_detection/ppdet/data/tests/run_all_tests.py
+++ b/PaddleCV/object_detection/ppdet/data/tests/run_all_tests.py
--- a/PaddleCV/object_detection/ppdet/data/tests/set_env.py
+++ b/PaddleCV/object_detection/ppdet/data/tests/set_env.py
--- a/PaddleCV/object_detection/ppdet/data/tests/test_loader.py
+++ b/PaddleCV/object_detection/ppdet/data/tests/test_loader.py
--- a/PaddleCV/object_detection/ppdet/data/tests/test_operator.py
+++ b/PaddleCV/object_detection/ppdet/data/tests/test_operator.py
--- a/PaddleCV/object_detection/ppdet/data/tests/test_reader.py
+++ b/PaddleCV/object_detection/ppdet/data/tests/test_reader.py
--- a/PaddleCV/object_detection/ppdet/data/tests/test_roidb_source.py
+++ b/PaddleCV/object_detection/ppdet/data/tests/test_roidb_source.py
--- a/PaddleCV/object_detection/ppdet/data/tests/test_transformer.py
+++ b/PaddleCV/object_detection/ppdet/data/tests/test_transformer.py
--- a/PaddleCV/object_detection/ppdet/data/tools/generate_data_for_training.py
+++ b/PaddleCV/object_detection/ppdet/data/tools/generate_data_for_training.py
--- a/PaddleCV/object_detection/ppdet/data/transform/__init__.py
+++ b/PaddleCV/object_detection/ppdet/data/transform/__init__.py
--- a/PaddleCV/object_detection/ppdet/data/transform/arrange_sample.py
+++ b/PaddleCV/object_detection/ppdet/data/transform/arrange_sample.py
--- a/PaddleCV/object_detection/ppdet/data/transform/op_helper.py
+++ b/PaddleCV/object_detection/ppdet/data/transform/op_helper.py
--- a/PaddleCV/object_detection/ppdet/data/transform/operators.py
+++ b/PaddleCV/object_detection/ppdet/data/transform/operators.py
--- a/PaddleCV/object_detection/ppdet/data/transform/parallel_map.py
+++ b/PaddleCV/object_detection/ppdet/data/transform/parallel_map.py
--- a/PaddleCV/object_detection/ppdet/data/transform/post_map.py
+++ b/PaddleCV/object_detection/ppdet/data/transform/post_map.py
--- a/PaddleCV/object_detection/ppdet/data/transform/shared_queue/__init__.py
+++ b/PaddleCV/object_detection/ppdet/data/transform/shared_queue/__init__.py
--- a/PaddleCV/object_detection/ppdet/data/transform/shared_queue/queue.py
+++ b/PaddleCV/object_detection/ppdet/data/transform/shared_queue/queue.py
--- a/PaddleCV/object_detection/ppdet/data/transform/shared_queue/sharedmemory.py
+++ b/PaddleCV/object_detection/ppdet/data/transform/shared_queue/sharedmemory.py
--- a/PaddleCV/object_detection/ppdet/data/transform/transformer.py
+++ b/PaddleCV/object_detection/ppdet/data/transform/transformer.py
--- a/PaddleCV/object_detection/ppdet/modeling/__init__.py
+++ b/PaddleCV/object_detection/ppdet/modeling/__init__.py
--- a/PaddleCV/object_detection/ppdet/modeling/anchor_heads/__init__.py
+++ b/PaddleCV/object_detection/ppdet/modeling/anchor_heads/__init__.py
--- a/PaddleCV/object_detection/ppdet/modeling/anchor_heads/retina_head.py
+++ b/PaddleCV/object_detection/ppdet/modeling/anchor_heads/retina_head.py
--- a/PaddleCV/object_detection/ppdet/modeling/anchor_heads/rpn_head.py
+++ b/PaddleCV/object_detection/ppdet/modeling/anchor_heads/rpn_head.py
--- a/PaddleCV/object_detection/ppdet/modeling/anchor_heads/yolo_head.py
+++ b/PaddleCV/object_detection/ppdet/modeling/anchor_heads/yolo_head.py
--- a/PaddleCV/object_detection/ppdet/modeling/architectures/__init__.py
+++ b/PaddleCV/object_detection/ppdet/modeling/architectures/__init__.py
--- a/PaddleCV/object_detection/ppdet/modeling/architectures/cascade_rcnn.py
+++ b/PaddleCV/object_detection/ppdet/modeling/architectures/cascade_rcnn.py
--- a/PaddleCV/object_detection/ppdet/modeling/architectures/faster_rcnn.py
+++ b/PaddleCV/object_detection/ppdet/modeling/architectures/faster_rcnn.py
--- a/PaddleCV/object_detection/ppdet/modeling/architectures/mask_rcnn.py
+++ b/PaddleCV/object_detection/ppdet/modeling/architectures/mask_rcnn.py
--- a/PaddleCV/object_detection/ppdet/modeling/architectures/retinanet.py
+++ b/PaddleCV/object_detection/ppdet/modeling/architectures/retinanet.py
--- a/PaddleCV/object_detection/ppdet/modeling/architectures/ssd.py
+++ b/PaddleCV/object_detection/ppdet/modeling/architectures/ssd.py
--- a/PaddleCV/object_detection/ppdet/modeling/architectures/yolov3.py
+++ b/PaddleCV/object_detection/ppdet/modeling/architectures/yolov3.py
--- a/PaddleCV/object_detection/ppdet/modeling/backbones/__init__.py
+++ b/PaddleCV/object_detection/ppdet/modeling/backbones/__init__.py
--- a/PaddleCV/object_detection/ppdet/modeling/backbones/darknet.py
+++ b/PaddleCV/object_detection/ppdet/modeling/backbones/darknet.py
--- a/PaddleCV/object_detection/ppdet/modeling/backbones/fpn.py
+++ b/PaddleCV/object_detection/ppdet/modeling/backbones/fpn.py
--- a/PaddleCV/object_detection/ppdet/modeling/backbones/mobilenet.py
+++ b/PaddleCV/object_detection/ppdet/modeling/backbones/mobilenet.py
--- a/PaddleCV/object_detection/ppdet/modeling/backbones/name_adapter.py
+++ b/PaddleCV/object_detection/ppdet/modeling/backbones/name_adapter.py
--- a/PaddleCV/object_detection/ppdet/modeling/backbones/resnet.py
+++ b/PaddleCV/object_detection/ppdet/modeling/backbones/resnet.py
--- a/PaddleCV/object_detection/ppdet/modeling/backbones/resnext.py
+++ b/PaddleCV/object_detection/ppdet/modeling/backbones/resnext.py
--- a/PaddleCV/object_detection/ppdet/modeling/backbones/senet.py
+++ b/PaddleCV/object_detection/ppdet/modeling/backbones/senet.py
--- a/PaddleCV/object_detection/ppdet/modeling/model_input.py
+++ b/PaddleCV/object_detection/ppdet/modeling/model_input.py
--- a/PaddleCV/object_detection/ppdet/modeling/ops.py
+++ b/PaddleCV/object_detection/ppdet/modeling/ops.py
--- a/PaddleCV/object_detection/ppdet/modeling/roi_extractors/__init__.py
+++ b/PaddleCV/object_detection/ppdet/modeling/roi_extractors/__init__.py
--- a/PaddleCV/object_detection/ppdet/modeling/roi_extractors/roi_extractor.py
+++ b/PaddleCV/object_detection/ppdet/modeling/roi_extractors/roi_extractor.py
--- a/PaddleCV/object_detection/ppdet/modeling/roi_heads/__init__.py
+++ b/PaddleCV/object_detection/ppdet/modeling/roi_heads/__init__.py
--- a/PaddleCV/object_detection/ppdet/modeling/roi_heads/bbox_head.py
+++ b/PaddleCV/object_detection/ppdet/modeling/roi_heads/bbox_head.py
--- a/PaddleCV/object_detection/ppdet/modeling/roi_heads/cascade_head.py
+++ b/PaddleCV/object_detection/ppdet/modeling/roi_heads/cascade_head.py
--- a/PaddleCV/object_detection/ppdet/modeling/roi_heads/mask_head.py
+++ b/PaddleCV/object_detection/ppdet/modeling/roi_heads/mask_head.py
--- a/PaddleCV/object_detection/ppdet/modeling/target_assigners.py
+++ b/PaddleCV/object_detection/ppdet/modeling/target_assigners.py
--- a/PaddleCV/object_detection/ppdet/modeling/tests/__init__.py
+++ b/PaddleCV/object_detection/ppdet/modeling/tests/__init__.py
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/PaddleCV/object_detection/ppdet/modeling/tests/decorator_helper.py
+++ b/PaddleCV/object_detection/ppdet/modeling/tests/decorator_helper.py
--- a/PaddleCV/object_detection/ppdet/modeling/tests/test_detector_cascade_rcnn.py
+++ b/PaddleCV/object_detection/ppdet/modeling/tests/test_detector_cascade_rcnn.py
--- a/PaddleCV/object_detection/ppdet/modeling/tests/test_detector_faster_rcnn.py
+++ b/PaddleCV/object_detection/ppdet/modeling/tests/test_detector_faster_rcnn.py
--- a/PaddleCV/object_detection/ppdet/modeling/tests/test_detector_mask_rcnn.py
+++ b/PaddleCV/object_detection/ppdet/modeling/tests/test_detector_mask_rcnn.py
--- a/PaddleCV/object_detection/ppdet/modeling/tests/test_detector_retinanet.py
+++ b/PaddleCV/object_detection/ppdet/modeling/tests/test_detector_retinanet.py
--- a/PaddleCV/object_detection/ppdet/modeling/tests/test_detector_ssd.py
+++ b/PaddleCV/object_detection/ppdet/modeling/tests/test_detector_ssd.py
--- a/PaddleCV/object_detection/ppdet/modeling/tests/test_detector_yolov3.py
+++ b/PaddleCV/object_detection/ppdet/modeling/tests/test_detector_yolov3.py
--- a/PaddleCV/object_detection/ppdet/optimizer.py
+++ b/PaddleCV/object_detection/ppdet/optimizer.py
--- a/PaddleCV/object_detection/ppdet/utils/__init__.py
+++ b/PaddleCV/object_detection/ppdet/utils/__init__.py
--- a/PaddleCV/object_detection/ppdet/utils/checkpoint.py
+++ b/PaddleCV/object_detection/ppdet/utils/checkpoint.py
--- a/PaddleCV/object_detection/ppdet/utils/cli.py
+++ b/PaddleCV/object_detection/ppdet/utils/cli.py
--- a/PaddleCV/object_detection/ppdet/utils/coco_eval.py
+++ b/PaddleCV/object_detection/ppdet/utils/coco_eval.py
--- a/PaddleCV/object_detection/ppdet/utils/colormap.py
+++ b/PaddleCV/object_detection/ppdet/utils/colormap.py
--- a/PaddleCV/object_detection/ppdet/utils/download.py
+++ b/PaddleCV/object_detection/ppdet/utils/download.py
--- a/PaddleCV/object_detection/ppdet/utils/eval_utils.py
+++ b/PaddleCV/object_detection/ppdet/utils/eval_utils.py
--- a/PaddleCV/object_detection/ppdet/utils/stats.py
+++ b/PaddleCV/object_detection/ppdet/utils/stats.py
--- a/PaddleCV/object_detection/ppdet/utils/visualizer.py
+++ b/PaddleCV/object_detection/ppdet/utils/visualizer.py
--- a/PaddleCV/object_detection/requirements.txt
+++ b/PaddleCV/object_detection/requirements.txt
--- a/PaddleCV/object_detection/setup.py
+++ b/PaddleCV/object_detection/setup.py
--- a/PaddleCV/object_detection/tools/configure.py
+++ b/PaddleCV/object_detection/tools/configure.py
--- a/PaddleCV/object_detection/tools/eval.py
+++ b/PaddleCV/object_detection/tools/eval.py
--- a/PaddleCV/object_detection/tools/infer.py
+++ b/PaddleCV/object_detection/tools/infer.py
--- a/PaddleCV/object_detection/tools/train.py
+++ b/PaddleCV/object_detection/tools/train.py