diff --git a/PaddleCV/PaddleDetection/.gitignore b/PaddleCV/PaddleDetection/.gitignore
index a89c71153e9cc197dc4541475f2ccbfd8b1cd129..43369eea55459685633f4a95743b60834dad2d74 100644
--- a/PaddleCV/PaddleDetection/.gitignore
+++ b/PaddleCV/PaddleDetection/.gitignore
@@ -56,3 +56,9 @@ coverage.xml
 /docs/_build/
 
 *.json
+
+
+dataset/coco/annotations
+dataset/coco/train2017
+dataset/coco/val2017
+dataset/voc/VOCdevkit
diff --git a/PaddleCV/PaddleDetection/README.md b/PaddleCV/PaddleDetection/README.md
index 0ad79d226a0e3670230706742f6070157602c592..5fc9f92e0cc448f9bee22e9d7add3ba04572f202 100644
--- a/PaddleCV/PaddleDetection/README.md
+++ b/PaddleCV/PaddleDetection/README.md
@@ -32,97 +32,102 @@ changes.
 - Performance Optimized:
 
   With the help of the underlying PaddlePaddle framework, faster training and
-reduced GPU memory footprint is achieved. Notably, Yolo V3 training is
+reduced GPU memory footprint is achieved. Notably, YOLOv3 training is
 much faster compared to other frameworks. Another example is Mask-RCNN
 (ResNet50), we managed to fit up to 4 images per GPU (Tesla V100 16GB) during
 multi-GPU training.
 
 Supported Architectures:
 
-|                    | ResNet | ResNet-vd <sup>[1](#vd)</sup> | ResNeXt-vd | SENet | MobileNet | DarkNet | VGG |
-|--------------------|:------:|------------------------------:|:----------:|:-----:|:---------:|:-------:|:---:|
-| Faster R-CNN       | ✓      |                             ✓ | x          | ✓     | ✗         | ✗       | ✗   |
-| Faster R-CNN + FPN | ✓      |                             ✓ | ✓          | ✓     | ✗         | ✗       | ✗   |
-| Mask R-CNN         | ✓      |                             ✓ | x          | ✓     | ✗         | ✗       | ✗   |
-| Mask R-CNN + FPN   | ✓      |                             ✓ | ✓          | ✓     | ✗         | ✗       | ✗   |
-| Cascade R-CNN      | ✓      |                             ✗ | ✗          | ✗     | ✗         | ✗       | ✗   |
-| RetinaNet          | ✓      |                             ✗ | ✗          | ✗     | ✗         | ✗       | ✗   |
-| Yolov3             | ✓      |                             ✗ | ✗          | ✗     | ✓         | ✓       | ✗   |
-| SSD                | ✗      |                             ✗ | ✗          | ✗     | ✓         | ✗       | ✓   |
+|                     | ResNet | ResNet-vd <sup>[1](#vd)</sup> | ResNeXt-vd | SENet | MobileNet | DarkNet | VGG  |
+| ------------------- | :----: | ----------------------------: | :--------: | :---: | :-------: | :-----: | :--: |
+| Faster R-CNN        |   ✓    |                             ✓ |     x      |   ✓   |     ✗     |    ✗    |  ✗   |
+| Faster R-CNN + FPN  |   ✓    |                             ✓ |     ✓      |   ✓   |     ✗     |    ✗    |  ✗   |
+| Mask R-CNN          |   ✓    |                             ✓ |     x      |   ✓   |     ✗     |    ✗    |  ✗   |
+| Mask R-CNN + FPN    |   ✓    |                             ✓ |     ✓      |   ✓   |     ✗     |    ✗    |  ✗   |
+| Cascade Faster-RCNN |   ✓    |                             ✓ |     ✓      |   ✗   |     ✗     |    ✗    |  ✗   |
+| Cascade Mask-RCNN   |   ✓    |                             ✗ |     ✗      |   ✓   |     ✗     |    ✗    |  ✗   |
+| RetinaNet           |   ✓    |                             ✗ |     ✗      |   ✗   |     ✗     |    ✗    |  ✗   |
+| YOLOv3              |   ✓    |                             ✗ |     ✗      |   ✗   |     ✓     |    ✓    |  ✗   |
+| SSD                 |   ✗    |                             ✗ |     ✗      |   ✗   |     ✓     |    ✗    |  ✓   |
 
 <a name="vd">[1]</a> [ResNet-vd](https://arxiv.org/pdf/1812.01187) models offer much improved accuracy with negligible performance cost.
 
 Advanced Features:
 
-- [x] **Synchronized Batch Norm**: currently used by Yolo V3.
-- [x] **Group Norm**: pretrained models to be released.
-- [x] **Modulated Deformable Convolution**: pretrained models to be released.
-- [x] **Deformable PSRoI Pooling**: pretrained models to be released.
+- [x] **Synchronized Batch Norm**: currently used by YOLOv3.
+- [x] **Group Norm**
+- [x] **Modulated Deformable Convolution**
+- [x] **Deformable PSRoI Pooling**
 
 **NOTE:** Synchronized batch normalization can only be used on multiple GPU devices, can not be used on CPU devices or single GPU device.
 
+## Get Started
 
-## Model zoo
-
-Pretrained models are available in the PaddlePaddle [PaddleDetection model zoo](docs/MODEL_ZOO.md).
-
+- [Installation guide](docs/INSTALL.md)
+- [Quick start on small dataset](docs/QUICK_STARTED.md)
+- [Guide to traing, evaluate and arguments description](docs/GETTING_STARTED.md)
+- [Guide to preprocess pipeline and custom dataset](docs/DATA.md)
+- [Introduction to the configuration workflow](docs/CONFIG.md)
+- [Examples for detailed configuration explanation](docs/config_example/)
+- [IPython Notebook demo](demo/mask_rcnn_demo.ipynb)
+- [Transfer learning document](docs/TRANSFER_LEARNING.md)
 
-## Installation
+## Model Zoo
 
-Please follow the [installation guide](docs/INSTALL.md).
+- Pretrained models are available in the [PaddleDetection model zoo](docs/MODEL_ZOO.md).
+- [Face detection models](configs/face_detection/README.md)
+- [Pretrained models for pedestrian  and vehicle detection](contrib/README.md)
 
+## Model compression
 
-## Get Started
+- [ Quantification aware training example](slim/quantization)
+- [ Pruning compression example](slim/prune)
 
-For inference, simply run the following command and the visualized result will
-be saved in `output`.
+## Depoly
 
-```bash
-export PYTHONPATH=`pwd`:$PYTHONPATH
-python tools/infer.py -c configs/mask_rcnn_r50_1x.yml \
-    -o weights=https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_1x.tar \
-    --infer_img=demo/000000570688.jpg
-```
+- [Export model for inference depolyment](docs/EXPORT_MODEL.md)
+- [C++ inference depolyment](inference/README.md)
 
-For detailed training and evaluation workflow, please refer to [GETTING_STARTED.md](docs/GETTING_STARTED.md).
+## Benchmark
 
-For detailed configuration and parameter description, please refer to [Complete config files](docs/config_example/)
+- [Inference benchmark](docs/BENCHMARK_INFER_cn.md)
 
-We also recommend users to take a look at the [IPython Notebook demo](demo/mask_rcnn_demo.ipynb)
 
-Further information can be found in these documentations:
+## Updates
 
-- [Introduction to the configuration workflow.](docs/CONFIG.md)
-- [Guide to custom dataset and preprocess pipeline.](docs/DATA.md)
+#### 10/2019
 
+- Face detection models included: BlazeFace, Faceboxes.
+- Enrich COCO models,  box mAP up to 51.9%.
+- Add CACacascade RCNN, one of the best single model of Objects365 2019 challenge Full Track champion.
+- Add pretrained models for pedestrian and vehicle detection.
+- Support mixed-precision training.
+- Add C++ inference depolyment.
+- Add model compression examples.
 
-##  Todo List
+#### 2/9/2019
 
-Please note this is a work in progress, substantial changes may come in the
-near future.
-Some of the planned features include:
+- Add retrained models for GroupNorm.
 
-- [ ] Mixed precision training.
-- [ ] Distributed training.
-- [ ] Inference in 8-bit mode.
-- [ ] User defined operations.
-- [ ] Larger model zoo.
+- Add Cascade-Mask-RCNN+FPN.
 
+#### 5/8/2019
 
-## Updates
+- Add a series of models ralated modulated Deformable Convolution.
 
 #### 7/29/2019
 
 - Update Chinese docs for PaddleDetection
 - Fix bug in R-CNN models when train and test at the same time
 - Add ResNext101-vd + Mask R-CNN + FPN models
-- Add Yolo v3 on VOC models
+- Add YOLOv3 on VOC models
 
 #### 7/3/2019
 
 - Initial release of PaddleDetection and detection model zoo
 - Models included: Faster R-CNN, Mask R-CNN, Faster R-CNN+FPN, Mask
-  R-CNN+FPN, Cascade-Faster-RCNN+FPN, RetinaNet, Yolo v3, and SSD.
+  R-CNN+FPN, Cascade-Faster-RCNN+FPN, RetinaNet, YOLOv3, and SSD.
 
 
 ## Contributing
diff --git a/PaddleCV/PaddleDetection/README_cn.md b/PaddleCV/PaddleDetection/README_cn.md
index 4864557f4fe5fd6e64cb7a18f27c0a054060cbce..19ac0a693c81dd930bd97f352cb5380f4a709ecb 100644
--- a/PaddleCV/PaddleDetection/README_cn.md
+++ b/PaddleCV/PaddleDetection/README_cn.md
@@ -2,7 +2,7 @@
 
 # PaddleDetection
 
-PaddleDetection的目的是为工业界和学术界提供大量易使用的目标检测模型。PaddleDetection不仅性能完善，易于部署，同时能够灵活的满足算法研发需求。
+PaddleDetection的目的是为工业界和学术界提供丰富、易用的目标检测模型。不仅性能优越、易于部署，而且能够灵活的满足算法研究的需求。
 
 **目前检测库下模型均要求使用PaddlePaddle 1.6及以上版本或适当的develop版本。**
 
@@ -17,15 +17,15 @@ PaddleDetection的目的是为工业界和学术界提供大量易使用的目
 
 - 易部署:
 
-  PaddleDetection的模型中使用的主要算子均通过C++和CUDA实现，配合PaddlePaddle的高性能预测引擎，使得在服务器环境下易于部署。
+  PaddleDetection的模型中使用的核心算子均通过C++或CUDA实现，同时基于PaddlePaddle的高性能推理引擎可以方便地部署在多种硬件平台上。
 
 - 高灵活度：
 
-  PaddleDetection各个组件均为功能单元。例如，模型结构，数据预处理流程，用户能够通过修改配置文件轻松实现可定制化。
+  PaddleDetection通过模块化设计来解耦各个组件，基于配置文件可以轻松地搭建各种检测模型。
 
 - 高性能：
 
-  在PaddlePaddle底层框架的帮助下，实现了更快的模型训练及更少的显存占用量。值得注意的是，Yolo v3的训练速度远快于其他框架。另外，Mask-RCNN(ResNet50)可以在Tesla V100 16GB环境下以每个GPU4张图片输入实现多卡训练。
+  基于PaddlePaddle框架的高性能内核，在模型训练速度、显存占用上有一定的优势。例如，YOLOv3的训练速度快于其他框架，在Tesla V100 16GB环境下，Mask-RCNN(ResNet50)可以单卡Batch Size可以达到4 (甚至到5)。
 
 支持的模型结构：
 
@@ -35,75 +35,89 @@ PaddleDetection的目的是为工业界和学术界提供大量易使用的目
 | Faster R-CNN + FPN | ✓      |                             ✓ | ✓          | ✓     | ✗         | ✗       | ✗   |
 | Mask R-CNN         | ✓      |                             ✓ | x          | ✓     | ✗         | ✗       | ✗   |
 | Mask R-CNN + FPN   | ✓      |                             ✓ | ✓          | ✓     | ✗         | ✗       | ✗   |
-| Cascade R-CNN      | ✓      |                             ✗ | ✗          | ✗     | ✗         | ✗       | ✗   |
-| RetinaNet          | ✓      |                             ✗ | ✗          | ✗     | ✗         | ✗       | ✗   |
-| Yolov3             | ✓      |                             ✗ | ✗          | ✗     | ✓         | ✓       | ✗   |
+| Cascade Faster-CNN | ✓      |                             ✓ | ✓          | ✗     | ✗         | ✗       | ✗  |
+| Cascade Mask-CNN   | ✓      |                             ✗ | ✗          | ✓     | ✗         | ✗       | ✗   |
+| RetinaNet          | ✓      |                             ✗ | ✓          | ✗     | ✗         | ✗       | ✗   |
+| YOLOv3             | ✓      |                             ✗ | ✗          | ✗     | ✓         | ✓       | ✗   |
 | SSD                | ✗      |                             ✗ | ✗          | ✗     | ✓         | ✗       | ✓   |
 
 <a name="vd">[1]</a> [ResNet-vd](https://arxiv.org/pdf/1812.01187) 模型提供了较大的精度提高和较少的性能损失。
 
 扩展特性：
 
-- [x] **Synchronized Batch Norm**: 目前在Yolo v3中使用。
-- [x] **Group Norm**: 预训练模型待发布。
-- [x] **Modulated Deformable Convolution**: 预训练模型待发布。
-- [x] **Deformable PSRoI Pooling**: 预训练模型待发布。
+- [x] **Synchronized Batch Norm**: 目前在YOLOv3中使用。
+- [x] **Group Norm**
+- [x] **Modulated Deformable Convolution**
+- [x] **Deformable PSRoI Pooling**
 
 **注意:** Synchronized batch normalization 只能在多GPU环境下使用，不能在CPU环境或者单GPU环境下使用。
 
-## 模型库
-
-基于PaddlePaddle训练的目标检测模型可参考[PaddleDetection模型库](docs/MODEL_ZOO_cn.md).
 
+## 使用教程
 
-## 安装
+- [安装说明](docs/INSTALL_cn.md)
+- [快速开始](docs/QUICK_STARTED_cn.md)
+- [训练、评估及参数说明](docs/GETTING_STARTED_cn.md)
+- [数据预处理及自定义数据集](docs/DATA_cn.md)
+- [配置模块设计和介绍](docs/CONFIG_cn.md)
+- [详细的配置信息和参数说明示例](docs/config_example/)
+- [IPython Notebook demo](demo/mask_rcnn_demo.ipynb)
+- [迁移学习教程](docs/TRANSFER_LEARNING_cn.md)
 
-请参考[安装说明文档](docs/INSTALL_cn.md).
+## 模型库
 
+- [模型库](docs/MODEL_ZOO_cn.md)
+- [人脸检测模型](configs/face_detection/README.md)
+- [行人检测和车辆检测预训练模型](contrib/README_cn.md)
 
-## 开始
 
-## 快速入门
+## 模型压缩
+- [量化训练压缩示例](slim/quantization)
+- [剪枝压缩示例](slim/prune)
 
-PaddleDetection提供了快速开始的demo利于用户能够快速上手，示例请参考[QUICK_STARTED_cn.md](docs/QUICK_STARTED_cn.md)
+## 推理部署
 
-更多训练及评估流程，请参考[GETTING_STARTED_cn.md](docs/GETTING_STARTED_cn.md).
+- [模型导出教程](docs/EXPORT_MODEL.md)
+- [C++推理部署](inference/README.md)
 
-详细的配置信息和参数说明，请参考[示例配置文件](docs/config_example/).
+## Benchmark
 
-同时推荐用户参考[IPython Notebook demo](demo/mask_rcnn_demo.ipynb)
+- [推理Benchmark](docs/BENCHMARK_INFER_cn.md)
 
-其他更多信息可参考以下文档内容：
 
-- [配置流程介绍](docs/CONFIG_cn.md)
-- [自定义数据集和预处理流程介绍](docs/DATA_cn.md)
 
+## 版本更新
 
-## 未来规划
+### 10/2019
 
-目前PaddleDetection处在持续更新的状态，接下来将会推出一系列的更新，包括如下特性：
+- 增加人脸检测模型BlazeFace、Faceboxes。
+- 丰富基于COCO的模型，精度高达51.9%。
+- 增加Objects365 2019 Challenge上夺冠的最佳单模型之一CACascade-RCNN。
+- 增加行人检测和车辆检测预训练模型。
+- 支持FP16训练。
+- 增加跨平台的C++推理部署方案。
+- 增加模型压缩示例。
 
-- [ ] 混合精度训练
-- [ ] 分布式训练
-- [ ] Int8模式预测
-- [ ] 用户自定义算子
-- [ ] 进一步丰富模型库
 
+### 2/9/2019
+- 增加GroupNorm模型。
+- 增加CascadeRCNN+Mask模型。
 
-## 版本更新
+#### 5/8/2019
+- 增加Modulated Deformable Convolution系列模型。
 
 #### 7/22/2019
 
 - 增加检测库中文文档
 - 修复R-CNN系列模型训练同时进行评估的问题
 - 新增ResNext101-vd + Mask R-CNN + FPN模型
-- 新增基于VOC数据集的Yolo v3模型
+- 新增基于VOC数据集的YOLOv3模型
 
 #### 7/3/2019
 
 - 首次发布PaddleDetection检测库和检测模型库
 - 模型包括：Faster R-CNN, Mask R-CNN, Faster R-CNN+FPN, Mask
-  R-CNN+FPN, Cascade-Faster-RCNN+FPN, RetinaNet, Yolo v3, 和SSD.
+  R-CNN+FPN, Cascade-Faster-RCNN+FPN, RetinaNet, YOLOv3, 和SSD.
 
 ## 如何贡献代码
 
diff --git a/PaddleCV/PaddleDetection/configs/cascade_rcnn_r50_fpn_1x_ms_test.yml b/PaddleCV/PaddleDetection/configs/cascade_rcnn_r50_fpn_1x_ms_test.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c345aeedbe6f65fbe19aaf87ddbadf5ed567c38d
--- /dev/null
+++ b/PaddleCV/PaddleDetection/configs/cascade_rcnn_r50_fpn_1x_ms_test.yml
@@ -0,0 +1,177 @@
+architecture: CascadeRCNN
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+max_iters: 90000
+snapshot_iter: 10000
+use_gpu: true
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
+weights: output/cascade_rcnn_r50_fpn_1x/model_final
+metric: COCO
+num_classes: 81
+
+CascadeRCNN:
+  backbone: ResNet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: CascadeBBoxHead
+  bbox_assigner: CascadeBBoxAssigner
+
+ResNet:
+  norm_type: affine_channel
+  depth: 50
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  variant: b
+
+FPN:
+  min_level: 2
+  max_level: 6
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+
+FPNRPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  min_level: 2
+  max_level: 6
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_positive_overlap: 0.7
+    rpn_negative_overlap: 0.3
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  min_level: 2
+  max_level: 5
+  box_resolution: 7
+  sampling_ratio: 2
+
+CascadeBBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [10, 20, 30]
+  bg_thresh_lo: [0.0, 0.0, 0.0]
+  bg_thresh_hi: [0.5, 0.6, 0.7]
+  fg_thresh: [0.5, 0.6, 0.7]
+  fg_fraction: 0.25
+
+CascadeBBoxHead:
+  head: CascadeTwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+
+CascadeTwoFCHead:
+  mlp_dim: 1024
+
+MultiScaleTEST:
+  score_thresh: 0.05
+  nms_thresh: 0.5
+  detections_per_im: 100
+  enable_voting: true
+  vote_thresh: 0.9
+
+LearningRate:
+  base_lr: 0.02
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [60000, 80000]
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+
+FasterRCNNTrainFeed:
+  batch_size: 2
+  dataset:
+    dataset_dir: dataset/coco
+    annotation: annotations/instances_train2017.json
+    image_dir: train2017
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  drop_last: false
+  num_workers: 2
+
+FasterRCNNEvalFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: dataset/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean:
+    - 0.485
+    - 0.456
+    - 0.406
+    std:
+    - 0.229
+    - 0.224
+    - 0.225
+  - !MultiscaleTestResize
+    origin_target_size: 800
+    origin_max_size: 1333
+    target_size:
+    - 400
+    - 500
+    - 600
+    - 700
+    - 900
+    - 1000
+    - 1100
+    - 1200
+    max_size: 2000
+    use_flip: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !PadMSTest
+    pad_to_stride: 32
+  num_scale: 18
+  num_workers: 2
+
+FasterRCNNTestFeed:
+  batch_size: 1
+  dataset:
+    annotation: dataset/coco/annotations/instances_val2017.json
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  drop_last: false
+  num_workers: 2
diff --git a/PaddleCV/PaddleDetection/configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x.yml b/PaddleCV/PaddleDetection/configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x.yml
new file mode 100755
index 0000000000000000000000000000000000000000..afe456967e789c822c36db4e384d9b49f049b4f5
--- /dev/null
+++ b/PaddleCV/PaddleDetection/configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x.yml
@@ -0,0 +1,257 @@
+architecture: CascadeMaskRCNN
+train_feed: MaskRCNNTrainFeed
+eval_feed: MaskRCNNEvalFeed
+test_feed: MaskRCNNTestFeed
+max_iters: 300000
+snapshot_iter: 10
+use_gpu: true
+log_iter: 20
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/SENet154_vd_caffe_pretrained.tar 
+weights: output/cascade_mask_rcnn_dcn_se154_vd_fpn_gn_s1x/model_final/
+metric: COCO
+num_classes: 81
+
+CascadeMaskRCNN:
+  backbone: SENet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: CascadeBBoxHead
+  bbox_assigner: CascadeBBoxAssigner
+  mask_assigner: MaskAssigner
+  mask_head: MaskHead
+
+SENet:
+  depth: 152
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  group_width: 4
+  groups: 64
+  norm_type: bn
+  freeze_norm: True
+  variant: d
+  dcn_v2_stages: [3, 4, 5]
+  std_senet: True
+
+FPN:
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+  freeze_norm: False
+  norm_type: gn
+
+FPNRPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  max_level: 5
+  min_level: 2
+  box_resolution: 7
+  sampling_ratio: 2
+  mask_resolution: 14
+
+MaskHead:
+  dilation: 1
+  conv_dim: 256
+  num_convs: 4
+  resolution: 28
+  norm_type: gn
+
+CascadeBBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [10, 20, 30]
+  bg_thresh_hi: [0.5, 0.6, 0.7]
+  bg_thresh_lo: [0.0, 0.0, 0.0]
+  fg_fraction: 0.25
+  fg_thresh: [0.5, 0.6, 0.7]
+
+MaskAssigner:
+  resolution: 28
+
+CascadeBBoxHead:
+  head: CascadeXConvNormHead 
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+
+CascadeXConvNormHead:
+  norm_type: gn
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [240000, 280000]
+  - !LinearWarmup
+    start_factor: 0.01
+    steps: 2000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+
+MaskRCNNTrainFeed:
+  # batch size per device
+  batch_size: 1
+  dataset:
+    dataset_dir: dataset/coco
+    image_dir: train2017
+    annotation: annotations/instances_train2017.json
+  sample_transforms: 
+  - !DecodeImage
+    to_rgb: False
+    with_mixup: False
+  - !RandomFlipImage
+    is_mask_flip: true
+    is_normalized: false
+    prob: 0.5
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: False
+    mean:
+    - 102.9801 
+    - 115.9465
+    - 122.7717
+    std:
+    - 1.0 
+    - 1.0 
+    - 1.0 
+  - !ResizeImage
+    interp: 1
+    target_size:
+    - 416
+    - 448
+    - 480
+    - 512
+    - 544
+    - 576
+    - 608
+    - 640
+    - 672
+    - 704 
+    - 736
+    - 768
+    - 800
+    - 832
+    - 864
+    - 896
+    - 928
+    - 960
+    - 992
+    - 1024
+    - 1056
+    - 1088
+    - 1120
+    - 1152
+    - 1184
+    - 1216
+    - 1248
+    - 1280
+    - 1312
+    - 1344
+    - 1376
+    - 1408
+    max_size: 1600
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  num_workers: 8
+
+MaskRCNNEvalFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: dataset/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  sample_transforms: 
+  - !DecodeImage
+    to_rgb: False
+    with_mixup: False
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: False
+    mean:
+    - 102.9801 
+    - 115.9465
+    - 122.7717
+    std:
+    - 1.0 
+    - 1.0 
+    - 1.0 
+  - !ResizeImage
+    interp: 1
+    target_size:
+    - 800
+    max_size: 1333
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  num_workers: 2
+
+MaskRCNNTestFeed:
+  batch_size: 1
+  dataset:
+    annotation: dataset/coco/annotations/instances_val2017.json
+  sample_transforms: 
+  - !DecodeImage
+    to_rgb: False
+    with_mixup: False
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: False
+    mean:
+    - 102.9801 
+    - 115.9465
+    - 122.7717
+    std:
+    - 1.0 
+    - 1.0 
+    - 1.0 
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  num_workers: 2
diff --git a/PaddleCV/PaddleDetection/configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml b/PaddleCV/PaddleDetection/configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml
new file mode 100644
index 0000000000000000000000000000000000000000..78839f9b065acb50d81785a24a3c56efeb7bde4b
--- /dev/null
+++ b/PaddleCV/PaddleDetection/configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml
@@ -0,0 +1,272 @@
+architecture: CascadeMaskRCNN
+train_feed: MaskRCNNTrainFeed
+eval_feed: MaskRCNNEvalFeed
+test_feed: MaskRCNNTestFeed
+max_iters: 300000
+snapshot_iter: 10000
+use_gpu: true
+log_iter: 20
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/SENet154_vd_caffe_pretrained.tar 
+weights: output/cascade_mask_rcnn_dcn_se154_vd_fpn_gn_s1x/model_final/
+metric: COCO
+num_classes: 81
+
+CascadeMaskRCNN:
+  backbone: SENet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: CascadeBBoxHead
+  bbox_assigner: CascadeBBoxAssigner
+  mask_assigner: MaskAssigner
+  mask_head: MaskHead
+
+SENet:
+  depth: 152
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  group_width: 4
+  groups: 64
+  norm_type: bn
+  freeze_norm: True
+  variant: d
+  dcn_v2_stages: [3, 4, 5]
+  std_senet: True
+
+FPN:
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+  freeze_norm: False
+  norm_type: gn
+
+FPNRPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  max_level: 5
+  min_level: 2
+  box_resolution: 7
+  sampling_ratio: 2
+  mask_resolution: 14
+
+MaskHead:
+  dilation: 1
+  conv_dim: 256
+  num_convs: 4
+  resolution: 28
+  norm_type: gn
+
+CascadeBBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [10, 20, 30]
+  bg_thresh_hi: [0.5, 0.6, 0.7]
+  bg_thresh_lo: [0.0, 0.0, 0.0]
+  fg_fraction: 0.25
+  fg_thresh: [0.5, 0.6, 0.7]
+
+MaskAssigner:
+  resolution: 28
+
+CascadeBBoxHead:
+  head: CascadeXConvNormHead 
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+
+CascadeXConvNormHead:
+  norm_type: gn
+
+MultiScaleTEST:
+  score_thresh: 0.05
+  nms_thresh: 0.5
+  detections_per_im: 100
+  enable_voting: true
+  vote_thresh: 0.9
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [240000, 280000]
+  - !LinearWarmup
+    start_factor: 0.01
+    steps: 2000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+
+MaskRCNNTrainFeed:
+  # batch size per device
+  batch_size: 1
+  dataset:
+    dataset_dir: dataset/coco
+    image_dir: train2017
+    annotation: annotations/instances_train2017.json
+  sample_transforms: 
+  - !DecodeImage
+    to_rgb: False
+    with_mixup: False
+  - !RandomFlipImage
+    is_mask_flip: true
+    is_normalized: false
+    prob: 0.5
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: False
+    mean:
+    - 102.9801 
+    - 115.9465
+    - 122.7717
+    std:
+    - 1.0 
+    - 1.0 
+    - 1.0 
+  - !ResizeImage
+    interp: 1
+    target_size:
+    - 416
+    - 448
+    - 480
+    - 512
+    - 544
+    - 576
+    - 608
+    - 640
+    - 672
+    - 704 
+    - 736
+    - 768
+    - 800
+    - 832
+    - 864
+    - 896
+    - 928
+    - 960
+    - 992
+    - 1024
+    - 1056
+    - 1088
+    - 1120
+    - 1152
+    - 1184
+    - 1216
+    - 1248
+    - 1280
+    - 1312
+    - 1344
+    - 1376
+    - 1408
+    max_size: 1600
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  num_workers: 8
+
+MaskRCNNEvalFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: dataset/coco
+    annotation: annotations/instances_val2017_debug_139.json
+    image_dir: val2017
+  sample_transforms: 
+  - !DecodeImage
+    to_rgb: False
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: False
+    mean:
+    - 102.9801 
+    - 115.9465
+    - 122.7717
+    std:
+    - 1.0 
+    - 1.0 
+    - 1.0 
+  - !MultiscaleTestResize
+    origin_target_size: 800
+    origin_max_size: 1333
+    target_size:
+    - 400
+    - 500
+    - 600
+    - 700
+    - 900
+    - 1000
+    - 1100
+    - 1200
+    max_size: 2000
+    use_flip: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !PadMSTest
+    pad_to_stride: 32
+  # num_scale = (len(target_size) + 1) * (1 + use_flip)
+  num_scale: 32
+  num_workers: 2
+
+MaskRCNNTestFeed:
+  batch_size: 1
+  dataset:
+    annotation: dataset/coco/annotations/instances_val2017.json
+  sample_transforms: 
+  - !DecodeImage
+    to_rgb: False
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: False
+    mean:
+    - 102.9801 
+    - 115.9465
+    - 122.7717
+    std:
+    - 1.0 
+    - 1.0 
+    - 1.0 
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  num_workers: 2
diff --git a/PaddleCV/PaddleDetection/configs/face_detection/README.md b/PaddleCV/PaddleDetection/configs/face_detection/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..69bd25f9fb5edc02522896448bef0a66f378fa1b
--- /dev/null
+++ b/PaddleCV/PaddleDetection/configs/face_detection/README.md
@@ -0,0 +1,261 @@
+English | [简体中文](README_cn.md)
+
+# FaceDetection
+The goal of FaceDetection is to provide efficient and high-speed face detection solutions,
+including cutting-edge and classic models.
+
+
+<div align="center">
+  <img src="../../demo/output/12_Group_Group_12_Group_Group_12_935.jpg" />
+</div>
+
+## Data Pipline
+We use the [WIDER FACE dataset](http://shuoyang1213.me/WIDERFACE/) to carry out the training
+and testing of the model, the official website gives detailed data introduction.
+- WIDER Face data source:  
+Loads `wider_face` type dataset with directory structures like this:
+
+  ```
+  dataset/wider_face/
+  ├── wider_face_split
+  │   ├── wider_face_train_bbx_gt.txt
+  │   ├── wider_face_val_bbx_gt.txt
+  ├── WIDER_train
+  │   ├── images
+  │   │   ├── 0--Parade
+  │   │   │   ├── 0_Parade_marchingband_1_100.jpg
+  │   │   │   ├── 0_Parade_marchingband_1_381.jpg
+  │   │   │   │   ...
+  │   │   ├── 10--People_Marching
+  │   │   │   ...
+  ├── WIDER_val
+  │   ├── images
+  │   │   ├── 0--Parade
+  │   │   │   ├── 0_Parade_marchingband_1_1004.jpg
+  │   │   │   ├── 0_Parade_marchingband_1_1045.jpg
+  │   │   │   │   ...
+  │   │   ├── 10--People_Marching
+  │   │   │   ...
+  ```
+
+- Download dataset manually:  
+To download the WIDER FACE dataset, run the following commands:
+```
+cd dataset/wider_face && ./download.sh
+```
+
+- Download dataset automatically:
+If a training session is started but the dataset is not setup properly
+(e.g, not found in dataset/wider_face), PaddleDetection can automatically
+download them from [WIDER FACE dataset](http://shuoyang1213.me/WIDERFACE/),
+the decompressed datasets will be cached in ~/.cache/paddle/dataset/ and can be discovered
+automatically subsequently.
+
+### Data Augmentation
+
+- **Data-anchor-sampling:** Randomly transform the scale of the image to a certain range of scales,
+greatly enhancing the scale change of the face. The specific operation is to obtain $v=\sqrt{width * height}$
+according to the randomly selected face height and width, and judge the value of `v` in which interval of
+ `[16,32,64,128]`. Assuming `v=45` && `32<v<64`, and any value of `[16,32,64]` is selected with a probability
+ of uniform distribution. If `64` is selected, the face's interval is selected in `[64 / 2, min(v * 2, 64 * 2)]`.
+
+- **Other methods:** Including `RandomDistort`,`ExpandImage`,`RandomInterpImage`,`RandomFlipImage` etc.
+Please refer to [DATA.md](../../docs/DATA.md#APIs) for details.
+
+
+##  Benchmark and Model Zoo
+Supported architectures is shown in the below table, please refer to
+[Algorithm Description](#Algorithm-Description) for details of the algorithm.
+
+|                          | Original | Lite <sup>[1](#lite)</sup> | NAS <sup>[2](#nas)</sup> |
+|:------------------------:|:--------:|:--------------------------:|:------------------------:|
+| [BlazeFace](#BlazeFace)  | ✓        |                          ✓ | ✓                        |
+| [FaceBoxes](#FaceBoxes)  | ✓        |                          ✓ | x                        |
+
+<a name="lite">[1]</a> `Lite` edition means reduces the number of network layers and channels.  
+<a name="nas">[2]</a> `NAS` edition means use `Neural Architecture Search` algorithm to
+optimized network structure.
+
+**Todo List:**
+- [ ] HamBox
+- [ ] Pyramidbox
+
+### Model Zoo
+
+#### mAP in WIDER FACE
+
+| Architecture | Type     | Size | Img/gpu | Lr schd | Easy Set  | Medium Set | Hard Set  | Download |
+|:------------:|:--------:|:----:|:-------:|:-------:|:---------:|:----------:|:---------:|:--------:|
+| BlazeFace    | Original | 640  |    8    | 32w     | **0.915** | **0.892**  | **0.797** | [model](https://paddlemodels.bj.bcebos.com/object_detection/blazeface_original.tar) |
+| BlazeFace    | Lite     | 640  |    8    | 32w     | 0.909     | 0.885      | 0.781     | [model](https://paddlemodels.bj.bcebos.com/object_detection/blazeface_lite.tar) |
+| BlazeFace    | NAS      | 640  |    8    | 32w     | 0.837     | 0.807      | 0.658     | [model](https://paddlemodels.bj.bcebos.com/object_detection/blazeface_nas.tar) |
+| FaceBoxes    | Original | 640  |    8    | 32w     | 0.875     | 0.848      | 0.568     | [model](https://paddlemodels.bj.bcebos.com/object_detection/faceboxes_original.tar) |
+| FaceBoxes    | Lite     | 640  |    8    | 32w     | 0.898     | 0.872      | 0.752     | [model](https://paddlemodels.bj.bcebos.com/object_detection/faceboxes_lite.tar) |
+
+**NOTES:**  
+- Get mAP in `Easy/Medium/Hard Set` by multi-scale evaluation in `tools/face_eval.py`.
+For details can refer to [Evaluation](#Evaluate-on-the-WIDER-FACE).
+- BlazeFace-Lite Training and Testing ues [blazeface.yml](../../configs/face_detection/blazeface.yml)
+configs file and set `lite_edition: true`.
+
+#### mAP in FDDB
+
+| Architecture | Type     | Size | DistROC | ContROC |
+|:------------:|:--------:|:----:|:-------:|:-------:|
+| BlazeFace    | Original | 640  | **0.992**   | **0.762**   |
+| BlazeFace    | Lite     | 640  | 0.990   | 0.756   |
+| BlazeFace    | NAS      | 640  | 0.981   | 0.741   |
+| FaceBoxes    | Original | 640  | 0.985   | 0.731   |
+| FaceBoxes    | Lite     | 640  | 0.987   | 0.741   |
+
+**NOTES:**  
+- Get mAP by multi-scale evaluation on the FDDB dataset.
+For details can refer to [Evaluation](#Evaluate-on-the-FDDB).
+
+#### Infer Time and Model Size comparison  
+
+| Architecture | Type     | Size | P4 (ms)   | CPU (ms) | ARM (ms)   | File size (MB) | Flops     |
+|:------------:|:--------:|:----:|:---------:|:--------:|:----------:|:--------------:|:---------:|
+| BlazeFace    | Original | 128  | -         | -        | -          | -              | -         |
+| BlazeFace    | Lite     | 128  | -         | -        | -          | -              | -         |
+| BlazeFace    | NAS      | 128  | -         | -        | -          | -              | -         |
+| FaceBoxes    | Original | 128  | -         | -        | -          | -              | -         |
+| FaceBoxes    | Lite     | 128  | -         | -        | -          | -              | -         |
+| BlazeFace    | Original | 320  | -         | -        | -          | -              | -         |
+| BlazeFace    | Lite     | 320  | -         | -        | -          | -              | -         |
+| BlazeFace    | NAS      | 320  | -         | -        | -          | -              | -         |
+| FaceBoxes    | Original | 320  | -         | -        | -          | -              | -         |
+| FaceBoxes    | Lite     | 320  | -         | -        | -          | -              | -         |
+| BlazeFace    | Original | 640  | -         | -        | -          | -              | -         |
+| BlazeFace    | Lite     | 640  | -         | -        | -          | -              | -         |
+| BlazeFace    | NAS      | 640  | -         | -        | -          | -              | -         |
+| FaceBoxes    | Original | 640  | -         | -        | -          | -              | -         |
+| FaceBoxes    | Lite     | 640  | -         | -        | -          | -              | -         |
+
+
+**NOTES:**  
+- CPU: i5-7360U @ 2.30GHz. Single core and single thread.
+
+
+
+## Get Started
+`Training` and `Inference` please refer to [GETTING_STARTED.md](../../docs/GETTING_STARTED.md)
+- **NOTES:**  
+- `BlazeFace` and `FaceBoxes` is trained in 4 GPU with `batch_size=8` per gpu (total batch size as 32)
+and trained 320000 iters.(If your GPU count is not 4, please refer to the rule of training parameters
+in the table of [calculation rules](../../docs/GETTING_STARTED.md#faq))
+- Currently we do not support evaluation in training.
+
+### Evaluation
+```
+export CUDA_VISIBLE_DEVICES=0
+export PYTHONPATH=$PYTHONPATH:.
+python tools/face_eval.py -c configs/face_detection/blazeface.yml
+```
+- Optional arguments
+- `-d` or `--dataset_dir`: Dataset path, same as dataset_dir of configs. Such as: `-d dataset/wider_face`.
+- `-f` or `--output_eval`: Evaluation file directory, default is `output/pred`.
+- `-e` or `--eval_mode`: Evaluation mode, include `widerface` and `fddb`, default is `widerface`.
+- `--multi_scale`: If you add this action button in the command, it will select `multi_scale` evaluation.
+Default is `False`, it will select `single-scale` evaluation.
+
+After the evaluation is completed, the test result in txt format will be generated in `output/pred`,
+and then mAP will be calculated according to different data sets. If you set `--eval_mode=widerface`,
+it will [Evaluate on the WIDER FACE](#Evaluate-on-the-WIDER-FACE).If you set `--eval_mode=fddb`,
+it will [Evaluate on the FDDB](#Evaluate-on-the-FDDB).
+
+#### Evaluate on the WIDER FACE
+- Download the official evaluation script to evaluate the AP metrics:
+```
+wget http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/eval_script/eval_tools.zip
+unzip eval_tools.zip && rm -f eval_tools.zip
+```
+- Modify the result path and the name of the curve to be drawn in `eval_tools/wider_eval.m`:
+```
+# Modify the folder name where the result is stored.
+pred_dir = './pred';  
+# Modify the name of the curve to be drawn
+legend_name = 'Fluid-BlazeFace';
+```
+- `wider_eval.m` is the main execution program of the evaluation module. The run command is as follows:
+```
+matlab -nodesktop -nosplash -nojvm -r "run wider_eval.m;quit;"
+```
+
+#### Evaluate on the FDDB
+[FDDB dataset](http://vis-www.cs.umass.edu/fddb/) details can refer to FDDB's official website.  
+- Download the official dataset and evaluation script to evaluate the ROC metrics:
+```
+#external link to the Faces in the Wild data set
+wget http://tamaraberg.com/faceDataset/originalPics.tar.gz
+#The annotations are split into ten folds. See README for details.
+wget http://vis-www.cs.umass.edu/fddb/FDDB-folds.tgz
+#information on directory structure and file formats
+wget http://vis-www.cs.umass.edu/fddb/README.txt
+```
+- Install OpenCV: Requires [OpenCV library](http://sourceforge.net/projects/opencvlibrary/)  
+If the utility 'pkg-config' is not available for your operating system,
+edit the Makefile to manually specify the OpenCV flags as following:
+```
+INCS = -I/usr/local/include/opencv
+LIBS = -L/usr/local/lib -lcxcore -lcv -lhighgui -lcvaux -lml
+```
+
+- Compile FDDB evaluation code: execute `make` in evaluation folder.
+
+- Generate full image path list and groundtruth in FDDB-folds. The run command is as follows:
+```
+cat `ls|grep -v"ellipse"` > filePath.txt` and `cat *ellipse* > fddb_annotFile.txt`
+```
+- Evaluation
+Finally evaluation command is:
+```
+./evaluate -a ./FDDB/FDDB-folds/fddb_annotFile.txt \
+           -d DETECTION_RESULT.txt -f 0 \
+           -i ./FDDB -l ./FDDB/FDDB-folds/filePath.txt \
+           -r ./OUTPUT_DIR -z .jpg
+```
+**NOTES:** The interpretation of the argument can be performed by `./evaluate --help`.
+
+## Algorithm Description
+
+### BlazeFace
+**Introduction:**  
+[BlazeFace](https://arxiv.org/abs/1907.05047) is Google Research published face detection model.
+It's lightweight but good performance, and tailored for mobile GPU inference. It runs at a speed
+of 200-1000+ FPS on flagship devices.
+
+**Particularity:**  
+- Anchor scheme stops at 8×8(input 128x128), 6 anchors per pixel at that resolution.
+- 5 single, and 6 double BlazeBlocks: 5×5 depthwise convs, same accuracy with fewer layers.
+- Replace the non-maximum suppression algorithm with a blending strategy that estimates the
+regression parameters of a bounding box as a weighted mean between the overlapping predictions.
+
+**Edition information:**
+- Original: Reference original paper reproduction.
+- Lite: Replace 5x5 conv with 3x3 conv, fewer network layers and conv channels.
+- NAS: use `Neural Architecture Search` algorithm to optimized network structure,
+less network layer and conv channel number than `Lite`.
+
+### FaceBoxes
+**Introduction:**  
+[FaceBoxes](https://arxiv.org/abs/1708.05234) which named A CPU Real-time Face Detector
+with High Accuracy is face detector proposed by Shifeng Zhang, with high performance on
+both speed and accuracy. This paper is published by IJCB(2017).
+
+**Particularity:**
+- Anchor scheme stops at 20x20, 10x10, 5x5, which network input size is 640x640,
+including 3, 1, 1 anchors per pixel at each resolution. The corresponding densities
+are 1, 2, 4(20x20), 4(10x10) and 4(5x5).
+- 2 convs with CReLU, 2 poolings, 3 inceptions and 2 convs with ReLU.
+- Use density prior box to improve detection accuracy.
+
+**Edition information:**
+- Original: Reference original paper reproduction.
+- Lite: 2 convs with CReLU, 1 pooling, 2 convs with ReLU, 3 inceptions and 2 convs with ReLU.
+Anchor scheme stops at 80x80 and 40x40, including 3, 1 anchors per pixel at each resolution.
+The corresponding densities are 1, 2, 4(80x80) and 4(40x40), using less conv channel number than lite.
+
+
+## Contributing
+Contributions are highly welcomed and we would really appreciate your feedback!!
diff --git a/PaddleCV/PaddleDetection/configs/face_detection/blazeface.yml b/PaddleCV/PaddleDetection/configs/face_detection/blazeface.yml
index 8b27eae70ec3895da635a6a35a9eed94531aeba0..692f14a7cc8091bc8df1f5edbfbca2a9c59b0073 100644
--- a/PaddleCV/PaddleDetection/configs/face_detection/blazeface.yml
+++ b/PaddleCV/PaddleDetection/configs/face_detection/blazeface.yml
@@ -89,7 +89,7 @@ SSDEvalFeed:
   fields: ['image', 'im_id', 'gt_box']
   dataset:
     dataset_dir: dataset/wider_face
-    annotation: annotFile.txt #wider_face_split/wider_face_val_bbx_gt.txt   
+    annotation: wider_face_split/wider_face_val_bbx_gt.txt   
     image_dir: WIDER_val/images
   drop_last: false
   image_shape: [3, 640, 640]
diff --git a/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_fpn_1x.yml
index 8ddc6f6c98630bb92f2af9d366bf297a801a0ef3..c719106104f1424008db3a079e2e1ac7a3d742b9 100644
--- a/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_fpn_1x.yml
+++ b/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_fpn_1x.yml
@@ -21,7 +21,7 @@ FasterRCNN:
   bbox_assigner: BBoxAssigner
 
 ResNet:
-  norm_type: affine_channel
+  norm_type: bn
   norm_decay: 0.
   depth: 50
   feature_maps: [2, 3, 4, 5]
diff --git a/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_vd_1x.yml b/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_vd_1x.yml
index bcca99233c831fcb6260b2f0e0eb759f7c6be5c2..f39a144a431f5998a8178c41c10ade796d270cb6 100644
--- a/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_vd_1x.yml
+++ b/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_vd_1x.yml
@@ -82,8 +82,8 @@ LearningRate:
     gamma: 0.1
     milestones: [120000, 160000]
   - !LinearWarmup
-    start_factor: 0.3333333333333333
-    steps: 500
+    start_factor: 0.1
+    steps: 1000
 
 OptimizerBuilder:
   optimizer:
diff --git a/PaddleCV/PaddleDetection/configs/mask_rcnn_r50_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/mask_rcnn_r50_fpn_1x.yml
index 3a76395ee586f9e36d78494010162784be1a7534..a889ea283f445974e11d38479162b388d69ec3ad 100644
--- a/PaddleCV/PaddleDetection/configs/mask_rcnn_r50_fpn_1x.yml
+++ b/PaddleCV/PaddleDetection/configs/mask_rcnn_r50_fpn_1x.yml
@@ -24,7 +24,7 @@ ResNet:
   depth: 50
   feature_maps: [2, 3, 4, 5]
   freeze_at: 2
-  norm_type: affine_channel
+  norm_type: bn
 
 FPN:
   max_level: 6
diff --git a/PaddleCV/PaddleDetection/configs/obj365/cascade_rcnn_dcnv2_se154_vd_fpn_gn_cas.yml b/PaddleCV/PaddleDetection/configs/obj365/cascade_rcnn_dcnv2_se154_vd_fpn_gn_cas.yml
new file mode 100644
index 0000000000000000000000000000000000000000..be80042520367c1bb2d63ac2998651d789c9e298
--- /dev/null
+++ b/PaddleCV/PaddleDetection/configs/obj365/cascade_rcnn_dcnv2_se154_vd_fpn_gn_cas.yml
@@ -0,0 +1,247 @@
+architecture: CascadeRCNN
+train_feed: FasterRCNNTrainFeed
+eval_feed: FasterRCNNEvalFeed
+test_feed: FasterRCNNTestFeed
+max_iters: 500000
+snapshot_iter: 10000
+use_gpu: true
+log_iter: 20
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_coco_pretrained.tar
+weights: output/cascade_rcnn_dcnv2_se154_vd_fpn_gn_cas/model_final
+metric: COCO
+num_classes: 81
+
+CascadeRCNN:
+  backbone: ResNet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: CascadeBBoxHead
+  bbox_assigner: CascadeBBoxAssigner
+
+SENet:
+  depth: 152
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  group_width: 4
+  groups: 64
+  norm_type: bn
+  freeze_norm: True
+  variant: d
+  dcn_v2_stages: [3, 4, 5]
+  std_senet: True
+
+FPN:
+  min_level: 2
+  max_level: 6
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+  freeze_norm: False
+  norm_type: gn
+
+FPNRPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  min_level: 2
+  max_level: 6
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_positive_overlap: 0.7
+    rpn_negative_overlap: 0.3
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  min_level: 2
+  max_level: 5
+  box_resolution: 7
+  sampling_ratio: 2
+
+CascadeBBoxAssigner:
+  batch_size_per_im: 1024
+  bbox_reg_weights: [10, 20, 30]
+  bg_thresh_lo: [0.0, 0.0, 0.0]
+  bg_thresh_hi: [0.5, 0.6, 0.7]
+  fg_thresh: [0.5, 0.6, 0.7]
+  fg_fraction: 0.25
+
+CascadeBBoxHead:
+  head: CascadeXConvNormHead 
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+
+CascadeXConvNormHead:
+  norm_type: gn
+
+CascadeTwoFCHead:
+  mlp_dim: 1024
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [400000, 460000]
+  - !LinearWarmup
+    start_factor: 0.01
+    steps: 2000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+
+FasterRCNNTrainFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: dataset/objects365
+    annotation: annotations/train.json
+    image_dir: train
+  sample_transforms: 
+  - !DecodeImage
+    to_rgb: False
+    with_mixup: False
+  - !RandomFlipImage
+    is_mask_flip: true
+    is_normalized: false
+    prob: 0.5
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: False
+    mean:
+    - 102.9801 
+    - 115.9465
+    - 122.7717
+    std:
+    - 1.0 
+    - 1.0 
+    - 1.0 
+  - !ResizeImage
+    interp: 1
+    target_size:
+    - 416
+    - 448
+    - 480
+    - 512
+    - 544
+    - 576
+    - 608
+    - 640
+    - 672
+    - 704 
+    - 736
+    - 768
+    - 800
+    - 832
+    - 864
+    - 896
+    - 928
+    - 960
+    - 992
+    - 1024
+    - 1056
+    - 1088
+    - 1120
+    - 1152
+    - 1184
+    - 1216
+    - 1248
+    - 1280
+    - 1312
+    - 1344
+    - 1376
+    - 1408
+    max_size: 1600
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  num_workers: 4
+  class_aware_sampling: true
+
+FasterRCNNEvalFeed:
+  batch_size: 1
+  dataset:
+    dataset_dir: dataset/objects365
+    annotation: annotations/val.json
+    image_dir: val
+  sample_transforms: 
+  - !DecodeImage
+    to_rgb: False
+    with_mixup: False
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: False
+    mean:
+    - 102.9801 
+    - 115.9465
+    - 122.7717
+    std:
+    - 1.0 
+    - 1.0 
+    - 1.0 
+  - !ResizeImage
+    target_size: 800
+    max_size: 1333
+    interp: 1
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+
+FasterRCNNTestFeed:
+  batch_size: 1
+  dataset:
+    annotation: dataset/obj365/annotations/val.json
+  sample_transforms: 
+  - !DecodeImage
+    to_rgb: False
+    with_mixup: False
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: False
+    mean:
+    - 102.9801 
+    - 115.9465
+    - 122.7717
+    std:
+    - 1.0 
+    - 1.0 
+    - 1.0 
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 32
+  drop_last: false
+  num_workers: 2
diff --git a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/001.png b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/001.png
new file mode 100644
index 0000000000000000000000000000000000000000..63ae9167fd03e8a95756fe5f6195fc8d741b9cfa
Binary files /dev/null and b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/001.png differ
diff --git a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/002.png b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/002.png
new file mode 100644
index 0000000000000000000000000000000000000000..0de905cf55e6b02487ee1b8220810df8eaa24c2c
Binary files /dev/null and b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/002.png differ
diff --git a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/003.png b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/003.png
new file mode 100644
index 0000000000000000000000000000000000000000..e9026e099df42d4267be07a71401eb5426b47745
Binary files /dev/null and b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/003.png differ
diff --git a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/004.png b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/004.png
new file mode 100644
index 0000000000000000000000000000000000000000..d8118ec3e0ef63bc74e825b5e7638a1886580604
Binary files /dev/null and b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/004.png differ
diff --git a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/output/001.png b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/output/001.png
new file mode 100644
index 0000000000000000000000000000000000000000..5194d6ff891b9507fedfc53f36de4f00219c7f30
Binary files /dev/null and b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/output/001.png differ
diff --git a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/output/004.png b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/output/004.png
new file mode 100644
index 0000000000000000000000000000000000000000..7c62be5051f9a47c5f5e98ccd9f45c3fa5f30257
Binary files /dev/null and b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/output/004.png differ
diff --git a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/pedestrian_yolov3_darknet.yml b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/pedestrian_yolov3_darknet.yml
new file mode 100644
index 0000000000000000000000000000000000000000..adc9109aa356e109afc81bea13b856ce0f4be448
--- /dev/null
+++ b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/pedestrian_yolov3_darknet.yml
@@ -0,0 +1,82 @@
+architecture: YOLOv3
+train_feed: YoloTrainFeed
+eval_feed: YoloEvalFeed
+test_feed: YoloTestFeed
+use_gpu: true
+max_iters: 200000
+log_smooth_window: 20
+save_dir: output
+snapshot_iter: 5000
+metric: COCO
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar
+weights: https://paddlemodels.bj.bcebos.com/object_detection/pedestrian_yolov3_darknet.tar
+num_classes: 1
+
+YOLOv3:
+  backbone: DarkNet
+  yolo_head: YOLOv3Head
+
+DarkNet:
+  norm_type: sync_bn
+  norm_decay: 0.
+  depth: 53
+
+YOLOv3Head:
+  anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+  anchors: [[10, 13], [16, 30], [33, 23],
+            [30, 61], [62, 45], [59, 119],
+            [116, 90], [156, 198], [373, 326]]
+  norm_decay: 0.
+  ignore_thresh: 0.7
+  label_smooth: true
+  nms:
+    background_label: -1
+    keep_top_k: 100
+    nms_threshold: 0.45
+    nms_top_k: 1000
+    normalized: false
+    score_threshold: 0.01
+
+LearningRate:
+  base_lr: 0.001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones:
+    - 150000
+    - 180000
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 4000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
+
+YoloTrainFeed:
+  batch_size: 8
+  dataset:
+    dataset_dir: dataset/pedestrian
+    annotation: annotations/instances_train2017.json
+    image_dir: train2017
+  num_workers: 8
+  bufsize: 128
+  use_process: true
+
+YoloEvalFeed:
+  batch_size: 8
+  image_shape: [3, 608, 608]
+  dataset:
+    dataset_dir: dataset/pedestrian
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+
+YoloTestFeed:
+  batch_size: 1
+  image_shape: [3, 608, 608]
+  dataset:
+    annotation: contrib/PedestrianDetection/pedestrian.json
diff --git a/PaddleCV/PaddleDetection/contrib/README.md b/PaddleCV/PaddleDetection/contrib/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..11f93b85b2ddf81176fd7b4c655dbeb28d1e2dc5
--- /dev/null
+++ b/PaddleCV/PaddleDetection/contrib/README.md
@@ -0,0 +1,104 @@
+# PaddleDetection applied for specific scenarios
+
+We provide some models implemented by PaddlePaddle to detect objects in specific scenarios, users can download the models and use them in these scenarios.
+
+| Task                 | Algorithm | Box AP | Download                                                                                |
+|:---------------------|:---------:|:------:| :-------------------------------------------------------------------------------------: |
+| Vehicle Detection    |  YOLOv3  |  54.5  | [model](https://paddlemodels.bj.bcebos.com/object_detection/vehicle_yolov3_darknet.tar) |
+| Pedestrian Detection |  YOLOv3  |  51.8  | [model](https://paddlemodels.bj.bcebos.com/object_detection/pedestrian_yolov3_darknet.tar) |
+
+## Vehicle Detection
+
+One of major applications of vehichle detection is traffic monitoring. In this scenary, vehicles to be detected are mostly captured by the cameras mounted on top of traffic light columns.
+
+### 1. Network
+
+The network for detecting vehicles is YOLOv3, the backbone of which is Dacknet53.
+
+### 2. Configuration for training
+
+PaddleDetection provides users with a configuration file [yolov3_darnet.yml](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/PaddleDetection/configs/yolov3_darknet.yml) to train YOLOv3 on the COCO dataset, compared with this file, we modify some parameters as followed to conduct the training for vehicle detection:
+
+* max_iters: 120000
+* num_classes: 6
+* anchors: [[8, 9], [10, 23], [19, 15], [23, 33], [40, 25], [54, 50], [101, 80], [139, 145], [253, 224]]
+* label_smooth: false
+* nms/nms_top_k: 400
+* nms/score_threshold: 0.005
+* milestones: [60000, 80000]
+* dataset_dir: dataset/vehicle
+
+### 3. Accuracy
+
+The accuracy of the model trained and evaluated on our private data is shown as followed:
+
+AP at IoU=.50:.05:.95 is 0.545.
+
+AP at IoU=.50 is 0.764.
+
+### 4. Inference
+
+Users can employ the model to conduct the inference:
+
+```
+export CUDA_VISIBLE_DEVICES=0
+export PYTHONPATH=$PYTHONPATH:.
+python -u tools/infer.py -c contrib/VehicleDetection/vehicle_yolov3_darknet.yml \
+                         -o weights=https://paddlemodels.bj.bcebos.com/object_detection/vehicle_yolov3_darknet.tar \
+                         --infer_dir contrib/VehicleDetection/demo \
+                         --draw_threshold 0.2 \
+                         --output_dir contrib/VehicleDetection/demo/output
+
+```
+
+Some inference results are visualized below:
+
+![](VehicleDetection/demo/output/001.jpeg)
+
+![](VehicleDetection/demo/output/005.png)
+
+## Pedestrian Detection
+
+The main applications of pedetestrian detection include intelligent monitoring. In this scenary, photos of pedetestrians are taken by surveillance cameras in public areas, then pedestrian detection are conducted on these photos.
+
+### 1. Network
+
+The network for detecting vehicles is YOLOv3, the backbone of which is Dacknet53.
+
+### 2. Configuration for training
+
+PaddleDetection provides users with a configuration file [yolov3_darnet.yml](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/PaddleDetection/configs/yolov3_darknet.yml) to train YOLOv3 on the COCO dataset, compared with this file, we modify some parameters as followed to conduct the training for pedestrian detection:
+
+* max_iters: 200000
+* num_classes: 1
+* snapshot_iter: 5000
+* milestones: [150000, 180000]
+* dataset_dir: dataset/pedestrian
+
+### 3. Accuracy
+
+The accuracy of the model trained and evaluted on our private data is shown as followed:
+
+AP at IoU=.50:.05:.95 is 0.518.
+
+AP at IoU=.50 is 0.792.
+
+### 4. Inference
+
+Users can employ the model to conduct the inference:
+
+```
+export CUDA_VISIBLE_DEVICES=0
+export PYTHONPATH=$PYTHONPATH:.
+python -u tools/infer.py -c contrib/PedestrianDetection/pedestrian_yolov3_darknet.yml \
+                         -o weights=https://paddlemodels.bj.bcebos.com/object_detection/pedestrian_yolov3_darknet.tar \
+                         --infer_dir contrib/PedestrianDetection/demo \
+                         --draw_threshold 0.3 \
+                         --output_dir contrib/PedestrianDetection/demo/output
+```
+
+Some inference results are visualized below:
+
+![](PedestrianDetection/demo/output/001.png)
+
+![](PedestrianDetection/demo/output/004.png)
diff --git a/PaddleCV/PaddleDetection/contrib/README_cn.md b/PaddleCV/PaddleDetection/contrib/README_cn.md
new file mode 100644
index 0000000000000000000000000000000000000000..ca2a0fda33ef9dba514d07cf9c808ec1cd2878e1
--- /dev/null
+++ b/PaddleCV/PaddleDetection/contrib/README_cn.md
@@ -0,0 +1,106 @@
+# PaddleDetection 特色垂类检测模型
+
+我们提供了针对不同场景的基于PaddlePaddle的检测模型，用户可以下载模型进行使用。
+
+| 任务                 | 算法 | 精度(Box AP) | 下载                                                                                |
+|:---------------------|:---------:|:------:| :---------------------------------------------------------------------------------: |
+| 车辆检测    |  YOLOv3  |  54.5  | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/vehicle_yolov3_darknet.tar) |
+| 行人检测 |  YOLOv3  |  51.8  | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/pedestrian_yolov3_darknet.tar) |
+
+
+## 车辆检测（Vehicle Detection）
+
+车辆检测的主要应用之一是交通监控。在这样的监控场景中，待检测的车辆多为道路红绿灯柱上的摄像头拍摄所得。
+
+### 1. 模型结构
+
+Backbone为Dacknet53的YOLOv3。
+
+### 2. 训练参数配置
+
+PaddleDetection提供了使用COCO数据集对YOLOv3进行训练的参数配置文件[yolov3_darnet.yml](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/PaddleDetection/configs/yolov3_darknet.yml)，与之相比，在进行车辆检测的模型训练时，我们对以下参数进行了修改：
+
+* max_iters: 120000
+* num_classes: 6
+* anchors: [[8, 9], [10, 23], [19, 15], [23, 33], [40, 25], [54, 50], [101, 80], [139, 145], [253, 224]]
+* label_smooth: false
+* nms/nms_top_k: 400
+* nms/score_threshold: 0.005
+* milestones: [60000, 80000]
+* dataset_dir: dataset/vehicle
+
+### 3. 精度指标
+
+模型在我们内部数据上的精度指标为：
+
+IOU=.50:.05:.95时的AP为 0.545。
+
+IOU=.5时的AP为 0.764。
+
+### 4. 预测
+
+用户可以使用我们训练好的模型进行车辆检测：
+
+```
+export CUDA_VISIBLE_DEVICES=0
+export PYTHONPATH=$PYTHONPATH:.
+python -u tools/infer.py -c contrib/VehicleDetection/vehicle_yolov3_darknet.yml \
+                         -o weights=https://paddlemodels.bj.bcebos.com/object_detection/vehicle_yolov3_darknet.tar \
+                         --infer_dir contrib/VehicleDetection/demo \
+                         --draw_threshold 0.2 \
+                         --output_dir contrib/VehicleDetection/demo/output
+
+```
+
+预测结果示例：
+
+![](VehicleDetection/demo/output/001.jpeg)
+
+![](VehicleDetection/demo/output/005.png)
+
+## 行人检测（Pedestrian Detection）
+
+行人检测的主要应用有智能监控。在监控场景中，大多是从公共区域的监控摄像头视角拍摄行人，获取图像后再进行行人检测。
+
+### 1. 模型结构
+
+Backbone为Dacknet53的YOLOv3。
+
+
+### 2. 训练参数配置
+
+PaddleDetection提供了使用COCO数据集对YOLOv3进行训练的参数配置文件[yolov3_darnet.yml](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/PaddleDetection/configs/yolov3_darknet.yml)，与之相比，在进行行人检测的模型训练时，我们对以下参数进行了修改：
+
+* max_iters: 200000
+* num_classes: 1
+* snapshot_iter: 5000
+* milestones: [150000, 180000]
+* dataset_dir: dataset/pedestrian
+
+### 2. 精度指标
+
+模型在我们针对监控场景的内部数据上精度指标为：
+
+IOU=.5时的AP为 0.792。
+
+IOU=.5-.95时的AP为 0.518。
+
+### 3. 预测
+
+用户可以使用我们训练好的模型进行行人检测：
+
+```
+export CUDA_VISIBLE_DEVICES=0
+export PYTHONPATH=$PYTHONPATH:.
+python -u tools/infer.py -c contrib/PedestrianDetection/pedestrian_yolov3_darknet.yml \
+                         -o weights=https://paddlemodels.bj.bcebos.com/object_detection/pedestrian_yolov3_darknet.tar \
+                         --infer_dir contrib/PedestrianDetection/demo \
+                         --draw_threshold 0.3 \
+                         --output_dir contrib/PedestrianDetection/demo/output
+```
+
+预测结果示例：
+
+![](PedestrianDetection/demo/output/001.png)
+
+![](PedestrianDetection/demo/output/004.png)
diff --git a/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/001.jpeg b/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/001.jpeg
new file mode 100644
index 0000000000000000000000000000000000000000..8786db5eb6773931c363358bb39462b33db55369
Binary files /dev/null and b/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/001.jpeg differ
diff --git a/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/003.png b/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/003.png
new file mode 100644
index 0000000000000000000000000000000000000000..c01ab4ce769fb3b1c8863093a35d27da0ab10efd
Binary files /dev/null and b/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/003.png differ
diff --git a/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/004.png b/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/004.png
new file mode 100644
index 0000000000000000000000000000000000000000..8907eb8d4d9b82e08ca214509c9fb41ca889db2a
Binary files /dev/null and b/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/004.png differ
diff --git a/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/005.png b/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/005.png
new file mode 100644
index 0000000000000000000000000000000000000000..bf17712809c2fe6fa8e7d4f093ec4ac94523537c
Binary files /dev/null and b/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/005.png differ
diff --git a/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/output/001.jpeg b/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/output/001.jpeg
new file mode 100644
index 0000000000000000000000000000000000000000..aa2b679d4d2a73487edd5f9c67323ab18df93893
Binary files /dev/null and b/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/output/001.jpeg differ
diff --git a/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/output/005.png b/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/output/005.png
new file mode 100644
index 0000000000000000000000000000000000000000..57f918a30fcc5bf7bda284c1a1a0304e8822d325
Binary files /dev/null and b/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/output/005.png differ
diff --git a/PaddleCV/PaddleDetection/contrib/VehicleDetection/vehicle_yolov3_darknet.yml b/PaddleCV/PaddleDetection/contrib/VehicleDetection/vehicle_yolov3_darknet.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6a923a0c109a30ed2e247fc7204e81ef0a82eef4
--- /dev/null
+++ b/PaddleCV/PaddleDetection/contrib/VehicleDetection/vehicle_yolov3_darknet.yml
@@ -0,0 +1,82 @@
+architecture: YOLOv3
+train_feed: YoloTrainFeed
+eval_feed: YoloEvalFeed
+test_feed: YoloTestFeed
+use_gpu: true
+max_iters: 120000
+log_smooth_window: 20
+save_dir: output
+snapshot_iter: 2000
+metric: COCO
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar
+weights: https://paddlemodels.bj.bcebos.com/object_detection/vehicle_yolov3_darknet.tar
+num_classes: 6
+
+YOLOv3:
+  backbone: DarkNet
+  yolo_head: YOLOv3Head
+
+DarkNet:
+  norm_type: sync_bn
+  norm_decay: 0.
+  depth: 53
+
+YOLOv3Head:
+  anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+  anchors: [[8, 9], [10, 23], [19, 15],
+            [23, 33], [40, 25], [54, 50],
+            [101, 80], [139, 145], [253, 224]]
+  norm_decay: 0.
+  ignore_thresh: 0.7
+  label_smooth: false
+  nms:
+    background_label: -1
+    keep_top_k: 100
+    nms_threshold: 0.45
+    nms_top_k: 400
+    normalized: false
+    score_threshold: 0.005
+
+LearningRate:
+  base_lr: 0.001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones:
+    - 60000
+    - 80000
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 4000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
+
+YoloTrainFeed:
+  batch_size: 8
+  dataset:
+    dataset_dir: dataset/vehicle
+    annotation: annotations/instances_train2017.json
+    image_dir: train2017
+  num_workers: 8
+  bufsize: 128
+  use_process: true
+
+YoloEvalFeed:
+  batch_size: 8
+  image_shape: [3, 608, 608]
+  dataset:
+    dataset_dir: dataset/vehicle
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+
+YoloTestFeed:
+  batch_size: 1
+  image_shape: [3, 608, 608]
+  dataset:
+    annotation: contrib/VehicleDetection/vehicle.json
diff --git a/PaddleCV/PaddleDetection/dataset/wider_face/download.sh b/PaddleCV/PaddleDetection/dataset/wider_face/download.sh
new file mode 100755
index 0000000000000000000000000000000000000000..6c86a22c6826d88846a16fbd43f8b556d8610b8f
--- /dev/null
+++ b/PaddleCV/PaddleDetection/dataset/wider_face/download.sh
@@ -0,0 +1,21 @@
+# All rights `PaddleDetection` reserved
+# References:
+#   @inproceedings{yang2016wider,
+#   Author = {Yang, Shuo and Luo, Ping and Loy, Chen Change and Tang, Xiaoou},
+#   Booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+#   Title = {WIDER FACE: A Face Detection Benchmark},
+#   Year = {2016}}
+
+DIR="$( cd "$(dirname "$0")" ; pwd -P )"
+cd "$DIR"
+
+# Download the data.
+echo "Downloading..."
+wget https://dataset.bj.bcebos.com/wider_face/WIDER_train.zip
+wget https://dataset.bj.bcebos.com/wider_face/WIDER_val.zip
+wget https://dataset.bj.bcebos.com/wider_face/wider_face_split.zip
+# Extract the data.
+echo "Extracting..."
+unzip WIDER_train.zip
+unzip WIDER_val.zip
+unzip wider_face_split.zip
diff --git a/PaddleCV/PaddleDetection/demo/000000014439_640x640.jpg b/PaddleCV/PaddleDetection/demo/000000014439_640x640.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..58e9d3e228af43c9b55d8d0cb385ce82ebb8b996
Binary files /dev/null and b/PaddleCV/PaddleDetection/demo/000000014439_640x640.jpg differ
diff --git a/PaddleCV/PaddleDetection/demo/cas.png b/PaddleCV/PaddleDetection/demo/cas.png
new file mode 100644
index 0000000000000000000000000000000000000000..a60303c99ce0d1ba52e6f89414df8bd5c90fae62
Binary files /dev/null and b/PaddleCV/PaddleDetection/demo/cas.png differ
diff --git a/PaddleCV/PaddleDetection/demo/obj365_gt.png b/PaddleCV/PaddleDetection/demo/obj365_gt.png
new file mode 100644
index 0000000000000000000000000000000000000000..eb69077f6ba1676d9fc0ba3e4cada645c5ab7245
Binary files /dev/null and b/PaddleCV/PaddleDetection/demo/obj365_gt.png differ
diff --git a/PaddleCV/PaddleDetection/demo/obj365_pred.png b/PaddleCV/PaddleDetection/demo/obj365_pred.png
new file mode 100644
index 0000000000000000000000000000000000000000..d99fb1fd198f1f8b6e003741375e9aa5524f117a
Binary files /dev/null and b/PaddleCV/PaddleDetection/demo/obj365_pred.png differ
diff --git a/PaddleCV/PaddleDetection/demo/output/12_Group_Group_12_Group_Group_12_935.jpg b/PaddleCV/PaddleDetection/demo/output/12_Group_Group_12_Group_Group_12_935.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2a563361ae03fbe079dba017374eee51ccbd17dd
Binary files /dev/null and b/PaddleCV/PaddleDetection/demo/output/12_Group_Group_12_Group_Group_12_935.jpg differ
diff --git a/PaddleCV/PaddleDetection/docs/BENCHMARK_INFER_cn.md b/PaddleCV/PaddleDetection/docs/BENCHMARK_INFER_cn.md
new file mode 100644
index 0000000000000000000000000000000000000000..bc4c25fee1555eb124c25edfcd066af0f6c7bfcf
--- /dev/null
+++ b/PaddleCV/PaddleDetection/docs/BENCHMARK_INFER_cn.md
@@ -0,0 +1,89 @@
+
+
+# 推理Benchmark
+
+
+
+- 测试环境:
+  - CUDA 9.0
+  - CUDNN 7.5
+  - TensorRT-5.1.2.2
+  - PaddlePaddle v1.6
+  - GPU分别为: Tesla V100和Tesla P4
+- 测试方式:
+  - 为了方面比较不同模型的推理速度，输入采用同样大小的图片，为 3x640x640，采用 `demo/000000014439_640x640.jpg` 图片。
+  - Batch Size=1
+  - 去掉前10轮warmup时间，测试100轮的平均时间，单位ms/image，包括输入数据拷贝至GPU的时间、计算时间、数据拷贝只CPU的时间。
+  - 采用Fluid C++预测引擎: 包含Fluid C++预测、Fluid-TensorRT预测，下面同时测试了Float32 (FP32) 和Float16 (FP16)的推理速度。
+  - 测试时开启了 FLAGS_cudnn_exhaustive_search=True，使用exhaustive方式搜索卷积计算算法。
+
+### 推理速度
+
+
+
+
+
+| 模型                                  | Tesla V100 Fluid   (ms/image) | Tesla V100   Fluid-TensorRT-FP32 (ms/image) | Tesla V100   Fluid-TensorRT-FP16 (ms/image) | Tesla P4 Fluid   (ms/image) | Tesla P4   Fluid-TensorRT-FP32 (ms/image) |
+| ------------------------------------- | ----------------------------- | ------------------------------------------- | ------------------------------------------- | --------------------------- | ----------------------------------------- |
+| faster_rcnn_r50_1x                    | 147.488                       | 146.124                                     | 142.416                                     | 471.547                     | 471.631                                   |
+| faster_rcnn_r50_2x                    | 147.636                       | 147.73                                      | 141.664                                     | 471.548                     | 472.86                                    |
+| faster_rcnn_r50_vd_1x                 | 146.588                       | 144.767                                     | 141.208                                     | 459.357                     | 457.852                                   |
+| faster_rcnn_r50_fpn_1x                | 25.11                         | 24.758                                      | 20.744                                      | 59.411                      | 57.585                                    |
+| faster_rcnn_r50_fpn_2x                | 25.351                        | 24.505                                      | 20.509                                      | 59.594                      | 57.591                                    |
+| faster_rcnn_r50_vd_fpn_2x             | 25.514                        | 25.292                                      | 21.097                                      | 61.026                      | 58.377                                    |
+| faster_rcnn_r50_fpn_gn_2x             | 36.959                        | 36.173                                      | 32.356                                      | 101.339                     | 101.212                                   |
+| faster_rcnn_dcn_r50_fpn_1x            | 28.707                        | 28.162                                      | 27.503                                      | 68.154                      | 67.443                                    |
+| faster_rcnn_dcn_r50_vd_fpn_2x         | 28.576                        | 28.271                                      | 27.512                                      | 68.959                      | 68.448                                    |
+| faster_rcnn_r101_1x                   | 153.267                       | 150.985                                     | 144.849                                     | 490.104                     | 486.836                                   |
+| faster_rcnn_r101_fpn_1x               | 30.949                        | 30.331                                      | 24.021                                      | 73.591                      | 69.736                                    |
+| faster_rcnn_r101_fpn_2x               | 30.918                        | 29.126                                      | 23.677                                      | 73.563                      | 70.32                                     |
+| faster_rcnn_r101_vd_fpn_1x            | 31.144                        | 30.202                                      | 23.57                                       | 74.767                      | 70.773                                    |
+| faster_rcnn_r101_vd_fpn_2x            | 30.678                        | 29.969                                      | 23.327                                      | 74.882                      | 70.842                                    |
+| faster_rcnn_x101_vd_64x4d_fpn_1x      | 60.36                         | 58.461                                      | 45.172                                      | 132.178                     | 131.734                                   |
+| faster_rcnn_x101_vd_64x4d_fpn_2x      | 59.003                        | 59.163                                      | 46.065                                      | 131.422                     | 132.186                                   |
+| faster_rcnn_dcn_r101_vd_fpn_1x        | 36.862                        | 37.205                                      | 36.539                                      | 93.273                      | 92.616                                    |
+| faster_rcnn_dcn_x101_vd_64x4d_fpn_1x  | 78.476                        | 78.335                                      | 77.559                                      | 185.976                     | 185.996                                   |
+| faster_rcnn_se154_vd_fpn_s1x          | 166.282                       | 90.508                                      | 80.738                                      | 304.653                     | 193.234                                   |
+| mask_rcnn_r50_1x                      | 160.185                       | 160.4                                       | 160.322                                     | -                           | -                                         |
+| mask_rcnn_r50_2x                      | 159.821                       | 159.527                                     | 160.41                                      | -                           | -                                         |
+| mask_rcnn_r50_fpn_1x                  | 95.72                         | 95.719                                      | 92.455                                      | 259.8                       | 258.04                                    |
+| mask_rcnn_r50_fpn_2x                  | 84.545                        | 83.567                                      | 79.269                                      | 227.284                     | 222.975                                   |
+| mask_rcnn_r50_vd_fpn_2x               | 82.07                         | 82.442                                      | 77.187                                      | 223.75                      | 221.683                                   |
+| mask_rcnn_r50_fpn_gn_2x               | 94.936                        | 94.611                                      | 91.42                                       | 265.468                     | 263.76                                    |
+| mask_rcnn_dcn_r50_fpn_1x              | 97.828                        | 97.433                                      | 93.76                                       | 256.295                     | 258.056                                   |
+| mask_rcnn_dcn_r50_vd_fpn_2x           | 77.831                        | 79.453                                      | 76.983                                      | 205.469                     | 204.499                                   |
+| mask_rcnn_r101_fpn_1x                 | 95.543                        | 97.929                                      | 90.314                                      | 252.997                     | 250.782                                   |
+| mask_rcnn_r101_vd_fpn_1x              | 98.046                        | 97.647                                      | 90.272                                      | 261.286                     | 262.108                                   |
+| mask_rcnn_x101_vd_64x4d_fpn_1x        | 115.461                       | 115.756                                     | 102.04                                      | 296.066                     | 293.62                                    |
+| mask_rcnn_x101_vd_64x4d_fpn_2x        | 107.144                       | 107.29                                      | 97.275                                      | 267.636                     | 267.577                                   |
+| mask_rcnn_dcn_r101_vd_fpn_1x          | 85.504                        | 84.875                                      | 84.907                                      | 225.202                     | 226.585                                   |
+| mask_rcnn_dcn_x101_vd_64x4d_fpn_1x    | 129.937                       | 129.934                                     | 127.804                                     | 326.786                     | 326.161                                   |
+| mask_rcnn_se154_vd_fpn_s1x            | 214.188                       | 139.807                                     | 121.516                                     | 440.391                     | 439.727                                   |
+| cascade_rcnn_r50_fpn_1x               | 36.866                        | 36.949                                      | 36.637                                      | 101.851                     | 101.912                                   |
+| cascade_mask_rcnn_r50_fpn_1x          | 110.344                       | 106.412                                     | 100.367                                     | 301.703                     | 297.739                                   |
+| cascade_rcnn_dcn_r50_fpn_1x           | 40.412                        | 39.58                                       | 39.853                                      | 110.346                     | 110.077                                   |
+| cascade_mask_rcnn_r50_fpn_gn_2x       | 170.092                       | 168.758                                     | 163.298                                     | 527.998                     | 529.59                                    |
+| cascade_rcnn_dcn_r101_vd_fpn_1x       | 48.414                        | 48.849                                      | 48.701                                      | 134.9                       | 134.846                                   |
+| cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x | 90.062                        | 90.218                                      | 90.009                                      | 228.67                      | 228.396                                   |
+| retinanet_r101_fpn_1x                 | 55.59                         | 54.636                                      | 48.489                                      | 90.394                      | 83.951                                    |
+| retinanet_r50_fpn_1x                  | 50.048                        | 47.932                                      | 44.385                                      | 73.819                      | 70.282                                    |
+| retinanet_x101_vd_64x4d_fpn_1x        | 83.329                        | 83.446                                      | 70.76                                       | 145.936                     | 146.168                                   |
+| yolov3_darknet                        | 21.427                        | 20.252                                      | 13.856                                      | 55.173                      | 55.692                                    |
+| yolov3_darknet_voc                    | 17.58                         | 16.241                                      | 9.473                                       | 51.049                      | 51.249                                    |
+| yolov3_mobilenet_v1                   | 12.869                        | 11.834                                      | 9.408                                       | 24.887                      | 21.352                                    |
+| yolov3_mobilenet_v1_voc               | 9.118                         | 8.146                                       | 5.575                                       | 20.787                      | 17.169                                    |
+| yolov3_r34                            | 14.914                        | 14.125                                      | 11.176                                      | 20.798                      | 20.822                                    |
+| yolov3_r34_voc                        | 11.288                        | 10.73                                       | 7.7                                         | 25.874                      | 22.399                                    |
+| ssd_mobilenet_v1_voc                  | 5.763                         | 5.854                                       | 4.589                                       | 11.75                       | 9.485                                     |
+| ssd_vgg16_300                         | 28.722                        | 29.644                                      | 20.399                                      | 73.707                      | 74.531                                    |
+| ssd_vgg16_300_voc                     | 18.425                        | 19.288                                      | 11.298                                      | 56.297                      | 56.201                                    |
+| ssd_vgg16_512                         | 27.471                        | 28.328                                      | 19.328                                      | 68.685                      | 69.808                                    |
+| ssd_vgg16_512_voc                     | 18.721                        | 19.636                                      | 12.004                                      | 54.688                      | 56.174                                    |
+
+1. RCNN系列模型Fluid-TensorRT速度相比Fluid预测没有优势，原因是: TensorRT仅支持定长输入，当前基于ResNet系列的RCNN模型，只有backbone部分采用了TensorRT子图计算，比较耗时的stage-5没有基于TensorRT计算。 Fluid对CNN模型也做了一系列的融合优化。后续TensorRT版本升级、或有其他优化策略时再更新数据。
+2. YOLO v3系列模型，Fluid-TensorRT相比Fluid预测加速5% - 10%不等。
+3. SSD和YOLOv3系列模型 TensorRT-FP16预测速度有一定的优势，加速约20% - 40%不等。具体如下图。
+
+<div align="center">
+  <img src="images/bench_ssd_yolo_infer.png" />
+</div>
diff --git a/PaddleCV/PaddleDetection/docs/CACascadeRCNN.md b/PaddleCV/PaddleDetection/docs/CACascadeRCNN.md
new file mode 100644
index 0000000000000000000000000000000000000000..8e72b9af30183abf6c028ec2f4ee2ec695a29a6c
--- /dev/null
+++ b/PaddleCV/PaddleDetection/docs/CACascadeRCNN.md
@@ -0,0 +1,51 @@
+# CACascade RCNN
+## 简介
+CACascade RCNN是百度视觉技术部在Objects365 2019 Challenge上夺冠的最佳单模型之一，Objects365是在通用物体检测领域的一个全新的数据集，旨在促进对自然场景不同对象的检测研究。Objects365在63万张图像上标注了365个对象类，训练集中共有超过1000万个边界框。这里放出的是Full Track任务中最好的单模型之一。
+
+<div align="center">
+  <img src="../demo/obj365_gt.png"/>
+</div>
+
+## 方法描述
+
+针对大规模物体检测算法的特点，我们提出了一种基于图片包含物体类别的数量的采样方式（Class Aware Sampling）。基于这种方式进行训练模型可以在更短的时间使模型收敛到更好的效果。
+
+<div align="center">
+  <img src="../demo/cas.png"/>
+</div>
+
+本次公布的最好单模型是一个基于Cascade RCNN的两阶段检测模型，在此基础上将Backbone替换为更加强大的SENet154模型，Deformable Conv模块以及更复杂二阶段网络结构，针对BatchSize比较小的情况增加了Group Normalization操作并同时使用了多尺度训练，最终达到了非常理想的效果。预训练模型先后分别在ImageNet和COCO数据集上进行了训练，其中在COCO数据集上训练时增加了Mask分支，其余结构与CACascade RCNN相同， 会在启动训练时自动下载。
+
+## 使用方法
+
+1.准备数据
+
+数据需要通过[Objects365官方网站](https://www.objects365.org/download.html)进行申请下载，数据下载后将数据放置在dataset目录中。
+```
+${THIS REPO ROOT}
+  \--dataset
+      \-- objects365
+           \-- annotations
+                |-- train.json
+                |-- val.json
+           \-- train
+           \-- val
+```
+
+2.启动训练模型
+
+```bash
+python tools/train.py -c configs/obj365/cascade_rcnn_dcnv2_se154_vd_fpn_gn.yml
+```
+
+3.模型预测结果
+
+|        模型         | 验证集 mAP |                           下载链接                           |
+| :-----------------: | :--------: | :----------------------------------------------------------: |
+| CACascadeRCNN SE154 |    31.7    | [model](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_dcnv2_se154_vd_fpn_gn_cas_obj365.tar) |
+
+## 模型效果
+
+<div align="center">
+  <img src="../demo/obj365_pred.png"/>
+</div>
diff --git a/PaddleCV/PaddleDetection/docs/CONFIG.md b/PaddleCV/PaddleDetection/docs/CONFIG.md
index ea05b3978dd245c7737948ede09211247a201afc..3cba54eb546cfb648cc7b5bd2e135652a040b309 100644
--- a/PaddleCV/PaddleDetection/docs/CONFIG.md
+++ b/PaddleCV/PaddleDetection/docs/CONFIG.md
@@ -1,3 +1,5 @@
+English | [简体中文](CONFIG_cn.md)
+
 # Config Pipline
 
 ## Introduction
diff --git a/PaddleCV/PaddleDetection/docs/DATA.md b/PaddleCV/PaddleDetection/docs/DATA.md
index c47049b0a7c59d3db83bfaf7f839d6fa99b8880d..080fe0666f0b8cb981f6ec8b131ee6e7cf5fffc1 100644
--- a/PaddleCV/PaddleDetection/docs/DATA.md
+++ b/PaddleCV/PaddleDetection/docs/DATA.md
@@ -1,3 +1,5 @@
+English | [简体中文](DATA_cn.md)
+
 # Data Pipline
 
 ## Introduction
@@ -126,6 +128,8 @@ the corresponding data stream. Many aspect of the `Reader`, such as storage
 location, preprocessing pipeline, acceleration mode can be configured with yaml
 files.
 
+### APIs
+
 The main APIs are as follows:
 
 1. Data parsing
@@ -139,7 +143,7 @@ The main APIs are as follows:
  - `source/loader.py`: Roidb dataset parser. [source](../ppdet/data/source/loader.py)
 
 2. Operator
- `transform/operators.py`: Contains a variety of data enhancement methods, including:
+ `transform/operators.py`: Contains a variety of data augmentation methods, including:
 - `DecodeImage`: Read images in RGB format.
 - `RandomFlipImage`: Horizontal flip.
 - `RandomDistort`: Distort brightness, contrast, saturation, and hue.
@@ -150,7 +154,7 @@ The main APIs are as follows:
 - `NormalizeImage`: Normalize image pixel values.
 - `NormalizeBox`: Normalize the bounding box.
 - `Permute`: Arrange the channels of the image and optionally convert image to BGR format.
-- `MixupImage`: Mixup two images with given fraction<sup>[1](#vd)</sup>.
+- `MixupImage`: Mixup two images with given fraction<sup>[1](#mix)</sup>.
 
 <a name="mix">[1]</a> Please refer to [this paper](https://arxiv.org/pdf/1710.09412.pdf)。
 
@@ -177,16 +181,18 @@ whole data pipeline is fully customizable through the yaml configuration files.
 
 #### Custom Datasets
 
-- Option 1: Convert the dataset to COCO or VOC format.
+- Option 1: Convert the dataset to COCO format.
 ```sh
- # a small utility (`tools/labelme2coco.py`) is provided to convert
- # Labelme-annotated dataset to COCO format.
- python ./ppdet/data/tools/labelme2coco.py --json_input_dir ./labelme_annos/
+ # a small utility (`tools/x2coco.py`) is provided to convert
+ # Labelme-annotated dataset or cityscape dataset to COCO format. 
+ python ./ppdet/data/tools/x2coco.py --dataset_type labelme
+                                --json_input_dir ./labelme_annos/
                                 --image_input_dir ./labelme_imgs/
                                 --output_dir ./cocome/
                                 --train_proportion 0.8
                                 --val_proportion 0.2
                                 --test_proportion 0.0
+ # --dataset_type: The data format which is need to be converted. Currently supported are: 'labelme' and 'cityscape'
  # --json_input_dir：The path of json files which are annotated by Labelme.
  # --image_input_dir：The path of images.
  # --output_dir：The path of coverted COCO dataset.
diff --git a/PaddleCV/PaddleDetection/docs/DATA_cn.md b/PaddleCV/PaddleDetection/docs/DATA_cn.md
index eff8b5489a2cdf9524473c563ce2d90ae9d9bd64..e34ba8e8585d7ea8d692e18ad88e50f048f2b0a3 100644
--- a/PaddleCV/PaddleDetection/docs/DATA_cn.md
+++ b/PaddleCV/PaddleDetection/docs/DATA_cn.md
@@ -105,9 +105,9 @@ python ./ppdet/data/tools/generate_data_for_training.py
  4. 数据获取接口  
      为方便训练时的数据获取，我们将多个`data.Dataset`组合在一起构成一个`data.Reader`为用户提供数据，用户只需要调用`Reader.[train|eval|infer]`即可获得对应的数据流。`Reader`支持yaml文件配置数据地址、预处理过程、加速方式等。
 
-主要的APIs如下：
-
+### APIs
 
+主要的APIs如下：
 
 
 1. 数据解析  
@@ -165,15 +165,17 @@ coco = Reader(ccfg.DATA, ccfg.TRANSFORM, maxiter=-1)
 ```
 #### 如何使用自定义数据集？
 
-- 选择1：将数据集转换为VOC格式或者COCO格式。
+- 选择1：将数据集转换为COCO格式。
 ```
- # 在./tools/中提供了labelme2coco.py用于将labelme标注的数据集转换为COCO数据集
- python ./ppdet/data/tools/labelme2coco.py --json_input_dir ./labelme_annos/
+ # 在./tools/中提供了x2coco.py用于将labelme标注的数据集或cityscape数据集转换为COCO数据集
+ python ./ppdet/data/tools/x2coco.py --dataset_type labelme
+                                --json_input_dir ./labelme_annos/
                                 --image_input_dir ./labelme_imgs/
                                 --output_dir ./cocome/
                                 --train_proportion 0.8
                                 --val_proportion 0.2
                                 --test_proportion 0.0
+ # --dataset_type：需要转换的数据格式，目前支持：’labelme‘和’cityscape‘
  # --json_input_dir：使用labelme标注的json文件所在文件夹
  # --image_input_dir：图像文件所在文件夹
  # --output_dir：转换后的COCO格式数据集存放位置
diff --git a/PaddleCV/PaddleDetection/docs/EXPORT_MODEL.md b/PaddleCV/PaddleDetection/docs/EXPORT_MODEL.md
new file mode 100644
index 0000000000000000000000000000000000000000..614d87e29c5e5d5685f64c9ee78cf7d1d5192d09
--- /dev/null
+++ b/PaddleCV/PaddleDetection/docs/EXPORT_MODEL.md
@@ -0,0 +1,48 @@
+# 模型导出
+
+训练得到一个满足要求的模型后，如果想要将该模型接入到C++预测库或者Serving服务，需要通过`tools/export_model.py`导出该模型。
+
+## 启动参数说明
+
+|      FLAG      |      用途      |    默认值    |                 备注                      |
+|:--------------:|:--------------:|:------------:|:-----------------------------------------:|
+|       -c       |  指定配置文件  |     None     |                                           |
+|  --output_dir  |  模型保存路径  |  `./output`  |  模型默认保存在`output/配置文件名/`路径下 |
+
+## 使用示例
+
+使用[训练/评估/推断](GETTING_STARTED_cn.md)中训练得到的模型进行试用，脚本如下
+
+```bash
+# 导出FasterRCNN模型, 模型中data层默认的shape为3x800x1333
+python tools/export_model.py -c configs/faster_rcnn_r50_1x.yml \
+        --output_dir=./inference_model \
+        -o weights=output/faster_rcnn_r50_1x/model_final \
+
+```
+
+预测模型会导出到`inference_model/faster_rcnn_r50_1x`目录下，模型名和参数名分别为`__model__`和`__params__`。
+
+## 设置导出模型的输入大小
+
+使用Fluid-TensorRT进行预测时，由于<=TensorRT 5.1的版本仅支持定长输入，保存模型的`data`层的图片大小需要和实际输入图片大小一致。而Fluid C++预测引擎没有此限制。可通过设置TestFeed的`image_shape`可以修改保存模型中的输入图片大小。示例如下:
+
+```bash
+# 导出FasterRCNN模型，输入是3x640x640
+python tools/export_model.py -c configs/faster_rcnn_r50_1x.yml \
+        --output_dir=./inference_model \
+        -o weights=https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar \
+           FasterRCNNTestFeed.image_shape=[3,640,640]
+
+# 导出YOLOv3模型，输入是3x320x320
+python tools/export_model.py -c configs/yolov3_darknet.yml \
+        --output_dir=./inference_model \
+        -o weights=https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar \
+           YoloTestFeed.image_shape=[3,320,320]
+
+# 导出SSD模型，输入是3x300x300
+python tools/export_model.py -c configs/ssd/ssd_mobilenet_v1_voc.yml \
+        --output_dir=./inference_model \
+        -o weights= https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_voc.tar \
+           SSDTestFeed.image_shape=[3,300,300]
+```
diff --git a/PaddleCV/PaddleDetection/docs/GETTING_STARTED.md b/PaddleCV/PaddleDetection/docs/GETTING_STARTED.md
index 788f7f408873a5caeee542129c0457111af40ebb..199b343ed2a80a1bf80fcf3d3206fbdf8413551f 100644
--- a/PaddleCV/PaddleDetection/docs/GETTING_STARTED.md
+++ b/PaddleCV/PaddleDetection/docs/GETTING_STARTED.md
@@ -1,210 +1,163 @@
+English | [简体中文](GETTING_STARTED_cn.md)
+
 # Getting Started
 
 For setting up the running environment, please refer to [installation
 instructions](INSTALL.md).
 
 
-## Training
-
-#### Single-GPU Training
+## Training/Evaluation/Inference
 
+PaddleDetection provides scripots for training, evalution and inference with various features according to different configure.
 
 ```bash
-export CUDA_VISIBLE_DEVICES=0
+# set PYTHONPATH
 export PYTHONPATH=$PYTHONPATH:.
-python tools/train.py -c configs/faster_rcnn_r50_1x.yml
-```
-
-#### Multi-GPU Training
-
-```bash
+# training in single-GPU and multi-GPU. specify different GPU numbers by CUDA_VISIBLE_DEVICES
 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-export PYTHONPATH=$PYTHONPATH:.
 python tools/train.py -c configs/faster_rcnn_r50_1x.yml
+# GPU evalution
+export CUDA_VISIBLE_DEVICES=0
+python tools/eval.py -c configs/faster_rcnn_r50_1x.yml
+# Inference
+python tools/infer.py -c configs/faster_rcnn_r50_1x.yml --infer_img=demo/000000570688.jpg
 ```
 
-#### CPU Training
+### Optional argument list
 
-```bash
-export CPU_NUM=8
-export PYTHONPATH=$PYTHONPATH:.
-python tools/train.py -c configs/faster_rcnn_r50_1x.yml -o use_gpu=false
-```
+list below can be viewed by `--help`
 
-##### Optional arguments
+|         FLAG             |  script supported  |    description    |     default     |      remark      |
+| :----------------------: | :------------: | :---------------: | :--------------: | :-----------------: |
+|          -c              |      ALL       |  Select config file  |  None  |  **The whole description of configure can refer to [config_example](config_example)** |
+|          -o              |      ALL       |  Set parameters in configure file  |  None  |  `-o` has higher priority to file configured by `-c`. Such as `-o use_gpu=False max_iter=10000`  |  
+|   -r/--resume_checkpoint |     train      |  Checkpoint path for resuming training  |  None  |  `-r output/faster_rcnn_r50_1x/10000`  |
+|        --eval            |     train      |  Whether to perform evaluation in training  |  False  |    |
+|      --output_eval       |     train/eval |  json path in evalution  |  current path  |  `--output_eval ./json_result`  |
+|   -d/--dataset_dir       |   train/eval   |  path for dataset, same as dataset_dir in configs  |  None  |  `-d dataset/coco`  |
+|       --fp16             |     train      |  Whether to enable mixed precision training  |  False  |  GPU training is required  |
+|       --loss_scale       |     train      |  Loss scaling factor for mixed precision training  |  8.0  |  enable when `--fp16` is True  |  
+|       --json_eval        |       eval     |  Whether to evaluate with already existed bbox.json or mask.json  |  False  |  json path is set in `--output_eval`  |
+|       --output_dir       |      infer     |  Directory for storing the output visualization files  |  `./output`  |  `--output_dir output`  |
+|    --draw_threshold      |      infer     |  Threshold to reserve the result for visualization  |  0.5  |  `--draw_threshold 0.7`  |
+|      --infer_dir         |       infer     |  Directory for images to perform inference on  |  None  |    |
+|      --infer_img         |       infer     |  Image path  |  None  |  higher priority over --infer_dir  |
+|        --use_tb          |   train/infer   |  Whether to record the data with [tb-paddle](https://github.com/linshuliang/tb-paddle), so as to display in Tensorboard  |  False  |      |
+|        --tb\_log_dir     |   train/infer   |  tb-paddle logging directory for image  |  train:`tb_log_dir/scalar` infer: `tb_log_dir/image`  |     |
 
-- `-r` or `--resume_checkpoint`: Checkpoint path for resuming training. Such as: `-r output/faster_rcnn_r50_1x/10000`
-- `--eval`: Whether to perform evaluation in training, default is `False`
-- `--output_eval`: If perform evaluation in training, this edits evaluation directory, default is current directory.
-- `-d` or `--dataset_dir`: Dataset path, same as `dataset_dir` of configs. Such as: `-d dataset/coco`
-- `-c`: Select config file and all files are saved in `configs/`
-- `-o`: Set configuration options in config file. Such as: `-o max_iters=180000`. `-o` has higher priority to file configured by `-c`
-- `--use_tb`: Whether to record the data with [tb-paddle](https://github.com/linshuliang/tb-paddle), so as to display in Tensorboard, default is `False`
-- `--tb_log_dir`: tb-paddle logging directory for scalar, default is `tb_log_dir/scalar`
-- `--fp16`: Whether to enable mixed precision training (requires GPU), default is `False`
-- `--loss_scale`: Loss scaling factor for mixed precision training, default is `8.0`
 
+## Examples
 
-##### Examples
+### Training
 
 - Perform evaluation in training
-```bash
-export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-export PYTHONPATH=$PYTHONPATH:.
-python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml --eval
-```
 
-Alternating between training epoch and evaluation run is possible, simply pass
-in `--eval` to do so and evaluate at each snapshot_iter. It can be modified at `snapshot_iter` of the configuration file. If evaluation dataset is large and
-causes time-consuming in training, we suggest decreasing evaluation times or evaluating after training. When perform evaluation in training,
-the best model with highest MAP is saved at each `snapshot_iter`. `best_model` has the same path as `model_final`.
+  ```bash
+  export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+  python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml --eval
+  ```
 
+  Perform training and evalution alternatively and evaluate at each snapshot_iter. Meanwhile, the best model with highest MAP is saved at each `snapshot_iter` which has the same path as `model_final`.
 
-- Configure dataset path
-```bash
-export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-export PYTHONPATH=$PYTHONPATH:.
-python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
-                         -d dataset/coco
-```
+  If evaluation dataset is large, we suggest decreasing evaluation times or evaluating after training.
 
 - Fine-tune other task
 
-When using pre-trained model to fine-tune other task, the excluded pre-trained parameters can be set by finetune_exclude_pretrained_params in YAML config or -o finetune_exclude_pretrained_params in the arguments.
+  When using pre-trained model to fine-tune other task, two methods can be used:
 
-```bash
-export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-export PYTHONPATH=$PYTHONPATH:.
-python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
-                         -o pretrain_weights=output/faster_rcnn_r50_1x/model_final/ \
-                            finetune_exclude_pretrained_params = ['cls_score','bbox_pred']
-```
+  1. The excluded pre-trained parameters can be set by `finetune_exclude_pretrained_params` in YAML config
+  2. Set -o finetune\_exclude\_pretrained_params in the arguments.
+
+  ```bash
+  export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+  python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
+                           -o pretrain_weights=output/faster_rcnn_r50_1x/model_final/ \
+                              finetune_exclude_pretrained_params = ['cls_score','bbox_pred']
+  ```
 
 ##### NOTES
 
 - `CUDA_VISIBLE_DEVICES` can specify different gpu numbers. Such as: `export CUDA_VISIBLE_DEVICES=0,1,2,3`. GPU calculation rules can refer [FAQ](#faq)
-- Dataset is stored in `dataset/coco` by default (configurable).
 - Dataset will be downloaded automatically and cached in `~/.cache/paddle/dataset` if not be found locally.
 - Pretrained model is downloaded automatically and cached in `~/.cache/paddle/weights`.
-- Model checkpoints are saved in `output` by default (configurable).
-- When finetuning, users could set `pretrain_weights` to the models published by PaddlePaddle. Parameters matched by fields in finetune_exclude_pretrained_params will be ignored in loading and fields can be wildcard matching. For detailed information, please refer to [Transfer Learning](TRANSFER_LEARNING.md).
-- To check out hyper parameters used, please refer to the [configs](../configs).
+- Checkpoints are saved in `output` by default, and can be revised from save_dir in configure files.
 - RCNN models training on CPU is not supported on PaddlePaddle<=1.5.1 and will be fixed on later version.
 
 
+### Mixed Precision Training
+
+Mixed precision training can be enabled with `--fp16` flag. Currently Faster-FPN, Mask-FPN and Yolov3 have been verified to be working with little to no loss of precision (less than 0.2 mAP)
 
-## Evaluation
+To speed up mixed precision training, it is recommended to train in multi-process mode, for example
 
 ```bash
-# run on GPU with:
-export PYTHONPATH=$PYTHONPATH:.
-export CUDA_VISIBLE_DEVICES=0
-python tools/eval.py -c configs/faster_rcnn_r50_1x.yml
+python -m paddle.distributed.launch --selected_gpus 0,1,2,3,4,5,6,7 tools/train.py --fp16 -c configs/faster_rcnn_r50_fpn_1x.yml
 ```
 
-#### Optional arguments
+If loss becomes `NaN` during training, try tweak the `--loss_scale` value. Please refer to the Nvidia [documentation](https://docs.nvidia.com/deeplearning/sdk/mixed-precision-training/index.html#mptrain) on mixed precision training for details.
 
-- `-d` or `--dataset_dir`: Dataset path, same as dataset_dir of configs. Such as: `-d dataset/coco`
-- `--output_eval`: Evaluation directory, default is current directory.
-- `-o`: Set configuration options in config file. Such as: `-o weights=output/faster_rcnn_r50_1x/model_final`
-- `--json_eval`: Whether to eval with already existed bbox.json or mask.json. Default is `False`. Json file directory is assigned by `-f` argument.
+Also, please note mixed precision training currently requires changing `norm_type` from `affine_channel` to `bn`.
 
-#### Examples
+
+
+### Evaluation
 
 - Evaluate by specified weights path and dataset path
-```bash
-# run on GPU with:
-export PYTHONPATH=$PYTHONPATH:.
-export CUDA_VISIBLE_DEVICES=0
-python -u tools/eval.py -c configs/faster_rcnn_r50_1x.yml \
-                        -o weights=output/faster_rcnn_r50_1x/model_final \
-                        -d dataset/coco
-```
+
+  ```bash
+  export CUDA_VISIBLE_DEVICES=0
+  python -u tools/eval.py -c configs/faster_rcnn_r50_1x.yml \
+                          -o weights=https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar \
+                          -d dataset/coco
+  ```
+
+  The path of model to be evaluted can be both local path and link in [MODEL_ZOO](MODEL_ZOO_cn.md).
 
 - Evaluate with json
-```bash
-# run on GPU with:
-export PYTHONPATH=$PYTHONPATH:.
-export CUDA_VISIBLE_DEVICES=0
-python tools/eval.py -c configs/faster_rcnn_r50_1x.yml \
+
+  ```bash
+  export CUDA_VISIBLE_DEVICES=0
+  python tools/eval.py -c configs/faster_rcnn_r50_1x.yml \
              --json_eval \
              -f evaluation/
-```
+  ```
 
-The json file must be named bbox.json or mask.json, placed in the `evaluation/` directory. Or without the `-f` parameter, default is the current directory.
+  The json file must be named bbox.json or mask.json, placed in the `evaluation/` directory.
 
 #### NOTES
 
-- Checkpoint is loaded from `output` by default (configurable)
 - Multi-GPU evaluation for R-CNN and SSD models is not supported at the
 moment, but it is a planned feature
 
 
-## Inference
-
-
-- Run inference on a single image:
-
-```bash
-# run on GPU with:
-export PYTHONPATH=$PYTHONPATH:.
-export CUDA_VISIBLE_DEVICES=0
-python tools/infer.py -c configs/faster_rcnn_r50_1x.yml --infer_img=demo/000000570688.jpg
-```
-
-- Multi-image inference:
-
-```bash
-# run on GPU with:
-export PYTHONPATH=$PYTHONPATH:.
-export CUDA_VISIBLE_DEVICES=0
-python tools/infer.py -c configs/faster_rcnn_r50_1x.yml --infer_dir=demo
-```
-
-#### Optional arguments
-
-- `--output_dir`: Directory for storing the output visualization files.
-- `--draw_threshold`: Threshold to reserve the result for visualization. Default is 0.5.
-- `--save_inference_model`: Save inference model in output_dir if True.
-- `--use_tb`: Whether to record the data with [tb-paddle](https://github.com/linshuliang/tb-paddle), so as to display in Tensorboard, default is `False`
-- `--tb_log_dir`: tb-paddle logging directory for image, default is `tb_log_dir/image`
-
-#### Examples
+### Inference
 
 - Output specified directory && Set up threshold
 
-```bash
-# run on GPU with:
-export PYTHONPATH=$PYTHONPATH:.
-export CUDA_VISIBLE_DEVICES=0
-python tools/infer.py -c configs/faster_rcnn_r50_1x.yml \
+  ```bash
+  export CUDA_VISIBLE_DEVICES=0
+  python tools/infer.py -c configs/faster_rcnn_r50_1x.yml \
                       --infer_img=demo/000000570688.jpg \
                       --output_dir=infer_output/ \
                       --draw_threshold=0.5 \
                       -o weights=output/faster_rcnn_r50_1x/model_final \
                       --use_tb=Ture
-```
+  ```
 
-The visualization files are saved in `output` by default, to specify a different path, simply add a `--output_dir=` flag.
-`--draw_threshold` is an optional argument. Default is 0.5.
-Different thresholds will produce different results depending on the calculation of [NMS](https://ieeexplore.ieee.org/document/1699659).
-If users want to infer according to customized model path, `-o weights` can be set for specified path.
-`--use_tb` is an optional argument, if `--use_tb` is `True`, the tb-paddle will record data in directory,
-so users can see the results in Tensorboard.
+  `--draw_threshold` is an optional argument. Default is 0.5.
+  Different thresholds will produce different results depending on the calculation of [NMS](https://ieeexplore.ieee.org/document/1699659).
 
-- Save inference model
 
-```bash
-# run on GPU with:
-export CUDA_VISIBLE_DEVICES=0
-export PYTHONPATH=$PYTHONPATH:.
-python tools/infer.py -c configs/faster_rcnn_r50_1x.yml \
-                      --infer_img=demo/000000570688.jpg \
-                      --save_inference_model
-```
+- Export model
 
-Save inference model by set `--save_inference_model`, which can be loaded by PaddlePaddle predict library.
+  ```bash
+  python tools/export_model.py -c configs/faster_rcnn_r50_1x.yml \
+                      --output_dir=inference_model \
+                      -o weights=output/faster_rcnn_r50_1x/model_final \
+                         FasterRCNNTestFeed.image_shape=[3,800,1333]
+  ```
 
+  Save inference model `tools/export_model.py`, which can be loaded by PaddlePaddle predict library.
 
 ## FAQ
 
diff --git a/PaddleCV/PaddleDetection/docs/GETTING_STARTED_cn.md b/PaddleCV/PaddleDetection/docs/GETTING_STARTED_cn.md
index 2f0dff5fedc7864842816a1bbfc84dd34cef1108..b5dd6041033e539e18d59dc8669a3658ba395da2 100644
--- a/PaddleCV/PaddleDetection/docs/GETTING_STARTED_cn.md
+++ b/PaddleCV/PaddleDetection/docs/GETTING_STARTED_cn.md
@@ -3,206 +3,146 @@
 关于配置运行环境，请参考[安装指南](INSTALL_cn.md)
 
 
-## 训练
-
-
-#### 单GPU训练
+## 训练/评估/推断
 
+PaddleDetection提供了训练/训练/评估三个功能的使用脚本，支持通过不同可选参数实现特定功能
 
 ```bash
-export CUDA_VISIBLE_DEVICES=0
+# 设置PYTHONPATH路径
 export PYTHONPATH=$PYTHONPATH:.
-python tools/train.py -c configs/faster_rcnn_r50_1x.yml
-```
-
-#### 多GPU训练
-
-
-```bash
+# GPU训练 支持单卡，多卡训练，通过CUDA_VISIBLE_DEVICES指定卡号
 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-export PYTHONPATH=$PYTHONPATH:.
 python tools/train.py -c configs/faster_rcnn_r50_1x.yml
+# GPU评估
+export CUDA_VISIBLE_DEVICES=0
+python tools/eval.py -c configs/faster_rcnn_r50_1x.yml
+# 推断
+python tools/infer.py -c configs/faster_rcnn_r50_1x.yml --infer_img=demo/000000570688.jpg
 ```
 
-#### CPU训练
+### 可选参数列表
 
-```bash
-export CPU_NUM=8
-export PYTHONPATH=$PYTHONPATH:.
-python tools/train.py -c configs/faster_rcnn_r50_1x.yml -o use_gpu=false
-```
+以下列表可以通过`--help`查看
+
+|         FLAG             |     支持脚本    |        用途        |      默认值       |         备注         |
+| :----------------------: | :------------: | :---------------: | :--------------: | :-----------------: |
+|          -c              |      ALL       |  指定配置文件  |  None  |  **完整配置说明请参考[配置案例](config_example)** |
+|          -o              |      ALL       |  设置配置文件里的参数内容  |  None  |  使用-o配置相较于-c选择的配置文件具有更高的优先级。例如：`-o use_gpu=False max_iter=10000`  |  
+|   -r/--resume_checkpoint |     train      |  从某一检查点恢复训练  |  None  |  `-r output/faster_rcnn_r50_1x/10000`  |
+|        --eval            |     train      |  是否边训练边测试  |  False  |    |
+|      --output_eval       |     train/eval |  编辑评测保存json路径  |  当前路径  |  `--output_eval ./json_result`  |
+|   -d/--dataset_dir       |   train/eval   |  数据集路径, 同配置文件里的dataset_dir  |  None  |  `-d dataset/coco`  |
+|       --fp16             |     train      |  是否使用混合精度训练模式  |  False  |  需使用GPU训练  |
+|       --loss_scale       |     train      |  设置混合精度训练模式中损失值的缩放比例  |  8.0  |  需先开启`--fp16`后使用  |  
+|       --json_eval        |       eval     |  是否通过已存在的bbox.json或者mask.json进行评估  |  False  |  json文件路径在`--output_eval`中设置  |
+|       --output_dir       |      infer     |  输出推断后可视化文件  |  `./output`  |  `--output_dir output`  |
+|    --draw_threshold      |      infer     |  可视化时分数阈值  |  0.5  |  `--draw_threshold 0.7`  |
+|      --infer_dir         |       infer     |  用于推断的图片文件夹路径  |  None  |    |
+|      --infer_img         |       infer     |  用于推断的图片路径  |  None  |  相较于`--infer_dir`具有更高优先级  |
+|        --use_tb          |   train/infer   |  是否使用[tb-paddle](https://github.com/linshuliang/tb-paddle)记录数据，进而在TensorBoard中显示  |  False  |      |
+|        --tb\_log_dir     |   train/infer   |  指定 tb-paddle 记录数据的存储路径  |  train:`tb_log_dir/scalar` infer: `tb_log_dir/image`  |     |
 
-##### 可选参数
 
-- `-r` or `--resume_checkpoint`: 从某一检查点恢复训练，例如: `-r output/faster_rcnn_r50_1x/10000`
-- `--eval`: 是否边训练边测试，默认是 `False`
-- `--output_eval`: 如果边训练边测试, 这个参数可以编辑评测保存json路径, 默认是当前目录。
-- `-d` or `--dataset_dir`: 数据集路径, 同配置文件里的`dataset_dir`. 例如: `-d dataset/coco`
-- `-c`: 选择配置文件，所有配置文件在`configs/`中
-- `-o`: 设置配置文件里的参数内容。例如: `-o max_iters=180000`。使用`-o`配置相较于`-c`选择的配置文件具有更高的优先级。
-- `--use_tb`: 是否使用[tb-paddle](https://github.com/linshuliang/tb-paddle)记录数据，进而在TensorBoard中显示，默认是False。
-- `--tb_log_dir`: 指定 tb-paddle 记录数据的存储路径，默认是`tb_log_dir/scalar`。
-- `--fp16`: 是否使用混合精度训练模式（需GPU训练），默认是`False`。
-- `--loss_scale`: 设置混合精度训练模式中损失值的缩放比例，默认是`8.0`。
+## 使用示例
 
-##### 例子
+### 模型训练
 
 - 边训练边测试
 
-```bash
-export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-export PYTHONPATH=$PYTHONPATH:.
-python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml --eval
-```
+  ```bash
+  export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+  python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml --eval -d dataset/coco
+  ```
 
-可通过设置`--eval`在训练epoch中交替执行评估, 评估在每个snapshot\_iter时开始。可在配置文件的`snapshot_iter`处修改。
-如果验证集很大，测试将会比较耗时，影响训练速度，建议减少评估次数，或训练完再进行评估。
-当边训练边测试时，在每次snapshot\_iter会评测出最佳mAP模型保存到
-`best_model`文件夹下，`best_model`的路径和`model_final`的路径相同。
+  在训练中交替执行评估, 评估在每个snapshot\_iter时开始。每次评估后还会评出最佳mAP模型保存到`best_model`文件夹下。
 
-- 指定数据集路径
+  如果验证集很大，测试将会比较耗时，建议减少评估次数，或训练完再进行评估。
 
-```bash
-export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-export PYTHONPATH=$PYTHONPATH:.
-python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
-                         -d dataset/coco
-```
 
 - Fine-tune其他任务
 
-使用预训练模型fine-tune其他任务时，在YAML配置文件中设置`finetune_exclude_pretrained_params`或在命令行中添加`-o finetune_exclude_pretrained_params`对预训练模型进行选择性加载。
+  使用预训练模型fine-tune其他任务时，可采用如下两种方式：
 
-```bash
-export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-export PYTHONPATH=$PYTHONPATH:.
-python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
+  1. 在YAML配置文件中设置`finetune_exclude_pretrained_params`
+  2. 在命令行中添加-o finetune\_exclude\_pretrained_params对预训练模型进行选择性加载。
+
+  ```bash
+  export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+  python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
                          -o pretrain_weights=output/faster_rcnn_r50_1x/model_final/ \
-                            finetune_exclude_pretrained_params = ['cls_score','bbox_pred']
-```
+                            finetune_exclude_pretrained_params=['cls_score','bbox_pred']
+  ```
 
-##### 提示
+  详细说明请参考[Transfer Learning](TRANSFER_LEARNING_cn.md)
+
+#### 提示
 
 - `CUDA_VISIBLE_DEVICES` 参数可以指定不同的GPU。例如: `export CUDA_VISIBLE_DEVICES=0,1,2,3`. GPU计算规则可以参考 [FAQ](#faq)
-- 数据集默认存储在`dataset/coco`中（可配置）。
 - 若本地未找到数据集，将自动下载数据集并保存在`~/.cache/paddle/dataset`中。
 - 预训练模型自动下载并保存在`〜/.cache/paddle/weights`中。
-- 模型checkpoints默认保存在`output`中（可配置）。
-- 进行模型fine-tune时，用户可将`pretrain_weights`配置为PaddlePaddle发布的模型，加载模型时finetune_exclude_pretrained_params中的字段匹配的参数不被加载，可以为通配符匹配方式。详细说明请参考[Transfer Learning](TRANSFER_LEARNING_cn.md)
-- 更多参数配置，请参考[配置文件](../configs)。
-- RCNN系列模型CPU训练在PaddlePaddle 1.5.1及以下版本暂不支持，将在下个版本修复。
+- 模型checkpoints默认保存在`output`中，可通过修改配置文件中save_dir进行配置。
+- RCNN系列模型CPU训练在PaddlePaddle 1.5.1及以下版本暂不支持。
 
+### 混合精度训练
 
-## 评估
+通过设置 `--fp16` 命令行选项可以启用混合精度训练。目前混合精度训练已经在Faster-FPN, Mask-FPN 及 Yolov3 上进行验证，几乎没有精度损失（小于0.2 mAP)。
 
+建议使用多进程方式来进一步加速混合精度训练。示例如下。
 
 ```bash
-# GPU评估
-export CUDA_VISIBLE_DEVICES=0
-export PYTHONPATH=$PYTHONPATH:.
-python tools/eval.py -c configs/faster_rcnn_r50_1x.yml
+python -m paddle.distributed.launch --selected_gpus 0,1,2,3,4,5,6,7 tools/train.py --fp16 -c configs/faster_rcnn_r50_fpn_1x.yml
 ```
 
-#### 可选参数
+如果训练过程中loss出现`NaN`，请尝试调节`--loss_scale`选项数值，细节请参看混合精度训练相关的[Nvidia文档](https://docs.nvidia.com/deeplearning/sdk/mixed-precision-training/index.html#mptrain)。
 
-- `-d` or `--dataset_dir`: 数据集路径, 同配置文件里的`dataset_dir`。例如: `-d dataset/coco`
-- `--output_eval`: 这个参数可以编辑评测保存json路径, 默认是当前目录。
-- `-o`: 设置配置文件里的参数内容。 例如: `-o weights=output/faster_rcnn_r50_1x/model_final`
-- `--json_eval`: 是否通过已存在的bbox.json或者mask.json进行评估。默认是`False`。json文件路径通过`-f`指令来设置。
+另外，请注意将配置文件中的 `norm_type` 由 `affine_channel` 改为 `bn`。
 
-#### 例子
 
-- 指定数据集路径
-```bash
-# GPU评估
-export CUDA_VISIBLE_DEVICES=0
-export PYTHONPATH=$PYTHONPATH:.
-python -u tools/eval.py -c configs/faster_rcnn_r50_1x.yml \
-                        -o weights=output/faster_rcnn_r50_1x/model_final \
+### 模型评估
+
+- 指定权重和数据集路径
+
+  ```bash
+  export CUDA_VISIBLE_DEVICES=0
+  python -u tools/eval.py -c configs/faster_rcnn_r50_1x.yml \
+                        -o weights=https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar \
                         -d dataset/coco
-```
+  ```
+
+  评估模型可以为本地路径，例如`output/faster_rcnn_r50_1x/model_final/`, 也可以为[MODEL_ZOO](MODEL_ZOO_cn.md)中给出的模型链接。
 
 - 通过json文件评估
-```bash
-# GPU评估
-export CUDA_VISIBLE_DEVICES=0
-export PYTHONPATH=$PYTHONPATH:.
-python tools/eval.py -c configs/faster_rcnn_r50_1x.yml \
+
+  ```bash
+  export CUDA_VISIBLE_DEVICES=0
+  python -u tools/eval.py -c configs/faster_rcnn_r50_1x.yml \
              --json_eval \
-             -f evaluation/
-```
+             --output_eval evaluation/
+  ```
 
-json文件必须命名为bbox.json或者mask.json，放在`evaluation/`目录下，或者不加`-f`参数，默认为当前目录。
+  json文件必须命名为bbox.json或者mask.json，放在`evaluation/`目录下。
 
 #### 提示
 
-- 默认从`output`加载checkpoint（可配置）
 - R-CNN和SSD模型目前暂不支持多GPU评估，将在后续版本支持
 
 
-## 推断
-
-
-- 单图片推断
-
-```bash
-# GPU推断
-export CUDA_VISIBLE_DEVICES=0
-export PYTHONPATH=$PYTHONPATH:.
-python tools/infer.py -c configs/faster_rcnn_r50_1x.yml --infer_img=demo/000000570688.jpg
-```
-
-- 多图片推断
-
-```bash
-# GPU推断
-export CUDA_VISIBLE_DEVICES=0
-export PYTHONPATH=$PYTHONPATH:.
-python tools/infer.py -c configs/faster_rcnn_r50_1x.yml --infer_dir=demo
-```
-
-#### 可选参数
-
-- `--output_dir`: 输出推断后可视化文件。
-- `--draw_threshold`: 设置推断的阈值。默认是0.5.
-- `--save_inference_model`: 设为`True`时，将预测模型保存到output\_dir中.
-- `--use_tb`: 是否使用[tb-paddle](https://github.com/linshuliang/tb-paddle)记录数据，进而在TensorBoard中显示，默认是False。
-- `--tb_log_dir`: 指定 tb-paddle 记录数据的存储路径，默认是`tb_log_dir/image`。
-
-#### 例子
+### 模型推断
 
 - 设置输出路径 && 设置推断阈值
 
-```bash
-# GPU推断
-export CUDA_VISIBLE_DEVICES=0
-export PYTHONPATH=$PYTHONPATH:.
-python tools/infer.py -c configs/faster_rcnn_r50_1x.yml \
+  ```bash
+  export CUDA_VISIBLE_DEVICES=0
+  python -u tools/infer.py -c configs/faster_rcnn_r50_1x.yml \
                       --infer_img=demo/000000570688.jpg \
                       --output_dir=infer_output/ \
                       --draw_threshold=0.5 \
                       -o weights=output/faster_rcnn_r50_1x/model_final \
-                      --use_tb=True
-```
+  ```
 
 
-可视化文件默认保存在`output`中，可通过`--output_dir=`指定不同的输出路径。
-`--draw_threshold` 是个可选参数. 根据 [NMS](https://ieeexplore.ieee.org/document/1699659) 的计算，
-不同阈值会产生不同的结果。如果用户需要对自定义路径的模型进行推断，可以设置`-o weights`指定模型路径。
-`--use_tb`是个可选参数，当为`True`时，可使用 TensorBoard 来可视化参数的变化趋势和图片。
-
-- 保存推断模型
-
-```bash
-# GPU推断
-export CUDA_VISIBLE_DEVICES=0
-export PYTHONPATH=$PYTHONPATH:.
-python tools/infer.py -c configs/faster_rcnn_r50_1x.yml --infer_img=demo/000000570688.jpg \
-                      --save_inference_model
-```
-
-通过设置`--save_inference_model`保存可供PaddlePaddle预测库加载的推断模型。
-
+  `--draw_threshold` 是个可选参数. 根据 [NMS](https://ieeexplore.ieee.org/document/1699659) 的计算，
+  不同阈值会产生不同的结果。如果用户需要对自定义路径的模型进行推断，可以设置`-o weights`指定模型路径。
 
 ## FAQ
 
@@ -227,3 +167,7 @@ batch size可以达到每GPU 4 (Tesla V100 16GB)。
 **Q:**  如何修改数据预处理? </br>
 **A:**  可在配置文件中设置 `sample_transform`。注意需要在配置文件中加入**完整预处理**
 例如RCNN模型中`DecodeImage`, `NormalizeImage` and `Permute`。更多详细描述请参考[配置案例](config_example)。
+
+
+**Q:** affine_channel和batch norm是什么关系?
+**A:** 在RCNN系列模型加载预训练模型初始化，有时候会固定住batch norm的参数, 使用预训练模型中的全局均值和方式，并且batch norm的scale和bias参数不更新，已发布的大多ResNet系列的RCNN模型采用这种方式。这种情况下可以在config中设置norm_type为bn或affine_channel, freeze_norm为true (默认为true)，两种方式等价。affne_channel的计算方式为`scale * x + bias`。只不过设置affine_channel时，内部对batch norm的参数自动做了融合。如果训练使用的affine_channel，用保存的模型做初始化，训练其他任务时，即可使用affine_channel, 也可使用batch norm, 参数均可正确加载。
diff --git a/PaddleCV/PaddleDetection/docs/INSTALL.md b/PaddleCV/PaddleDetection/docs/INSTALL.md
index 3876812cfc389077c647aa42d2adc25d654cd748..10aafe11b1e259031edd93395cebe680767e036a 100644
--- a/PaddleCV/PaddleDetection/docs/INSTALL.md
+++ b/PaddleCV/PaddleDetection/docs/INSTALL.md
@@ -1,3 +1,5 @@
+English | [简体中文](INSTALL_cn.md)
+
 # Installation
 
 ---
@@ -36,7 +38,7 @@ python -c "import paddle; print(paddle.__version__)"
 
 ### Requirements:
 
-- Python2 or Python3
+- Python2 or Python3 (Only support Python3 for windows)
 - CUDA >= 8.0
 - cuDNN >= 5.0
 - nccl >= 2.1.2
@@ -58,6 +60,12 @@ COCO-API is needed for running. Installation is as follows:
     # not to install the COCO API into global site-packages
     python setup.py install --user
 
+**Installation of COCO-API in windows:**
+
+    # if cython is not installed
+    pip install Cython
+    # Because the origin version of cocoapi does not support windows, another version is used which only supports Python3
+    pip install git+https://github.com/philferriere/cocoapi.git#subdirectory=PythonAPI
 
 ## PaddleDetection
 
diff --git a/PaddleCV/PaddleDetection/docs/INSTALL_cn.md b/PaddleCV/PaddleDetection/docs/INSTALL_cn.md
index 7226274aa214f2956b55c36ddedc61c0f4d15d4e..dc90908fbffec327e06c84e6f75b5e35b29a20dd 100644
--- a/PaddleCV/PaddleDetection/docs/INSTALL_cn.md
+++ b/PaddleCV/PaddleDetection/docs/INSTALL_cn.md
@@ -35,7 +35,7 @@ python -c "import paddle; print(paddle.__version__)"
 
 ### 环境需求:
 
-- Python2 or Python3
+- Python2 or Python3 (windows系统仅支持Python3)
 - CUDA >= 8.0
 - cuDNN >= 5.0
 - nccl >= 2.1.2
@@ -56,6 +56,12 @@ python -c "import paddle; print(paddle.__version__)"
     # 若您没有权限或更倾向不安装至全局site-packages
     python setup.py install --user
 
+**windows用户安装COCO-API方式：**
+
+    # 若Cython未安装，请安装Cython
+    pip install Cython
+    # 由于原版cocoapi不支持windows，采用第三方实现版本，该版本仅支持Python3
+    pip install git+https://github.com/philferriere/cocoapi.git#subdirectory=PythonAPI
 
 ## PaddleDetection
 
diff --git a/PaddleCV/PaddleDetection/docs/MODEL_ZOO.md b/PaddleCV/PaddleDetection/docs/MODEL_ZOO.md
index db71b2b223e94d6ac28ee617ea1b19e560fb82eb..d6042ada1293ea77a1670871bbff1d6f94f8a163 100644
--- a/PaddleCV/PaddleDetection/docs/MODEL_ZOO.md
+++ b/PaddleCV/PaddleDetection/docs/MODEL_ZOO.md
@@ -1,3 +1,5 @@
+English | [简体中文](MODEL_ZOO_cn.md)
+
 # Model Zoo and Benchmark
 ## Environment
 
@@ -76,6 +78,7 @@ The backbone models pretrained on ImageNet are available. All backbone models ar
 | ResNet50-FPN            | Cascade Faster | c3-c5 |     2     |   1x    |       -        |  44.2  |    -    | [model](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_dcn_r50_fpn_1x.tar) |
 | ResNet101-vd-FPN        | Cascade Faster | c3-c5 |     2     |   1x    |       -        |  46.4  |    -    | [model](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_dcn_r101_vd_fpn_1x.tar) |
 | ResNeXt101-vd-FPN       | Cascade Faster | c3-c5 |     2     |   1x    |       -        |  47.3  |    -    | [model](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x.tar) |
+| SENet154-vd-FPN         | Cascade Mask   | c3-c5 |    1      |  1.44x  |       -        |  51.9  |  43.9   | [model](https://paddlemodels.bj.bcebos.com/object_detection/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x.tar) |
 
 #### Notes:
 - Deformable ConvNets v2(dcn_v2) reference from [Deformable ConvNets v2](https://arxiv.org/abs/1811.11168).
@@ -155,3 +158,8 @@ results of image size 608/416/320 above.
 
 **NOTE**: MobileNet-SSD is trained in 2 GPU with totoal batch size as 64 and trained 120 epoches. VGG-SSD is trained in 4 GPU with total batch size as 32 and trained 240 epoches. SSD training data augmentations: randomly color distortion,
 randomly cropping, randomly expansion, randomly flipping.
+
+
+## Face Detection
+
+Please refer [face detection models](../configs/face_detection) for details.
diff --git a/PaddleCV/PaddleDetection/docs/MODEL_ZOO_cn.md b/PaddleCV/PaddleDetection/docs/MODEL_ZOO_cn.md
index 86450ad0af40c3671d4ca6b2882e88b6df7d9b8f..918a9cb249d105d9ed7cd0411ede8c5e8ce0c3d5 100644
--- a/PaddleCV/PaddleDetection/docs/MODEL_ZOO_cn.md
+++ b/PaddleCV/PaddleDetection/docs/MODEL_ZOO_cn.md
@@ -75,6 +75,7 @@ Paddle提供基于ImageNet的骨架网络预训练模型。所有预训练模型
 | ResNet50-FPN         | Cascade Faster | c3-c5   |    2      |   1x    |      -        |  44.2  |    -    | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_dcn_r50_fpn_1x.tar) |
 | ResNet101-vd-FPN     | Cascade Faster | c3-c5   |    2      |   1x    |      -        |  46.4  |    -    | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_dcn_r101_vd_fpn_1x.tar) |
 | ResNeXt101-vd-FPN    | Cascade Faster | c3-c5   |    2      |   1x    |      -        |  47.3  |    -    | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x.tar) |
+| SENet154-vd-FPN      | Cascade Mask   | c3-c5   |    1      |  1.44x  |      -        |  51.9  |  43.9   | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x.tar) |
 
 #### 注意事项:
 - Deformable卷积网络v2(dcn_v2)参考自论文[Deformable ConvNets v2](https://arxiv.org/abs/1811.11168).
@@ -149,3 +150,7 @@ Paddle提供基于ImageNet的骨架网络预训练模型。所有预训练模型
 | VGG16        | 512  |     8   |   240e  |      65.975     | 80.2  | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ssd_vgg16_512_voc.tar) |
 
 **注意事项:** MobileNet-SSD在2卡，总batch size为64下训练120周期。VGG-SSD在总batch size为32下训练240周期。数据增强包括：随机颜色失真，随机剪裁，随机扩张，随机翻转。
+
+## 人脸检测
+
+详细请参考[人脸检测模型](../configs/face_detection).
diff --git a/PaddleCV/PaddleDetection/docs/QUICK_STARTED.md b/PaddleCV/PaddleDetection/docs/QUICK_STARTED.md
index 5b687b35e72718be939b83299a5e9a799d6f5e79..c6649f71f7b41c2fda00141e20c16dffc1f93e40 100644
--- a/PaddleCV/PaddleDetection/docs/QUICK_STARTED.md
+++ b/PaddleCV/PaddleDetection/docs/QUICK_STARTED.md
@@ -2,7 +2,7 @@ English | [简体中文](QUICK_STARTED_cn.md)
 
 # Quick Start
 
-This tutorial fine-tunes a tiny dataset by pretrained detection model for users to get a model and learn PaddleDetection quickly. The model can be trained in around 15min with good performance.
+This tutorial fine-tunes a tiny dataset by pretrained detection model for users to get a model and learn PaddleDetection quickly. The model can be trained in around 20min with good performance.
 
 ## Data Preparation
 
diff --git a/PaddleCV/PaddleDetection/docs/QUICK_STARTED_cn.md b/PaddleCV/PaddleDetection/docs/QUICK_STARTED_cn.md
index 78c019a6bfe7bcb82ca5a42ea9d65cf26f397aeb..8c02ffb798250a0fa29db02ab7e4b38f04e4daac 100644
--- a/PaddleCV/PaddleDetection/docs/QUICK_STARTED_cn.md
+++ b/PaddleCV/PaddleDetection/docs/QUICK_STARTED_cn.md
@@ -2,7 +2,7 @@
 
 # 快速开始
 
-为了使得用户能够在很短的时间内快速产出模型，掌握PaddleDetection的使用方式，这篇教程通过一个预训练检测模型对小数据集进行finetune。在P40上单卡大约15min即可产出一个效果不错的模型。
+为了使得用户能够在很短的时间内快速产出模型，掌握PaddleDetection的使用方式，这篇教程通过一个预训练检测模型对小数据集进行finetune。在P40上单卡大约20min即可产出一个效果不错的模型。
 
 ## 数据准备
 
diff --git a/PaddleCV/PaddleDetection/docs/TRANSFER_LEARNING.md b/PaddleCV/PaddleDetection/docs/TRANSFER_LEARNING.md
index 61bc1312364e32b8ecb8d0c0ffb618894e1d6f2b..0bc0377acb749ee896050660ba122a3a77ca20b7 100644
--- a/PaddleCV/PaddleDetection/docs/TRANSFER_LEARNING.md
+++ b/PaddleCV/PaddleDetection/docs/TRANSFER_LEARNING.md
@@ -1,3 +1,5 @@
+English | [简体中文](TRANSFER_LEARNING_cn.md)
+
 # Transfer Learning
 
 Transfer learning aims at learning new knowledge from existing knowledge. For example, take pretrained model from ImageNet to initialize detection models, or take pretrained model from COCO dataset to initialize train detection models in PascalVOC dataset.
@@ -6,7 +8,10 @@ In transfer learning, if different dataset and the number of classes is used, th
 
 ## Transfer Learning in PaddleDetection
 
-In transfer learning, it's needed to load pretrained model selectively. Set `finetune_exclude_pretrained_params` in YAML configuration files or set `-o finetune_exclude_pretrained_params` in command line.
+In transfer learning, it's needed to load pretrained model selectively. The following two methods can be used:
+
+1. Set `finetune_exclude_pretrained_params` in YAML configuration files. Please refer to [configure file](../configs/yolov3_mobilenet_v1_fruit.yml#L15)
+2. Set -o finetune_exclude_pretrained_params in command line. For example:
 
 ```python
 export PYTHONPATH=$PYTHONPATH:.
diff --git a/PaddleCV/PaddleDetection/docs/TRANSFER_LEARNING_cn.md b/PaddleCV/PaddleDetection/docs/TRANSFER_LEARNING_cn.md
index 255d31857799a4b04558ecb6c84244a1e69f803b..a54210d0aa9ef32096620e1830d49a2b2430b189 100644
--- a/PaddleCV/PaddleDetection/docs/TRANSFER_LEARNING_cn.md
+++ b/PaddleCV/PaddleDetection/docs/TRANSFER_LEARNING_cn.md
@@ -6,7 +6,10 @@
 
 ## PaddleDetection进行迁移学习
 
-在迁移学习中，对预训练模型进行选择性加载，可通过在 YMAL 配置文件中通过设置 finetune_exclude_pretrained_params字段，也可通过在 train.py的启动参数中设置 -o finetune_exclude_pretrained_params。
+在迁移学习中，对预训练模型进行选择性加载，可通过如下两种方式实现：
+
+1. 在 YMAL 配置文件中通过设置`finetune_exclude_pretrained_params`字段。可参考[配置文件](../configs/yolov3_mobilenet_v1_fruit.yml#L15)
+2. 在 train.py的启动参数中设置 -o finetune_exclude_pretrained_params。例如：
 
 ```python
 export PYTHONPATH=$PYTHONPATH:.
diff --git a/PaddleCV/PaddleDetection/docs/images/bench_ssd_yolo_infer.png b/PaddleCV/PaddleDetection/docs/images/bench_ssd_yolo_infer.png
new file mode 100644
index 0000000000000000000000000000000000000000..f81600b14cbe9af4b12f86c574ce2dffa937774f
Binary files /dev/null and b/PaddleCV/PaddleDetection/docs/images/bench_ssd_yolo_infer.png differ
diff --git a/PaddleCV/PaddleDetection/inference/CMakeLists.txt b/PaddleCV/PaddleDetection/inference/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed610da047316d0b08d73d51e0223a06180b4026
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/CMakeLists.txt
@@ -0,0 +1,272 @@
+cmake_minimum_required(VERSION 3.0)
+project(cpp_inference_demo CXX C)
+message("cmake module path: ${CMAKE_MODULE_PATH}")
+message("cmake root path: ${CMAKE_ROOT}")
+option(WITH_MKL        "Compile demo with MKL/OpenBlas support,defaultuseMKL."          ON)
+option(WITH_GPU        "Compile demo with GPU/CPU, default use CPU."                    ON)
+option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static."   ON)
+option(USE_TENSORRT "Compile demo with TensorRT."   OFF)
+
+SET(PADDLE_DIR "" CACHE PATH "Location of libraries")
+SET(OPENCV_DIR "" CACHE PATH "Location of libraries")
+SET(CUDA_LIB "" CACHE PATH "Location of libraries")
+
+
+include(external-cmake/yaml-cpp.cmake)
+
+macro(safe_set_static_flag)
+    foreach(flag_var
+        CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
+        CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+      if(${flag_var} MATCHES "/MD")
+        string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
+	  endif(${flag_var} MATCHES "/MD")
+    endforeach(flag_var)
+endmacro()
+
+if (WITH_MKL)
+    ADD_DEFINITIONS(-DUSE_MKL)
+endif()
+
+if (NOT DEFINED PADDLE_DIR OR ${PADDLE_DIR} STREQUAL "")
+    message(FATAL_ERROR "please set PADDLE_DIR with -DPADDLE_DIR=/path/paddle_influence_dir")
+endif()
+
+if (NOT DEFINED OPENCV_DIR OR ${OPENCV_DIR} STREQUAL "")
+    message(FATAL_ERROR "please set OPENCV_DIR with -DOPENCV_DIR=/path/opencv")
+endif()
+
+include_directories("${CMAKE_SOURCE_DIR}/")
+include_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/src/ext-yaml-cpp/include")
+include_directories("${PADDLE_DIR}/")
+include_directories("${PADDLE_DIR}/third_party/install/protobuf/include")
+include_directories("${PADDLE_DIR}/third_party/install/glog/include")
+include_directories("${PADDLE_DIR}/third_party/install/gflags/include")
+include_directories("${PADDLE_DIR}/third_party/install/xxhash/include")
+if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/include")
+    include_directories("${PADDLE_DIR}/third_party/install/snappy/include")
+endif()
+if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/include")
+    include_directories("${PADDLE_DIR}/third_party/install/snappystream/include")
+endif()
+include_directories("${PADDLE_DIR}/third_party/install/zlib/include")
+include_directories("${PADDLE_DIR}/third_party/boost")
+include_directories("${PADDLE_DIR}/third_party/eigen3")
+
+if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
+    link_directories("${PADDLE_DIR}/third_party/install/snappy/lib")
+endif()
+if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
+    link_directories("${PADDLE_DIR}/third_party/install/snappystream/lib")
+endif()
+
+link_directories("${PADDLE_DIR}/third_party/install/zlib/lib")
+link_directories("${PADDLE_DIR}/third_party/install/protobuf/lib")
+link_directories("${PADDLE_DIR}/third_party/install/glog/lib")
+link_directories("${PADDLE_DIR}/third_party/install/gflags/lib")
+link_directories("${PADDLE_DIR}/third_party/install/xxhash/lib")
+link_directories("${PADDLE_DIR}/paddle/lib/")
+link_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/lib")
+link_directories("${CMAKE_CURRENT_BINARY_DIR}")
+if (WIN32)
+  include_directories("${PADDLE_DIR}/paddle/fluid/inference")
+  link_directories("${PADDLE_DIR}/paddle/fluid/inference")
+  include_directories("${OPENCV_DIR}/build/include")
+  include_directories("${OPENCV_DIR}/opencv/build/include")
+  link_directories("${OPENCV_DIR}/build/x64/vc14/lib")
+else ()
+  include_directories("${PADDLE_DIR}/paddle/include")
+  link_directories("${PADDLE_DIR}/paddle/lib")
+  include_directories("${OPENCV_DIR}/include")
+  link_directories("${OPENCV_DIR}/lib")
+endif ()
+
+if (WIN32)
+    add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
+    set(CMAKE_C_FLAGS_DEBUG   "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd")
+    set(CMAKE_C_FLAGS_RELEASE  "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT")
+    set(CMAKE_CXX_FLAGS_DEBUG  "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd")
+    set(CMAKE_CXX_FLAGS_RELEASE   "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT")
+    if (WITH_STATIC_LIB)
+        safe_set_static_flag()
+        add_definitions(-DSTATIC_LIB)
+    endif()
+else()
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -o2 -std=c++11")
+    set(CMAKE_STATIC_LIBRARY_PREFIX "")
+endif()
+
+# TODO let users define cuda lib path
+if (WITH_GPU)
+    if (NOT DEFINED CUDA_LIB OR ${CUDA_LIB} STREQUAL "")
+        message(FATAL_ERROR "please set CUDA_LIB with -DCUDA_LIB=/path/cuda-8.0/lib64")
+    endif()
+    if (NOT WIN32)
+        if (NOT DEFINED CUDNN_LIB)
+            message(FATAL_ERROR "please set CUDNN_LIB with -DCUDNN_LIB=/path/cudnn_v7.4/cuda/lib64")
+        endif()
+    endif(NOT WIN32)
+endif() 
+
+
+if (NOT WIN32)
+  if (USE_TENSORRT AND WITH_GPU)
+      include_directories("${PADDLE_DIR}/third_party/install/tensorrt/include")
+      link_directories("${PADDLE_DIR}/third_party/install/tensorrt/lib")
+  endif()
+endif(NOT WIN32)
+
+if (NOT WIN32)
+    set(NGRAPH_PATH "${PADDLE_DIR}/third_party/install/ngraph")
+    if(EXISTS ${NGRAPH_PATH})
+        include(GNUInstallDirs)
+        include_directories("${NGRAPH_PATH}/include")
+        link_directories("${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}")
+        set(NGRAPH_LIB ${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}/libngraph${CMAKE_SHARED_LIBRARY_SUFFIX})
+    endif()
+endif()
+
+if(WITH_MKL)
+  include_directories("${PADDLE_DIR}/third_party/install/mklml/include")
+  if (WIN32)
+    set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.lib
+            ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.lib)
+  else ()
+    set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX}
+            ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX})
+  endif ()
+  set(MKLDNN_PATH "${PADDLE_DIR}/third_party/install/mkldnn")
+  if(EXISTS ${MKLDNN_PATH})
+    include_directories("${MKLDNN_PATH}/include")
+    if (WIN32)
+      set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib)
+    else ()
+      set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0)
+    endif ()
+  endif()
+else()
+  set(MATH_LIB ${PADDLE_DIR}/third_party/install/openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX})
+endif()
+
+if(WITH_STATIC_LIB)
+  if (WIN32)
+    set(DEPS
+        ${PADDLE_DIR}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
+  else ()
+    set(DEPS
+        ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
+  endif()
+else()
+  if (WIN32)
+    set(DEPS
+        ${PADDLE_DIR}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
+  else ()
+    set(DEPS
+        ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
+  endif()
+endif()
+
+if (NOT WIN32)
+    set(EXTERNAL_LIB "-lrt -ldl -lpthread")
+    set(DEPS ${DEPS}
+        ${MATH_LIB} ${MKLDNN_LIB}
+        glog gflags protobuf yaml-cpp z xxhash
+        ${EXTERNAL_LIB})
+    if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
+        set(DEPS ${DEPS} snappystream)
+    endif()
+    if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
+        set(DEPS ${DEPS} snappy)
+    endif()
+else()
+    set(DEPS ${DEPS}
+        ${MATH_LIB} ${MKLDNN_LIB}
+        opencv_world346 glog libyaml-cppmt gflags_static libprotobuf zlibstatic xxhash ${EXTERNAL_LIB})
+    set(DEPS ${DEPS} libcmt shlwapi)
+    if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
+        set(DEPS ${DEPS} snappy)
+    endif()
+    if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
+        set(DEPS ${DEPS} snappystream)
+    endif()
+endif(NOT WIN32)
+
+if(WITH_GPU)
+  if(NOT WIN32)
+    if (USE_TENSORRT)
+      set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer${CMAKE_STATIC_LIBRARY_SUFFIX})
+      set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX})
+    endif()
+    set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
+    set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX})
+  else()
+    set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} )
+    set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} )
+    set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX})
+  endif()
+endif()
+
+if (NOT WIN32)    
+    set(OPENCV_LIB_DIR ${OPENCV_DIR}/lib)
+    if(EXISTS "${OPENCV_LIB_DIR}")
+	    message("OPENCV_LIB:" ${OPENCV_LIB_DIR})
+    else()
+        set(OPENCV_LIB_DIR ${OPENCV_DIR}/lib64)	
+	    message("OPENCV_LIB:" ${OPENCV_LIB_DIR})        
+    endif()
+   
+    set(OPENCV_3RD_LIB_DIR ${OPENCV_DIR}/share/OpenCV/3rdparty/lib)
+    if(EXISTS "${OPENCV_3RD_LIB_DIR}")
+	    message("OPENCV_3RD_LIB_DIR:" ${OPENCV_3RD_LIB_DIR})
+    else()
+        set(OPENCV_3RD_LIB_DIR ${OPENCV_DIR}/share/OpenCV/3rdparty/lib64)
+	    message("OPENCV_3RD_LIB_DIR:" ${OPENCV_3RD_LIB_DIR})
+    endif()
+
+    set(DEPS ${DEPS} ${OPENCV_LIB_DIR}/libopencv_imgcodecs${CMAKE_STATIC_LIBRARY_SUFFIX})
+    set(DEPS ${DEPS} ${OPENCV_LIB_DIR}/libopencv_imgproc${CMAKE_STATIC_LIBRARY_SUFFIX})
+    set(DEPS ${DEPS} ${OPENCV_LIB_DIR}/libopencv_core${CMAKE_STATIC_LIBRARY_SUFFIX})
+    set(DEPS ${DEPS} ${OPENCV_LIB_DIR}/libopencv_highgui${CMAKE_STATIC_LIBRARY_SUFFIX})
+    set(DEPS ${DEPS} ${OPENCV_3RD_LIB_DIR}/libIlmImf${CMAKE_STATIC_LIBRARY_SUFFIX})
+    set(DEPS ${DEPS} ${OPENCV_3RD_LIB_DIR}/liblibjasper${CMAKE_STATIC_LIBRARY_SUFFIX})
+    set(DEPS ${DEPS} ${OPENCV_3RD_LIB_DIR}/liblibpng${CMAKE_STATIC_LIBRARY_SUFFIX})
+    set(DEPS ${DEPS} ${OPENCV_3RD_LIB_DIR}/liblibtiff${CMAKE_STATIC_LIBRARY_SUFFIX})
+    set(DEPS ${DEPS} ${OPENCV_3RD_LIB_DIR}/libittnotify${CMAKE_STATIC_LIBRARY_SUFFIX})
+    set(DEPS ${DEPS} ${OPENCV_3RD_LIB_DIR}/liblibjpeg-turbo${CMAKE_STATIC_LIBRARY_SUFFIX})
+    set(DEPS ${DEPS} ${OPENCV_3RD_LIB_DIR}/liblibwebp${CMAKE_STATIC_LIBRARY_SUFFIX})
+    set(DEPS ${DEPS} ${OPENCV_3RD_LIB_DIR}/libzlib${CMAKE_STATIC_LIBRARY_SUFFIX})
+    if(EXISTS "${OPENCV_3RD_LIB_DIR}/libippiw${CMAKE_STATIC_LIBRARY_SUFFIX}")
+        set(DEPS ${DEPS} ${OPENCV_3RD_LIB_DIR}/libippiw${CMAKE_STATIC_LIBRARY_SUFFIX})
+    endif()
+    if(EXISTS "${OPENCV_3RD_LIB_DIR}/libippicv${CMAKE_STATIC_LIBRARY_SUFFIX}")
+        set(DEPS ${DEPS} ${OPENCV_3RD_LIB_DIR}/libippicv${CMAKE_STATIC_LIBRARY_SUFFIX})
+    endif()
+endif()
+# message(${CMAKE_CXX_FLAGS})
+# set(CMAKE_CXX_FLAGS "-g ${CMAKE_CXX_FLAGS}")
+
+SET(PADDLESEG_INFERENCE_SRCS  preprocessor/preprocessor.cpp 
+    preprocessor/preprocessor_detection.cpp predictor/detection_predictor.cpp 
+    utils/detection_result.pb.cc)
+
+ADD_LIBRARY(libpaddleseg_inference STATIC ${PADDLESEG_INFERENCE_SRCS})
+target_link_libraries(libpaddleseg_inference ${DEPS})
+
+add_executable(detection_demo detection_demo.cpp)
+
+ADD_DEPENDENCIES(libpaddleseg_inference ext-yaml-cpp)
+ADD_DEPENDENCIES(detection_demo ext-yaml-cpp libpaddleseg_inference)
+target_link_libraries(detection_demo ${DEPS} libpaddleseg_inference)
+
+if (WIN32)
+    add_custom_command(TARGET detection_demo POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./mklml.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./libiomp5md.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll
+    )
+endif()
+
+execute_process(COMMAND cp -r ${CMAKE_SOURCE_DIR}/images ${CMAKE_SOURCE_DIR}/conf ${CMAKE_CURRENT_BINARY_DIR})
diff --git a/PaddleCV/PaddleDetection/inference/LICENSE b/PaddleCV/PaddleDetection/inference/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/PaddleCV/PaddleDetection/inference/README.md b/PaddleCV/PaddleDetection/inference/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..54faa0f0db80d710316039f91a09a325ba741c5e
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/README.md
@@ -0,0 +1,170 @@
+# PaddleDetection C++预测部署方案
+
+## 本文档结构
+
+[1.说明](#1说明)
+
+[2.主要目录和文件](#2主要目录和文件)
+
+[3.编译](#3编译)
+
+[4.预测并可视化结果](#4预测并可视化结果)
+
+
+
+
+## 1.说明
+
+本目录提供一个跨平台的图像检测模型的C++预测部署方案，用户通过一定的配置，加上少量的代码，即可把模型集成到自己的服务中，完成相应的图像检测任务。
+
+主要设计的目标包括以下四点：
+- 跨平台，支持在 Windows 和 Linux 完成编译、开发和部署
+- 可扩展性，支持用户针对新模型开发自己特殊的数据预处理等逻辑
+- 高性能，除了`PaddlePaddle`自身带来的性能优势，我们还针对图像检测的特点对关键步骤进行了性能优化
+- 支持多种常见的图像检测模型，如YOLOv3, Faster-RCNN, Faster-RCNN+FPN，用户通过少量配置即可加载模型完成常见检测任务
+
+## 2.主要目录和文件
+
+```bash
+deploy
+├── detection_demo.cpp # 完成图像检测预测任务C++代码
+│
+├── conf
+│   ├── detection_rcnn.yaml #示例faster rcnn 目标检测配置
+│   └── detection_rcnn_fpn.yaml #示例faster rcnn + fpn目标检测配置
+├── images
+│   └── detection_rcnn # 示例faster rcnn + fpn目标检测测试图片目录
+├── tools
+│   └── vis.py # 示例图像检测结果可视化脚本
+├── docs
+│   ├── linux_build.md # Linux 编译指南
+│   ├── windows_vs2015_build.md # windows VS2015编译指南
+│   └── windows_vs2019_build.md # Windows VS2019编译指南
+│
+├── utils # 一些基础公共函数
+│
+├── preprocess # 数据预处理相关代码
+│
+├── predictor # 模型加载和预测相关代码
+│
+├── CMakeList.txt # cmake编译入口文件
+│
+└── external-cmake # 依赖的外部项目cmake（目前仅有yaml-cpp）
+
+```
+
+## 3.编译
+支持在`Windows`和`Linux`平台编译和使用：
+- [Linux 编译指南](./docs/linux_build.md)
+- [Windows 使用 Visual Studio 2019 Community 编译指南](./docs/windows_vs2019_build.md)
+- [Windows 使用 Visual Studio 2015 编译指南](./docs/windows_vs2015_build.md)
+
+`Windows`上推荐使用最新的`Visual Studio 2019 Community`直接编译`CMake`项目。
+
+## 4.预测并可视化结果
+
+完成编译后，便生成了需要的可执行文件和链接库。这里以我们基于`faster rcnn`检测模型为例，介绍部署图像检测模型的通用流程。
+
+### 1. 下载模型文件
+我们提供faster rcnn，faster rcnn+fpn模型用于预测coco17数据集，可在以下链接下载：[faster rcnn示例模型下载地址](https://paddleseg.bj.bcebos.com/inference/faster_rcnn_pp50.zip)，
+ [faster rcnn + fpn示例模型下载地址](https://paddleseg.bj.bcebos.com/inference/faster_rcnn_pp50_fpn.zip)。
+
+下载并解压，解压后目录结构如下：
+```
+faster_rcnn_pp50/
+├── __model__ # 模型文件
+│
+└── __params__ # 参数文件
+```
+解压后把上述目录拷贝到合适的路径：
+
+**假设**`Windows`系统上，我们模型和参数文件所在路径为`D:\projects\models\faster_rcnn_pp50`。
+
+**假设**`Linux`上对应的路径则为`/root/projects/models/faster_rcnn_pp50/`。
+
+
+### 2. 修改配置
+
+`inference`源代码(即本目录)的`conf`目录下提供了示例基于faster rcnn的配置文件`detection_rcnn.yaml`, 相关的字段含义和说明如下：
+
+```yaml
+DEPLOY:
+    # 是否使用GPU预测
+    USE_GPU: 1
+    # 模型和参数文件所在目录路径
+    MODEL_PATH: "/root/projects/models/faster_rcnn_pp50"
+    # 模型文件名
+    MODEL_FILENAME: "__model__"
+    # 参数文件名
+    PARAMS_FILENAME: "__params__"
+    # 预测图片的标准输入，尺寸不一致会resize
+    EVAL_CROP_SIZE: (608, 608)
+    # resize方式，支持 UNPADDING和RANGE_SCALING
+    RESIZE_TYPE: "RANGE_SCALING"
+    # 短边对齐的长度，仅在RANGE_SCALING下有效
+    TARGET_SHORT_SIZE : 800
+    # 均值
+    MEAN:  [0.4647, 0.4647, 0.4647]
+    # 方差
+    STD: [0.0834, 0.0834, 0.0834]
+    # 图片类型， rgb或者rgba
+    IMAGE_TYPE: "rgb"
+    # 像素分类数
+    NUM_CLASSES: 1
+    # 通道数
+    CHANNELS : 3
+    # 预处理器， 目前提供图像检测的通用处理类DetectionPreProcessor
+    PRE_PROCESSOR: "DetectionPreProcessor"
+    # 预测模式，支持 NATIVE 和 ANALYSIS
+    PREDICTOR_MODE: "ANALYSIS"
+    # 每次预测的 batch_size
+    BATCH_SIZE : 3
+    # 长边伸缩的最大长度，-1代表无限制。
+    RESIZE_MAX_SIZE: 1333
+    # 输入的tensor数量。
+    FEEDS_SIZE: 3
+
+```
+修改字段`MODEL_PATH`的值为你在**上一步**下载并解压的模型文件所放置的目录即可。更多配置文件字段介绍，请参考文档[预测部署方案配置文件说明](./docs/configuration.md)。
+
+### 3. 执行预测
+
+在终端中切换到生成的可执行文件所在目录为当前目录(Windows系统为`cmd`)。
+
+`Linux` 系统中执行以下命令：
+```shell
+./detection_demo --conf=conf/detection_rcnn.yaml --input_dir=images/detection_rcnn
+```
+`Windows` 中执行以下命令:
+```shell
+.\detection_demo.exe --conf=conf\detection_rcnn.yaml --input_dir=images\detection_rcnn\
+```
+
+
+预测使用的两个命令参数说明如下：
+
+| 参数 | 含义 |
+|-------|----------|
+| conf | 模型配置的Yaml文件路径 |
+| input_dir | 需要预测的图片目录 |
+
+·
+配置文件说明请参考上一步，样例程序会扫描input_dir目录下的所有图片，并为每一张图片生成对应的预测结果，输出到屏幕，并在`X`同一目录下保存到`X.pb文件`（X为对应图片的文件名）。可使用工具脚本vis.py将检测结果可视化。
+
+**检测结果可视化**
+
+运行可视化脚本时，只需输入命令行参数图片路径、检测结果pb文件路径、目标框阈值以及类别-标签映射文件路径即可得到可视化的图片`X.png` (tools目录下提供coco17的类别标签映射文件coco17.json)。
+
+```bash
+python vis.py --img_path=../build/images/detection_rcnn/000000087038.jpg --img_result_path=../build/images/detection_rcnn/000000087038.jpg.pb --threshold=0.1 --c2l_path=coco17.json
+```
+
+检测结果（每个图片的结果用空行隔开）
+
+```原图：```
+
+![原图](./demo_images/000000087038.jpg)
+
+```检测结果图：```
+
+![检测结果](./demo_images/000000087038.jpg.png)
diff --git a/PaddleCV/PaddleDetection/inference/conf/detection_rcnn.yaml b/PaddleCV/PaddleDetection/inference/conf/detection_rcnn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..50c23fbb3e53ff159844e65da4ed194e169cffb6
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/conf/detection_rcnn.yaml
@@ -0,0 +1,18 @@
+DEPLOY:
+    USE_GPU: 1
+    MODEL_PATH: "/root/projects/models/faster_rcnn_pp50"
+    MODEL_FILENAME: "__model__"
+    PARAMS_FILENAME: "__params__"
+    EVAL_CROP_SIZE: (608, 608)
+    RESIZE_TYPE: "RANGE_SCALING"
+    TARGET_SHORT_SIZE : 800
+    MEAN:  [0.485, 0.456, 0.406]
+    STD: [0.229, 0.224, 0.225]
+    IMAGE_TYPE: "rgb"
+    NUM_CLASSES: 1
+    CHANNELS : 3
+    PRE_PROCESSOR: "DetectionPreProcessor"
+    PREDICTOR_MODE: "ANALYSIS"
+    BATCH_SIZE : 3 
+    RESIZE_MAX_SIZE: 1333
+    FEEDS_SIZE: 3
diff --git a/PaddleCV/PaddleDetection/inference/conf/detection_rcnn_fpn.yaml b/PaddleCV/PaddleDetection/inference/conf/detection_rcnn_fpn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9d6635ef8c2b29fb0ca9318d1ec08f1f7be037f7
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/conf/detection_rcnn_fpn.yaml
@@ -0,0 +1,19 @@
+DEPLOY:
+    USE_GPU: 1
+    MODEL_PATH: "/root/projects/models/faster_rcnn_pp50_fpn"
+    MODEL_FILENAME: "__model__"
+    PARAMS_FILENAME: "__params__"
+    EVAL_CROP_SIZE: (608, 608)
+    RESIZE_TYPE: "RANGE_SCALING"
+    TARGET_SHORT_SIZE : 800
+    MEAN:  [0.485, 0.456, 0.406]
+    STD: [0.229, 0.224, 0.225]
+    IMAGE_TYPE: "rgb"
+    NUM_CLASSES: 1
+    CHANNELS : 3
+    PRE_PROCESSOR: "DetectionPreProcessor"
+    PREDICTOR_MODE: "ANALYSIS"
+    BATCH_SIZE : 1 
+    RESIZE_MAX_SIZE: 1333
+    FEEDS_SIZE: 3
+    COARSEST_STRIDE: 32
diff --git a/PaddleCV/PaddleDetection/inference/demo_images/000000087038.jpg b/PaddleCV/PaddleDetection/inference/demo_images/000000087038.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9f77f5d5f057b6f92dc096da704ecb8dee99bdf5
Binary files /dev/null and b/PaddleCV/PaddleDetection/inference/demo_images/000000087038.jpg differ
diff --git a/PaddleCV/PaddleDetection/inference/demo_images/000000087038.jpg.png b/PaddleCV/PaddleDetection/inference/demo_images/000000087038.jpg.png
new file mode 100644
index 0000000000000000000000000000000000000000..aa2c63d1c3dd1ca08d517239842ce5bd40310d01
Binary files /dev/null and b/PaddleCV/PaddleDetection/inference/demo_images/000000087038.jpg.png differ
diff --git a/PaddleCV/PaddleDetection/inference/detection_demo.cpp b/PaddleCV/PaddleDetection/inference/detection_demo.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7e711ed6970358c528a3198bb6168a871d83d380
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/detection_demo.cpp
@@ -0,0 +1,42 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <glog/logging.h>
+#include <utils/utils.h>
+#include <predictor/detection_predictor.h>
+
+DEFINE_string(conf, "", "Configuration File Path");
+DEFINE_string(input_dir, "", "Directory of Input Images");
+
+int main(int argc, char** argv) {
+    // 0. parse args
+    google::ParseCommandLineFlags(&argc, &argv, true);
+    if (FLAGS_conf.empty() || FLAGS_input_dir.empty()) {
+        std::cout << "Usage: ./predictor --conf=/config/path/to/your/model --input_dir=/directory/of/your/input/images";
+        return -1;
+    }
+    // 1. create a predictor and init it with conf
+    PaddleSolution::DetectionPredictor predictor;
+    if (predictor.init(FLAGS_conf) != 0) {
+        LOG(FATAL) << "Fail to init predictor";
+        return -1;
+    }
+
+    // 2. get all the images with extension '.jpeg' at input_dir
+    auto imgs = PaddleSolution::utils::get_directory_images(FLAGS_input_dir, ".jpeg|.jpg|.JPEG|.JPG|.bmp|.BMP|.png|.PNG");
+
+    // 3. predict
+    predictor.predict(imgs);
+    return 0;
+}
diff --git a/PaddleCV/PaddleDetection/inference/docs/configuration.md b/PaddleCV/PaddleDetection/inference/docs/configuration.md
new file mode 100644
index 0000000000000000000000000000000000000000..45c8605c96a2332da13f388343ec65b729439a0d
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/docs/configuration.md
@@ -0,0 +1,75 @@
+# 预测部署方案配置文件说明
+## 基本概念
+预测部署方案的配置文件旨在给用户提供一个预测部署方案定制化接口。用户仅需理解该配置文件相关字段的含义，无需编写任何代码，即可定制化预测部署方案。为了更好地表达每个字段的含义，首先介绍配置文件中字段的类型。
+
+### 字段类型
+- **required**: 表明该字段必须显式定义，否则无法正常启动预测部署程序。
+- **optional**: 表明该字段可忽略不写，预测部署系统会提供默认值，相关默认值将在下文介绍。
+
+### 字段值类型
+- **int**：表明该字段必须赋予整型类型的值。
+- **string**：表明该字段必须赋予字符串类型的值。
+- **list**：表明该字段必须赋予列表的值。
+- **tuple**: 表明该字段必须赋予双元素元组的值。
+
+## 字段介绍
+
+```yaml
+# 预测部署时所有配置字段需在DEPLOY字段下
+DEPLOY:
+    # 类型：required int
+    # 含义：是否使用GPU预测。 0:不使用  1:使用
+    USE_GPU: 1
+    # 类型：required string
+    # 含义：模型和参数文件所在目录
+    MODEL_PATH: "/path/to/model_directory"
+    # 类型：required string
+    # 含义：模型文件名
+    MODEL_FILENAME: "__model__"
+    # 类型：required string
+    # 含义：参数文件名
+    PARAMS_FILENAME: "__params__"
+    # 类型：optional string
+    # 含义：图像resize的类型。支持 UNPADDING 和 RANGE_SCALING模式。默认是UNPADDING模式。
+    RESIZE_TYPE: "UNPADDING"
+    # 类型：required tuple
+    # 含义：当使用UNPADDING模式时，会将图像直接resize到该尺寸。
+    EVAL_CROP_SIZE: (513, 513)
+    # 类型：optional int
+    # 含义：当使用RANGE_SCALING模式时，图像短边需要对齐该字段的值，长边会同比例
+    # 的缩放，从而在保持图像长宽比例不变的情况下resize到新的尺寸。默认值为0。
+    TARGET_SHORT_SIZE: 800
+    # 类型：optional int
+    # 含义: 当使用RANGE_SCALING模式时,长边不能缩放到比该字段的值大。默认值为0。
+    RESIZE_MAX_SIZE: 1333
+    # 类型：required list
+    # 含义：图像进行归一化预处理时的均值
+    MEAN: [104.008, 116.669, 122.675]
+    # 类型：required list
+    # 含义：图像进行归一化预处理时的方差
+    STD: [1.0, 1.0, 1.0]
+    # 类型：string
+    # 含义：图片类型, rgb 或者 rgba
+    IMAGE_TYPE: "rgb"
+    # 类型：required int
+    # 含义：图像分类类型数
+    NUM_CLASSES: 2
+    # 类型：required int
+    # 含义：图片通道数
+    CHANNELS : 3
+    # 类型：required string
+    # 含义：预处理方式，目前提供图像检测的通用预处理类DetectionPreProcessor.
+    PRE_PROCESSOR: "DetectionPreProcessor"
+    # 类型：required string
+    # 含义：预测模式，支持 NATIVE 和 ANALYSIS
+    PREDICTOR_MODE: "ANALYSIS"
+    # 类型：required int
+    # 含义：每次预测的 batch_size
+    BATCH_SIZE : 3
+    # 类型：optional int
+    # 含义: 输入张量的个数。大部分模型不需要设置。 默认值为1.
+    FEEDS_SIZE: 2
+    # 类型: optional int
+    # 含义: 将图像的边变为该字段的值的整数倍。默认值为1。
+    COARSEST_STRIDE: 32
+```
diff --git a/PaddleCV/PaddleDetection/inference/docs/linux_build.md b/PaddleCV/PaddleDetection/inference/docs/linux_build.md
new file mode 100644
index 0000000000000000000000000000000000000000..2ad9e46383123efee47b941f97c8e7690c7b95d6
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/docs/linux_build.md
@@ -0,0 +1,84 @@
+# Linux平台 编译指南
+
+## 说明
+本文档在 `Linux`平台使用`GCC 4.8.5` 和 `GCC 4.9.4`测试过，如果需要使用更高G++版本编译使用，则需要重新编译Paddle预测库，请参考: [从源码编译Paddle预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_usage/deploy/inference/build_and_install_lib_cn.html#id15)。
+
+## 前置条件
+* G++ 4.8.2 ~ 4.9.4
+* CUDA 8.0/ CUDA 9.0
+* CMake 3.0+
+
+请确保系统已经安装好上述基本软件，**下面所有示例以工作目录为 `/root/projects/`演示**。
+
+### Step1: 下载代码
+
+1. `mkdir -p /root/projects/paddle_models && cd /root/projects/paddle_models`
+2. `git clone https://github.com/PaddlePaddle/models.git`
+
+`C++`预测代码在`/root/projects/paddle_models/models/PaddleCV/PaddleDetection/inference` 目录，该目录不依赖任何`PaddleDetection`下其他目录。
+
+
+### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference
+
+目前仅支持`CUDA 8` 和 `CUDA 9`，请点击 [PaddlePaddle预测库下载地址](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_usage/deploy/inference/build_and_install_lib_cn.html)下载对应的版本（develop版本）。
+
+
+下载并解压后`/root/projects/fluid_inference`目录包含内容为：
+```
+fluid_inference
+├── paddle # paddle核心库和头文件
+|
+├── third_party # 第三方依赖库和头文件
+|
+└── version.txt # 版本和编译信息
+```
+
+### Step3: 安装配置OpenCV
+
+```shell
+# 0. 切换到/root/projects目录
+cd /root/projects
+# 1. 下载OpenCV3.4.6版本源代码
+wget -c https://paddleseg.bj.bcebos.com/inference/opencv-3.4.6.zip
+# 2. 解压
+unzip opencv-3.4.6.zip && cd opencv-3.4.6
+# 3. 创建build目录并编译, 这里安装到/usr/local/opencv3目录
+mkdir build && cd build
+cmake .. -DCMAKE_INSTALL_PREFIX=/root/projects/opencv3 -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DWITH_IPP=OFF -DBUILD_IPP_IW=OFF -DWITH_LAPACK=OFF -DWITH_EIGEN=OFF -DCMAKE_INSTALL_LIBDIR=lib64 -DWITH_ZLIB=ON -DBUILD_ZLIB=ON -DWITH_JPEG=ON -DBUILD_JPEG=ON -DWITH_PNG=ON -DBUILD_PNG=ON -DWITH_TIFF=ON -DBUILD_TIFF=ON
+make -j4
+make install
+```
+
+**注意：** 上述操作完成后，`opencv` 被安装在 `/root/projects/opencv3` 目录。
+
+### Step4: 编译
+
+`CMake`编译时，涉及到四个编译参数用于指定核心依赖库的路径, 他们的定义如下:
+
+|  参数名   | 含义  |
+|  ----  | ----  |
+| CUDA_LIB  | cuda的库路径 |
+| CUDNN_LIB | cuDnn的库路径|
+| OPENCV_DIR  | OpenCV的安装路径， |
+| PADDLE_DIR | Paddle预测库的路径 |
+
+执行下列操作时，**注意**把对应的参数改为你的上述依赖库实际路径：
+
+```shell
+cd /root/projects/paddle_models/models/PaddleCV/PaddleDetection/inference
+
+mkdir build && cd build
+cmake .. -DWITH_GPU=ON  -DPADDLE_DIR=/root/projects/fluid_inference -DCUDA_LIB=/usr/local/cuda/lib64/ -DOPENCV_DIR=/root/projects/opencv3/ -DCUDNN_LIB=/usr/local/cuda/lib64/
+make
+```
+
+
+### Step5: 预测及可视化
+
+执行命令：
+
+```
+./detection_demo --conf=/path/to/your/conf --input_dir=/path/to/your/input/data/directory
+```
+
+更详细说明请参考ReadMe文档： [预测和可视化部分](../README.md)
diff --git a/PaddleCV/PaddleDetection/inference/docs/windows_vs2015_build.md b/PaddleCV/PaddleDetection/inference/docs/windows_vs2015_build.md
new file mode 100644
index 0000000000000000000000000000000000000000..b20b219951284e525cbeef667a02c1b9c21de179
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/docs/windows_vs2015_build.md
@@ -0,0 +1,96 @@
+# Windows平台使用 Visual Studio 2015 编译指南
+
+本文档步骤，我们同时在`Visual Studio 2015` 和 `Visual Studio 2019 Community` 两个版本进行了测试，我们推荐使用[`Visual Studio 2019`直接编译`CMake`项目](./windows_vs2019_build.md)。
+
+
+## 前置条件
+* Visual Studio 2015
+* CUDA 8.0/ CUDA 9.0
+* CMake 3.0+
+
+请确保系统已经安装好上述基本软件，**下面所有示例以工作目录为 `D:\projects`演示**。
+
+### Step1: 下载代码
+
+1. 打开`cmd`, 执行 `cd D:\projects\paddle_models`
+2. `git clone https://github.com/PaddlePaddle/models.git`
+
+`C++`预测库代码在`D:\projects\paddle_models\models\PaddleCV\PaddleDetection\inference` 目录，该目录不依赖任何`PaddleDetection`下其他目录。
+
+
+### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference
+
+根据Windows环境，下载相应版本的PaddlePaddle预测库，并解压到`D:\projects\`目录
+
+| CUDA | GPU | 下载地址 |
+|------|------|--------|
+| 8.0 | Yes | [fluid_inference.zip](https://bj.bcebos.com/v1/paddleseg/fluid_inference_win.zip) |
+| 9.0 | Yes | [fluid_inference_cuda90.zip](https://paddleseg.bj.bcebos.com/fluid_inference_cuda9_cudnn7.zip) |
+
+解压后`D:\projects\fluid_inference`目录包含内容为：
+```
+fluid_inference
+├── paddle # paddle核心库和头文件
+|
+├── third_party # 第三方依赖库和头文件
+|
+└── version.txt # 版本和编译信息
+```
+
+### Step3: 安装配置OpenCV
+
+1. 在OpenCV官网下载适用于Windows平台的3.4.6版本， [下载地址](https://sourceforge.net/projects/opencvlibrary/files/3.4.6/opencv-3.4.6-vc14_vc15.exe/download)  
+2. 运行下载的可执行文件，将OpenCV解压至指定目录，如`D:\projects\opencv`  
+3. 配置环境变量，如下流程所示  
+    - 我的电脑->属性->高级系统设置->环境变量  
+    - 在系统变量中找到Path（如没有，自行创建），并双击编辑  
+    - 新建，将opencv路径填入并保存，如`D:\projects\opencv\build\x64\vc14\bin`
+
+### Step4: 以VS2015为例编译代码
+
+以下命令需根据自己系统中各相关依赖的路径进行修改
+
+* 调用VS2015, 请根据实际VS安装路径进行调整，打开cmd命令行工具执行以下命令
+* 其他vs版本(比如vs2019)，请查找到对应版本的`vcvarsall.bat`路径，替换本命令即可
+
+```
+call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64
+```
+
+* CMAKE编译工程
+    * PADDLE_DIR: fluid_inference预测库路径
+    * CUDA_LIB: CUDA动态库目录, 请根据实际安装情况调整
+    * OPENCV_DIR: OpenCV解压目录
+
+```
+# 切换到预测库所在目录
+cd /d D:\projects\paddle_models\models\PaddleCV\PaddleDetection\inference
+# 创建构建目录, 重新构建只需要删除该目录即可
+mkdir build
+cd build
+# cmake构建VS项目
+D:\projects\paddle_models\models\PaddleCV\PaddleDetection\inference\build> cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_GPU=ON -DPADDLE_DIR=D:\projects\fluid_inference -DCUDA_LIB=D:\projects\cudalib\v9.0\lib\x64 -DOPENCV_DIR=D:\projects\opencv -T host=x64
+```
+
+这里的`cmake`参数`-G`, 表示生成对应的VS版本的工程，可以根据自己的`VS`版本调整，具体请参考[cmake文档](https://cmake.org/cmake/help/v3.15/manual/cmake-generators.7.html)
+
+* 生成可执行文件
+
+```
+D:\projects\paddle_models\models\PaddleCV\PaddleDetection\inference\build> msbuild /m /p:Configuration=Release cpp_inference_demo.sln
+```
+
+### Step5: 预测及可视化
+
+上述`Visual Studio 2015`编译产出的可执行文件在`build\release`目录下，切换到该目录：
+```
+cd /d D:\projects\paddle_models\models\PaddleCV\PaddleDetection\inference\build\release
+```
+
+之后执行命令：
+
+```
+detection_demo.exe --conf=/path/to/your/conf --input_dir=/path/to/your/input/data/directory
+```
+
+更详细说明请参考ReadMe文档： [预测和可视化部分](../README.md)
diff --git a/PaddleCV/PaddleDetection/inference/docs/windows_vs2019_build.md b/PaddleCV/PaddleDetection/inference/docs/windows_vs2019_build.md
new file mode 100644
index 0000000000000000000000000000000000000000..6a467af4526eabf749be8a6dfae32a833be355c5
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/docs/windows_vs2019_build.md
@@ -0,0 +1,101 @@
+# Visual Studio 2019 Community CMake 编译指南
+
+Windows 平台下，我们使用`Visual Studio 2015` 和 `Visual Studio 2019 Community` 进行了测试。微软从`Visual Studio 2017`开始即支持直接管理`CMake`跨平台编译项目，但是直到`2019`才提供了稳定和完全的支持，所以如果你想使用CMake管理项目编译构建，我们推荐你使用`Visual Studio 2019`环境下构建。
+
+你也可以使用和`VS2015`一样，通过把`CMake`项目转化成`VS`项目来编译，其中**有差别的部分**在文档中我们有说明，请参考：[使用Visual Studio 2015 编译指南](./windows_vs2015_build.md)
+
+## 前置条件
+* Visual Studio 2019
+* CUDA 8.0/ CUDA 9.0
+* CMake 3.0+
+
+请确保系统已经安装好上述基本软件，我们使用的是`VS2019`的社区版。
+
+**下面所有示例以工作目录为 `D:\projects`演示**。
+
+### Step1: 下载代码
+
+1. 点击下载源代码：[下载地址](https://github.com/PaddlePaddle/models/archive/develop.zip)
+2. 解压，解压后目录重命名为`paddle_models`
+
+以下代码目录路径为`D:\projects\paddle_models` 为例。
+
+
+### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference
+
+根据Windows环境，下载相应版本的PaddlePaddle预测库，并解压到`D:\projects\`目录
+
+| CUDA | GPU | 下载地址 |
+|------|------|--------|
+| 8.0 | Yes | [fluid_inference.zip](https://bj.bcebos.com/v1/paddleseg/fluid_inference_win.zip) |
+| 9.0 | Yes | [fluid_inference_cuda90.zip](https://paddleseg.bj.bcebos.com/fluid_inference_cuda9_cudnn7.zip) |
+
+解压后`D:\projects\fluid_inference`目录包含内容为：
+```
+fluid_inference
+├── paddle # paddle核心库和头文件
+|
+├── third_party # 第三方依赖库和头文件
+|
+└── version.txt # 版本和编译信息
+```
+**注意：** `CUDA90`版本解压后目录名称为`fluid_inference_cuda90`。
+
+### Step3: 安装配置OpenCV
+
+1. 在OpenCV官网下载适用于Windows平台的3.4.6版本， [下载地址](https://sourceforge.net/projects/opencvlibrary/files/3.4.6/opencv-3.4.6-vc14_vc15.exe/download)  
+2. 运行下载的可执行文件，将OpenCV解压至指定目录，如`D:\projects\opencv`
+3. 配置环境变量，如下流程所示  
+    - 我的电脑->属性->高级系统设置->环境变量  
+    - 在系统变量中找到Path（如没有，自行创建），并双击编辑  
+    - 新建，将opencv路径填入并保存，如`D:\projects\opencv\build\x64\vc14\bin`
+
+### Step4: 使用Visual Studio 2019直接编译CMake
+
+1. 打开Visual Studio 2019 Community，点击`继续但无需代码`
+![step2](https://paddleseg.bj.bcebos.com/inference/vs2019_step1.png)
+2. 点击： `文件`->`打开`->`CMake`
+![step2.1](https://paddleseg.bj.bcebos.com/inference/vs2019_step2.png)
+
+选择项目代码所在路径，并打开`CMakeList.txt`：
+
+![step2.2](https://paddleseg.bj.bcebos.com/inference/vs2019_step3.png)
+
+3. 点击：`项目`->`cpp_inference_demo的CMake设置`
+
+![step3](https://paddleseg.bj.bcebos.com/inference/vs2019_step4.png)
+
+4. 点击`浏览`，分别设置编译选项指定`CUDA`、`OpenCV`、`Paddle预测库`的路径
+
+![step4](https://paddleseg.bj.bcebos.com/inference/vs2019_step5.png)
+
+三个编译参数的含义说明如下：
+
+|  参数名   | 含义  |
+|  ----  | ----  |
+| CUDA_LIB  | cuda的库路径 |
+| OPENCV_DIR  | OpenCV的安装路径， |
+| PADDLE_DIR | Paddle预测库的路径 |
+
+**设置完成后**, 点击上图中`保存并生成CMake缓存以加载变量`。
+
+5. 点击`生成`->`全部生成`
+
+![step6](https://paddleseg.bj.bcebos.com/inference/vs2019_step6.png)
+
+
+### Step5: 预测及可视化
+
+上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下，打开`cmd`，并切换到该目录：
+
+```
+cd D:\projects\paddle_models\models\PaddleCV\PaddleDetection\inference\build\x64-Release
+```
+
+之后执行命令：
+
+```
+detection_demo.exe --conf=/path/to/your/conf --input_dir=/path/to/your/input/data/directory
+```
+
+更详细说明请参考ReadMe文档： [预测和可视化部分](../README.md)
diff --git a/PaddleCV/PaddleDetection/inference/external-cmake/yaml-cpp.cmake b/PaddleCV/PaddleDetection/inference/external-cmake/yaml-cpp.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..15fa2674e00d85f1db7bbdfdceeebadaf0eabf5a
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/external-cmake/yaml-cpp.cmake
@@ -0,0 +1,29 @@
+
+find_package(Git REQUIRED)
+
+include(ExternalProject)
+
+message("${CMAKE_BUILD_TYPE}")
+
+ExternalProject_Add(
+        ext-yaml-cpp
+        GIT_REPOSITORY https://github.com/jbeder/yaml-cpp.git
+        GIT_TAG e0e01d53c27ffee6c86153fa41e7f5e57d3e5c90
+        CMAKE_ARGS
+        -DYAML_CPP_BUILD_TESTS=OFF
+		-DYAML_CPP_BUILD_TOOLS=OFF
+        -DYAML_CPP_INSTALL=OFF
+        -DYAML_CPP_BUILD_CONTRIB=OFF
+		-DMSVC_SHARED_RT=OFF
+		-DBUILD_SHARED_LIBS=OFF
+        -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+        -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
+        -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
+        -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
+        -DCMAKE_LIBRARY_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib
+        -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib
+        PREFIX "${CMAKE_BINARY_DIR}/ext/yaml-cpp"
+        # Disable install step
+        INSTALL_COMMAND ""
+	    LOG_DOWNLOAD ON
+)
diff --git a/PaddleCV/PaddleDetection/inference/images/detection_rcnn/000000014439.jpg b/PaddleCV/PaddleDetection/inference/images/detection_rcnn/000000014439.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0abbdab06eb5950b93908cc91adfa640e8a3ac78
Binary files /dev/null and b/PaddleCV/PaddleDetection/inference/images/detection_rcnn/000000014439.jpg differ
diff --git a/PaddleCV/PaddleDetection/inference/images/detection_rcnn/000000087038.jpg b/PaddleCV/PaddleDetection/inference/images/detection_rcnn/000000087038.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9f77f5d5f057b6f92dc096da704ecb8dee99bdf5
Binary files /dev/null and b/PaddleCV/PaddleDetection/inference/images/detection_rcnn/000000087038.jpg differ
diff --git a/PaddleCV/PaddleDetection/inference/images/detection_rcnn/000000570688.jpg b/PaddleCV/PaddleDetection/inference/images/detection_rcnn/000000570688.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..cb304bd56c4010c08611a30dcca58ea9140cea54
Binary files /dev/null and b/PaddleCV/PaddleDetection/inference/images/detection_rcnn/000000570688.jpg differ
diff --git a/PaddleCV/PaddleDetection/inference/predictor/detection_predictor.cpp b/PaddleCV/PaddleDetection/inference/predictor/detection_predictor.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ba07e3b6c7fb2152bd7825950a3cd94769f36adc
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/predictor/detection_predictor.cpp
@@ -0,0 +1,383 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "detection_predictor.h"
+#include <cstring>
+#include <cmath>
+#include <fstream>
+#include "utils/detection_result.pb.h"
+
+namespace PaddleSolution {
+    /* lod_buffer: every item in lod_buffer is an image matrix after preprocessing
+     * input_buffer: same data with lod_buffer after flattening to 1-D vector and padding, needed to be empty before using this function
+     */
+    void padding_minibatch(const std::vector<std::vector<float>> &lod_buffer, std::vector<float> &input_buffer, 
+                           std::vector<int> &resize_heights, std::vector<int> &resize_widths, int channels, int coarsest_stride = 1) {
+        int batch_size = lod_buffer.size();
+        int max_h = -1;
+        int max_w = -1;
+        for(int i = 0; i < batch_size; ++i) {
+            max_h = (max_h > resize_heights[i])? max_h:resize_heights[i];
+            max_w = (max_w > resize_widths[i])? max_w:resize_widths[i];
+        }
+        max_h = static_cast<int>(ceil(static_cast<float>(max_h) / static_cast<float>(coarsest_stride)) * coarsest_stride);
+        max_w = static_cast<int>(ceil(static_cast<float>(max_w) / static_cast<float>(coarsest_stride)) * coarsest_stride);
+        std::cout << "max_w: " << max_w << " max_h: " << max_h << std::endl;
+        input_buffer.insert(input_buffer.end(), batch_size * channels * max_h * max_w, 0);
+        // flatten tensor and padding
+        for(int i = 0; i < lod_buffer.size(); ++i) {
+            float *input_buffer_ptr = input_buffer.data() + i * channels * max_h * max_w;
+            const float *lod_ptr = lod_buffer[i].data();
+            for(int c = 0; c < channels; ++c) {
+                for(int h = 0; h < resize_heights[i]; ++h) {
+                    memcpy(input_buffer_ptr, lod_ptr, resize_widths[i] * sizeof(float));
+                    lod_ptr += resize_widths[i];
+                    input_buffer_ptr += max_w;
+                }
+                input_buffer_ptr += (max_h - resize_heights[i]) * max_w;
+            }
+        }
+        // change resize w, h
+        for(int i = 0; i < batch_size; ++i){
+            resize_widths[i] = max_w;
+            resize_heights[i] = max_h;
+        }
+    }
+
+    void output_detection_result(const float* out_addr, const std::vector<std::vector<size_t>> &lod_vector, const std::vector<std::string> &imgs_batch){
+        for(int i = 0; i < lod_vector[0].size() - 1; ++i) {
+            DetectionResult detection_result;
+            detection_result.set_filename(imgs_batch[i]);
+            std::cout << imgs_batch[i] << ":" << std::endl;
+            for (int j = lod_vector[0][i]; j < lod_vector[0][i+1]; ++j) {
+                DetectionBox *box_ptr = detection_result.add_detection_boxes();
+                box_ptr->set_class_(static_cast<int>(round(out_addr[0 + j * 6])));
+                box_ptr->set_score(out_addr[1 + j * 6]);
+                box_ptr->set_left_top_x(out_addr[2 + j * 6]);
+                box_ptr->set_left_top_y(out_addr[3 + j * 6]);
+                box_ptr->set_right_bottom_x(out_addr[4 + j * 6]);
+                box_ptr->set_right_bottom_y(out_addr[5 + j * 6]);
+                printf("Class %d, score = %f, left top = [%f, %f], right bottom = [%f, %f]\n",
+                          static_cast<int>(round(out_addr[0 + j * 6])), out_addr[1 + j * 6], out_addr[2 + j * 6], 
+                                             out_addr[3 + j * 6], out_addr[4 + j * 6], out_addr[5 + j * 6]);    
+            }
+            printf("\n");
+            std::ofstream output(imgs_batch[i] + ".pb", std::ios::out | std::ios::trunc | std::ios::binary);
+            detection_result.SerializeToOstream(&output);
+            output.close();
+        }
+    }
+    
+    int DetectionPredictor::init(const std::string& conf) {
+        if (!_model_config.load_config(conf)) {
+            LOG(FATAL) << "Fail to load config file: [" << conf << "]";
+            return -1;
+        }
+        _preprocessor = PaddleSolution::create_processor(conf);
+        if (_preprocessor == nullptr) {
+            LOG(FATAL) << "Failed to create_processor";
+            return -1;
+        }
+
+        bool use_gpu = _model_config._use_gpu;
+        const auto& model_dir = _model_config._model_path;
+        const auto& model_filename = _model_config._model_file_name;
+        const auto& params_filename = _model_config._param_file_name;
+
+        // load paddle model file
+        if (_model_config._predictor_mode == "NATIVE") {
+            paddle::NativeConfig config;
+            auto prog_file = utils::path_join(model_dir, model_filename);
+            auto param_file = utils::path_join(model_dir, params_filename);
+            config.prog_file = prog_file;
+            config.param_file = param_file;
+            config.fraction_of_gpu_memory = 0;
+            config.use_gpu = use_gpu;
+            config.device = 0;
+            _main_predictor = paddle::CreatePaddlePredictor(config);
+        } else if (_model_config._predictor_mode == "ANALYSIS") {
+            paddle::AnalysisConfig config;
+            if (use_gpu) {
+                config.EnableUseGpu(100, 0);
+            }
+            auto prog_file = utils::path_join(model_dir, model_filename);
+            auto param_file = utils::path_join(model_dir, params_filename);
+            config.SetModel(prog_file, param_file);
+            config.SwitchUseFeedFetchOps(false);
+            config.SwitchSpecifyInputNames(true);
+            config.EnableMemoryOptim();                        
+            _main_predictor = paddle::CreatePaddlePredictor(config);
+        } else {
+            return -1;
+        }
+        return 0;
+
+    }
+
+    int DetectionPredictor::predict(const std::vector<std::string>& imgs) {
+        if (_model_config._predictor_mode == "NATIVE") {
+            return native_predict(imgs);
+        }
+        else if (_model_config._predictor_mode == "ANALYSIS") {
+            return analysis_predict(imgs);
+        }
+        return -1;
+    }
+
+    int DetectionPredictor::native_predict(const std::vector<std::string>& imgs) {
+        int config_batch_size = _model_config._batch_size;
+
+        int channels = _model_config._channels;
+        int eval_width = _model_config._resize[0];
+        int eval_height = _model_config._resize[1];
+        std::size_t total_size = imgs.size();
+        int default_batch_size = std::min(config_batch_size, (int)total_size);
+        int batch = total_size / default_batch_size + ((total_size % default_batch_size) != 0);
+        int batch_buffer_size = default_batch_size * channels * eval_width * eval_height;
+
+        auto& input_buffer = _buffer;
+        auto& imgs_batch = _imgs_batch;
+        float sr;
+    //    DetectionResultsContainer result_container;
+        for (int u = 0; u < batch; ++u) {
+            int batch_size = default_batch_size;
+            if (u == (batch - 1) && (total_size % default_batch_size)) {
+                batch_size = total_size % default_batch_size;
+            }
+
+            int real_buffer_size = batch_size * channels * eval_width * eval_height;
+            std::vector<paddle::PaddleTensor> feeds;
+            input_buffer.clear();
+            imgs_batch.clear();
+            for (int i = 0; i < batch_size; ++i) {
+                int idx = u * default_batch_size + i;
+                imgs_batch.push_back(imgs[idx]);
+            }
+            std::vector<int> ori_widths;
+            std::vector<int> ori_heights;
+            std::vector<int> resize_widths;
+            std::vector<int> resize_heights;
+            std::vector<float> scale_ratios;
+            ori_widths.resize(batch_size);
+            ori_heights.resize(batch_size);
+            resize_widths.resize(batch_size);
+            resize_heights.resize(batch_size);
+            scale_ratios.resize(batch_size);
+            std::vector<std::vector<float>> lod_buffer(batch_size);
+            if (!_preprocessor->batch_process(imgs_batch, lod_buffer, ori_widths.data(), ori_heights.data(),
+                                          resize_widths.data(), resize_heights.data(), scale_ratios.data())) {
+                return -1;
+            }
+            // flatten and padding 
+            padding_minibatch(lod_buffer, input_buffer, resize_heights, resize_widths, channels, _model_config._coarsest_stride);
+            paddle::PaddleTensor im_tensor, im_size_tensor, im_info_tensor;
+
+            im_tensor.name = "image";
+            im_tensor.shape = std::vector<int>({ batch_size, channels, resize_heights[0], resize_widths[0] });
+            im_tensor.data.Reset(input_buffer.data(), input_buffer.size() * sizeof(float));
+            im_tensor.dtype = paddle::PaddleDType::FLOAT32;
+ 
+            std::vector<float> image_infos;
+            for(int i = 0; i < batch_size; ++i) {
+                image_infos.push_back(resize_heights[i]);
+                image_infos.push_back(resize_widths[i]);
+                image_infos.push_back(scale_ratios[i]);
+            }
+            im_info_tensor.name = "info";
+            im_info_tensor.shape = std::vector<int>({batch_size, 3});
+            im_info_tensor.data.Reset(image_infos.data(), batch_size * 3 * sizeof(float));
+            im_info_tensor.dtype = paddle::PaddleDType::FLOAT32;
+            
+            std::vector<int> image_size;
+            for(int i = 0; i < batch_size; ++i) {
+                image_size.push_back(ori_heights[i]);
+                image_size.push_back(ori_widths[i]);
+            }
+
+           std::vector<float> image_size_f;
+           for(int i = 0; i < batch_size; ++i) {
+               image_size_f.push_back(ori_heights[i]);
+               image_size_f.push_back(ori_widths[i]);
+               image_size_f.push_back(1.0);
+           }
+           
+           int feeds_size = _model_config._feeds_size;
+           im_size_tensor.name = "im_size";
+           if(feeds_size == 2) {
+                im_size_tensor.shape = std::vector<int>({ batch_size, 2});
+                im_size_tensor.data.Reset(image_size.data(), batch_size * 2 * sizeof(int));
+                im_size_tensor.dtype = paddle::PaddleDType::INT32;
+           }
+           else if(feeds_size == 3) {
+                im_size_tensor.shape = std::vector<int>({ batch_size, 3});
+                im_size_tensor.data.Reset(image_size_f.data(), batch_size * 3 * sizeof(float));
+                im_size_tensor.dtype = paddle::PaddleDType::FLOAT32;
+           }
+           std::cout << "Feed size = " << feeds_size << std::endl;
+           feeds.push_back(im_tensor);
+           if(_model_config._feeds_size > 2) {
+                feeds.push_back(im_info_tensor);
+           }
+           feeds.push_back(im_size_tensor);
+           _outputs.clear();
+
+            auto t1 = std::chrono::high_resolution_clock::now();
+            if (!_main_predictor->Run(feeds, &_outputs, batch_size)) {
+                LOG(ERROR) << "Failed: NativePredictor->Run() return false at batch: " << u;
+                continue;
+            }
+            auto t2 = std::chrono::high_resolution_clock::now();
+            auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
+            std::cout << "runtime = " << duration << std::endl;
+            std::cout << "Number of outputs:"  << _outputs.size() << std::endl;
+            int out_num = 1;
+            // print shape of first output tensor for debugging
+            std::cout << "size of outputs[" << 0 << "]: (";
+            for (int j = 0; j < _outputs[0].shape.size(); ++j) {
+                out_num *= _outputs[0].shape[j];
+                std::cout << _outputs[0].shape[j] << ",";
+            }
+            std::cout << ")" << std::endl;
+       
+        //    const size_t nums = _outputs.front().data.length() / sizeof(float);
+        //    if (out_num % batch_size != 0 || out_num != nums) {
+        //        LOG(ERROR) << "outputs data size mismatch with shape size.";
+        //        return -1;
+        //    }
+            float* out_addr = (float *)(_outputs[0].data.data());
+            output_detection_result(out_addr, _outputs[0].lod, imgs_batch);
+        }
+        return 0;
+    }
+
+    int DetectionPredictor::analysis_predict(const std::vector<std::string>& imgs) {
+
+        int config_batch_size = _model_config._batch_size;
+        int channels = _model_config._channels;
+        int eval_width = _model_config._resize[0];
+        int eval_height = _model_config._resize[1];
+        auto total_size = imgs.size();
+        int default_batch_size = std::min(config_batch_size, (int)total_size);
+        int batch = total_size / default_batch_size + ((total_size % default_batch_size) != 0);
+        int batch_buffer_size = default_batch_size * channels * eval_width * eval_height;
+
+        auto& input_buffer = _buffer;
+        auto& imgs_batch = _imgs_batch;
+        //DetectionResultsContainer result_container;
+        for (int u = 0; u < batch; ++u) {
+            int batch_size = default_batch_size;
+            if (u == (batch - 1) && (total_size % default_batch_size)) {
+                batch_size = total_size % default_batch_size;
+            }
+
+            int real_buffer_size = batch_size * channels * eval_width * eval_height;
+            std::vector<paddle::PaddleTensor> feeds;
+            //input_buffer.resize(real_buffer_size);
+            input_buffer.clear();
+            imgs_batch.clear();
+            for (int i = 0; i < batch_size; ++i) {
+                int idx = u * default_batch_size + i;
+                imgs_batch.push_back(imgs[idx]);
+            }
+        
+            std::vector<int> ori_widths;
+            std::vector<int> ori_heights;
+            std::vector<int> resize_widths;
+            std::vector<int> resize_heights;
+            std::vector<float> scale_ratios;
+            ori_widths.resize(batch_size);
+            ori_heights.resize(batch_size);
+            resize_widths.resize(batch_size);
+            resize_heights.resize(batch_size);
+            scale_ratios.resize(batch_size);
+        
+            std::vector<std::vector<float>> lod_buffer(batch_size);
+            if (!_preprocessor->batch_process(imgs_batch, lod_buffer, ori_widths.data(), ori_heights.data(),
+                          resize_widths.data(), resize_heights.data(), scale_ratios.data())){
+                std::cout << "Failed to preprocess!" << std::endl;
+                return -1;
+            }
+
+            //flatten tensor
+            padding_minibatch(lod_buffer, input_buffer, resize_heights, resize_widths, channels, _model_config._coarsest_stride);
+
+            std::vector<std::string> input_names = _main_predictor->GetInputNames();
+            auto im_tensor = _main_predictor->GetInputTensor(input_names.front());
+            im_tensor->Reshape({ batch_size, channels, resize_heights[0], resize_widths[0] });
+            im_tensor->copy_from_cpu(input_buffer.data());
+ 
+            if(input_names.size() > 2){
+                std::vector<float> image_infos;
+                for(int i = 0; i < batch_size; ++i) {
+                    image_infos.push_back(resize_heights[i]);
+                    image_infos.push_back(resize_widths[i]);
+                    image_infos.push_back(scale_ratios[i]);
+                }        
+                auto im_info_tensor = _main_predictor->GetInputTensor(input_names[1]);
+                im_info_tensor->Reshape({batch_size, 3});
+                im_info_tensor->copy_from_cpu(image_infos.data());
+            }
+
+            std::vector<int> image_size;
+            for(int i = 0; i < batch_size; ++i) {
+                image_size.push_back(ori_heights[i]);
+                image_size.push_back(ori_widths[i]);
+            }
+            std::vector<float> image_size_f;
+            for(int i = 0; i < batch_size; ++i) {
+                image_size_f.push_back(static_cast<float>(ori_heights[i]));
+                image_size_f.push_back(static_cast<float>(ori_widths[i]));
+                image_size_f.push_back(1.0);
+            }
+             
+            auto im_size_tensor = _main_predictor->GetInputTensor(input_names.back());
+            if(input_names.size() > 2) {
+                im_size_tensor->Reshape({batch_size, 3});
+                im_size_tensor->copy_from_cpu(image_size_f.data());
+            }
+            else{
+                im_size_tensor->Reshape({batch_size, 2});
+                im_size_tensor->copy_from_cpu(image_size.data());
+            }
+        
+
+            auto t1 = std::chrono::high_resolution_clock::now();
+            _main_predictor->ZeroCopyRun();
+            auto t2 = std::chrono::high_resolution_clock::now();
+            auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
+            std::cout << "runtime = " << duration << std::endl;
+
+            auto output_names = _main_predictor->GetOutputNames();
+            auto output_t = _main_predictor->GetOutputTensor(output_names[0]);
+            std::vector<float> out_data;
+            std::vector<int> output_shape = output_t->shape();
+
+            int out_num = 1;
+            std::cout << "size of outputs[" << 0 << "]: (";
+            for (int j = 0; j < output_shape.size(); ++j) {
+                out_num *= output_shape[j];
+                std::cout << output_shape[j] << ",";
+            }
+            std::cout << ")" << std::endl;
+
+            out_data.resize(out_num);
+            output_t->copy_to_cpu(out_data.data());
+
+            float* out_addr = (float *)(out_data.data());
+            auto lod_vector = output_t->lod();
+            output_detection_result(out_addr, lod_vector, imgs_batch);            
+        }
+        return 0;
+    }
+}
diff --git a/PaddleCV/PaddleDetection/inference/predictor/detection_predictor.h b/PaddleCV/PaddleDetection/inference/predictor/detection_predictor.h
new file mode 100644
index 0000000000000000000000000000000000000000..3bc4cfdd793291d7d89342c7fbccfdd558d1f004
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/predictor/detection_predictor.h
@@ -0,0 +1,52 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+#include <thread>
+#include <chrono>
+#include <algorithm>
+#include <glog/logging.h>
+#include <yaml-cpp/yaml.h>
+#include <opencv2/opencv.hpp>
+#include <paddle_inference_api.h>
+
+#include <utils/conf_parser.h>
+#include <utils/utils.h>
+#include <preprocessor/preprocessor.h>
+
+namespace PaddleSolution {
+    class DetectionPredictor {
+    public:
+        // init a predictor with a yaml config file
+        int init(const std::string& conf);
+        // predict api
+        int predict(const std::vector<std::string>& imgs);
+
+    private:
+        int native_predict(const std::vector<std::string>& imgs);
+        int analysis_predict(const std::vector<std::string>& imgs);
+    private:
+        std::vector<float> _buffer;
+        std::vector<std::string> _imgs_batch;
+        std::vector<paddle::PaddleTensor> _outputs;
+
+        PaddleSolution::PaddleModelConfigPaser _model_config;
+        std::shared_ptr<PaddleSolution::ImagePreProcessor> _preprocessor;
+        std::unique_ptr<paddle::PaddlePredictor> _main_predictor;
+    };
+}
diff --git a/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor.cpp b/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..dbe7bcf624b649c02297bddd593d173b57550f17
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor.cpp
@@ -0,0 +1,43 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <glog/logging.h>
+
+#include "preprocessor.h"
+#include "preprocessor_detection.h"
+
+namespace PaddleSolution {
+
+    std::shared_ptr<ImagePreProcessor> create_processor(const std::string& conf_file) {
+
+        auto config = std::make_shared<PaddleSolution::PaddleModelConfigPaser>();
+        if (!config->load_config(conf_file)) {
+            LOG(FATAL) << "fail to laod conf file [" << conf_file << "]";
+            return nullptr;
+        }
+
+        if (config->_pre_processor == "DetectionPreProcessor") {
+            auto p = std::make_shared<DetectionPreProcessor>();
+            if (!p->init(config)) {
+                return nullptr;
+            }
+            return p;
+        }
+	
+
+        LOG(FATAL) << "unknown processor_name [" << config->_pre_processor << "]";
+
+        return nullptr;
+    }
+}
diff --git a/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor.h b/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor.h
new file mode 100644
index 0000000000000000000000000000000000000000..a3fb2e029c8acf92010a258dd2824b85a0f2f90f
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor.h
@@ -0,0 +1,64 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <vector>
+#include <string>
+#include <memory>
+
+#include <opencv2/core/core.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+#include <opencv2/highgui/highgui.hpp>
+
+#include "utils/conf_parser.h"
+
+namespace  PaddleSolution {
+
+class ImagePreProcessor {
+protected:
+    ImagePreProcessor() {};
+    
+public:
+    virtual ~ImagePreProcessor() {}
+
+    virtual bool single_process(const std::string& fname, float* data, int* ori_w, int* ori_h) {
+        return true;
+    }
+
+    virtual bool batch_process(const std::vector<std::string>& imgs, float* data, int* ori_w, int* ori_h) {
+        return true;
+    }
+
+    virtual bool single_process(const std::string& fname, float* data) {
+        return true;
+    }
+    
+    virtual bool batch_process(const std::vector<std::string>& imgs, float* data) {
+        return true;
+    }
+    
+    virtual bool single_process(const std::string& fname, std::vector<float> &data, int* ori_w, int* ori_h, int* resize_w, int* resize_h, float* scale_ratio) {
+	return true;
+    }
+
+    virtual bool batch_process(const std::vector<std::string>& imgs, std::vector<std::vector<float>> &data, int* ori_w, int* ori_h, int* resize_w, int* resize_h, float* scale_ratio) {
+	return true;
+    }
+
+}; // end of class ImagePreProcessor
+
+std::shared_ptr<ImagePreProcessor> create_processor(const std::string &config_file);
+
+} // end of namespace paddle_solution
+
diff --git a/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor_detection.cpp b/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor_detection.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ba8fd0e328c5a859e2d4b88adba0e56e5e3a7476
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor_detection.cpp
@@ -0,0 +1,130 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <thread>
+#include <mutex>
+
+#include <glog/logging.h>
+
+#include "preprocessor_detection.h"
+#include "utils/utils.h"
+
+namespace PaddleSolution {
+    bool DetectionPreProcessor::single_process(const std::string& fname, std::vector<float> &vec_data, int* ori_w, int* ori_h, int* resize_w, int* resize_h, float* scale_ratio) {
+        cv::Mat im1 = cv::imread(fname, -1);
+        cv::Mat im;
+        if(_config->_feeds_size == 3) { // faster rcnn
+            im1.convertTo(im, CV_32FC3, 1/255.0);
+        }
+        else if(_config->_feeds_size == 2){ //yolo v3
+            im = im1;
+        }
+        if (im.data == nullptr || im.empty()) {
+            LOG(ERROR) << "Failed to open image: " << fname;
+            return false;
+        }
+        
+        int channels = im.channels();
+        if (channels == 1) {
+            cv::cvtColor(im, im, cv::COLOR_GRAY2BGR);
+        }
+        channels = im.channels();
+        if (channels != 3 && channels != 4) {
+            LOG(ERROR) << "Only support rgb(gray) and rgba image.";
+            return false;
+        }
+        *ori_w = im.cols;
+        *ori_h = im.rows;
+        cv::cvtColor(im, im, cv::COLOR_BGR2RGB);      
+        //channels = im.channels();
+
+        //resize
+        int rw = im.cols;
+        int rh = im.rows;
+        float im_scale_ratio;
+        utils::scaling(_config->_resize_type, rw, rh, _config->_resize[0], _config->_resize[1], _config->_target_short_size, _config->_resize_max_size, im_scale_ratio);
+        cv::Size resize_size(rw, rh);
+        *resize_w = rw;
+        *resize_h = rh;
+        *scale_ratio = im_scale_ratio;
+        if (*ori_h != rh || *ori_w != rw) {
+            cv::Mat im_temp;
+            if(_config->_resize_type == utils::SCALE_TYPE::UNPADDING) {
+                cv::resize(im, im_temp, resize_size, 0, 0, cv::INTER_LINEAR);
+            }
+            else if(_config->_resize_type == utils::SCALE_TYPE::RANGE_SCALING) {
+                    cv::resize(im, im_temp, cv::Size(), im_scale_ratio, im_scale_ratio, cv::INTER_LINEAR);
+            }
+            im = im_temp;
+        }
+
+        vec_data.resize(channels * rw * rh);
+        float *data = vec_data.data();
+
+        float* pmean = _config->_mean.data();
+        float* pscale = _config->_std.data();
+        for (int h = 0; h < rh; ++h) {
+            const uchar* uptr = im.ptr<uchar>(h);
+            const float* fptr = im.ptr<float>(h);
+            int im_index = 0;
+            for (int w = 0; w < rw; ++w) {
+                for (int c = 0; c < channels; ++c) {
+                    int top_index = (c * rh + h) * rw + w;
+                    float pixel;// = static_cast<float>(fptr[im_index]);// / 255.0;
+                    if(_config->_feeds_size == 2){ //yolo v3
+                        pixel = static_cast<float>(uptr[im_index++]) / 255.0;
+                    }
+                    else if(_config->_feeds_size == 3){
+                        pixel = fptr[im_index++];
+                    }
+                    pixel = (pixel - pmean[c]) / pscale[c];
+                    data[top_index] = pixel;
+                }
+            }
+        }
+        return true;
+    }
+
+    bool DetectionPreProcessor::batch_process(const std::vector<std::string>& imgs, std::vector<std::vector<float>> &data, int* ori_w, int* ori_h, int* resize_w, int* resize_h, float* scale_ratio) {
+        auto ic = _config->_channels;
+        auto iw = _config->_resize[0];
+        auto ih = _config->_resize[1];
+        std::vector<std::thread> threads;
+        for (int i = 0; i < imgs.size(); ++i) {
+            std::string path = imgs[i];
+            int* width = &ori_w[i];
+            int* height = &ori_h[i];
+            int* resize_width = &resize_w[i];
+            int* resize_height = &resize_h[i];
+            float* sr = &scale_ratio[i];
+            threads.emplace_back([this, &data, i, path, width, height, resize_width, resize_height, sr] {
+                std::vector<float> buffer;
+                single_process(path, buffer, width, height, resize_width, resize_height, sr);
+                data[i] = buffer;
+                });
+        }
+        for (auto& t : threads) {
+            if (t.joinable()) {
+                t.join();
+            }
+        }
+        return true;
+    }
+
+    bool DetectionPreProcessor::init(std::shared_ptr<PaddleSolution::PaddleModelConfigPaser> config) {
+        _config = config;
+        return true;
+    }
+
+}
diff --git a/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor_detection.h b/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor_detection.h
new file mode 100644
index 0000000000000000000000000000000000000000..731329040423756151a2590d3ed0f46b2800191d
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor_detection.h
@@ -0,0 +1,36 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "preprocessor.h"
+
+namespace PaddleSolution {
+
+    class DetectionPreProcessor : public ImagePreProcessor {
+
+    public:
+        DetectionPreProcessor() : _config(nullptr) {
+        };
+
+        bool init(std::shared_ptr<PaddleSolution::PaddleModelConfigPaser> config);
+         
+        bool single_process(const std::string& fname, std::vector<float> &data, int* ori_w, int* ori_h, int* resize_w, int* resize_h, float* scale_ratio);
+
+        bool batch_process(const std::vector<std::string>& imgs, std::vector<std::vector<float>> &data, int* ori_w, int* ori_h, int* resize_w, int* resize_h, float* scale_ratio);
+    private:
+        std::shared_ptr<PaddleSolution::PaddleModelConfigPaser> _config;
+    };
+
+}
diff --git a/PaddleCV/PaddleDetection/inference/tools/detection_result_pb2.py b/PaddleCV/PaddleDetection/inference/tools/detection_result_pb2.py
new file mode 100644
index 0000000000000000000000000000000000000000..80a6a82a312dc7cf8d098dbb600974b5982123c7
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/tools/detection_result_pb2.py
@@ -0,0 +1,214 @@
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: detection_result.proto
+
+import sys
+_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf import descriptor_pb2
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+    name='detection_result.proto',
+    package='PaddleSolution',
+    syntax='proto2',
+    serialized_pb=_b(
+        '\n\x16\x64\x65tection_result.proto\x12\x0ePaddleSolution\"\x84\x01\n\x0c\x44\x65tectionBox\x12\r\n\x05\x63lass\x18\x01 \x01(\x05\x12\r\n\x05score\x18\x02 \x01(\x02\x12\x12\n\nleft_top_x\x18\x03 \x01(\x02\x12\x12\n\nleft_top_y\x18\x04 \x01(\x02\x12\x16\n\x0eright_bottom_x\x18\x05 \x01(\x02\x12\x16\n\x0eright_bottom_y\x18\x06 \x01(\x02\"Z\n\x0f\x44\x65tectionResult\x12\x10\n\x08\x66ilename\x18\x01 \x01(\t\x12\x35\n\x0f\x64\x65tection_boxes\x18\x02 \x03(\x0b\x32\x1c.PaddleSolution.DetectionBox'
+    ))
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+_DETECTIONBOX = _descriptor.Descriptor(
+    name='DetectionBox',
+    full_name='PaddleSolution.DetectionBox',
+    filename=None,
+    file=DESCRIPTOR,
+    containing_type=None,
+    fields=[
+        _descriptor.FieldDescriptor(
+            name='class',
+            full_name='PaddleSolution.DetectionBox.class',
+            index=0,
+            number=1,
+            type=5,
+            cpp_type=1,
+            label=1,
+            has_default_value=False,
+            default_value=0,
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            options=None),
+        _descriptor.FieldDescriptor(
+            name='score',
+            full_name='PaddleSolution.DetectionBox.score',
+            index=1,
+            number=2,
+            type=2,
+            cpp_type=6,
+            label=1,
+            has_default_value=False,
+            default_value=float(0),
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            options=None),
+        _descriptor.FieldDescriptor(
+            name='left_top_x',
+            full_name='PaddleSolution.DetectionBox.left_top_x',
+            index=2,
+            number=3,
+            type=2,
+            cpp_type=6,
+            label=1,
+            has_default_value=False,
+            default_value=float(0),
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            options=None),
+        _descriptor.FieldDescriptor(
+            name='left_top_y',
+            full_name='PaddleSolution.DetectionBox.left_top_y',
+            index=3,
+            number=4,
+            type=2,
+            cpp_type=6,
+            label=1,
+            has_default_value=False,
+            default_value=float(0),
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            options=None),
+        _descriptor.FieldDescriptor(
+            name='right_bottom_x',
+            full_name='PaddleSolution.DetectionBox.right_bottom_x',
+            index=4,
+            number=5,
+            type=2,
+            cpp_type=6,
+            label=1,
+            has_default_value=False,
+            default_value=float(0),
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            options=None),
+        _descriptor.FieldDescriptor(
+            name='right_bottom_y',
+            full_name='PaddleSolution.DetectionBox.right_bottom_y',
+            index=5,
+            number=6,
+            type=2,
+            cpp_type=6,
+            label=1,
+            has_default_value=False,
+            default_value=float(0),
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            options=None),
+    ],
+    extensions=[],
+    nested_types=[],
+    enum_types=[],
+    options=None,
+    is_extendable=False,
+    syntax='proto2',
+    extension_ranges=[],
+    oneofs=[],
+    serialized_start=43,
+    serialized_end=175, )
+
+_DETECTIONRESULT = _descriptor.Descriptor(
+    name='DetectionResult',
+    full_name='PaddleSolution.DetectionResult',
+    filename=None,
+    file=DESCRIPTOR,
+    containing_type=None,
+    fields=[
+        _descriptor.FieldDescriptor(
+            name='filename',
+            full_name='PaddleSolution.DetectionResult.filename',
+            index=0,
+            number=1,
+            type=9,
+            cpp_type=9,
+            label=1,
+            has_default_value=False,
+            default_value=_b("").decode('utf-8'),
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            options=None),
+        _descriptor.FieldDescriptor(
+            name='detection_boxes',
+            full_name='PaddleSolution.DetectionResult.detection_boxes',
+            index=1,
+            number=2,
+            type=11,
+            cpp_type=10,
+            label=3,
+            has_default_value=False,
+            default_value=[],
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            options=None),
+    ],
+    extensions=[],
+    nested_types=[],
+    enum_types=[],
+    options=None,
+    is_extendable=False,
+    syntax='proto2',
+    extension_ranges=[],
+    oneofs=[],
+    serialized_start=177,
+    serialized_end=267, )
+
+_DETECTIONRESULT.fields_by_name['detection_boxes'].message_type = _DETECTIONBOX
+DESCRIPTOR.message_types_by_name['DetectionBox'] = _DETECTIONBOX
+DESCRIPTOR.message_types_by_name['DetectionResult'] = _DETECTIONRESULT
+
+DetectionBox = _reflection.GeneratedProtocolMessageType(
+    'DetectionBox',
+    (_message.Message, ),
+    dict(
+        DESCRIPTOR=_DETECTIONBOX,
+        __module__='detection_result_pb2'
+        # @@protoc_insertion_point(class_scope:PaddleSolution.DetectionBox)
+    ))
+_sym_db.RegisterMessage(DetectionBox)
+
+DetectionResult = _reflection.GeneratedProtocolMessageType(
+    'DetectionResult',
+    (_message.Message, ),
+    dict(
+        DESCRIPTOR=_DETECTIONRESULT,
+        __module__='detection_result_pb2'
+        # @@protoc_insertion_point(class_scope:PaddleSolution.DetectionResult)
+    ))
+_sym_db.RegisterMessage(DetectionResult)
+
+# @@protoc_insertion_point(module_scope)
diff --git a/PaddleCV/PaddleDetection/inference/tools/vis.py b/PaddleCV/PaddleDetection/inference/tools/vis.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0f9fc841f52b252e0c96a8ba8c4694017047a8c
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/tools/vis.py
@@ -0,0 +1,117 @@
+# coding: utf-8
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import detection_result_pb2
+import cv2
+import sys
+import gflags
+import numpy as np
+import json
+from PIL import Image, ImageDraw, ImageFont
+
+Flags = gflags.FLAGS
+gflags.DEFINE_string('img_path', 'abc', 'image path')
+gflags.DEFINE_string('img_result_path', 'def', 'image result path')
+gflags.DEFINE_float('threshold', 0.0, 'threshold of score')
+gflags.DEFINE_string('c2l_path', 'ghk', 'class to label path')
+
+
+def colormap(rgb=False):
+    """
+    Get colormap
+    """
+    color_list = np.array([
+        0.000, 0.447, 0.741, 0.850, 0.325, 0.098, 0.929, 0.694, 0.125, 0.494,
+        0.184, 0.556, 0.466, 0.674, 0.188, 0.301, 0.745, 0.933, 0.635, 0.078,
+        0.184, 0.300, 0.300, 0.300, 0.600, 0.600, 0.600, 1.000, 0.000, 0.000,
+        1.000, 0.500, 0.000, 0.749, 0.749, 0.000, 0.000, 1.000, 0.000, 0.000,
+        0.000, 1.000, 0.667, 0.000, 1.000, 0.333, 0.333, 0.000, 0.333, 0.667,
+        0.000, 0.333, 1.000, 0.000, 0.667, 0.333, 0.000, 0.667, 0.667, 0.000,
+        0.667, 1.000, 0.000, 1.000, 0.333, 0.000, 1.000, 0.667, 0.000, 1.000,
+        1.000, 0.000, 0.000, 0.333, 0.500, 0.000, 0.667, 0.500, 0.000, 1.000,
+        0.500, 0.333, 0.000, 0.500, 0.333, 0.333, 0.500, 0.333, 0.667, 0.500,
+        0.333, 1.000, 0.500, 0.667, 0.000, 0.500, 0.667, 0.333, 0.500, 0.667,
+        0.667, 0.500, 0.667, 1.000, 0.500, 1.000, 0.000, 0.500, 1.000, 0.333,
+        0.500, 1.000, 0.667, 0.500, 1.000, 1.000, 0.500, 0.000, 0.333, 1.000,
+        0.000, 0.667, 1.000, 0.000, 1.000, 1.000, 0.333, 0.000, 1.000, 0.333,
+        0.333, 1.000, 0.333, 0.667, 1.000, 0.333, 1.000, 1.000, 0.667, 0.000,
+        1.000, 0.667, 0.333, 1.000, 0.667, 0.667, 1.000, 0.667, 1.000, 1.000,
+        1.000, 0.000, 1.000, 1.000, 0.333, 1.000, 1.000, 0.667, 1.000, 0.167,
+        0.000, 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000,
+        0.000, 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000,
+        0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000,
+        0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, 0.000,
+        0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833,
+        0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.143, 0.143, 0.143, 0.286,
+        0.286, 0.286, 0.429, 0.429, 0.429, 0.571, 0.571, 0.571, 0.714, 0.714,
+        0.714, 0.857, 0.857, 0.857, 1.000, 1.000, 1.000
+    ]).astype(np.float32)
+    color_list = color_list.reshape((-1, 3)) * 255
+    if not rgb:
+        color_list = color_list[:, ::-1]
+    return color_list
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 5:
+        print(
+            "Usage: python vis.py --img_path=/path/to/image --img_result_path=/path/to/image_result.pb --threshold=0.1 --c2l_path=/path/to/class2label.json"
+        )
+    else:
+        Flags(sys.argv)
+        color_list = colormap(rgb=True)
+        text_thickness = 1
+        text_scale = 0.3
+        with open(Flags.img_result_path, "rb") as f:
+            detection_result = detection_result_pb2.DetectionResult()
+            detection_result.ParseFromString(f.read())
+            img = cv2.imread(Flags.img_path)
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+            class2LabelMap = dict()
+            with open(Flags.c2l_path, "r", encoding="utf-8") as json_f:
+                class2LabelMap = json.load(json_f)
+                for box in detection_result.detection_boxes:
+                    if box.score >= Flags.threshold:
+                        box_class = getattr(box, 'class')
+                        text_class_score_str = "%s %.2f" % (
+                            class2LabelMap.get(str(box_class)), box.score)
+                        text_point = (int(box.left_top_x), int(box.left_top_y))
+
+                        ptLeftTop = (int(box.left_top_x), int(box.left_top_y))
+                        ptRightBottom = (int(box.right_bottom_x),
+                                         int(box.right_bottom_y))
+                        box_thickness = 1
+                        color = tuple([int(c) for c in color_list[box_class]])
+                        cv2.rectangle(img, ptLeftTop, ptRightBottom, color,
+                                      box_thickness, 8)
+                        if text_point[1] < 0:
+                            text_point = (int(box.left_top_x),
+                                          int(box.right_bottom_y))
+                        WHITE = (255, 255, 255)
+                        font = cv2.FONT_HERSHEY_SIMPLEX
+                        text_size = cv2.getTextSize(text_class_score_str, font,
+                                                    text_scale, text_thickness)
+
+                        text_box_left_top = (text_point[0],
+                                             text_point[1] - text_size[0][1])
+                        text_box_right_bottom = (
+                            text_point[0] + text_size[0][0], text_point[1])
+
+                        cv2.rectangle(img, text_box_left_top,
+                                      text_box_right_bottom, color, -1, 8)
+                        cv2.putText(img, text_class_score_str, text_point, font,
+                                    text_scale, WHITE, text_thickness)
+                img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+                cv2.imwrite(Flags.img_path + ".png", img)
diff --git a/PaddleCV/PaddleDetection/inference/utils/conf_parser.h b/PaddleCV/PaddleDetection/inference/utils/conf_parser.h
new file mode 100644
index 0000000000000000000000000000000000000000..21944d032b2c24cdb584dc076a696560d4665ea1
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/utils/conf_parser.h
@@ -0,0 +1,237 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <iostream>
+#include <vector>
+#include <string>
+#include <map>
+
+#include <yaml-cpp/yaml.h>
+namespace PaddleSolution {
+
+    class PaddleModelConfigPaser {
+	std::map<std::string, int> _scaling_map;
+    public:
+        PaddleModelConfigPaser()
+            :_class_num(0),
+            _channels(0),
+            _use_gpu(0),
+            _batch_size(1),
+            _target_short_size(0),
+            _model_file_name("__model__"),
+            _param_file_name("__params__"),
+	    _scaling_map{{"UNPADDING", 0},
+			 {"RANGE_SCALING",1}}, 
+            _feeds_size(1),
+	    _coarsest_stride(1)
+	     {
+        }
+        ~PaddleModelConfigPaser() {
+        }
+
+        void reset() {
+	    _crop_size.clear();
+            _resize.clear();
+            _mean.clear();
+            _std.clear();
+            _img_type.clear();
+            _class_num = 0;
+            _channels = 0;
+            _use_gpu = 0;
+            _target_short_size = 0;
+            _batch_size = 1;
+            _model_file_name = "__model__";
+            _model_path = "./";
+            _param_file_name="__params__";
+	    _resize_type = 0;
+	    _resize_max_size = 0;
+	    _feeds_size = 1;
+ 	    _coarsest_stride = 1;
+        }
+
+        std::string process_parenthesis(const std::string& str) {
+            if (str.size() < 2) {
+                return str;
+            }
+            std::string nstr(str);
+            if (str[0] == '(' && str.back() == ')') {
+                nstr[0] = '[';
+                nstr[str.size() - 1] = ']';
+            }
+            return nstr;
+        }
+
+        template <typename T>
+        std::vector<T> parse_str_to_vec(const std::string& str) {
+            std::vector<T> data;
+            auto node = YAML::Load(str);
+            for (const auto& item : node) {
+                data.push_back(item.as<T>());
+            }
+            return data;
+        }
+
+        bool load_config(const std::string& conf_file) {
+            
+            reset();
+
+            YAML::Node config = YAML::LoadFile(conf_file);
+            // 1. get resize
+            auto str = config["DEPLOY"]["EVAL_CROP_SIZE"].as<std::string>();
+            _resize = parse_str_to_vec<int>(process_parenthesis(str));
+
+	    // 0. get crop_size
+            if(config["DEPLOY"]["CROP_SIZE"].IsDefined()) {
+	        auto crop_str = config["DEPLOY"]["CROP_SIZE"].as<std::string>();
+     	        _crop_size = parse_str_to_vec<int>(process_parenthesis(crop_str));	    
+            }
+	    else {
+		_crop_size = _resize;
+	    }
+
+            // 2. get mean
+            for (const auto& item : config["DEPLOY"]["MEAN"]) {
+                _mean.push_back(item.as<float>());
+            }
+
+            // 3. get std
+            for (const auto& item : config["DEPLOY"]["STD"]) {
+                _std.push_back(item.as<float>());
+            }
+
+            // 4. get image type
+            _img_type = config["DEPLOY"]["IMAGE_TYPE"].as<std::string>();
+            // 5. get class number
+            _class_num = config["DEPLOY"]["NUM_CLASSES"].as<int>();
+            // 7. set model path
+            _model_path = config["DEPLOY"]["MODEL_PATH"].as<std::string>();
+            // 8. get model file_name
+            _model_file_name = config["DEPLOY"]["MODEL_FILENAME"].as<std::string>();
+            // 9. get model param file name
+            _param_file_name = config["DEPLOY"]["PARAMS_FILENAME"].as<std::string>();
+            // 10. get pre_processor
+            _pre_processor = config["DEPLOY"]["PRE_PROCESSOR"].as<std::string>();
+            // 11. use_gpu
+            _use_gpu = config["DEPLOY"]["USE_GPU"].as<int>();
+            // 12. predictor_mode
+            _predictor_mode = config["DEPLOY"]["PREDICTOR_MODE"].as<std::string>();
+            // 13. batch_size
+            _batch_size = config["DEPLOY"]["BATCH_SIZE"].as<int>();
+            // 14. channels
+            _channels = config["DEPLOY"]["CHANNELS"].as<int>();
+            // 15. target_short_size
+	    if(config["DEPLOY"]["TARGET_SHORT_SIZE"].IsDefined()) {
+            	_target_short_size = config["DEPLOY"]["TARGET_SHORT_SIZE"].as<int>();
+	    }
+	    // 16.resize_type            
+	    if(config["DEPLOY"]["RESIZE_TYPE"].IsDefined() && 
+                _scaling_map.find(config["DEPLOY"]["RESIZE_TYPE"].as<std::string>()) != _scaling_map.end()) {
+                _resize_type = _scaling_map[config["DEPLOY"]["RESIZE_TYPE"].as<std::string>()];
+	    }
+	    else{
+		_resize_type = 0;
+	    }
+	    // 17.resize_max_size
+	    if(config["DEPLOY"]["RESIZE_MAX_SIZE"].IsDefined()) {
+	    	_resize_max_size = config["DEPLOY"]["RESIZE_MAX_SIZE"].as<int>();
+	    }
+            // 18.feeds_size
+	    if(config["DEPLOY"]["FEEDS_SIZE"].IsDefined()){
+		_feeds_size = config["DEPLOY"]["FEEDS_SIZE"].as<int>();	
+            }
+	    // 19. coarsest_stride
+	    if(config["DEPLOY"]["COARSEST_STRIDE"].IsDefined()) {
+		_coarsest_stride = config["DEPLOY"]["COARSEST_STRIDE"].as<int>();
+	    }
+            return true;
+        }
+
+        void debug() const {
+            
+            std::cout << "SCALE_RESIZE: (" << _resize[0] << ", " << _resize[1] << ")" << std::endl;
+
+            std::cout << "MEAN: [";
+            for (int i = 0; i < _mean.size(); ++i) {
+                if (i != _mean.size() - 1) {
+                    std::cout << _mean[i] << ", ";
+                } else {
+                    std::cout << _mean[i];
+                }
+            }
+            std::cout << "]" << std::endl;
+
+            std::cout << "STD: [";
+            for (int i = 0; i < _std.size(); ++i) {
+                if (i != _std.size() - 1) {
+                    std::cout << _std[i] << ", ";
+                }
+                else {
+                    std::cout << _std[i];
+                }
+            }
+            std::cout << "]" << std::endl;
+            std::cout << "DEPLOY.TARGET_SHORT_SIZE: " << _target_short_size << std::endl;
+            std::cout << "DEPLOY.IMAGE_TYPE: " << _img_type << std::endl;
+            std::cout << "DEPLOY.NUM_CLASSES: " << _class_num << std::endl;
+            std::cout << "DEPLOY.CHANNELS: " << _channels << std::endl;
+            std::cout << "DEPLOY.MODEL_PATH: " << _model_path << std::endl;
+            std::cout << "DEPLOY.MODEL_FILENAME: " << _model_file_name << std::endl;
+            std::cout << "DEPLOY.PARAMS_FILENAME: " << _param_file_name << std::endl;
+            std::cout << "DEPLOY.PRE_PROCESSOR: " << _pre_processor << std::endl;
+            std::cout << "DEPLOY.USE_GPU: " << _use_gpu << std::endl;
+            std::cout << "DEPLOY.PREDICTOR_MODE: " << _predictor_mode << std::endl;
+            std::cout << "DEPLOY.BATCH_SIZE: " << _batch_size << std::endl;
+        }
+	//DEPLOY.COARSEST_STRIDE
+	int _coarsest_stride;
+        // DEPLOY.FEEDS_SIZE
+	int _feeds_size;
+	// DEPLOY.RESIZE_TYPE  0:unpadding 1:rangescaling  Default:0
+        int _resize_type;
+	// DEPLOY.RESIZE_MAX_SIZE
+        int _resize_max_size;
+	// DEPLOY.CROP_SIZE
+	std::vector<int> _crop_size;
+        // DEPLOY.SCALE_RESIZE
+        std::vector<int> _resize;
+        // DEPLOY.MEAN
+        std::vector<float> _mean;
+        // DEPLOY.STD
+        std::vector<float> _std;
+        // DEPLOY.IMAGE_TYPE
+        std::string _img_type;
+        // DEPLOY.TARGET_SHORT_SIZE
+        int _target_short_size;
+        // DEPLOY.NUM_CLASSES
+        int _class_num;
+        // DEPLOY.CHANNELS
+        int _channels;
+        // DEPLOY.MODEL_PATH
+        std::string _model_path;
+        // DEPLOY.MODEL_FILENAME
+        std::string _model_file_name;
+        // DEPLOY.PARAMS_FILENAME
+        std::string _param_file_name;
+        // DEPLOY.PRE_PROCESSOR
+        std::string _pre_processor;
+        // DEPLOY.USE_GPU
+        int _use_gpu;
+        // DEPLOY.PREDICTOR_MODE
+        std::string _predictor_mode;
+        // DEPLOY.BATCH_SIZE
+        int _batch_size;
+    };
+
+}
diff --git a/PaddleCV/PaddleDetection/inference/utils/detection_result.pb.cc b/PaddleCV/PaddleDetection/inference/utils/detection_result.pb.cc
new file mode 100644
index 0000000000000000000000000000000000000000..b5cce7317914cf93f99d0d4efa3aee763972cc4e
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/utils/detection_result.pb.cc
@@ -0,0 +1,1159 @@
+// Generated by the protocol buffer compiler.  DO NOT EDIT!
+// source: detection_result.proto
+
+#define INTERNAL_SUPPRESS_PROTOBUF_FIELD_DEPRECATION
+#include "detection_result.pb.h"
+
+#include <algorithm>
+
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/port.h>
+#include <google/protobuf/stubs/once.h>
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/wire_format_lite_inl.h>
+#include <google/protobuf/descriptor.h>
+#include <google/protobuf/generated_message_reflection.h>
+#include <google/protobuf/reflection_ops.h>
+#include <google/protobuf/wire_format.h>
+// @@protoc_insertion_point(includes)
+
+namespace PaddleSolution {
+
+namespace {
+
+const ::google::protobuf::Descriptor* DetectionBox_descriptor_ = NULL;
+const ::google::protobuf::internal::GeneratedMessageReflection*
+  DetectionBox_reflection_ = NULL;
+const ::google::protobuf::Descriptor* DetectionResult_descriptor_ = NULL;
+const ::google::protobuf::internal::GeneratedMessageReflection*
+  DetectionResult_reflection_ = NULL;
+
+}  // namespace
+
+
+void protobuf_AssignDesc_detection_5fresult_2eproto() GOOGLE_ATTRIBUTE_COLD;
+void protobuf_AssignDesc_detection_5fresult_2eproto() {
+  protobuf_AddDesc_detection_5fresult_2eproto();
+  const ::google::protobuf::FileDescriptor* file =
+    ::google::protobuf::DescriptorPool::generated_pool()->FindFileByName(
+      "detection_result.proto");
+  GOOGLE_CHECK(file != NULL);
+  DetectionBox_descriptor_ = file->message_type(0);
+  static const int DetectionBox_offsets_[6] = {
+    GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionBox, class__),
+    GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionBox, score_),
+    GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionBox, left_top_x_),
+    GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionBox, left_top_y_),
+    GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionBox, right_bottom_x_),
+    GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionBox, right_bottom_y_),
+  };
+  DetectionBox_reflection_ =
+    ::google::protobuf::internal::GeneratedMessageReflection::NewGeneratedMessageReflection(
+      DetectionBox_descriptor_,
+      DetectionBox::internal_default_instance(),
+      DetectionBox_offsets_,
+      GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionBox, _has_bits_),
+      -1,
+      -1,
+      sizeof(DetectionBox),
+      GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionBox, _internal_metadata_));
+  DetectionResult_descriptor_ = file->message_type(1);
+  static const int DetectionResult_offsets_[2] = {
+    GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionResult, filename_),
+    GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionResult, detection_boxes_),
+  };
+  DetectionResult_reflection_ =
+    ::google::protobuf::internal::GeneratedMessageReflection::NewGeneratedMessageReflection(
+      DetectionResult_descriptor_,
+      DetectionResult::internal_default_instance(),
+      DetectionResult_offsets_,
+      GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionResult, _has_bits_),
+      -1,
+      -1,
+      sizeof(DetectionResult),
+      GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionResult, _internal_metadata_));
+}
+
+namespace {
+
+GOOGLE_PROTOBUF_DECLARE_ONCE(protobuf_AssignDescriptors_once_);
+void protobuf_AssignDescriptorsOnce() {
+  ::google::protobuf::GoogleOnceInit(&protobuf_AssignDescriptors_once_,
+                 &protobuf_AssignDesc_detection_5fresult_2eproto);
+}
+
+void protobuf_RegisterTypes(const ::std::string&) GOOGLE_ATTRIBUTE_COLD;
+void protobuf_RegisterTypes(const ::std::string&) {
+  protobuf_AssignDescriptorsOnce();
+  ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage(
+      DetectionBox_descriptor_, DetectionBox::internal_default_instance());
+  ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage(
+      DetectionResult_descriptor_, DetectionResult::internal_default_instance());
+}
+
+}  // namespace
+
+void protobuf_ShutdownFile_detection_5fresult_2eproto() {
+  DetectionBox_default_instance_.Shutdown();
+  delete DetectionBox_reflection_;
+  DetectionResult_default_instance_.Shutdown();
+  delete DetectionResult_reflection_;
+}
+
+void protobuf_InitDefaults_detection_5fresult_2eproto_impl() {
+  GOOGLE_PROTOBUF_VERIFY_VERSION;
+
+  DetectionBox_default_instance_.DefaultConstruct();
+  ::google::protobuf::internal::GetEmptyString();
+  DetectionResult_default_instance_.DefaultConstruct();
+  DetectionBox_default_instance_.get_mutable()->InitAsDefaultInstance();
+  DetectionResult_default_instance_.get_mutable()->InitAsDefaultInstance();
+}
+
+GOOGLE_PROTOBUF_DECLARE_ONCE(protobuf_InitDefaults_detection_5fresult_2eproto_once_);
+void protobuf_InitDefaults_detection_5fresult_2eproto() {
+  ::google::protobuf::GoogleOnceInit(&protobuf_InitDefaults_detection_5fresult_2eproto_once_,
+                 &protobuf_InitDefaults_detection_5fresult_2eproto_impl);
+}
+void protobuf_AddDesc_detection_5fresult_2eproto_impl() {
+  GOOGLE_PROTOBUF_VERIFY_VERSION;
+
+  protobuf_InitDefaults_detection_5fresult_2eproto();
+  ::google::protobuf::DescriptorPool::InternalAddGeneratedFile(
+    "\n\026detection_result.proto\022\016PaddleSolution"
+    "\"\204\001\n\014DetectionBox\022\r\n\005class\030\001 \001(\005\022\r\n\005scor"
+    "e\030\002 \001(\002\022\022\n\nleft_top_x\030\003 \001(\002\022\022\n\nleft_top_"
+    "y\030\004 \001(\002\022\026\n\016right_bottom_x\030\005 \001(\002\022\026\n\016right"
+    "_bottom_y\030\006 \001(\002\"Z\n\017DetectionResult\022\020\n\010fi"
+    "lename\030\001 \001(\t\0225\n\017detection_boxes\030\002 \003(\0132\034."
+    "PaddleSolution.DetectionBox", 267);
+  ::google::protobuf::MessageFactory::InternalRegisterGeneratedFile(
+    "detection_result.proto", &protobuf_RegisterTypes);
+  ::google::protobuf::internal::OnShutdown(&protobuf_ShutdownFile_detection_5fresult_2eproto);
+}
+
+GOOGLE_PROTOBUF_DECLARE_ONCE(protobuf_AddDesc_detection_5fresult_2eproto_once_);
+void protobuf_AddDesc_detection_5fresult_2eproto() {
+  ::google::protobuf::GoogleOnceInit(&protobuf_AddDesc_detection_5fresult_2eproto_once_,
+                 &protobuf_AddDesc_detection_5fresult_2eproto_impl);
+}
+// Force AddDescriptors() to be called at static initialization time.
+struct StaticDescriptorInitializer_detection_5fresult_2eproto {
+  StaticDescriptorInitializer_detection_5fresult_2eproto() {
+    protobuf_AddDesc_detection_5fresult_2eproto();
+  }
+} static_descriptor_initializer_detection_5fresult_2eproto_;
+
+namespace {
+
+static void MergeFromFail(int line) GOOGLE_ATTRIBUTE_COLD GOOGLE_ATTRIBUTE_NORETURN;
+static void MergeFromFail(int line) {
+  ::google::protobuf::internal::MergeFromFail(__FILE__, line);
+}
+
+}  // namespace
+
+
+// ===================================================================
+
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+const int DetectionBox::kClassFieldNumber;
+const int DetectionBox::kScoreFieldNumber;
+const int DetectionBox::kLeftTopXFieldNumber;
+const int DetectionBox::kLeftTopYFieldNumber;
+const int DetectionBox::kRightBottomXFieldNumber;
+const int DetectionBox::kRightBottomYFieldNumber;
+#endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
+
+DetectionBox::DetectionBox()
+  : ::google::protobuf::Message(), _internal_metadata_(NULL) {
+  if (this != internal_default_instance()) protobuf_InitDefaults_detection_5fresult_2eproto();
+  SharedCtor();
+  // @@protoc_insertion_point(constructor:PaddleSolution.DetectionBox)
+}
+
+void DetectionBox::InitAsDefaultInstance() {
+}
+
+DetectionBox::DetectionBox(const DetectionBox& from)
+  : ::google::protobuf::Message(),
+    _internal_metadata_(NULL) {
+  SharedCtor();
+  UnsafeMergeFrom(from);
+  // @@protoc_insertion_point(copy_constructor:PaddleSolution.DetectionBox)
+}
+
+void DetectionBox::SharedCtor() {
+  _cached_size_ = 0;
+  ::memset(&class__, 0, reinterpret_cast<char*>(&right_bottom_y_) -
+    reinterpret_cast<char*>(&class__) + sizeof(right_bottom_y_));
+}
+
+DetectionBox::~DetectionBox() {
+  // @@protoc_insertion_point(destructor:PaddleSolution.DetectionBox)
+  SharedDtor();
+}
+
+void DetectionBox::SharedDtor() {
+}
+
+void DetectionBox::SetCachedSize(int size) const {
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+}
+const ::google::protobuf::Descriptor* DetectionBox::descriptor() {
+  protobuf_AssignDescriptorsOnce();
+  return DetectionBox_descriptor_;
+}
+
+const DetectionBox& DetectionBox::default_instance() {
+  protobuf_InitDefaults_detection_5fresult_2eproto();
+  return *internal_default_instance();
+}
+
+::google::protobuf::internal::ExplicitlyConstructed<DetectionBox> DetectionBox_default_instance_;
+
+DetectionBox* DetectionBox::New(::google::protobuf::Arena* arena) const {
+  DetectionBox* n = new DetectionBox;
+  if (arena != NULL) {
+    arena->Own(n);
+  }
+  return n;
+}
+
+void DetectionBox::Clear() {
+// @@protoc_insertion_point(message_clear_start:PaddleSolution.DetectionBox)
+#if defined(__clang__)
+#define ZR_HELPER_(f) \
+  _Pragma("clang diagnostic push") \
+  _Pragma("clang diagnostic ignored \"-Winvalid-offsetof\"") \
+  __builtin_offsetof(DetectionBox, f) \
+  _Pragma("clang diagnostic pop")
+#else
+#define ZR_HELPER_(f) reinterpret_cast<char*>(\
+  &reinterpret_cast<DetectionBox*>(16)->f)
+#endif
+
+#define ZR_(first, last) do {\
+  ::memset(&(first), 0,\
+           ZR_HELPER_(last) - ZR_HELPER_(first) + sizeof(last));\
+} while (0)
+
+  ZR_(class__, right_bottom_y_);
+
+#undef ZR_HELPER_
+#undef ZR_
+
+  _has_bits_.Clear();
+  if (_internal_metadata_.have_unknown_fields()) {
+    mutable_unknown_fields()->Clear();
+  }
+}
+
+bool DetectionBox::MergePartialFromCodedStream(
+    ::google::protobuf::io::CodedInputStream* input) {
+#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
+  ::google::protobuf::uint32 tag;
+  // @@protoc_insertion_point(parse_start:PaddleSolution.DetectionBox)
+  for (;;) {
+    ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoff(127);
+    tag = p.first;
+    if (!p.second) goto handle_unusual;
+    switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
+      // optional int32 class = 1;
+      case 1: {
+        if (tag == 8) {
+          set_has_class_();
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &class__)));
+        } else {
+          goto handle_unusual;
+        }
+        if (input->ExpectTag(21)) goto parse_score;
+        break;
+      }
+
+      // optional float score = 2;
+      case 2: {
+        if (tag == 21) {
+         parse_score:
+          set_has_score();
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
+                 input, &score_)));
+        } else {
+          goto handle_unusual;
+        }
+        if (input->ExpectTag(29)) goto parse_left_top_x;
+        break;
+      }
+
+      // optional float left_top_x = 3;
+      case 3: {
+        if (tag == 29) {
+         parse_left_top_x:
+          set_has_left_top_x();
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
+                 input, &left_top_x_)));
+        } else {
+          goto handle_unusual;
+        }
+        if (input->ExpectTag(37)) goto parse_left_top_y;
+        break;
+      }
+
+      // optional float left_top_y = 4;
+      case 4: {
+        if (tag == 37) {
+         parse_left_top_y:
+          set_has_left_top_y();
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
+                 input, &left_top_y_)));
+        } else {
+          goto handle_unusual;
+        }
+        if (input->ExpectTag(45)) goto parse_right_bottom_x;
+        break;
+      }
+
+      // optional float right_bottom_x = 5;
+      case 5: {
+        if (tag == 45) {
+         parse_right_bottom_x:
+          set_has_right_bottom_x();
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
+                 input, &right_bottom_x_)));
+        } else {
+          goto handle_unusual;
+        }
+        if (input->ExpectTag(53)) goto parse_right_bottom_y;
+        break;
+      }
+
+      // optional float right_bottom_y = 6;
+      case 6: {
+        if (tag == 53) {
+         parse_right_bottom_y:
+          set_has_right_bottom_y();
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
+                 input, &right_bottom_y_)));
+        } else {
+          goto handle_unusual;
+        }
+        if (input->ExpectAtEnd()) goto success;
+        break;
+      }
+
+      default: {
+      handle_unusual:
+        if (tag == 0 ||
+            ::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) ==
+            ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) {
+          goto success;
+        }
+        DO_(::google::protobuf::internal::WireFormat::SkipField(
+              input, tag, mutable_unknown_fields()));
+        break;
+      }
+    }
+  }
+success:
+  // @@protoc_insertion_point(parse_success:PaddleSolution.DetectionBox)
+  return true;
+failure:
+  // @@protoc_insertion_point(parse_failure:PaddleSolution.DetectionBox)
+  return false;
+#undef DO_
+}
+
+void DetectionBox::SerializeWithCachedSizes(
+    ::google::protobuf::io::CodedOutputStream* output) const {
+  // @@protoc_insertion_point(serialize_start:PaddleSolution.DetectionBox)
+  // optional int32 class = 1;
+  if (has_class_()) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(1, this->class_(), output);
+  }
+
+  // optional float score = 2;
+  if (has_score()) {
+    ::google::protobuf::internal::WireFormatLite::WriteFloat(2, this->score(), output);
+  }
+
+  // optional float left_top_x = 3;
+  if (has_left_top_x()) {
+    ::google::protobuf::internal::WireFormatLite::WriteFloat(3, this->left_top_x(), output);
+  }
+
+  // optional float left_top_y = 4;
+  if (has_left_top_y()) {
+    ::google::protobuf::internal::WireFormatLite::WriteFloat(4, this->left_top_y(), output);
+  }
+
+  // optional float right_bottom_x = 5;
+  if (has_right_bottom_x()) {
+    ::google::protobuf::internal::WireFormatLite::WriteFloat(5, this->right_bottom_x(), output);
+  }
+
+  // optional float right_bottom_y = 6;
+  if (has_right_bottom_y()) {
+    ::google::protobuf::internal::WireFormatLite::WriteFloat(6, this->right_bottom_y(), output);
+  }
+
+  if (_internal_metadata_.have_unknown_fields()) {
+    ::google::protobuf::internal::WireFormat::SerializeUnknownFields(
+        unknown_fields(), output);
+  }
+  // @@protoc_insertion_point(serialize_end:PaddleSolution.DetectionBox)
+}
+
+::google::protobuf::uint8* DetectionBox::InternalSerializeWithCachedSizesToArray(
+    bool deterministic, ::google::protobuf::uint8* target) const {
+  (void)deterministic; // Unused
+  // @@protoc_insertion_point(serialize_to_array_start:PaddleSolution.DetectionBox)
+  // optional int32 class = 1;
+  if (has_class_()) {
+    target = ::google::protobuf::internal::WireFormatLite::WriteInt32ToArray(1, this->class_(), target);
+  }
+
+  // optional float score = 2;
+  if (has_score()) {
+    target = ::google::protobuf::internal::WireFormatLite::WriteFloatToArray(2, this->score(), target);
+  }
+
+  // optional float left_top_x = 3;
+  if (has_left_top_x()) {
+    target = ::google::protobuf::internal::WireFormatLite::WriteFloatToArray(3, this->left_top_x(), target);
+  }
+
+  // optional float left_top_y = 4;
+  if (has_left_top_y()) {
+    target = ::google::protobuf::internal::WireFormatLite::WriteFloatToArray(4, this->left_top_y(), target);
+  }
+
+  // optional float right_bottom_x = 5;
+  if (has_right_bottom_x()) {
+    target = ::google::protobuf::internal::WireFormatLite::WriteFloatToArray(5, this->right_bottom_x(), target);
+  }
+
+  // optional float right_bottom_y = 6;
+  if (has_right_bottom_y()) {
+    target = ::google::protobuf::internal::WireFormatLite::WriteFloatToArray(6, this->right_bottom_y(), target);
+  }
+
+  if (_internal_metadata_.have_unknown_fields()) {
+    target = ::google::protobuf::internal::WireFormat::SerializeUnknownFieldsToArray(
+        unknown_fields(), target);
+  }
+  // @@protoc_insertion_point(serialize_to_array_end:PaddleSolution.DetectionBox)
+  return target;
+}
+
+size_t DetectionBox::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:PaddleSolution.DetectionBox)
+  size_t total_size = 0;
+
+  if (_has_bits_[0 / 32] & 63u) {
+    // optional int32 class = 1;
+    if (has_class_()) {
+      total_size += 1 +
+        ::google::protobuf::internal::WireFormatLite::Int32Size(
+          this->class_());
+    }
+
+    // optional float score = 2;
+    if (has_score()) {
+      total_size += 1 + 4;
+    }
+
+    // optional float left_top_x = 3;
+    if (has_left_top_x()) {
+      total_size += 1 + 4;
+    }
+
+    // optional float left_top_y = 4;
+    if (has_left_top_y()) {
+      total_size += 1 + 4;
+    }
+
+    // optional float right_bottom_x = 5;
+    if (has_right_bottom_x()) {
+      total_size += 1 + 4;
+    }
+
+    // optional float right_bottom_y = 6;
+    if (has_right_bottom_y()) {
+      total_size += 1 + 4;
+    }
+
+  }
+  if (_internal_metadata_.have_unknown_fields()) {
+    total_size +=
+      ::google::protobuf::internal::WireFormat::ComputeUnknownFieldsSize(
+        unknown_fields());
+  }
+  int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = cached_size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+  return total_size;
+}
+
+void DetectionBox::MergeFrom(const ::google::protobuf::Message& from) {
+// @@protoc_insertion_point(generalized_merge_from_start:PaddleSolution.DetectionBox)
+  if (GOOGLE_PREDICT_FALSE(&from == this)) MergeFromFail(__LINE__);
+  const DetectionBox* source =
+      ::google::protobuf::internal::DynamicCastToGenerated<const DetectionBox>(
+          &from);
+  if (source == NULL) {
+  // @@protoc_insertion_point(generalized_merge_from_cast_fail:PaddleSolution.DetectionBox)
+    ::google::protobuf::internal::ReflectionOps::Merge(from, this);
+  } else {
+  // @@protoc_insertion_point(generalized_merge_from_cast_success:PaddleSolution.DetectionBox)
+    UnsafeMergeFrom(*source);
+  }
+}
+
+void DetectionBox::MergeFrom(const DetectionBox& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:PaddleSolution.DetectionBox)
+  if (GOOGLE_PREDICT_TRUE(&from != this)) {
+    UnsafeMergeFrom(from);
+  } else {
+    MergeFromFail(__LINE__);
+  }
+}
+
+void DetectionBox::UnsafeMergeFrom(const DetectionBox& from) {
+  GOOGLE_DCHECK(&from != this);
+  if (from._has_bits_[0 / 32] & (0xffu << (0 % 32))) {
+    if (from.has_class_()) {
+      set_class_(from.class_());
+    }
+    if (from.has_score()) {
+      set_score(from.score());
+    }
+    if (from.has_left_top_x()) {
+      set_left_top_x(from.left_top_x());
+    }
+    if (from.has_left_top_y()) {
+      set_left_top_y(from.left_top_y());
+    }
+    if (from.has_right_bottom_x()) {
+      set_right_bottom_x(from.right_bottom_x());
+    }
+    if (from.has_right_bottom_y()) {
+      set_right_bottom_y(from.right_bottom_y());
+    }
+  }
+  if (from._internal_metadata_.have_unknown_fields()) {
+    ::google::protobuf::UnknownFieldSet::MergeToInternalMetdata(
+      from.unknown_fields(), &_internal_metadata_);
+  }
+}
+
+void DetectionBox::CopyFrom(const ::google::protobuf::Message& from) {
+// @@protoc_insertion_point(generalized_copy_from_start:PaddleSolution.DetectionBox)
+  if (&from == this) return;
+  Clear();
+  MergeFrom(from);
+}
+
+void DetectionBox::CopyFrom(const DetectionBox& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:PaddleSolution.DetectionBox)
+  if (&from == this) return;
+  Clear();
+  UnsafeMergeFrom(from);
+}
+
+bool DetectionBox::IsInitialized() const {
+
+  return true;
+}
+
+void DetectionBox::Swap(DetectionBox* other) {
+  if (other == this) return;
+  InternalSwap(other);
+}
+void DetectionBox::InternalSwap(DetectionBox* other) {
+  std::swap(class__, other->class__);
+  std::swap(score_, other->score_);
+  std::swap(left_top_x_, other->left_top_x_);
+  std::swap(left_top_y_, other->left_top_y_);
+  std::swap(right_bottom_x_, other->right_bottom_x_);
+  std::swap(right_bottom_y_, other->right_bottom_y_);
+  std::swap(_has_bits_[0], other->_has_bits_[0]);
+  _internal_metadata_.Swap(&other->_internal_metadata_);
+  std::swap(_cached_size_, other->_cached_size_);
+}
+
+::google::protobuf::Metadata DetectionBox::GetMetadata() const {
+  protobuf_AssignDescriptorsOnce();
+  ::google::protobuf::Metadata metadata;
+  metadata.descriptor = DetectionBox_descriptor_;
+  metadata.reflection = DetectionBox_reflection_;
+  return metadata;
+}
+
+#if PROTOBUF_INLINE_NOT_IN_HEADERS
+// DetectionBox
+
+// optional int32 class = 1;
+bool DetectionBox::has_class_() const {
+  return (_has_bits_[0] & 0x00000001u) != 0;
+}
+void DetectionBox::set_has_class_() {
+  _has_bits_[0] |= 0x00000001u;
+}
+void DetectionBox::clear_has_class_() {
+  _has_bits_[0] &= ~0x00000001u;
+}
+void DetectionBox::clear_class_() {
+  class__ = 0;
+  clear_has_class_();
+}
+::google::protobuf::int32 DetectionBox::class_() const {
+  // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.class)
+  return class__;
+}
+void DetectionBox::set_class_(::google::protobuf::int32 value) {
+  set_has_class_();
+  class__ = value;
+  // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.class)
+}
+
+// optional float score = 2;
+bool DetectionBox::has_score() const {
+  return (_has_bits_[0] & 0x00000002u) != 0;
+}
+void DetectionBox::set_has_score() {
+  _has_bits_[0] |= 0x00000002u;
+}
+void DetectionBox::clear_has_score() {
+  _has_bits_[0] &= ~0x00000002u;
+}
+void DetectionBox::clear_score() {
+  score_ = 0;
+  clear_has_score();
+}
+float DetectionBox::score() const {
+  // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.score)
+  return score_;
+}
+void DetectionBox::set_score(float value) {
+  set_has_score();
+  score_ = value;
+  // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.score)
+}
+
+// optional float left_top_x = 3;
+bool DetectionBox::has_left_top_x() const {
+  return (_has_bits_[0] & 0x00000004u) != 0;
+}
+void DetectionBox::set_has_left_top_x() {
+  _has_bits_[0] |= 0x00000004u;
+}
+void DetectionBox::clear_has_left_top_x() {
+  _has_bits_[0] &= ~0x00000004u;
+}
+void DetectionBox::clear_left_top_x() {
+  left_top_x_ = 0;
+  clear_has_left_top_x();
+}
+float DetectionBox::left_top_x() const {
+  // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.left_top_x)
+  return left_top_x_;
+}
+void DetectionBox::set_left_top_x(float value) {
+  set_has_left_top_x();
+  left_top_x_ = value;
+  // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.left_top_x)
+}
+
+// optional float left_top_y = 4;
+bool DetectionBox::has_left_top_y() const {
+  return (_has_bits_[0] & 0x00000008u) != 0;
+}
+void DetectionBox::set_has_left_top_y() {
+  _has_bits_[0] |= 0x00000008u;
+}
+void DetectionBox::clear_has_left_top_y() {
+  _has_bits_[0] &= ~0x00000008u;
+}
+void DetectionBox::clear_left_top_y() {
+  left_top_y_ = 0;
+  clear_has_left_top_y();
+}
+float DetectionBox::left_top_y() const {
+  // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.left_top_y)
+  return left_top_y_;
+}
+void DetectionBox::set_left_top_y(float value) {
+  set_has_left_top_y();
+  left_top_y_ = value;
+  // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.left_top_y)
+}
+
+// optional float right_bottom_x = 5;
+bool DetectionBox::has_right_bottom_x() const {
+  return (_has_bits_[0] & 0x00000010u) != 0;
+}
+void DetectionBox::set_has_right_bottom_x() {
+  _has_bits_[0] |= 0x00000010u;
+}
+void DetectionBox::clear_has_right_bottom_x() {
+  _has_bits_[0] &= ~0x00000010u;
+}
+void DetectionBox::clear_right_bottom_x() {
+  right_bottom_x_ = 0;
+  clear_has_right_bottom_x();
+}
+float DetectionBox::right_bottom_x() const {
+  // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.right_bottom_x)
+  return right_bottom_x_;
+}
+void DetectionBox::set_right_bottom_x(float value) {
+  set_has_right_bottom_x();
+  right_bottom_x_ = value;
+  // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.right_bottom_x)
+}
+
+// optional float right_bottom_y = 6;
+bool DetectionBox::has_right_bottom_y() const {
+  return (_has_bits_[0] & 0x00000020u) != 0;
+}
+void DetectionBox::set_has_right_bottom_y() {
+  _has_bits_[0] |= 0x00000020u;
+}
+void DetectionBox::clear_has_right_bottom_y() {
+  _has_bits_[0] &= ~0x00000020u;
+}
+void DetectionBox::clear_right_bottom_y() {
+  right_bottom_y_ = 0;
+  clear_has_right_bottom_y();
+}
+float DetectionBox::right_bottom_y() const {
+  // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.right_bottom_y)
+  return right_bottom_y_;
+}
+void DetectionBox::set_right_bottom_y(float value) {
+  set_has_right_bottom_y();
+  right_bottom_y_ = value;
+  // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.right_bottom_y)
+}
+
+inline const DetectionBox* DetectionBox::internal_default_instance() {
+  return &DetectionBox_default_instance_.get();
+}
+#endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
+
+// ===================================================================
+
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+const int DetectionResult::kFilenameFieldNumber;
+const int DetectionResult::kDetectionBoxesFieldNumber;
+#endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
+
+DetectionResult::DetectionResult()
+  : ::google::protobuf::Message(), _internal_metadata_(NULL) {
+  if (this != internal_default_instance()) protobuf_InitDefaults_detection_5fresult_2eproto();
+  SharedCtor();
+  // @@protoc_insertion_point(constructor:PaddleSolution.DetectionResult)
+}
+
+void DetectionResult::InitAsDefaultInstance() {
+}
+
+DetectionResult::DetectionResult(const DetectionResult& from)
+  : ::google::protobuf::Message(),
+    _internal_metadata_(NULL) {
+  SharedCtor();
+  UnsafeMergeFrom(from);
+  // @@protoc_insertion_point(copy_constructor:PaddleSolution.DetectionResult)
+}
+
+void DetectionResult::SharedCtor() {
+  _cached_size_ = 0;
+  filename_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+
+DetectionResult::~DetectionResult() {
+  // @@protoc_insertion_point(destructor:PaddleSolution.DetectionResult)
+  SharedDtor();
+}
+
+void DetectionResult::SharedDtor() {
+  filename_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+
+void DetectionResult::SetCachedSize(int size) const {
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+}
+const ::google::protobuf::Descriptor* DetectionResult::descriptor() {
+  protobuf_AssignDescriptorsOnce();
+  return DetectionResult_descriptor_;
+}
+
+const DetectionResult& DetectionResult::default_instance() {
+  protobuf_InitDefaults_detection_5fresult_2eproto();
+  return *internal_default_instance();
+}
+
+::google::protobuf::internal::ExplicitlyConstructed<DetectionResult> DetectionResult_default_instance_;
+
+DetectionResult* DetectionResult::New(::google::protobuf::Arena* arena) const {
+  DetectionResult* n = new DetectionResult;
+  if (arena != NULL) {
+    arena->Own(n);
+  }
+  return n;
+}
+
+void DetectionResult::Clear() {
+// @@protoc_insertion_point(message_clear_start:PaddleSolution.DetectionResult)
+  if (has_filename()) {
+    filename_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+  }
+  detection_boxes_.Clear();
+  _has_bits_.Clear();
+  if (_internal_metadata_.have_unknown_fields()) {
+    mutable_unknown_fields()->Clear();
+  }
+}
+
+bool DetectionResult::MergePartialFromCodedStream(
+    ::google::protobuf::io::CodedInputStream* input) {
+#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
+  ::google::protobuf::uint32 tag;
+  // @@protoc_insertion_point(parse_start:PaddleSolution.DetectionResult)
+  for (;;) {
+    ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoff(127);
+    tag = p.first;
+    if (!p.second) goto handle_unusual;
+    switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
+      // optional string filename = 1;
+      case 1: {
+        if (tag == 10) {
+          DO_(::google::protobuf::internal::WireFormatLite::ReadString(
+                input, this->mutable_filename()));
+          ::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(
+            this->filename().data(), this->filename().length(),
+            ::google::protobuf::internal::WireFormat::PARSE,
+            "PaddleSolution.DetectionResult.filename");
+        } else {
+          goto handle_unusual;
+        }
+        if (input->ExpectTag(18)) goto parse_detection_boxes;
+        break;
+      }
+
+      // repeated .PaddleSolution.DetectionBox detection_boxes = 2;
+      case 2: {
+        if (tag == 18) {
+         parse_detection_boxes:
+          DO_(input->IncrementRecursionDepth());
+         parse_loop_detection_boxes:
+          DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtualNoRecursionDepth(
+                input, add_detection_boxes()));
+        } else {
+          goto handle_unusual;
+        }
+        if (input->ExpectTag(18)) goto parse_loop_detection_boxes;
+        input->UnsafeDecrementRecursionDepth();
+        if (input->ExpectAtEnd()) goto success;
+        break;
+      }
+
+      default: {
+      handle_unusual:
+        if (tag == 0 ||
+            ::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) ==
+            ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) {
+          goto success;
+        }
+        DO_(::google::protobuf::internal::WireFormat::SkipField(
+              input, tag, mutable_unknown_fields()));
+        break;
+      }
+    }
+  }
+success:
+  // @@protoc_insertion_point(parse_success:PaddleSolution.DetectionResult)
+  return true;
+failure:
+  // @@protoc_insertion_point(parse_failure:PaddleSolution.DetectionResult)
+  return false;
+#undef DO_
+}
+
+void DetectionResult::SerializeWithCachedSizes(
+    ::google::protobuf::io::CodedOutputStream* output) const {
+  // @@protoc_insertion_point(serialize_start:PaddleSolution.DetectionResult)
+  // optional string filename = 1;
+  if (has_filename()) {
+    ::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(
+      this->filename().data(), this->filename().length(),
+      ::google::protobuf::internal::WireFormat::SERIALIZE,
+      "PaddleSolution.DetectionResult.filename");
+    ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased(
+      1, this->filename(), output);
+  }
+
+  // repeated .PaddleSolution.DetectionBox detection_boxes = 2;
+  for (unsigned int i = 0, n = this->detection_boxes_size(); i < n; i++) {
+    ::google::protobuf::internal::WireFormatLite::WriteMessageMaybeToArray(
+      2, this->detection_boxes(i), output);
+  }
+
+  if (_internal_metadata_.have_unknown_fields()) {
+    ::google::protobuf::internal::WireFormat::SerializeUnknownFields(
+        unknown_fields(), output);
+  }
+  // @@protoc_insertion_point(serialize_end:PaddleSolution.DetectionResult)
+}
+
+::google::protobuf::uint8* DetectionResult::InternalSerializeWithCachedSizesToArray(
+    bool deterministic, ::google::protobuf::uint8* target) const {
+  (void)deterministic; // Unused
+  // @@protoc_insertion_point(serialize_to_array_start:PaddleSolution.DetectionResult)
+  // optional string filename = 1;
+  if (has_filename()) {
+    ::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(
+      this->filename().data(), this->filename().length(),
+      ::google::protobuf::internal::WireFormat::SERIALIZE,
+      "PaddleSolution.DetectionResult.filename");
+    target =
+      ::google::protobuf::internal::WireFormatLite::WriteStringToArray(
+        1, this->filename(), target);
+  }
+
+  // repeated .PaddleSolution.DetectionBox detection_boxes = 2;
+  for (unsigned int i = 0, n = this->detection_boxes_size(); i < n; i++) {
+    target = ::google::protobuf::internal::WireFormatLite::
+      InternalWriteMessageNoVirtualToArray(
+        2, this->detection_boxes(i), false, target);
+  }
+
+  if (_internal_metadata_.have_unknown_fields()) {
+    target = ::google::protobuf::internal::WireFormat::SerializeUnknownFieldsToArray(
+        unknown_fields(), target);
+  }
+  // @@protoc_insertion_point(serialize_to_array_end:PaddleSolution.DetectionResult)
+  return target;
+}
+
+size_t DetectionResult::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:PaddleSolution.DetectionResult)
+  size_t total_size = 0;
+
+  // optional string filename = 1;
+  if (has_filename()) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::StringSize(
+        this->filename());
+  }
+
+  // repeated .PaddleSolution.DetectionBox detection_boxes = 2;
+  {
+    unsigned int count = this->detection_boxes_size();
+    total_size += 1UL * count;
+    for (unsigned int i = 0; i < count; i++) {
+      total_size +=
+        ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual(
+          this->detection_boxes(i));
+    }
+  }
+
+  if (_internal_metadata_.have_unknown_fields()) {
+    total_size +=
+      ::google::protobuf::internal::WireFormat::ComputeUnknownFieldsSize(
+        unknown_fields());
+  }
+  int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = cached_size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+  return total_size;
+}
+
+void DetectionResult::MergeFrom(const ::google::protobuf::Message& from) {
+// @@protoc_insertion_point(generalized_merge_from_start:PaddleSolution.DetectionResult)
+  if (GOOGLE_PREDICT_FALSE(&from == this)) MergeFromFail(__LINE__);
+  const DetectionResult* source =
+      ::google::protobuf::internal::DynamicCastToGenerated<const DetectionResult>(
+          &from);
+  if (source == NULL) {
+  // @@protoc_insertion_point(generalized_merge_from_cast_fail:PaddleSolution.DetectionResult)
+    ::google::protobuf::internal::ReflectionOps::Merge(from, this);
+  } else {
+  // @@protoc_insertion_point(generalized_merge_from_cast_success:PaddleSolution.DetectionResult)
+    UnsafeMergeFrom(*source);
+  }
+}
+
+void DetectionResult::MergeFrom(const DetectionResult& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:PaddleSolution.DetectionResult)
+  if (GOOGLE_PREDICT_TRUE(&from != this)) {
+    UnsafeMergeFrom(from);
+  } else {
+    MergeFromFail(__LINE__);
+  }
+}
+
+void DetectionResult::UnsafeMergeFrom(const DetectionResult& from) {
+  GOOGLE_DCHECK(&from != this);
+  detection_boxes_.MergeFrom(from.detection_boxes_);
+  if (from._has_bits_[0 / 32] & (0xffu << (0 % 32))) {
+    if (from.has_filename()) {
+      set_has_filename();
+      filename_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.filename_);
+    }
+  }
+  if (from._internal_metadata_.have_unknown_fields()) {
+    ::google::protobuf::UnknownFieldSet::MergeToInternalMetdata(
+      from.unknown_fields(), &_internal_metadata_);
+  }
+}
+
+void DetectionResult::CopyFrom(const ::google::protobuf::Message& from) {
+// @@protoc_insertion_point(generalized_copy_from_start:PaddleSolution.DetectionResult)
+  if (&from == this) return;
+  Clear();
+  MergeFrom(from);
+}
+
+void DetectionResult::CopyFrom(const DetectionResult& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:PaddleSolution.DetectionResult)
+  if (&from == this) return;
+  Clear();
+  UnsafeMergeFrom(from);
+}
+
+bool DetectionResult::IsInitialized() const {
+
+  return true;
+}
+
+void DetectionResult::Swap(DetectionResult* other) {
+  if (other == this) return;
+  InternalSwap(other);
+}
+void DetectionResult::InternalSwap(DetectionResult* other) {
+  filename_.Swap(&other->filename_);
+  detection_boxes_.UnsafeArenaSwap(&other->detection_boxes_);
+  std::swap(_has_bits_[0], other->_has_bits_[0]);
+  _internal_metadata_.Swap(&other->_internal_metadata_);
+  std::swap(_cached_size_, other->_cached_size_);
+}
+
+::google::protobuf::Metadata DetectionResult::GetMetadata() const {
+  protobuf_AssignDescriptorsOnce();
+  ::google::protobuf::Metadata metadata;
+  metadata.descriptor = DetectionResult_descriptor_;
+  metadata.reflection = DetectionResult_reflection_;
+  return metadata;
+}
+
+#if PROTOBUF_INLINE_NOT_IN_HEADERS
+// DetectionResult
+
+// optional string filename = 1;
+bool DetectionResult::has_filename() const {
+  return (_has_bits_[0] & 0x00000001u) != 0;
+}
+void DetectionResult::set_has_filename() {
+  _has_bits_[0] |= 0x00000001u;
+}
+void DetectionResult::clear_has_filename() {
+  _has_bits_[0] &= ~0x00000001u;
+}
+void DetectionResult::clear_filename() {
+  filename_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+  clear_has_filename();
+}
+const ::std::string& DetectionResult::filename() const {
+  // @@protoc_insertion_point(field_get:PaddleSolution.DetectionResult.filename)
+  return filename_.GetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+void DetectionResult::set_filename(const ::std::string& value) {
+  set_has_filename();
+  filename_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value);
+  // @@protoc_insertion_point(field_set:PaddleSolution.DetectionResult.filename)
+}
+void DetectionResult::set_filename(const char* value) {
+  set_has_filename();
+  filename_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value));
+  // @@protoc_insertion_point(field_set_char:PaddleSolution.DetectionResult.filename)
+}
+void DetectionResult::set_filename(const char* value, size_t size) {
+  set_has_filename();
+  filename_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
+      ::std::string(reinterpret_cast<const char*>(value), size));
+  // @@protoc_insertion_point(field_set_pointer:PaddleSolution.DetectionResult.filename)
+}
+::std::string* DetectionResult::mutable_filename() {
+  set_has_filename();
+  // @@protoc_insertion_point(field_mutable:PaddleSolution.DetectionResult.filename)
+  return filename_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+::std::string* DetectionResult::release_filename() {
+  // @@protoc_insertion_point(field_release:PaddleSolution.DetectionResult.filename)
+  clear_has_filename();
+  return filename_.ReleaseNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+void DetectionResult::set_allocated_filename(::std::string* filename) {
+  if (filename != NULL) {
+    set_has_filename();
+  } else {
+    clear_has_filename();
+  }
+  filename_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), filename);
+  // @@protoc_insertion_point(field_set_allocated:PaddleSolution.DetectionResult.filename)
+}
+
+// repeated .PaddleSolution.DetectionBox detection_boxes = 2;
+int DetectionResult::detection_boxes_size() const {
+  return detection_boxes_.size();
+}
+void DetectionResult::clear_detection_boxes() {
+  detection_boxes_.Clear();
+}
+const ::PaddleSolution::DetectionBox& DetectionResult::detection_boxes(int index) const {
+  // @@protoc_insertion_point(field_get:PaddleSolution.DetectionResult.detection_boxes)
+  return detection_boxes_.Get(index);
+}
+::PaddleSolution::DetectionBox* DetectionResult::mutable_detection_boxes(int index) {
+  // @@protoc_insertion_point(field_mutable:PaddleSolution.DetectionResult.detection_boxes)
+  return detection_boxes_.Mutable(index);
+}
+::PaddleSolution::DetectionBox* DetectionResult::add_detection_boxes() {
+  // @@protoc_insertion_point(field_add:PaddleSolution.DetectionResult.detection_boxes)
+  return detection_boxes_.Add();
+}
+::google::protobuf::RepeatedPtrField< ::PaddleSolution::DetectionBox >*
+DetectionResult::mutable_detection_boxes() {
+  // @@protoc_insertion_point(field_mutable_list:PaddleSolution.DetectionResult.detection_boxes)
+  return &detection_boxes_;
+}
+const ::google::protobuf::RepeatedPtrField< ::PaddleSolution::DetectionBox >&
+DetectionResult::detection_boxes() const {
+  // @@protoc_insertion_point(field_list:PaddleSolution.DetectionResult.detection_boxes)
+  return detection_boxes_;
+}
+
+inline const DetectionResult* DetectionResult::internal_default_instance() {
+  return &DetectionResult_default_instance_.get();
+}
+#endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
+
+// @@protoc_insertion_point(namespace_scope)
+
+}  // namespace PaddleSolution
+
+// @@protoc_insertion_point(global_scope)
diff --git a/PaddleCV/PaddleDetection/inference/utils/detection_result.pb.h b/PaddleCV/PaddleDetection/inference/utils/detection_result.pb.h
new file mode 100644
index 0000000000000000000000000000000000000000..1b2f89ea9ca13f3f949bd19b097bb514a4afc525
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/utils/detection_result.pb.h
@@ -0,0 +1,563 @@
+// Generated by the protocol buffer compiler.  DO NOT EDIT!
+// source: detection_result.proto
+
+#ifndef PROTOBUF_detection_5fresult_2eproto__INCLUDED
+#define PROTOBUF_detection_5fresult_2eproto__INCLUDED
+
+#include <string>
+
+#include <google/protobuf/stubs/common.h>
+
+#if GOOGLE_PROTOBUF_VERSION < 3001000
+#error This file was generated by a newer version of protoc which is
+#error incompatible with your Protocol Buffer headers.  Please update
+#error your headers.
+#endif
+#if 3001000 < GOOGLE_PROTOBUF_MIN_PROTOC_VERSION
+#error This file was generated by an older version of protoc which is
+#error incompatible with your Protocol Buffer headers.  Please
+#error regenerate this file with a newer version of protoc.
+#endif
+
+#include <google/protobuf/arena.h>
+#include <google/protobuf/arenastring.h>
+#include <google/protobuf/generated_message_util.h>
+#include <google/protobuf/metadata.h>
+#include <google/protobuf/message.h>
+#include <google/protobuf/repeated_field.h>
+#include <google/protobuf/extension_set.h>
+#include <google/protobuf/unknown_field_set.h>
+// @@protoc_insertion_point(includes)
+
+namespace PaddleSolution {
+
+// Internal implementation detail -- do not call these.
+void protobuf_AddDesc_detection_5fresult_2eproto();
+void protobuf_InitDefaults_detection_5fresult_2eproto();
+void protobuf_AssignDesc_detection_5fresult_2eproto();
+void protobuf_ShutdownFile_detection_5fresult_2eproto();
+
+class DetectionBox;
+class DetectionResult;
+
+// ===================================================================
+
+class DetectionBox : public ::google::protobuf::Message /* @@protoc_insertion_point(class_definition:PaddleSolution.DetectionBox) */ {
+ public:
+  DetectionBox();
+  virtual ~DetectionBox();
+
+  DetectionBox(const DetectionBox& from);
+
+  inline DetectionBox& operator=(const DetectionBox& from) {
+    CopyFrom(from);
+    return *this;
+  }
+
+  inline const ::google::protobuf::UnknownFieldSet& unknown_fields() const {
+    return _internal_metadata_.unknown_fields();
+  }
+
+  inline ::google::protobuf::UnknownFieldSet* mutable_unknown_fields() {
+    return _internal_metadata_.mutable_unknown_fields();
+  }
+
+  static const ::google::protobuf::Descriptor* descriptor();
+  static const DetectionBox& default_instance();
+
+  static const DetectionBox* internal_default_instance();
+
+  void Swap(DetectionBox* other);
+
+  // implements Message ----------------------------------------------
+
+  inline DetectionBox* New() const { return New(NULL); }
+
+  DetectionBox* New(::google::protobuf::Arena* arena) const;
+  void CopyFrom(const ::google::protobuf::Message& from);
+  void MergeFrom(const ::google::protobuf::Message& from);
+  void CopyFrom(const DetectionBox& from);
+  void MergeFrom(const DetectionBox& from);
+  void Clear();
+  bool IsInitialized() const;
+
+  size_t ByteSizeLong() const;
+  bool MergePartialFromCodedStream(
+      ::google::protobuf::io::CodedInputStream* input);
+  void SerializeWithCachedSizes(
+      ::google::protobuf::io::CodedOutputStream* output) const;
+  ::google::protobuf::uint8* InternalSerializeWithCachedSizesToArray(
+      bool deterministic, ::google::protobuf::uint8* output) const;
+  ::google::protobuf::uint8* SerializeWithCachedSizesToArray(::google::protobuf::uint8* output) const {
+    return InternalSerializeWithCachedSizesToArray(false, output);
+  }
+  int GetCachedSize() const { return _cached_size_; }
+  private:
+  void SharedCtor();
+  void SharedDtor();
+  void SetCachedSize(int size) const;
+  void InternalSwap(DetectionBox* other);
+  void UnsafeMergeFrom(const DetectionBox& from);
+  private:
+  inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
+    return _internal_metadata_.arena();
+  }
+  inline void* MaybeArenaPtr() const {
+    return _internal_metadata_.raw_arena_ptr();
+  }
+  public:
+
+  ::google::protobuf::Metadata GetMetadata() const;
+
+  // nested types ----------------------------------------------------
+
+  // accessors -------------------------------------------------------
+
+  // optional int32 class = 1;
+  bool has_class_() const;
+  void clear_class_();
+  static const int kClassFieldNumber = 1;
+  ::google::protobuf::int32 class_() const;
+  void set_class_(::google::protobuf::int32 value);
+
+  // optional float score = 2;
+  bool has_score() const;
+  void clear_score();
+  static const int kScoreFieldNumber = 2;
+  float score() const;
+  void set_score(float value);
+
+  // optional float left_top_x = 3;
+  bool has_left_top_x() const;
+  void clear_left_top_x();
+  static const int kLeftTopXFieldNumber = 3;
+  float left_top_x() const;
+  void set_left_top_x(float value);
+
+  // optional float left_top_y = 4;
+  bool has_left_top_y() const;
+  void clear_left_top_y();
+  static const int kLeftTopYFieldNumber = 4;
+  float left_top_y() const;
+  void set_left_top_y(float value);
+
+  // optional float right_bottom_x = 5;
+  bool has_right_bottom_x() const;
+  void clear_right_bottom_x();
+  static const int kRightBottomXFieldNumber = 5;
+  float right_bottom_x() const;
+  void set_right_bottom_x(float value);
+
+  // optional float right_bottom_y = 6;
+  bool has_right_bottom_y() const;
+  void clear_right_bottom_y();
+  static const int kRightBottomYFieldNumber = 6;
+  float right_bottom_y() const;
+  void set_right_bottom_y(float value);
+
+  // @@protoc_insertion_point(class_scope:PaddleSolution.DetectionBox)
+ private:
+  inline void set_has_class_();
+  inline void clear_has_class_();
+  inline void set_has_score();
+  inline void clear_has_score();
+  inline void set_has_left_top_x();
+  inline void clear_has_left_top_x();
+  inline void set_has_left_top_y();
+  inline void clear_has_left_top_y();
+  inline void set_has_right_bottom_x();
+  inline void clear_has_right_bottom_x();
+  inline void set_has_right_bottom_y();
+  inline void clear_has_right_bottom_y();
+
+  ::google::protobuf::internal::InternalMetadataWithArena _internal_metadata_;
+  ::google::protobuf::internal::HasBits<1> _has_bits_;
+  mutable int _cached_size_;
+  ::google::protobuf::int32 class__;
+  float score_;
+  float left_top_x_;
+  float left_top_y_;
+  float right_bottom_x_;
+  float right_bottom_y_;
+  friend void  protobuf_InitDefaults_detection_5fresult_2eproto_impl();
+  friend void  protobuf_AddDesc_detection_5fresult_2eproto_impl();
+  friend void protobuf_AssignDesc_detection_5fresult_2eproto();
+  friend void protobuf_ShutdownFile_detection_5fresult_2eproto();
+
+  void InitAsDefaultInstance();
+};
+extern ::google::protobuf::internal::ExplicitlyConstructed<DetectionBox> DetectionBox_default_instance_;
+
+// -------------------------------------------------------------------
+
+class DetectionResult : public ::google::protobuf::Message /* @@protoc_insertion_point(class_definition:PaddleSolution.DetectionResult) */ {
+ public:
+  DetectionResult();
+  virtual ~DetectionResult();
+
+  DetectionResult(const DetectionResult& from);
+
+  inline DetectionResult& operator=(const DetectionResult& from) {
+    CopyFrom(from);
+    return *this;
+  }
+
+  inline const ::google::protobuf::UnknownFieldSet& unknown_fields() const {
+    return _internal_metadata_.unknown_fields();
+  }
+
+  inline ::google::protobuf::UnknownFieldSet* mutable_unknown_fields() {
+    return _internal_metadata_.mutable_unknown_fields();
+  }
+
+  static const ::google::protobuf::Descriptor* descriptor();
+  static const DetectionResult& default_instance();
+
+  static const DetectionResult* internal_default_instance();
+
+  void Swap(DetectionResult* other);
+
+  // implements Message ----------------------------------------------
+
+  inline DetectionResult* New() const { return New(NULL); }
+
+  DetectionResult* New(::google::protobuf::Arena* arena) const;
+  void CopyFrom(const ::google::protobuf::Message& from);
+  void MergeFrom(const ::google::protobuf::Message& from);
+  void CopyFrom(const DetectionResult& from);
+  void MergeFrom(const DetectionResult& from);
+  void Clear();
+  bool IsInitialized() const;
+
+  size_t ByteSizeLong() const;
+  bool MergePartialFromCodedStream(
+      ::google::protobuf::io::CodedInputStream* input);
+  void SerializeWithCachedSizes(
+      ::google::protobuf::io::CodedOutputStream* output) const;
+  ::google::protobuf::uint8* InternalSerializeWithCachedSizesToArray(
+      bool deterministic, ::google::protobuf::uint8* output) const;
+  ::google::protobuf::uint8* SerializeWithCachedSizesToArray(::google::protobuf::uint8* output) const {
+    return InternalSerializeWithCachedSizesToArray(false, output);
+  }
+  int GetCachedSize() const { return _cached_size_; }
+  private:
+  void SharedCtor();
+  void SharedDtor();
+  void SetCachedSize(int size) const;
+  void InternalSwap(DetectionResult* other);
+  void UnsafeMergeFrom(const DetectionResult& from);
+  private:
+  inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
+    return _internal_metadata_.arena();
+  }
+  inline void* MaybeArenaPtr() const {
+    return _internal_metadata_.raw_arena_ptr();
+  }
+  public:
+
+  ::google::protobuf::Metadata GetMetadata() const;
+
+  // nested types ----------------------------------------------------
+
+  // accessors -------------------------------------------------------
+
+  // optional string filename = 1;
+  bool has_filename() const;
+  void clear_filename();
+  static const int kFilenameFieldNumber = 1;
+  const ::std::string& filename() const;
+  void set_filename(const ::std::string& value);
+  void set_filename(const char* value);
+  void set_filename(const char* value, size_t size);
+  ::std::string* mutable_filename();
+  ::std::string* release_filename();
+  void set_allocated_filename(::std::string* filename);
+
+  // repeated .PaddleSolution.DetectionBox detection_boxes = 2;
+  int detection_boxes_size() const;
+  void clear_detection_boxes();
+  static const int kDetectionBoxesFieldNumber = 2;
+  const ::PaddleSolution::DetectionBox& detection_boxes(int index) const;
+  ::PaddleSolution::DetectionBox* mutable_detection_boxes(int index);
+  ::PaddleSolution::DetectionBox* add_detection_boxes();
+  ::google::protobuf::RepeatedPtrField< ::PaddleSolution::DetectionBox >*
+      mutable_detection_boxes();
+  const ::google::protobuf::RepeatedPtrField< ::PaddleSolution::DetectionBox >&
+      detection_boxes() const;
+
+  // @@protoc_insertion_point(class_scope:PaddleSolution.DetectionResult)
+ private:
+  inline void set_has_filename();
+  inline void clear_has_filename();
+
+  ::google::protobuf::internal::InternalMetadataWithArena _internal_metadata_;
+  ::google::protobuf::internal::HasBits<1> _has_bits_;
+  mutable int _cached_size_;
+  ::google::protobuf::RepeatedPtrField< ::PaddleSolution::DetectionBox > detection_boxes_;
+  ::google::protobuf::internal::ArenaStringPtr filename_;
+  friend void  protobuf_InitDefaults_detection_5fresult_2eproto_impl();
+  friend void  protobuf_AddDesc_detection_5fresult_2eproto_impl();
+  friend void protobuf_AssignDesc_detection_5fresult_2eproto();
+  friend void protobuf_ShutdownFile_detection_5fresult_2eproto();
+
+  void InitAsDefaultInstance();
+};
+extern ::google::protobuf::internal::ExplicitlyConstructed<DetectionResult> DetectionResult_default_instance_;
+
+// ===================================================================
+
+
+// ===================================================================
+
+#if !PROTOBUF_INLINE_NOT_IN_HEADERS
+// DetectionBox
+
+// optional int32 class = 1;
+inline bool DetectionBox::has_class_() const {
+  return (_has_bits_[0] & 0x00000001u) != 0;
+}
+inline void DetectionBox::set_has_class_() {
+  _has_bits_[0] |= 0x00000001u;
+}
+inline void DetectionBox::clear_has_class_() {
+  _has_bits_[0] &= ~0x00000001u;
+}
+inline void DetectionBox::clear_class_() {
+  class__ = 0;
+  clear_has_class_();
+}
+inline ::google::protobuf::int32 DetectionBox::class_() const {
+  // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.class)
+  return class__;
+}
+inline void DetectionBox::set_class_(::google::protobuf::int32 value) {
+  set_has_class_();
+  class__ = value;
+  // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.class)
+}
+
+// optional float score = 2;
+inline bool DetectionBox::has_score() const {
+  return (_has_bits_[0] & 0x00000002u) != 0;
+}
+inline void DetectionBox::set_has_score() {
+  _has_bits_[0] |= 0x00000002u;
+}
+inline void DetectionBox::clear_has_score() {
+  _has_bits_[0] &= ~0x00000002u;
+}
+inline void DetectionBox::clear_score() {
+  score_ = 0;
+  clear_has_score();
+}
+inline float DetectionBox::score() const {
+  // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.score)
+  return score_;
+}
+inline void DetectionBox::set_score(float value) {
+  set_has_score();
+  score_ = value;
+  // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.score)
+}
+
+// optional float left_top_x = 3;
+inline bool DetectionBox::has_left_top_x() const {
+  return (_has_bits_[0] & 0x00000004u) != 0;
+}
+inline void DetectionBox::set_has_left_top_x() {
+  _has_bits_[0] |= 0x00000004u;
+}
+inline void DetectionBox::clear_has_left_top_x() {
+  _has_bits_[0] &= ~0x00000004u;
+}
+inline void DetectionBox::clear_left_top_x() {
+  left_top_x_ = 0;
+  clear_has_left_top_x();
+}
+inline float DetectionBox::left_top_x() const {
+  // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.left_top_x)
+  return left_top_x_;
+}
+inline void DetectionBox::set_left_top_x(float value) {
+  set_has_left_top_x();
+  left_top_x_ = value;
+  // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.left_top_x)
+}
+
+// optional float left_top_y = 4;
+inline bool DetectionBox::has_left_top_y() const {
+  return (_has_bits_[0] & 0x00000008u) != 0;
+}
+inline void DetectionBox::set_has_left_top_y() {
+  _has_bits_[0] |= 0x00000008u;
+}
+inline void DetectionBox::clear_has_left_top_y() {
+  _has_bits_[0] &= ~0x00000008u;
+}
+inline void DetectionBox::clear_left_top_y() {
+  left_top_y_ = 0;
+  clear_has_left_top_y();
+}
+inline float DetectionBox::left_top_y() const {
+  // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.left_top_y)
+  return left_top_y_;
+}
+inline void DetectionBox::set_left_top_y(float value) {
+  set_has_left_top_y();
+  left_top_y_ = value;
+  // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.left_top_y)
+}
+
+// optional float right_bottom_x = 5;
+inline bool DetectionBox::has_right_bottom_x() const {
+  return (_has_bits_[0] & 0x00000010u) != 0;
+}
+inline void DetectionBox::set_has_right_bottom_x() {
+  _has_bits_[0] |= 0x00000010u;
+}
+inline void DetectionBox::clear_has_right_bottom_x() {
+  _has_bits_[0] &= ~0x00000010u;
+}
+inline void DetectionBox::clear_right_bottom_x() {
+  right_bottom_x_ = 0;
+  clear_has_right_bottom_x();
+}
+inline float DetectionBox::right_bottom_x() const {
+  // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.right_bottom_x)
+  return right_bottom_x_;
+}
+inline void DetectionBox::set_right_bottom_x(float value) {
+  set_has_right_bottom_x();
+  right_bottom_x_ = value;
+  // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.right_bottom_x)
+}
+
+// optional float right_bottom_y = 6;
+inline bool DetectionBox::has_right_bottom_y() const {
+  return (_has_bits_[0] & 0x00000020u) != 0;
+}
+inline void DetectionBox::set_has_right_bottom_y() {
+  _has_bits_[0] |= 0x00000020u;
+}
+inline void DetectionBox::clear_has_right_bottom_y() {
+  _has_bits_[0] &= ~0x00000020u;
+}
+inline void DetectionBox::clear_right_bottom_y() {
+  right_bottom_y_ = 0;
+  clear_has_right_bottom_y();
+}
+inline float DetectionBox::right_bottom_y() const {
+  // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.right_bottom_y)
+  return right_bottom_y_;
+}
+inline void DetectionBox::set_right_bottom_y(float value) {
+  set_has_right_bottom_y();
+  right_bottom_y_ = value;
+  // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.right_bottom_y)
+}
+
+inline const DetectionBox* DetectionBox::internal_default_instance() {
+  return &DetectionBox_default_instance_.get();
+}
+// -------------------------------------------------------------------
+
+// DetectionResult
+
+// optional string filename = 1;
+inline bool DetectionResult::has_filename() const {
+  return (_has_bits_[0] & 0x00000001u) != 0;
+}
+inline void DetectionResult::set_has_filename() {
+  _has_bits_[0] |= 0x00000001u;
+}
+inline void DetectionResult::clear_has_filename() {
+  _has_bits_[0] &= ~0x00000001u;
+}
+inline void DetectionResult::clear_filename() {
+  filename_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+  clear_has_filename();
+}
+inline const ::std::string& DetectionResult::filename() const {
+  // @@protoc_insertion_point(field_get:PaddleSolution.DetectionResult.filename)
+  return filename_.GetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+inline void DetectionResult::set_filename(const ::std::string& value) {
+  set_has_filename();
+  filename_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value);
+  // @@protoc_insertion_point(field_set:PaddleSolution.DetectionResult.filename)
+}
+inline void DetectionResult::set_filename(const char* value) {
+  set_has_filename();
+  filename_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value));
+  // @@protoc_insertion_point(field_set_char:PaddleSolution.DetectionResult.filename)
+}
+inline void DetectionResult::set_filename(const char* value, size_t size) {
+  set_has_filename();
+  filename_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
+      ::std::string(reinterpret_cast<const char*>(value), size));
+  // @@protoc_insertion_point(field_set_pointer:PaddleSolution.DetectionResult.filename)
+}
+inline ::std::string* DetectionResult::mutable_filename() {
+  set_has_filename();
+  // @@protoc_insertion_point(field_mutable:PaddleSolution.DetectionResult.filename)
+  return filename_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+inline ::std::string* DetectionResult::release_filename() {
+  // @@protoc_insertion_point(field_release:PaddleSolution.DetectionResult.filename)
+  clear_has_filename();
+  return filename_.ReleaseNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+inline void DetectionResult::set_allocated_filename(::std::string* filename) {
+  if (filename != NULL) {
+    set_has_filename();
+  } else {
+    clear_has_filename();
+  }
+  filename_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), filename);
+  // @@protoc_insertion_point(field_set_allocated:PaddleSolution.DetectionResult.filename)
+}
+
+// repeated .PaddleSolution.DetectionBox detection_boxes = 2;
+inline int DetectionResult::detection_boxes_size() const {
+  return detection_boxes_.size();
+}
+inline void DetectionResult::clear_detection_boxes() {
+  detection_boxes_.Clear();
+}
+inline const ::PaddleSolution::DetectionBox& DetectionResult::detection_boxes(int index) const {
+  // @@protoc_insertion_point(field_get:PaddleSolution.DetectionResult.detection_boxes)
+  return detection_boxes_.Get(index);
+}
+inline ::PaddleSolution::DetectionBox* DetectionResult::mutable_detection_boxes(int index) {
+  // @@protoc_insertion_point(field_mutable:PaddleSolution.DetectionResult.detection_boxes)
+  return detection_boxes_.Mutable(index);
+}
+inline ::PaddleSolution::DetectionBox* DetectionResult::add_detection_boxes() {
+  // @@protoc_insertion_point(field_add:PaddleSolution.DetectionResult.detection_boxes)
+  return detection_boxes_.Add();
+}
+inline ::google::protobuf::RepeatedPtrField< ::PaddleSolution::DetectionBox >*
+DetectionResult::mutable_detection_boxes() {
+  // @@protoc_insertion_point(field_mutable_list:PaddleSolution.DetectionResult.detection_boxes)
+  return &detection_boxes_;
+}
+inline const ::google::protobuf::RepeatedPtrField< ::PaddleSolution::DetectionBox >&
+DetectionResult::detection_boxes() const {
+  // @@protoc_insertion_point(field_list:PaddleSolution.DetectionResult.detection_boxes)
+  return detection_boxes_;
+}
+
+inline const DetectionResult* DetectionResult::internal_default_instance() {
+  return &DetectionResult_default_instance_.get();
+}
+#endif  // !PROTOBUF_INLINE_NOT_IN_HEADERS
+// -------------------------------------------------------------------
+
+
+// @@protoc_insertion_point(namespace_scope)
+
+}  // namespace PaddleSolution
+
+// @@protoc_insertion_point(global_scope)
+
+#endif  // PROTOBUF_detection_5fresult_2eproto__INCLUDED
diff --git a/PaddleCV/PaddleDetection/inference/utils/detection_result.proto b/PaddleCV/PaddleDetection/inference/utils/detection_result.proto
new file mode 100644
index 0000000000000000000000000000000000000000..2d1cbb2464ac09b0dcea01f8331da5ee7894a4d5
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/utils/detection_result.proto
@@ -0,0 +1,21 @@
+syntax = "proto2";
+package PaddleSolution;
+
+message DetectionBox {
+    optional int32 class = 1;
+    optional float score = 2;
+    optional float left_top_x = 3;
+    optional float left_top_y = 4;
+    optional float right_bottom_x = 5;
+    optional float right_bottom_y = 6; 
+}
+
+message DetectionResult {
+    optional string filename = 1;
+    repeated DetectionBox detection_boxes = 2;
+}
+
+//message DetectionResultsContainer {
+//    repeated DetectionResult result = 1;
+//}
+
diff --git a/PaddleCV/PaddleDetection/inference/utils/utils.h b/PaddleCV/PaddleDetection/inference/utils/utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..63245219edb6ad39e896f1eb041e8bff69613382
--- /dev/null
+++ b/PaddleCV/PaddleDetection/inference/utils/utils.h
@@ -0,0 +1,124 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <iostream>
+#include <vector>
+#include <string>
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#ifdef _WIN32
+#include <filesystem>
+#else
+#include <dirent.h>
+#include <sys/types.h>
+#endif
+
+namespace PaddleSolution {
+    namespace utils {
+        enum SCALE_TYPE{
+	    UNPADDING,
+	    RANGE_SCALING
+	};
+        inline std::string path_join(const std::string& dir, const std::string& path) {
+            std::string seperator = "/";
+            #ifdef _WIN32
+            seperator = "\\";
+            #endif
+            return dir + seperator + path;
+        }
+        #ifndef _WIN32
+        // scan a directory and get all files with input extensions
+        inline std::vector<std::string> get_directory_images(const std::string& path, const std::string& exts)
+        {
+            std::vector<std::string> imgs;
+            struct dirent *entry;
+            DIR *dir = opendir(path.c_str());
+            if (dir == NULL) {
+                closedir(dir);
+                return imgs;
+            }
+
+            while ((entry = readdir(dir)) != NULL) {
+                std::string item = entry->d_name;
+                auto ext = strrchr(entry->d_name, '.');
+                if (!ext || std::string(ext) == "." || std::string(ext) == "..") {
+                    continue;
+                }
+                if (exts.find(ext) != std::string::npos) {
+                    imgs.push_back(path_join(path, entry->d_name));
+                }
+            }
+	    sort(imgs.begin(), imgs.end());
+            return imgs;
+        }
+        #else
+        // scan a directory and get all files with input extensions
+        inline std::vector<std::string> get_directory_images(const std::string& path, const std::string& exts)
+        {
+            std::vector<std::string> imgs;
+            for (const auto& item : std::experimental::filesystem::directory_iterator(path)) {
+                auto suffix = item.path().extension().string();
+                if (exts.find(suffix) != std::string::npos && suffix.size() > 0) {
+                    auto fullname = path_join(path, item.path().filename().string());
+                    imgs.push_back(item.path().string());
+                }
+            }
+	    sort(imgs.begin(), imgs.end());
+            return imgs;
+        }
+        #endif
+	
+	inline int scaling(int resize_type, int &w, int &h, int new_w, int new_h, int target_size, int max_size, float &im_scale_ratio)
+	{
+	    if(w <= 0 || h <= 0 || new_w <= 0 || new_h <= 0){
+		return -1;
+	    }
+	    switch(resize_type) {
+	        case SCALE_TYPE::UNPADDING:
+		    {
+		        w = new_w;
+		        h = new_h;
+		        im_scale_ratio=0;
+		    }
+		    break;
+		case SCALE_TYPE::RANGE_SCALING:
+		    {
+                        int im_max_size = std::max(w, h);
+                        int im_min_size = std::min(w, h);                        
+			float scale_ratio= static_cast<float>(target_size) / static_cast<float>(im_min_size);
+                        if(max_size > 0) {
+                            if(round(scale_ratio * im_max_size) > max_size) {
+			        scale_ratio = static_cast<float>(max_size) / static_cast<float>(im_max_size);
+			    }
+			}
+			w = round(scale_ratio * static_cast<float>(w));
+			h = round(scale_ratio * static_cast<float>(h));
+			im_scale_ratio = scale_ratio;
+		    }
+		    break;
+		default :
+                    {
+			std::cout << "Can't support this type of scaling strategy." << std::endl;
+			std::cout << "Throw exception at file " << __FILE__ << " on line " << __LINE__ << std::endl;
+			throw 0;
+		    }
+		    break;
+	    }
+	    return 0;
+	}
+    }
+}
diff --git a/PaddleCV/PaddleDetection/ppdet/core/workspace.py b/PaddleCV/PaddleDetection/ppdet/core/workspace.py
index 64c0be1fcec9617072b1e60dca685ebb182139dd..bf505d6e4d1aab311057763c52f4ef501606a7fb 100644
--- a/PaddleCV/PaddleDetection/ppdet/core/workspace.py
+++ b/PaddleCV/PaddleDetection/ppdet/core/workspace.py
@@ -27,11 +27,29 @@ from .config.schema import SchemaDict, SharedConfig, extract_schema
 from .config.yaml_helpers import serializable
 
 __all__ = [
-    'global_config', 'load_config', 'merge_config', 'get_registered_modules',
-    'create', 'register', 'serializable'
+    'global_config',
+    'load_config',
+    'merge_config',
+    'get_registered_modules',
+    'create',
+    'register',
+    'serializable',
+    'dump_value',
 ]
 
 
+def dump_value(value):
+    # XXX this is hackish, but collections.abc is not available in python 2
+    if hasattr(value, '__dict__') or isinstance(value, (dict, tuple, list)):
+        value = yaml.dump(value, default_flow_style=True)
+        value = value.replace('\n', '')
+        value = value.replace('...', '')
+        return "'{}'".format(value)
+    else:
+        # primitive types
+        return str(value)
+
+
 class AttrDict(dict):
     """Single level attribute dict, NOT recursive"""
 
@@ -154,9 +172,9 @@ def create(cls_or_name, **kwargs):
             target_key = config[k]
             shared_conf = config.schema[k].default
             assert isinstance(shared_conf, SharedConfig)
-            if target_key is not None and not isinstance(
-                    target_key, SharedConfig):
-                continue   # value is given for the module
+            if target_key is not None and not isinstance(target_key,
+                                                         SharedConfig):
+                continue  # value is given for the module
             elif shared_conf.key in global_config:
                 # `key` is present in config
                 kwargs[k] = global_config[shared_conf.key]
diff --git a/PaddleCV/PaddleDetection/ppdet/data/data_feed.py b/PaddleCV/PaddleDetection/ppdet/data/data_feed.py
index c79c5e9491b3f1e33f363a406158b2957c47493e..b70f4be2067cd789a52b553eb7d8beb96bc4be94 100644
--- a/PaddleCV/PaddleDetection/ppdet/data/data_feed.py
+++ b/PaddleCV/PaddleDetection/ppdet/data/data_feed.py
@@ -27,18 +27,18 @@ from ppdet.data.reader import Reader
 from ppdet.data.transform.operators import (
     DecodeImage, MixupImage, NormalizeBox, NormalizeImage, RandomDistort,
     RandomFlipImage, RandomInterpImage, ResizeImage, ExpandImage, CropImage,
-    Permute)
-
+    Permute, MultiscaleTestResize)
 from ppdet.data.transform.arrange_sample import (
     ArrangeRCNN, ArrangeEvalRCNN, ArrangeTestRCNN, ArrangeSSD, ArrangeEvalSSD,
     ArrangeTestSSD, ArrangeYOLO, ArrangeEvalYOLO, ArrangeTestYOLO)
 
 __all__ = [
-    'PadBatch', 'MultiScale', 'RandomShape', 'DataSet', 'CocoDataSet',
-    'DataFeed', 'TrainFeed', 'EvalFeed', 'FasterRCNNTrainFeed',
-    'MaskRCNNTrainFeed', 'FasterRCNNTestFeed', 'MaskRCNNTestFeed',
-    'SSDTrainFeed', 'SSDEvalFeed', 'SSDTestFeed', 'YoloTrainFeed',
-    'YoloEvalFeed', 'YoloTestFeed', 'create_reader'
+    'PadBatch', 'MultiScale', 'RandomShape', 'PadMSTest', 'DataSet',
+    'CocoDataSet', 'DataFeed', 'TrainFeed', 'EvalFeed', 'FasterRCNNTrainFeed',
+    'MaskRCNNTrainFeed', 'FasterRCNNEvalFeed', 'MaskRCNNEvalFeed',
+    'FasterRCNNTestFeed', 'MaskRCNNTestFeed', 'SSDTrainFeed', 'SSDEvalFeed',
+    'SSDTestFeed', 'YoloTrainFeed', 'YoloEvalFeed', 'YoloTestFeed',
+    'create_reader'
 ]
 
 
@@ -70,6 +70,10 @@ def _prepare_data_config(feed, args_path):
         'TYPE': type(feed.dataset).__source__
     }
 
+    if feed.mode == 'TRAIN':
+        data_config['CLASS_AWARE_SAMPLING'] = getattr(
+            feed, 'class_aware_sampling', False)
+
     if len(getattr(feed.dataset, 'images', [])) > 0:
         data_config['IMAGES'] = feed.dataset.images
 
@@ -109,6 +113,7 @@ def create_reader(feed, max_iter=0, args_path=None, my_source=None):
     pad = [t for t in batch_transforms if isinstance(t, PadBatch)]
     rand_shape = [t for t in batch_transforms if isinstance(t, RandomShape)]
     multi_scale = [t for t in batch_transforms if isinstance(t, MultiScale)]
+    pad_ms_test = [t for t in batch_transforms if isinstance(t, PadMSTest)]
 
     if any(pad):
         transform_config['IS_PADDING'] = True
@@ -118,6 +123,10 @@ def create_reader(feed, max_iter=0, args_path=None, my_source=None):
         transform_config['RANDOM_SHAPES'] = rand_shape[0].sizes
     if any(multi_scale):
         transform_config['MULTI_SCALES'] = multi_scale[0].scales
+    if any(pad_ms_test):
+        transform_config['ENABLE_MULTISCALE_TEST'] = True
+        transform_config['NUM_SCALE'] = feed.num_scale
+        transform_config['COARSEST_STRIDE'] = pad_ms_test[0].pad_to_stride
 
     if hasattr(inspect, 'getfullargspec'):
         argspec = inspect.getfullargspec
@@ -182,6 +191,20 @@ class RandomShape(object):
         self.sizes = sizes
 
 
+@serializable
+class PadMSTest(object):
+    """
+    Padding for multi-scale test
+ 
+    Args:
+        pad_to_stride (int): pad to multiple of strides, e.g., 32
+    """
+
+    def __init__(self, pad_to_stride=0):
+        super(PadMSTest, self).__init__()
+        self.pad_to_stride = pad_to_stride
+
+
 @serializable
 class DataSet(object):
     """
@@ -301,7 +324,8 @@ class DataFeed(object):
                  bufsize=10,
                  use_process=False,
                  memsize=None,
-                 use_padded_im_info=False):
+                 use_padded_im_info=False,
+                 class_aware_sampling=False):
         super(DataFeed, self).__init__()
         self.fields = fields
         self.image_shape = image_shape
@@ -318,6 +342,7 @@ class DataFeed(object):
         self.memsize = memsize
         self.dataset = dataset
         self.use_padded_im_info = use_padded_im_info
+        self.class_aware_sampling = class_aware_sampling
         if isinstance(dataset, dict):
             self.dataset = DataSet(**dataset)
 
@@ -447,7 +472,8 @@ class FasterRCNNTrainFeed(DataFeed):
                  bufsize=10,
                  num_workers=2,
                  use_process=False,
-                 memsize=None):
+                 memsize=None,
+                 class_aware_sampling=False):
         # XXX this should be handled by the data loader, since `fields` is
         # given, just collect them
         sample_transforms.append(ArrangeRCNN())
@@ -464,7 +490,8 @@ class FasterRCNNTrainFeed(DataFeed):
             bufsize=bufsize,
             num_workers=num_workers,
             use_process=use_process,
-            memsize=memsize)
+            memsize=memsize,
+            class_aware_sampling=class_aware_sampling)
         # XXX these modes should be unified
         self.mode = 'TRAIN'
 
@@ -494,7 +521,10 @@ class FasterRCNNEvalFeed(DataFeed):
                  samples=-1,
                  drop_last=False,
                  num_workers=2,
-                 use_padded_im_info=True):
+                 use_padded_im_info=True,
+                 enable_multiscale=False,
+                 num_scale=1,
+                 enable_aug_flip=False):
         sample_transforms.append(ArrangeEvalRCNN())
         super(FasterRCNNEvalFeed, self).__init__(
             dataset,
@@ -509,6 +539,9 @@ class FasterRCNNEvalFeed(DataFeed):
             num_workers=num_workers,
             use_padded_im_info=use_padded_im_info)
         self.mode = 'VAL'
+        self.enable_multiscale = enable_multiscale
+        self.num_scale = num_scale
+        self.enable_aug_flip = enable_aug_flip
 
 
 @register
@@ -632,7 +665,10 @@ class MaskRCNNEvalFeed(DataFeed):
                  drop_last=False,
                  num_workers=2,
                  use_process=False,
-                 use_padded_im_info=True):
+                 use_padded_im_info=True,
+                 enable_multiscale=False,
+                 num_scale=1,
+                 enable_aug_flip=False):
         sample_transforms.append(ArrangeTestRCNN())
         super(MaskRCNNEvalFeed, self).__init__(
             dataset,
@@ -648,6 +684,9 @@ class MaskRCNNEvalFeed(DataFeed):
             use_process=use_process,
             use_padded_im_info=use_padded_im_info)
         self.mode = 'VAL'
+        self.enable_multiscale = enable_multiscale
+        self.num_scale = num_scale
+        self.enable_aug_flip = enable_aug_flip
 
 
 @register
@@ -891,7 +930,8 @@ class YoloTrainFeed(DataFeed):
                  use_process=True,
                  memsize=None,
                  num_max_boxes=50,
-                 mixup_epoch=250):
+                 mixup_epoch=250,
+                 class_aware_sampling=False):
         sample_transforms.append(ArrangeYOLO())
         super(YoloTrainFeed, self).__init__(
             dataset,
@@ -907,7 +947,8 @@ class YoloTrainFeed(DataFeed):
             num_workers=num_workers,
             bufsize=bufsize,
             use_process=use_process,
-            memsize=memsize)
+            memsize=memsize,
+            class_aware_sampling=class_aware_sampling)
         self.num_max_boxes = num_max_boxes
         self.mixup_epoch = mixup_epoch
         self.mode = 'TRAIN'
diff --git a/PaddleCV/PaddleDetection/ppdet/data/reader.py b/PaddleCV/PaddleDetection/ppdet/data/reader.py
index 5370bb9e42ef34909d8b41f1ec5c7b475f13357e..b2d4d07850601fb047b7b17fcb7c33ce6f35f6ea 100644
--- a/PaddleCV/PaddleDetection/ppdet/data/reader.py
+++ b/PaddleCV/PaddleDetection/ppdet/data/reader.py
@@ -68,8 +68,8 @@ class Reader(object):
         mapped_ds = map(sc, mapper, worker_args)
         # In VAL mode, gt_bbox, gt_label can be empty, and should
         # not be dropped
-        batched_ds = batch(mapped_ds, batchsize, drop_last, 
-                           drop_empty=(mode!="VAL"))
+        batched_ds = batch(
+            mapped_ds, batchsize, drop_last, drop_empty=(mode != "VAL"))
 
         trans_conf = {k.lower(): v for k, v in self._trans_conf[mode].items()}
         need_keys = {
@@ -78,6 +78,8 @@ class Reader(object):
             'random_shapes',
             'multi_scales',
             'use_padded_im_info',
+            'enable_multiscale_test',
+            'num_scale',
         }
         bm_config = {
             key: value
@@ -125,12 +127,15 @@ class Reader(object):
         return self._make_reader('TEST')
 
     @classmethod
-    def create(cls, mode, data_config,
-            transform_config, max_iter=-1,
-            my_source=None, ret_iter=True):
+    def create(cls,
+               mode,
+               data_config,
+               transform_config,
+               max_iter=-1,
+               my_source=None,
+               ret_iter=True):
         """ create a specific reader """
-        reader = Reader({mode: data_config},
-            {mode: transform_config}, max_iter)
+        reader = Reader({mode: data_config}, {mode: transform_config}, max_iter)
         if ret_iter:
             return reader._make_reader(mode, my_source)
         else:
diff --git a/PaddleCV/PaddleDetection/ppdet/data/source/__init__.py b/PaddleCV/PaddleDetection/ppdet/data/source/__init__.py
index ca0d5c833b15e8a5c95b8174e177d93c11b7981d..e55df6962b36906edac71e8b3cb25334a1d336a4 100644
--- a/PaddleCV/PaddleDetection/ppdet/data/source/__init__.py
+++ b/PaddleCV/PaddleDetection/ppdet/data/source/__init__.py
@@ -21,6 +21,7 @@ import copy
 from .roidb_source import RoiDbSource
 from .simple_source import SimpleSource
 from .iterator_source import IteratorSource
+from .class_aware_sampling_roidb_source import ClassAwareSamplingRoiDbSource
 
 
 def build_source(config):
@@ -53,7 +54,12 @@ def build_source(config):
     source_type = 'RoiDbSource'
     if 'type' in data_cf:
         if data_cf['type'] in ['VOCSource', 'COCOSource', 'RoiDbSource']:
-            source_type = 'RoiDbSource'
+            if 'class_aware_sampling' in args and args['class_aware_sampling']:
+                source_type = 'ClassAwareSamplingRoiDbSource'
+            else:
+                source_type = 'RoiDbSource'
+            if 'class_aware_sampling' in args:
+                del args['class_aware_sampling']
         else:
             source_type = data_cf['type']
         del args['type']
@@ -61,5 +67,7 @@ def build_source(config):
         return RoiDbSource(**args)
     elif source_type == 'SimpleSource':
         return SimpleSource(**args)
+    elif source_type == 'ClassAwareSamplingRoiDbSource':
+        return ClassAwareSamplingRoiDbSource(**args)
     else:
         raise ValueError('source type not supported: ' + source_type)
diff --git a/PaddleCV/PaddleDetection/ppdet/data/source/class_aware_sampling_roidb_source.py b/PaddleCV/PaddleDetection/ppdet/data/source/class_aware_sampling_roidb_source.py
new file mode 100644
index 0000000000000000000000000000000000000000..0175037c352594c48cce09ca033de18534937f87
--- /dev/null
+++ b/PaddleCV/PaddleDetection/ppdet/data/source/class_aware_sampling_roidb_source.py
@@ -0,0 +1,132 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#function:
+#    interface to load data from local files and parse it for samples, 
+#    eg: roidb data in pickled files
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import os
+import random
+
+import copy
+import collections
+import pickle as pkl
+import numpy as np
+from .roidb_source import RoiDbSource
+
+
+class ClassAwareSamplingRoiDbSource(RoiDbSource):
+    """ interface to load class aware sampling roidb data from files
+    """
+
+    def __init__(self,
+                 anno_file,
+                 image_dir=None,
+                 samples=-1,
+                 is_shuffle=True,
+                 load_img=False,
+                 cname2cid=None,
+                 use_default_label=None,
+                 mixup_epoch=-1,
+                 with_background=True):
+        """ Init
+
+        Args:
+            fname (str): label file path
+            image_dir (str): root dir for images
+            samples (int): samples to load, -1 means all
+            is_shuffle (bool): whether to shuffle samples
+            load_img (bool): whether load data in this class
+            cname2cid (dict): the label name to id dictionary
+            use_default_label (bool):whether use the default mapping of label to id
+            mixup_epoch (int): parse mixup in first n epoch
+            with_background (bool): whether load background 
+                                    as a class
+        """
+        super(ClassAwareSamplingRoiDbSource, self).__init__(
+            anno_file=anno_file,
+            image_dir=image_dir,
+            samples=samples,
+            is_shuffle=is_shuffle,
+            load_img=load_img,
+            cname2cid=cname2cid,
+            use_default_label=use_default_label,
+            mixup_epoch=mixup_epoch,
+            with_background=with_background)
+        self._img_weights = None
+
+    def __str__(self):
+        return 'ClassAwareSamplingRoidbSource(fname:%s,epoch:%d,size:%d)' \
+            % (self._fname, self._epoch, self.size())
+
+    def next(self):
+        """ load next sample
+        """
+        if self._epoch < 0:
+            self.reset()
+
+        _pos = np.random.choice(
+            self._samples, 1, replace=False, p=self._img_weights)[0]
+        sample = copy.deepcopy(self._roidb[_pos])
+
+        if self._load_img:
+            sample['image'] = self._load_image(sample['im_file'])
+        else:
+            sample['im_file'] = os.path.join(self._image_dir, sample['im_file'])
+
+        return sample
+
+    def _calc_img_weights(self):
+        """ calculate the probabilities of each sample
+        """
+        imgs_cls = []
+        num_per_cls = {}
+        img_weights = []
+        for i, roidb in enumerate(self._roidb):
+            img_cls = set(
+                [k for cls in self._roidb[i]['gt_class'] for k in cls])
+            imgs_cls.append(img_cls)
+            for c in img_cls:
+                if c not in num_per_cls:
+                    num_per_cls[c] = 1
+                else:
+                    num_per_cls[c] += 1
+
+        for i in range(len(self._roidb)):
+            weights = 0
+            for c in imgs_cls[i]:
+                weights += 1 / num_per_cls[c]
+            img_weights.append(weights)
+        # Probabilities sum to 1
+        img_weights = img_weights / np.sum(img_weights)
+        return img_weights
+
+    def reset(self):
+        """ implementation of Dataset.reset
+        """
+        if self._roidb is None:
+            self._roidb = self._load()
+
+        if self._img_weights is None:
+            self._img_weights = self._calc_img_weights()
+
+        self._samples = len(self._roidb)
+
+        if self._epoch < 0:
+            self._epoch = 0
diff --git a/PaddleCV/PaddleDetection/ppdet/data/source/coco_loader.py b/PaddleCV/PaddleDetection/ppdet/data/source/coco_loader.py
index ad62d829064d96d2356fc42aa800273f52dcc5f5..db18498905358eef66b07969dab8f65606d3cdc0 100644
--- a/PaddleCV/PaddleDetection/ppdet/data/source/coco_loader.py
+++ b/PaddleCV/PaddleDetection/ppdet/data/source/coco_loader.py
@@ -101,7 +101,8 @@ def load(anno_path, sample_num=-1, with_background=True):
             gt_class[i][0] = catid2clsid[catid]
             gt_bbox[i, :] = box['clean_bbox']
             is_crowd[i][0] = box['iscrowd']
-            gt_poly[i] = box['segmentation']
+            if 'segmentation' in box:
+                gt_poly[i] = box['segmentation']
 
         coco_rec = {
             'im_file': im_fname,
diff --git a/PaddleCV/PaddleDetection/ppdet/data/tools/labelme2coco.py b/PaddleCV/PaddleDetection/ppdet/data/tools/x2coco.py
similarity index 71%
rename from PaddleCV/PaddleDetection/ppdet/data/tools/labelme2coco.py
rename to PaddleCV/PaddleDetection/ppdet/data/tools/x2coco.py
index bf5dc32b3c094055eb5673430ae9179ca2635ad8..da8e4aef4011ef1a23e7459bc473301e171b9fea 100644
--- a/PaddleCV/PaddleDetection/ppdet/data/tools/labelme2coco.py
+++ b/PaddleCV/PaddleDetection/ppdet/data/tools/x2coco.py
@@ -44,7 +44,7 @@ def getbbox(self, points):
     return self.mask2box(mask)
 
 
-def images(data, num):
+def images_labelme(data, num):
     image = {}
     image['height'] = data['imageHeight']
     image['width'] = data['imageWidth']
@@ -52,6 +52,14 @@ def images(data, num):
     image['file_name'] = data['imagePath'].split('/')[-1]
     return image
 
+def images_cityscape(data, num, img_file):
+    image = {}
+    image['height'] = data['imgHeight']
+    image['width'] = data['imgWidth']
+    image['id'] = num + 1
+    image['file_name'] = img_file
+    return image 
+
 
 def categories(label, labels_list):
     category = {}
@@ -61,14 +69,14 @@ def categories(label, labels_list):
     return category
 
 
-def annotations_rectangle(points, label, num, label_to_num):
+def annotations_rectangle(points, label, image_num, object_num, label_to_num):
     annotation = {}
     seg_points = np.asarray(points).copy()
     seg_points[1, :] = np.asarray(points)[2, :]
     seg_points[2, :] = np.asarray(points)[1, :]
     annotation['segmentation'] = [list(seg_points.flatten())]
     annotation['iscrowd'] = 0
-    annotation['image_id'] = num + 1
+    annotation['image_id'] = image_num + 1
     annotation['bbox'] = list(
         map(float, [
             points[0][0], points[0][1], points[1][0] - points[0][0], points[1][
@@ -76,19 +84,19 @@ def annotations_rectangle(points, label, num, label_to_num):
         ]))
     annotation['area'] = annotation['bbox'][2] * annotation['bbox'][3]
     annotation['category_id'] = label_to_num[label]
-    annotation['id'] = num + 1
+    annotation['id'] = object_num + 1
     return annotation
 
 
-def annotations_polygon(height, width, points, label, num, label_to_num):
+def annotations_polygon(height, width, points, label, image_num, object_num, label_to_num):
     annotation = {}
     annotation['segmentation'] = [list(np.asarray(points).flatten())]
     annotation['iscrowd'] = 0
-    annotation['image_id'] = num + 1
+    annotation['image_id'] = image_num + 1
     annotation['bbox'] = list(map(float, get_bbox(height, width, points)))
     annotation['area'] = annotation['bbox'][2] * annotation['bbox'][3]
     annotation['category_id'] = label_to_num[label]
-    annotation['id'] = num + 1
+    annotation['id'] = object_num + 1
     return annotation
 
 
@@ -112,40 +120,60 @@ def get_bbox(height, width, points):
     ]
 
 
-def deal_json(img_path, json_path):
+def deal_json(ds_type, img_path, json_path):
     data_coco = {}
     label_to_num = {}
     images_list = []
     categories_list = []
     annotations_list = []
     labels_list = []
-    num = -1
+    image_num = -1
+    object_num = -1
     for img_file in os.listdir(img_path):
         img_label = img_file.split('.')[0]
+        if img_file.split('.')[-1] not in ['bmp', 'jpg', 'jpeg', 'png', 'JPEG', 'JPG', 'PNG']:
+            continue
         label_file = osp.join(json_path, img_label + '.json')
         print('Generating dataset from:', label_file)
-        num = num + 1
+        image_num = image_num + 1
         with open(label_file) as f:
             data = json.load(f)
-            images_list.append(images(data, num))
-            for shapes in data['shapes']:
-                label = shapes['label']
-                if label not in labels_list:
-                    categories_list.append(categories(label, labels_list))
-                    labels_list.append(label)
-                    label_to_num[label] = len(labels_list)
-                points = shapes['points']
-                p_type = shapes['shape_type']
-                if p_type == 'polygon':
-                    annotations_list.append(
-                        annotations_polygon(data['imageHeight'], data[
-                            'imageWidth'], points, label, num, label_to_num))
+            if ds_type == 'labelme':
+                images_list.append(images_labelme(data, image_num))
+            elif ds_type == 'cityscape':
+                images_list.append(images_cityscape(data, image_num, img_file)) 
+            if ds_type == 'labelme':
+                for shapes in data['shapes']:
+                    object_num = object_num + 1
+                    label = shapes['label']
+                    if label not in labels_list:
+                        categories_list.append(categories(label, labels_list))
+                        labels_list.append(label)
+                        label_to_num[label] = len(labels_list)
+                    points = shapes['points']
+                    p_type = shapes['shape_type']
+                    if p_type == 'polygon':
+                        annotations_list.append(
+                            annotations_polygon(data['imageHeight'], data[
+                                'imageWidth'], points, label, image_num, object_num, label_to_num))
 
-                if p_type == 'rectangle':
-                    points.append([points[0][0], points[1][1]])
-                    points.append([points[1][0], points[0][1]])
+                    if p_type == 'rectangle':
+                        points.append([points[0][0], points[1][1]])
+                        points.append([points[1][0], points[0][1]])
+                        annotations_list.append(
+                            annotations_rectangle(points, label, image_num, object_num, label_to_num))
+            elif ds_type == 'cityscape':
+                for shapes in data['objects']:
+                    object_num = object_num + 1
+                    label = shapes['label']
+                    if label not in labels_list:
+                        categories_list.append(categories(label, labels_list))
+                        labels_list.append(label)
+                        label_to_num[label] = len(labels_list)
+                    points = shapes['polygon']
                     annotations_list.append(
-                        annotations_rectangle(points, label, num, label_to_num))
+                        annotations_polygon(data['imgHeight'], data[
+                            'imgWidth'], points, label, image_num, object_num, label_to_num))
     data_coco['images'] = images_list
     data_coco['categories'] = categories_list
     data_coco['annotations'] = annotations_list
@@ -155,6 +183,7 @@ def deal_json(img_path, json_path):
 def main():
     parser = argparse.ArgumentParser(
         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--dataset_type', help='the type of dataset')
     parser.add_argument('--json_input_dir', help='input annotated directory')
     parser.add_argument('--image_input_dir', help='image directory')
     parser.add_argument(
@@ -175,6 +204,11 @@ def main():
         type=float,
         default=0.0)
     args = parser.parse_args()
+    try:
+        assert args.dataset_type in ['labelme', 'cityscape']
+    except AssertionError as e:
+        print('Now only support the cityscape dataset and labelme dataset!!')
+        os._exit(0)
     try:
         assert os.path.exists(args.json_input_dir)
     except AssertionError as e:
@@ -232,7 +266,8 @@ def main():
     if not os.path.exists(args.output_dir + '/annotations'):
         os.makedirs(args.output_dir + '/annotations')
     if args.train_proportion != 0:
-        train_data_coco = deal_json(args.output_dir + '/train',
+        train_data_coco = deal_json(args.dataset_type,
+                                    args.output_dir + '/train',
                                     args.json_input_dir)
         train_json_path = osp.join(args.output_dir + '/annotations',
                                    'instance_train.json')
diff --git a/PaddleCV/PaddleDetection/ppdet/data/transform/__init__.py b/PaddleCV/PaddleDetection/ppdet/data/transform/__init__.py
index 04514fd74b55b3ab9fe111a6880b272b453b9f9e..f4d15e9c3354c6c44912c456eb766eae4d7d32c5 100644
--- a/PaddleCV/PaddleDetection/ppdet/data/transform/__init__.py
+++ b/PaddleCV/PaddleDetection/ppdet/data/transform/__init__.py
@@ -67,7 +67,7 @@ def build_mapper(ops, context=None):
                 "invalid operator when build ops"
             o = op
         op_funcs.append(o)
-        op_repr.append('{{}}'.format(str(o)))
+        op_repr.append('{{{}}}'.format(str(o)))
     op_repr = '[{}]'.format(','.join(op_repr))
 
     def _mapper(sample):
@@ -78,7 +78,8 @@ def build_mapper(ops, context=None):
                 sample = out
             except Exception as e:
                 stack_info = traceback.format_exc()
-                logger.warn("fail to map op [{}] with error: {} and stack:\n{}".format(f, e, str(stack_info)))
+                logger.warn("fail to map op [{}] with error: {} and stack:\n{}".
+                            format(f, e, str(stack_info)))
                 raise e
 
         return out
@@ -116,10 +117,8 @@ def batch(ds, batchsize, drop_last=False, drop_empty=True):
         a batched dataset
     """
 
-    return BatchedDataset(ds, 
-                          batchsize, 
-                          drop_last=drop_last, 
-                          drop_empty=drop_empty)
+    return BatchedDataset(
+        ds, batchsize, drop_last=drop_last, drop_empty=drop_empty)
 
 
 def batch_map(ds, config):
diff --git a/PaddleCV/PaddleDetection/ppdet/data/transform/arrange_sample.py b/PaddleCV/PaddleDetection/ppdet/data/transform/arrange_sample.py
index e082c2dd72b53bf2ea4601584f1b9428e8ef617f..bebce691d36ddb12141dd7bfdf81030ff8ed2d1f 100644
--- a/PaddleCV/PaddleDetection/ppdet/data/transform/arrange_sample.py
+++ b/PaddleCV/PaddleDetection/ppdet/data/transform/arrange_sample.py
@@ -110,8 +110,11 @@ class ArrangeEvalRCNN(BaseOperator):
                     (image, im_info, im_id, im_shape, gt_bbox,
                     gt_class, difficult)
         """
-        im = sample['image']
-        keys = list(sample.keys())
+        ims = []
+        keys = sorted(list(sample.keys()))
+        for k in keys:
+            if 'image' in k:
+                ims.append(sample[k])
         if 'im_info' in keys:
             im_info = sample['im_info']
         else:
@@ -127,7 +130,9 @@ class ArrangeEvalRCNN(BaseOperator):
         gt_bbox = sample['gt_bbox']
         gt_class = sample['gt_class']
         difficult = sample['difficult']
-        outs = (im, im_info, im_id, im_shape, gt_bbox, gt_class, difficult)
+        remain_list = [im_info, im_id, im_shape, gt_bbox, gt_class, difficult]
+        ims.extend(remain_list)
+        outs = tuple(ims)
         return outs
 
 
@@ -148,10 +153,13 @@ class ArrangeTestRCNN(BaseOperator):
             context: a dict which contains additional info.
         Returns:
             sample: a tuple containing the following items:
-                    (image, im_info, im_id)
+                    (image, im_info, im_id, im_shape)
         """
-        im = sample['image']
-        keys = list(sample.keys())
+        ims = []
+        keys = sorted(list(sample.keys()))
+        for k in keys:
+            if 'image' in k:
+                ims.append(sample[k])
         if 'im_info' in keys:
             im_info = sample['im_info']
         else:
@@ -164,7 +172,9 @@ class ArrangeTestRCNN(BaseOperator):
         # bbox prediction needs im_info as input in format of [N, 3],
         # so im_shape is appended by 1 to match dimension.
         im_shape = np.array((h, w, 1), dtype=np.float32)
-        outs = (im, im_info, im_id, im_shape)
+        remain_list = [im_info, im_id, im_shape]
+        ims.extend(remain_list)
+        outs = tuple(ims)
         return outs
 
 
diff --git a/PaddleCV/PaddleDetection/ppdet/data/transform/operators.py b/PaddleCV/PaddleDetection/ppdet/data/transform/operators.py
index 0a426e0812c39909597a19a53028daf1772ac2c0..b09998120ffc1e76e42299489f16bacc6424454c 100644
--- a/PaddleCV/PaddleDetection/ppdet/data/transform/operators.py
+++ b/PaddleCV/PaddleDetection/ppdet/data/transform/operators.py
@@ -121,6 +121,105 @@ class DecodeImage(BaseOperator):
         return sample
 
 
+@register_op
+class MultiscaleTestResize(BaseOperator):
+    def __init__(self,
+                 origin_target_size=800,
+                 origin_max_size=1333,
+                 target_size=[],
+                 max_size=2000,
+                 interp=cv2.INTER_LINEAR,
+                 use_flip=True):
+        """
+        Rescale image to the each size in target size, and capped at max_size.
+
+        Args:
+            origin_target_size(int): original target size of image's short side.
+            origin_max_size(int): original max size of image.
+            target_size (list): A list of target sizes of image's short side.
+            max_size (int): the max size of image.
+            interp (int): the interpolation method.
+            use_flip (bool): whether use flip augmentation.
+        """
+        super(MultiscaleTestResize, self).__init__()
+        self.origin_target_size = int(origin_target_size)
+        self.origin_max_size = int(origin_max_size)
+        self.max_size = int(max_size)
+        self.interp = int(interp)
+        self.use_flip = use_flip
+
+        if not isinstance(target_size, list):
+            raise TypeError(
+                "Type of target_size is invalid. Must be List, now is {}".
+                format(type(target_size)))
+        self.target_size = target_size
+        if not (isinstance(self.origin_target_size, int) and isinstance(
+                self.origin_max_size, int) and isinstance(self.max_size, int)
+                and isinstance(self.interp, int)):
+            raise TypeError("{}: input type is invalid.".format(self))
+
+    def __call__(self, sample, context=None):
+        """ Resize the image numpy for multi-scale test.
+        """
+        origin_ims = {}
+        im = sample['image']
+        if not isinstance(im, np.ndarray):
+            raise TypeError("{}: image type is not numpy.".format(self))
+        if len(im.shape) != 3:
+            raise ImageError('{}: image is not 3-dimensional.'.format(self))
+        im_shape = im.shape
+        im_size_min = np.min(im_shape[0:2])
+        im_size_max = np.max(im_shape[0:2])
+        if float(im_size_min) == 0:
+            raise ZeroDivisionError('{}: min size of image is 0'.format(self))
+        base_name_list = ['image']
+        origin_ims['image'] = im
+        if self.use_flip:
+            sample['flip_image'] = im[:, ::-1, :]
+            base_name_list.append('flip_image')
+            origin_ims['flip_image'] = sample['flip_image']
+        im_info = []
+        for base_name in base_name_list:
+            im_scale = float(self.origin_target_size) / float(im_size_min)
+            # Prevent the biggest axis from being more than max_size
+            if np.round(im_scale * im_size_max) > self.origin_max_size:
+                im_scale = float(self.origin_max_size) / float(im_size_max)
+            im_scale_x = im_scale
+            im_scale_y = im_scale
+
+            resize_w = np.round(im_scale_x * float(im_shape[1]))
+            resize_h = np.round(im_scale_y * float(im_shape[0]))
+            im_resize = cv2.resize(
+                origin_ims[base_name],
+                None,
+                None,
+                fx=im_scale_x,
+                fy=im_scale_y,
+                interpolation=self.interp)
+            im_info.extend([resize_h, resize_w, im_scale])
+            sample[base_name] = im_resize
+            for i, size in enumerate(self.target_size):
+                im_scale = float(size) / float(im_size_min)
+                if np.round(im_scale * im_size_max) > self.max_size:
+                    im_scale = float(self.max_size) / float(im_size_max)
+                im_scale_x = im_scale
+                im_scale_y = im_scale
+                resize_w = np.round(im_scale_x * float(im_shape[1]))
+                resize_h = np.round(im_scale_y * float(im_shape[0]))
+                im_resize = cv2.resize(
+                    origin_ims[base_name],
+                    None,
+                    None,
+                    fx=im_scale_x,
+                    fy=im_scale_y,
+                    interpolation=self.interp)
+                im_info.extend([resize_h, resize_w, im_scale])
+                name = base_name + '_scale_' + str(i)
+                sample[name] = im_resize
+        sample['im_info'] = np.array(im_info, dtype=np.float32)
+        return sample
+
+
 @register_op
 class ResizeImage(BaseOperator):
     def __init__(self,
@@ -183,9 +282,12 @@ class ResizeImage(BaseOperator):
 
             resize_w = np.round(im_scale_x * float(im_shape[1]))
             resize_h = np.round(im_scale_y * float(im_shape[0]))
-
-            sample['im_info'] = np.array(
-                [resize_h, resize_w, im_scale], dtype=np.float32)
+            im_info = [resize_h, resize_w, im_scale]
+            if 'im_info' in sample and sample['im_info'][2] != 1.:
+                sample['im_info'] = np.append(
+                    list(sample['im_info']), im_info).astype(np.float32)
+            else:
+                sample['im_info'] = np.array(im_info).astype(np.float32)
         else:
             im_scale_x = float(selected_size) / float(im_shape[1])
             im_scale_y = float(selected_size) / float(im_shape[0])
@@ -331,19 +433,21 @@ class NormalizeImage(BaseOperator):
             1.(optional) Scale the image to [0,1]
             2. Each pixel minus mean and is divided by std
         """
-        im = sample['image']
-        im = im.astype(np.float32, copy=False)
-        if self.is_channel_first:
-            mean = np.array(self.mean)[:, np.newaxis, np.newaxis]
-            std = np.array(self.std)[:, np.newaxis, np.newaxis]
-        else:
-            mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
-            std = np.array(self.std)[np.newaxis, np.newaxis, :]
-        if self.is_scale:
-            im = im / 255.0
-        im -= mean
-        im /= std
-        sample['image'] = im
+        for k in sample.keys():
+            if 'image' in k:
+                im = sample[k]
+                im = im.astype(np.float32, copy=False)
+                if self.is_channel_first:
+                    mean = np.array(self.mean)[:, np.newaxis, np.newaxis]
+                    std = np.array(self.std)[:, np.newaxis, np.newaxis]
+                else:
+                    mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
+                    std = np.array(self.std)[np.newaxis, np.newaxis, :]
+                if self.is_scale:
+                    im = im / 255.0
+                im -= mean
+                im /= std
+                sample[k] = im
         return sample
 
 
@@ -785,13 +889,15 @@ class Permute(BaseOperator):
 
     def __call__(self, sample, context=None):
         assert 'image' in sample, "image data not found"
-        im = sample['image']
-        if self.channel_first:
-            im = np.swapaxes(im, 1, 2)
-            im = np.swapaxes(im, 1, 0)
-        if self.to_bgr:
-            im = im[[2, 1, 0], :, :]
-        sample['image'] = im
+        for k in sample.keys():
+            if 'image' in k:
+                im = sample[k]
+                if self.channel_first:
+                    im = np.swapaxes(im, 1, 2)
+                    im = np.swapaxes(im, 1, 0)
+                if self.to_bgr:
+                    im = im[[2, 1, 0], :, :]
+                sample[k] = im
         return sample
 
 
diff --git a/PaddleCV/PaddleDetection/ppdet/data/transform/post_map.py b/PaddleCV/PaddleDetection/ppdet/data/transform/post_map.py
index e8e9c3823050a9a649c39f0ce410c227c3f3c1b3..d556160e5a485753fe2d68600d320fcda6c91496 100644
--- a/PaddleCV/PaddleDetection/ppdet/data/transform/post_map.py
+++ b/PaddleCV/PaddleDetection/ppdet/data/transform/post_map.py
@@ -27,7 +27,9 @@ def build_post_map(coarsest_stride=1,
                    is_padding=False,
                    random_shapes=[],
                    multi_scales=[],
-                   use_padded_im_info=False):
+                   use_padded_im_info=False,
+                   enable_multiscale_test=False,
+                   num_scale=1):
     """
     Build a mapper for post-processing batches
 
@@ -36,10 +38,13 @@ def build_post_map(coarsest_stride=1,
           {
             coarsest_stride (int): stride of the coarsest FPN level
             is_padding (bool): whether to padding in minibatch
-            random_shapes: (list of int): resize to image to random
-                                          shapes, [] for not resize.
-            multi_scales: (list of int): resize image by random
-                                          scales, [] for not resize.
+            random_shapes (list of int): resize to image to random shapes, 
+                [] for not resize.
+            multi_scales (list of int): resize image by random scales, 
+                [] for not resize.
+            use_padded_im_info (bool): whether to update im_info after padding
+            enable_multiscale_test (bool): whether to use multiscale test.
+            num_scale (int) : the number of scales for multiscale test.
           }
     Returns:
         a mapper function which accept one argument 'batch' and
@@ -66,6 +71,33 @@ def build_post_map(coarsest_stride=1,
             padding_batch.append((padding_im, ) + data[1:])
         return padding_batch
 
+    def padding_multiscale_test(batch_data):
+        if len(batch_data) != 1:
+            raise NotImplementedError(
+                "Batch size must be 1 when using multiscale test, but now batch size is {}".
+                format(len(batch_data)))
+        if coarsest_stride > 1:
+            padding_batch = []
+            padding_images = []
+            data = batch_data[0]
+            for i, input in enumerate(data):
+                if i < num_scale:
+                    im_c, im_h, im_w = input.shape
+                    max_h = int(
+                        np.ceil(im_h / coarsest_stride) * coarsest_stride)
+                    max_w = int(
+                        np.ceil(im_w / coarsest_stride) * coarsest_stride)
+                    padding_im = np.zeros(
+                        (im_c, max_h, max_w), dtype=np.float32)
+                    padding_im[:, :im_h, :im_w] = input
+                    data[num_scale][3 * i:3 * i + 2] = [max_h, max_w]
+                    padding_batch.append(padding_im)
+                else:
+                    padding_batch.append(input)
+            return [tuple(padding_batch)]
+        # no need to padding
+        return batch_data
+
     def random_shape(batch_data):
         # For YOLO: gt_bbox is normalized, is scale invariant.
         shape = np.random.choice(random_shapes)
@@ -108,6 +140,8 @@ def build_post_map(coarsest_stride=1,
                 batch_data = random_shape(batch_data)
             if len(multi_scales) > 0:
                 batch_data = multi_scale_resize(batch_data)
+            if enable_multiscale_test:
+                batch_data = padding_multiscale_test(batch_data)
         except Exception as e:
             errmsg = "post-process failed with error: " + str(e)
             logger.warn(errmsg)
diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py
index ccfb16c27275322a54e206e50e0bcc2b1a2c89f9..f77ee62759bd9fe7e2b97669e0bdd82d1bddd9ad 100644
--- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py
+++ b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py
@@ -80,21 +80,15 @@ class CascadeMaskRCNN(object):
         self.cascade_rcnn_loss_weight = [1.0, 0.5, 0.25]
 
     def build(self, feed_vars, mode='train'):
-        im = feed_vars['image']
-        assert mode in ['train', 'test'], \
-            "only 'train' and 'test' mode is supported"
-
         if mode == 'train':
             required_fields = [
                 'gt_label', 'gt_box', 'gt_mask', 'is_crowd', 'im_info'
             ]
         else:
             required_fields = ['im_shape', 'im_info']
+        self._input_check(required_fields, feed_vars)
 
-        for var in required_fields:
-            assert var in feed_vars, \
-                "{} has no {} field".format(feed_vars, var)
-
+        im = feed_vars['image']
         if mode == 'train':
             gt_box = feed_vars['gt_box']
             is_crowd = feed_vars['is_crowd']
@@ -199,55 +193,167 @@ class CascadeMaskRCNN(object):
             loss.update({'loss': total_loss})
             return loss
         else:
-            if self.fpn is None:
-                last_feat = body_feats[list(body_feats.keys())[-1]]
-                roi_feat = self.roi_extractor(last_feat, rois)
-            else:
-                roi_feat = self.roi_extractor(body_feats, rois, spatial_scale)
+            mask_name = 'mask_pred'
+            mask_pred, bbox_pred = self.single_scale_eval(
+                body_feats, spatial_scale, im_info, mask_name, bbox_pred,
+                roi_feat_list, rcnn_pred_list, proposal_list,
+                feed_vars['im_shape'])
+            return {'bbox': bbox_pred, 'mask': mask_pred}
 
+    def build_multi_scale(self, feed_vars, mask_branch=False):
+        required_fields = ['image', 'im_info']
+        self._input_check(required_fields, feed_vars)
+
+        ims = []
+        for k in feed_vars.keys():
+            if 'image' in k:
+                ims.append(feed_vars[k])
+        result = {}
+
+        if not mask_branch:
+            assert 'im_shape' in feed_vars, \
+                "{} has no im_shape field".format(feed_vars)
+            result.update(feed_vars)
+
+        for i, im in enumerate(ims):
+            im_info = fluid.layers.slice(
+                input=feed_vars['im_info'],
+                axes=[1],
+                starts=[3 * i],
+                ends=[3 * i + 3])
+            body_feats = self.backbone(im)
+            result.update(body_feats)
+
+            # FPN
+            if self.fpn is not None:
+                body_feats, spatial_scale = self.fpn.get_output(body_feats)
+            rois = self.rpn_head.get_proposals(body_feats, im_info, mode='test')
+            if not mask_branch:
+                im_shape = feed_vars['im_shape']
+                body_feat_names = list(body_feats.keys())
+                proposal_list = []
+                roi_feat_list = []
+                rcnn_pred_list = []
+
+                proposals = None
+                bbox_pred = None
+                for i in range(3):
+                    if i > 0:
+                        refined_bbox = self._decode_box(
+                            proposals,
+                            bbox_pred,
+                            curr_stage=i - 1, )
+                    else:
+                        refined_bbox = rois
+
+                    proposals = refined_bbox
+                    proposal_list.append(proposals)
+
+                    # extract roi features
+                    roi_feat = self.roi_extractor(body_feats, proposals,
+                                                  spatial_scale)
+                    roi_feat_list.append(roi_feat)
+
+                    # bbox head
+                    cls_score, bbox_pred = self.bbox_head.get_output(
+                        roi_feat,
+                        wb_scalar=1.0 / self.cascade_rcnn_loss_weight[i],
+                        name='_' + str(i + 1) if i > 0 else '')
+                    rcnn_pred_list.append((cls_score, bbox_pred))
+
+                # get mask rois
+                if self.fpn is None:
+                    body_feat = body_feats[body_feat_names[-1]]
+                pred = self.bbox_head.get_prediction(
+                    im_info,
+                    im_shape,
+                    roi_feat_list,
+                    rcnn_pred_list,
+                    proposal_list,
+                    self.cascade_bbox_reg_weights,
+                    return_box_score=True)
+                bbox_name = 'bbox_' + str(i)
+                score_name = 'score_' + str(i)
+                if 'flip' in im.name:
+                    bbox_name += '_flip'
+                    score_name += '_flip'
+                result[bbox_name] = pred['bbox']
+                result[score_name] = pred['score']
+            else:
+                mask_name = 'mask_pred_' + str(i)
+                bbox_pred = feed_vars['bbox']
+                result.update({im.name: im})
+                if 'flip' in im.name:
+                    mask_name += '_flip'
+                    bbox_pred = feed_vars['bbox_flip']
+                mask_pred, bbox_pred = self.single_scale_eval(
+                    body_feats,
+                    spatial_scale,
+                    im_info,
+                    mask_name,
+                    bbox_pred=bbox_pred,
+                    use_multi_test=True)
+                result[mask_name] = mask_pred
+        return result
+
+    def single_scale_eval(self,
+                          body_feats,
+                          spatial_scale,
+                          im_info,
+                          mask_name,
+                          bbox_pred,
+                          roi_feat_list=None,
+                          rcnn_pred_list=None,
+                          proposal_list=None,
+                          im_shape=None,
+                          use_multi_test=False):
+        if self.fpn is None:
+            last_feat = body_feats[list(body_feats.keys())[-1]]
+        if not use_multi_test:
             bbox_pred = self.bbox_head.get_prediction(
-                im_info, feed_vars['im_shape'], roi_feat_list, rcnn_pred_list,
-                proposal_list, self.cascade_bbox_reg_weights,
-                self.cls_agnostic_bbox_reg)
-
+                im_info, im_shape, roi_feat_list, rcnn_pred_list, proposal_list,
+                self.cascade_bbox_reg_weights)
             bbox_pred = bbox_pred['bbox']
 
-            # share weight
-            bbox_shape = fluid.layers.shape(bbox_pred)
-            bbox_size = fluid.layers.reduce_prod(bbox_shape)
-            bbox_size = fluid.layers.reshape(bbox_size, [1, 1])
-            size = fluid.layers.fill_constant([1, 1], value=6, dtype='int32')
-            cond = fluid.layers.less_than(x=bbox_size, y=size)
-
-            mask_pred = fluid.layers.create_global_var(
-                shape=[1],
-                value=0.0,
-                dtype='float32',
-                persistable=False,
-                name='mask_pred')
-
-            with fluid.layers.control_flow.Switch() as switch:
-                with switch.case(cond):
-                    fluid.layers.assign(input=bbox_pred, output=mask_pred)
-                with switch.default():
-                    bbox = fluid.layers.slice(
-                        bbox_pred, [1], starts=[2], ends=[6])
-
-                    im_scale = fluid.layers.slice(
-                        im_info, [1], starts=[2], ends=[3])
-                    im_scale = fluid.layers.sequence_expand(im_scale, bbox)
-
-                    mask_rois = bbox * im_scale
-                    if self.fpn is None:
-                        mask_feat = self.roi_extractor(last_feat, mask_rois)
-                        mask_feat = self.bbox_head.get_head_feat(mask_feat)
-                    else:
-                        mask_feat = self.roi_extractor(
-                            body_feats, mask_rois, spatial_scale, is_mask=True)
+        # share weight
+        bbox_shape = fluid.layers.shape(bbox_pred)
+        bbox_size = fluid.layers.reduce_prod(bbox_shape)
+        bbox_size = fluid.layers.reshape(bbox_size, [1, 1])
+        size = fluid.layers.fill_constant([1, 1], value=6, dtype='int32')
+        cond = fluid.layers.less_than(x=bbox_size, y=size)
+
+        mask_pred = fluid.layers.create_global_var(
+            shape=[1],
+            value=0.0,
+            dtype='float32',
+            persistable=False,
+            name=mask_name)
+        with fluid.layers.control_flow.Switch() as switch:
+            with switch.case(cond):
+                fluid.layers.assign(input=bbox_pred, output=mask_pred)
+            with switch.default():
+                bbox = fluid.layers.slice(bbox_pred, [1], starts=[2], ends=[6])
 
-                    mask_out = self.mask_head.get_prediction(mask_feat, bbox)
-                    fluid.layers.assign(input=mask_out, output=mask_pred)
-            return {'bbox': bbox_pred, 'mask': mask_pred}
+                im_scale = fluid.layers.slice(
+                    im_info, [1], starts=[2], ends=[3])
+                im_scale = fluid.layers.sequence_expand(im_scale, bbox)
+
+                mask_rois = bbox * im_scale
+                if self.fpn is None:
+                    mask_feat = self.roi_extractor(last_feat, mask_rois)
+                    mask_feat = self.bbox_head.get_head_feat(mask_feat)
+                else:
+                    mask_feat = self.roi_extractor(
+                        body_feats, mask_rois, spatial_scale, is_mask=True)
+
+                mask_out = self.mask_head.get_prediction(mask_feat, bbox)
+                fluid.layers.assign(input=mask_out, output=mask_pred)
+        return mask_pred, bbox_pred
+
+    def _input_check(self, require_fields, feed_vars):
+        for var in require_fields:
+            assert var in feed_vars, \
+                "{} has no {} field".format(feed_vars, var)
 
     def _decode_box(self, proposals, bbox_pred, curr_stage):
         rcnn_loc_delta_r = fluid.layers.reshape(
@@ -269,7 +375,9 @@ class CascadeMaskRCNN(object):
     def train(self, feed_vars):
         return self.build(feed_vars, 'train')
 
-    def eval(self, feed_vars):
+    def eval(self, feed_vars, multi_scale=None, mask_branch=False):
+        if multi_scale:
+            return self.build_multi_scale(feed_vars, mask_branch)
         return self.build(feed_vars, 'test')
 
     def test(self, feed_vars):
diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py
index 647d8bbc58b02782f96f0d5e2bcf63046b406d32..b80a8d7f62ccd13f632fe8124372156d656d2abc 100644
--- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py
+++ b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py
@@ -74,16 +74,13 @@ class CascadeRCNN(object):
         self.cascade_rcnn_loss_weight = [1.0, 0.5, 0.25]
 
     def build(self, feed_vars, mode='train'):
-        im = feed_vars['image']
-        assert mode in ['train', 'test'], \
-            "only 'train' and 'test' mode is supported"
         if mode == 'train':
             required_fields = ['gt_label', 'gt_box', 'is_crowd', 'im_info']
         else:
             required_fields = ['im_shape', 'im_info']
-        for var in required_fields:
-            assert var in feed_vars, \
-                "{} has no {} field".format(feed_vars, var)
+        self._input_check(required_fields, feed_vars)
+
+        im = feed_vars['image']
         im_info = feed_vars['im_info']
 
         if mode == 'train':
@@ -171,6 +168,98 @@ class CascadeRCNN(object):
                 self.cls_agnostic_bbox_reg)
             return pred
 
+    def build_multi_scale(self, feed_vars):
+        required_fields = ['image', 'im_shape', 'im_info']
+        self._input_check(required_fields, feed_vars)
+        ims = []
+        for k in feed_vars.keys():
+            if 'image' in k:
+                ims.append(feed_vars[k])
+        result = {}
+        result.update(feed_vars)
+        for i, im in enumerate(ims):
+            im_info = fluid.layers.slice(
+                input=feed_vars['im_info'],
+                axes=[1],
+                starts=[3 * i],
+                ends=[3 * i + 3])
+            im_shape = feed_vars['im_shape']
+
+            # backbone
+            body_feats = self.backbone(im)
+            result.update(body_feats)
+            body_feat_names = list(body_feats.keys())
+
+            # FPN
+            if self.fpn is not None:
+                body_feats, spatial_scale = self.fpn.get_output(body_feats)
+
+            # rpn proposals
+            rpn_rois = self.rpn_head.get_proposals(
+                body_feats, im_info, mode='test')
+
+            proposal_list = []
+            roi_feat_list = []
+            rcnn_pred_list = []
+
+            proposals = None
+            bbox_pred = None
+            for i in range(3):
+                if i > 0:
+                    refined_bbox = self._decode_box(
+                        proposals,
+                        bbox_pred,
+                        curr_stage=i - 1, )
+                else:
+                    refined_bbox = rpn_rois
+
+                proposals = refined_bbox
+                proposal_list.append(proposals)
+
+                # extract roi features
+                roi_feat = self.roi_extractor(body_feats, proposals,
+                                              spatial_scale)
+                roi_feat_list.append(roi_feat)
+
+                # bbox head
+                cls_score, bbox_pred = self.bbox_head.get_output(
+                    roi_feat,
+                    wb_scalar=1.0 / self.cascade_rcnn_loss_weight[i],
+                    name='_' + str(i + 1) if i > 0 else '')
+                rcnn_pred_list.append((cls_score, bbox_pred))
+
+            # get mask rois
+            rois = proposal_list[2]
+
+            if self.fpn is None:
+                last_feat = body_feats[list(body_feats.keys())[-1]]
+                roi_feat = self.roi_extractor(last_feat, rois)
+            else:
+                roi_feat = self.roi_extractor(body_feats, rois, spatial_scale)
+
+            pred = self.bbox_head.get_prediction(
+                im_info,
+                im_shape,
+                roi_feat_list,
+                rcnn_pred_list,
+                proposal_list,
+                self.cascade_bbox_reg_weights,
+                self.cls_agnostic_bbox_reg,
+                return_box_score=True)
+            bbox_name = 'bbox_' + str(i)
+            score_name = 'score_' + str(i)
+            if 'flip' in im.name:
+                bbox_name += '_flip'
+                score_name += '_flip'
+            result[bbox_name] = pred['bbox']
+            result[score_name] = pred['score']
+        return result
+
+    def _input_check(self, require_fields, feed_vars):
+        for var in require_fields:
+            assert var in feed_vars, \
+                "{} has no {} field".format(feed_vars, var)
+
     def _decode_box(self, proposals, bbox_pred, curr_stage):
         rcnn_loc_delta_r = fluid.layers.reshape(
             bbox_pred, (-1, self.cls_agnostic_bbox_reg, 4))
@@ -191,7 +280,9 @@ class CascadeRCNN(object):
     def train(self, feed_vars):
         return self.build(feed_vars, 'train')
 
-    def eval(self, feed_vars):
+    def eval(self, feed_vars, multi_scale=None):
+        if multi_scale:
+            return self.build_multi_scale(feed_vars)
         return self.build(feed_vars, 'test')
 
     def test(self, feed_vars):
diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/faster_rcnn.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/faster_rcnn.py
index 6985598670717df9f9fea580ee7922c2cb981acf..e0ef7355c0d358d7b409ce3080f2416cd38de0b3 100644
--- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/faster_rcnn.py
+++ b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/faster_rcnn.py
@@ -63,6 +63,12 @@ class FasterRCNN(object):
         self.rpn_only = rpn_only
 
     def build(self, feed_vars, mode='train'):
+        if mode == 'train':
+            required_fields = ['gt_label', 'gt_box', 'is_crowd', 'im_info']
+        else:
+            required_fields = ['im_shape', 'im_info']
+        self._input_check(required_fields, feed_vars)
+
         im = feed_vars['image']
         im_info = feed_vars['im_info']
         if mode == 'train':
@@ -136,10 +142,62 @@ class FasterRCNN(object):
                                                  im_shape)
             return pred
 
+    def build_multi_scale(self, feed_vars):
+        required_fields = ['image', 'im_info', 'im_shape']
+        self._input_check(required_fields, feed_vars)
+        ims = []
+        for k in feed_vars.keys():
+            if 'image' in k:
+                ims.append(feed_vars[k])
+        result = {}
+        result.update(feed_vars)
+        for i, im in enumerate(ims):
+            im_info = fluid.layers.slice(
+                input=feed_vars['im_info'],
+                axes=[1],
+                starts=[3 * i],
+                ends=[3 * i + 3])
+            im_shape = feed_vars['im_shape']
+            body_feats = self.backbone(im)
+            result.update(body_feats)
+            body_feat_names = list(body_feats.keys())
+
+            if self.fpn is not None:
+                body_feats, spatial_scale = self.fpn.get_output(body_feats)
+
+            rois = self.rpn_head.get_proposals(body_feats, im_info, mode='test')
+
+            if self.fpn is None:
+                # in models without FPN, roi extractor only uses the last level of
+                # feature maps. And body_feat_names[-1] represents the name of
+                # last feature map.
+                body_feat = body_feats[body_feat_names[-1]]
+                roi_feat = self.roi_extractor(body_feat, rois)
+            else:
+                roi_feat = self.roi_extractor(body_feats, rois, spatial_scale)
+
+            pred = self.bbox_head.get_prediction(
+                roi_feat, rois, im_info, im_shape, return_box_score=True)
+            bbox_name = 'bbox_' + str(i)
+            score_name = 'score_' + str(i)
+            if 'flip' in im.name:
+                bbox_name += '_flip'
+                score_name += '_flip'
+            result[bbox_name] = pred['bbox']
+            result[score_name] = pred['score']
+        return result
+
+    def _input_check(self, require_fields, feed_vars):
+        for var in require_fields:
+            assert var in feed_vars, \
+                "{} has no {} field".format(feed_vars, var)
+
     def train(self, feed_vars):
         return self.build(feed_vars, 'train')
 
-    def eval(self, feed_vars):
+    def eval(self, feed_vars, multi_scale=None):
+        if multi_scale:
+            return self.build_multi_scale(feed_vars)
         return self.build(feed_vars, 'test')
 
     def test(self, feed_vars):
diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/mask_rcnn.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/mask_rcnn.py
index 97eacbf07be7cb0fe3624fad9c7f90bb8721d26e..715f3efa90e52c3330f36e9ba787082722f2c8bb 100644
--- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/mask_rcnn.py
+++ b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/mask_rcnn.py
@@ -69,18 +69,14 @@ class MaskRCNN(object):
         self.fpn = fpn
 
     def build(self, feed_vars, mode='train'):
-        im = feed_vars['image']
-        assert mode in ['train', 'test'], \
-            "only 'train' and 'test' mode is supported"
         if mode == 'train':
             required_fields = [
                 'gt_label', 'gt_box', 'gt_mask', 'is_crowd', 'im_info'
             ]
         else:
             required_fields = ['im_shape', 'im_info']
-        for var in required_fields:
-            assert var in feed_vars, \
-                "{} has no {} field".format(feed_vars, var)
+        self._input_check(required_fields, feed_vars)
+        im = feed_vars['image']
         im_info = feed_vars['im_info']
 
         mixed_precision_enabled = mixed_precision_global_state() is not None
@@ -97,6 +93,7 @@ class MaskRCNN(object):
                                      for k, v in body_feats.items())
 
         # FPN
+        spatial_scale = None
         if self.fpn is not None:
             body_feats, spatial_scale = self.fpn.get_output(body_feats)
 
@@ -153,57 +150,135 @@ class MaskRCNN(object):
                 im_scale = fluid.layers.sequence_expand(im_scale, rois)
                 rois = rois / im_scale
                 return {'proposal': rois}
-            if self.fpn is None:
-                last_feat = body_feats[list(body_feats.keys())[-1]]
-                roi_feat = self.roi_extractor(last_feat, rois)
-            else:
-                roi_feat = self.roi_extractor(body_feats, rois, spatial_scale)
+            mask_name = 'mask_pred'
+            mask_pred, bbox_pred = self.single_scale_eval(
+                body_feats, mask_name, rois, im_info, feed_vars['im_shape'],
+                spatial_scale)
+            return {'bbox': bbox_pred, 'mask': mask_pred}
 
+    def build_multi_scale(self, feed_vars, mask_branch=False):
+        required_fields = ['image', 'im_info']
+        self._input_check(required_fields, feed_vars)
+
+        ims = []
+        for k in feed_vars.keys():
+            if 'image' in k:
+                ims.append(feed_vars[k])
+        result = {}
+
+        if not mask_branch:
+            assert 'im_shape' in feed_vars, \
+                "{} has no im_shape field".format(feed_vars)
+            result.update(feed_vars)
+
+        for i, im in enumerate(ims):
+            im_info = fluid.layers.slice(
+                input=feed_vars['im_info'],
+                axes=[1],
+                starts=[3 * i],
+                ends=[3 * i + 3])
+            body_feats = self.backbone(im)
+            result.update(body_feats)
+
+            # FPN
+            if self.fpn is not None:
+                body_feats, spatial_scale = self.fpn.get_output(body_feats)
+            rois = self.rpn_head.get_proposals(body_feats, im_info, mode='test')
+            if not mask_branch:
+                im_shape = feed_vars['im_shape']
+                body_feat_names = list(body_feats.keys())
+                if self.fpn is None:
+                    body_feat = body_feats[body_feat_names[-1]]
+                    roi_feat = self.roi_extractor(body_feat, rois)
+                else:
+                    roi_feat = self.roi_extractor(body_feats, rois,
+                                                  spatial_scale)
+                pred = self.bbox_head.get_prediction(
+                    roi_feat, rois, im_info, im_shape, return_box_score=True)
+                bbox_name = 'bbox_' + str(i)
+                score_name = 'score_' + str(i)
+                if 'flip' in im.name:
+                    bbox_name += '_flip'
+                    score_name += '_flip'
+                result[bbox_name] = pred['bbox']
+                result[score_name] = pred['score']
+            else:
+                mask_name = 'mask_pred_' + str(i)
+                bbox_pred = feed_vars['bbox']
+                result.update({im.name: im})
+                if 'flip' in im.name:
+                    mask_name += '_flip'
+                    bbox_pred = feed_vars['bbox_flip']
+                mask_pred, bbox_pred = self.single_scale_eval(
+                    body_feats, mask_name, rois, im_info, feed_vars['im_shape'],
+                    spatial_scale, bbox_pred)
+                result[mask_name] = mask_pred
+        return result
+
+    def single_scale_eval(self,
+                          body_feats,
+                          mask_name,
+                          rois,
+                          im_info,
+                          im_shape,
+                          spatial_scale,
+                          bbox_pred=None):
+        if self.fpn is None:
+            last_feat = body_feats[list(body_feats.keys())[-1]]
+            roi_feat = self.roi_extractor(last_feat, rois)
+        else:
+            roi_feat = self.roi_extractor(body_feats, rois, spatial_scale)
+        if not bbox_pred:
             bbox_pred = self.bbox_head.get_prediction(roi_feat, rois, im_info,
-                                                      feed_vars['im_shape'])
+                                                      im_shape)
             bbox_pred = bbox_pred['bbox']
 
-            # share weight
-            bbox_shape = fluid.layers.shape(bbox_pred)
-            bbox_size = fluid.layers.reduce_prod(bbox_shape)
-            bbox_size = fluid.layers.reshape(bbox_size, [1, 1])
-            size = fluid.layers.fill_constant([1, 1], value=6, dtype='int32')
-            cond = fluid.layers.less_than(x=bbox_size, y=size)
-
-            mask_pred = fluid.layers.create_global_var(
-                shape=[1],
-                value=0.0,
-                dtype='float32',
-                persistable=False,
-                name='mask_pred')
-
-            with fluid.layers.control_flow.Switch() as switch:
-                with switch.case(cond):
-                    fluid.layers.assign(input=bbox_pred, output=mask_pred)
-                with switch.default():
-                    bbox = fluid.layers.slice(
-                        bbox_pred, [1], starts=[2], ends=[6])
-
-                    im_scale = fluid.layers.slice(
-                        im_info, [1], starts=[2], ends=[3])
-                    im_scale = fluid.layers.sequence_expand(im_scale, bbox)
-
-                    mask_rois = bbox * im_scale
-                    if self.fpn is None:
-                        mask_feat = self.roi_extractor(last_feat, mask_rois)
-                        mask_feat = self.bbox_head.get_head_feat(mask_feat)
-                    else:
-                        mask_feat = self.roi_extractor(
-                            body_feats, mask_rois, spatial_scale, is_mask=True)
-
-                    mask_out = self.mask_head.get_prediction(mask_feat, bbox)
-                    fluid.layers.assign(input=mask_out, output=mask_pred)
-            return {'bbox': bbox_pred, 'mask': mask_pred}
+        # share weight
+        bbox_shape = fluid.layers.shape(bbox_pred)
+        bbox_size = fluid.layers.reduce_prod(bbox_shape)
+        bbox_size = fluid.layers.reshape(bbox_size, [1, 1])
+        size = fluid.layers.fill_constant([1, 1], value=6, dtype='int32')
+        cond = fluid.layers.less_than(x=bbox_size, y=size)
+
+        mask_pred = fluid.layers.create_global_var(
+            shape=[1],
+            value=0.0,
+            dtype='float32',
+            persistable=False,
+            name=mask_name)
+        with fluid.layers.control_flow.Switch() as switch:
+            with switch.case(cond):
+                fluid.layers.assign(input=bbox_pred, output=mask_pred)
+            with switch.default():
+                bbox = fluid.layers.slice(bbox_pred, [1], starts=[2], ends=[6])
+
+                im_scale = fluid.layers.slice(
+                    im_info, [1], starts=[2], ends=[3])
+                im_scale = fluid.layers.sequence_expand(im_scale, bbox)
+
+                mask_rois = bbox * im_scale
+                if self.fpn is None:
+                    mask_feat = self.roi_extractor(last_feat, mask_rois)
+                    mask_feat = self.bbox_head.get_head_feat(mask_feat)
+                else:
+                    mask_feat = self.roi_extractor(
+                        body_feats, mask_rois, spatial_scale, is_mask=True)
+
+                mask_out = self.mask_head.get_prediction(mask_feat, bbox)
+                fluid.layers.assign(input=mask_out, output=mask_pred)
+        return mask_pred, bbox_pred
+
+    def _input_check(self, require_fields, feed_vars):
+        for var in require_fields:
+            assert var in feed_vars, \
+                "{} has no {} field".format(feed_vars, var)
 
     def train(self, feed_vars):
         return self.build(feed_vars, 'train')
 
-    def eval(self, feed_vars):
+    def eval(self, feed_vars, multi_scale=None, mask_branch=False):
+        if multi_scale:
+            return self.build_multi_scale(feed_vars, mask_branch)
         return self.build(feed_vars, 'test')
 
     def test(self, feed_vars):
diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/fpn.py b/PaddleCV/PaddleDetection/ppdet/modeling/backbones/fpn.py
index 840c8536106b162c1be1bdad402195d7d7a73653..9bd491a662dd640c62668c5878d47ea48c21d223 100644
--- a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/fpn.py
+++ b/PaddleCV/PaddleDetection/ppdet/modeling/backbones/fpn.py
@@ -17,7 +17,7 @@ from __future__ import division
 from __future__ import print_function
 
 from collections import OrderedDict
-
+import copy
 from paddle import fluid
 from paddle.fluid.param_attr import ParamAttr
 from paddle.fluid.initializer import Xavier
@@ -105,6 +105,7 @@ class FPN(object):
                 their name.
             spatial_scale(list): A list of multiplicative spatial scale factor.
         """
+        spatial_scale = copy.deepcopy(self.spatial_scale)
         body_name_list = list(body_dict.keys())[::-1]
         num_backbone_stages = len(body_name_list)
         self.fpn_inner_output = [[] for _ in range(num_backbone_stages)]
@@ -174,7 +175,7 @@ class FPN(object):
             fpn_dict[fpn_name] = fpn_output
             fpn_name_list.append(fpn_name)
         if not self.has_extra_convs and self.max_level - self.min_level == len(
-                self.spatial_scale):
+                spatial_scale):
             body_top_name = fpn_name_list[0]
             body_top_extension = fluid.layers.pool2d(
                 fpn_dict[body_top_name],
@@ -184,9 +185,9 @@ class FPN(object):
                 name=body_top_name + '_subsampled_2x')
             fpn_dict[body_top_name + '_subsampled_2x'] = body_top_extension
             fpn_name_list.insert(0, body_top_name + '_subsampled_2x')
-            self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5)
+            spatial_scale.insert(0, spatial_scale[0] * 0.5)
         # Coarser FPN levels introduced for RetinaNet
-        highest_backbone_level = self.min_level + len(self.spatial_scale) - 1
+        highest_backbone_level = self.min_level + len(spatial_scale) - 1
         if self.has_extra_convs and self.max_level > highest_backbone_level:
             fpn_blob = body_dict[body_name_list[0]]
             for i in range(highest_backbone_level + 1, self.max_level + 1):
@@ -210,6 +211,6 @@ class FPN(object):
                     name=fpn_name)
                 fpn_dict[fpn_name] = fpn_blob
                 fpn_name_list.insert(0, fpn_name)
-                self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5)
+                spatial_scale.insert(0, spatial_scale[0] * 0.5)
         res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list])
-        return res_dict, self.spatial_scale
+        return res_dict, spatial_scale
diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/model_input.py b/PaddleCV/PaddleDetection/ppdet/modeling/model_input.py
index ae6393e5d439776f66d59bc9e179beec34430ffb..0e7e3bf05756ff2fc271336acb7a5be98db2bd88 100644
--- a/PaddleCV/PaddleDetection/ppdet/modeling/model_input.py
+++ b/PaddleCV/PaddleDetection/ppdet/modeling/model_input.py
@@ -17,6 +17,7 @@ from __future__ import print_function
 from __future__ import division
 
 from collections import OrderedDict
+from ppdet.data.transform.operators import *
 
 from paddle import fluid
 
@@ -38,7 +39,7 @@ feed_var_def = [
 # yapf: enable
 
 
-def create_feed(feed, iterable=False):
+def create_feed(feed, iterable=False, sub_prog_feed=False):
     image_shape = feed.image_shape
     feed_var_map = {var['name']: var for var in feed_var_def}
     feed_var_map['image'] = {
@@ -60,6 +61,58 @@ def create_feed(feed, iterable=False):
         feed_var_map['gt_box']['lod_level'] = 0
         feed_var_map['is_difficult']['lod_level'] = 0
 
+    base_name_list = ['image']
+    num_scale = getattr(feed, 'num_scale', 1)
+    sample_transform = feed.sample_transforms
+    multiscale_test = False
+    aug_flip = False
+    for t in sample_transform:
+        if isinstance(t, MultiscaleTestResize):
+            multiscale_test = True
+            aug_flip = t.use_flip
+            assert (len(t.target_size)+1)*(aug_flip+1) == num_scale, \
+                "num_scale: {} is not equal to the actual number of scale: {}."\
+                .format(num_scale, (len(t.target_size)+1)*(aug_flip+1))
+            break
+
+    if aug_flip:
+        num_scale //= 2
+        base_name_list.insert(0, 'flip_image')
+        feed_var_map['flip_image'] = {
+            'name': 'flip_image',
+            'shape': image_shape,
+            'dtype': 'float32',
+            'lod_level': 0
+        }
+
+    image_name_list = []
+    if multiscale_test:
+        for base_name in base_name_list:
+            for i in range(0, num_scale):
+                name = base_name if i == 0 else base_name + '_scale_' + str(i -
+                                                                            1)
+                feed_var_map[name] = {
+                    'name': name,
+                    'shape': image_shape,
+                    'dtype': 'float32',
+                    'lod_level': 0
+                }
+                image_name_list.append(name)
+        feed_var_map['im_info']['shape'] = [feed.num_scale * 3]
+        feed.fields = image_name_list + feed.fields[1:]
+    if sub_prog_feed:
+        box_names = ['bbox', 'bbox_flip']
+        for box_name in box_names:
+            sub_prog_feed = {
+                'name': box_name,
+                'shape': [6],
+                'dtype': 'float32',
+                'lod_level': 1
+            }
+
+            feed.fields = feed.fields + [box_name]
+            feed_var_map[box_name] = sub_prog_feed
+
     feed_vars = OrderedDict([(key, fluid.layers.data(
         name=feed_var_map[key]['name'],
         shape=feed_var_map[key]['shape'],
@@ -70,5 +123,5 @@ def create_feed(feed, iterable=False):
         feed_list=list(feed_vars.values()),
         capacity=64,
         use_double_buffer=True,
-        iterable=iterable)
+        iterable=iterable) if not sub_prog_feed else None
     return loader, feed_vars
diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py
index 16a7db529dfd8f0740ceae04210a86a9b3e6ac35..314aeb6087e05aa1c33d26b6b838075523ff2b10 100644
--- a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py
+++ b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py
@@ -280,7 +280,12 @@ class BBoxHead(object):
         loss_bbox = fluid.layers.reduce_mean(loss_bbox)
         return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox}
 
-    def get_prediction(self, roi_feat, rois, im_info, im_shape):
+    def get_prediction(self,
+                       roi_feat,
+                       rois,
+                       im_info,
+                       im_shape,
+                       return_box_score=False):
         """
         Get prediction bounding box in test stage.
 
@@ -308,5 +313,7 @@ class BBoxHead(object):
         bbox_pred = fluid.layers.reshape(bbox_pred, (-1, self.num_classes, 4))
         decoded_box = self.box_coder(prior_box=boxes, target_box=bbox_pred)
         cliped_box = fluid.layers.box_clip(input=decoded_box, im_info=im_shape)
+        if return_box_score:
+            return {'bbox': cliped_box, 'score': cls_prob}
         pred_result = self.nms(bboxes=cliped_box, scores=cls_prob)
         return {'bbox': pred_result}
diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py
index 2068b1d8d73c2e4a98143ce38bfecfbee79a7c35..d36ff4c7541d8825e9491e696f787e10ef95b97e 100644
--- a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py
+++ b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py
@@ -146,7 +146,8 @@ class CascadeBBoxHead(object):
                        rcnn_pred_list,
                        proposal_list,
                        cascade_bbox_reg_weights,
-                       cls_agnostic_bbox_reg=2):
+                       cls_agnostic_bbox_reg=2,
+                       return_box_score=False):
         """
         Get prediction bounding box in test stage.
         :
@@ -214,7 +215,8 @@ class CascadeBBoxHead(object):
             axis=1)
 
         box_out = fluid.layers.box_clip(input=decoded_box, im_info=im_shape)
-
+        if return_box_score:
+            return {'bbox': box_out, 'score': boxes_cls_prob_mean}
         pred_result = self.nms(bboxes=box_out, scores=boxes_cls_prob_mean)
         return {"bbox": pred_result}
 
diff --git a/PaddleCV/PaddleDetection/ppdet/utils/checkpoint.py b/PaddleCV/PaddleDetection/ppdet/utils/checkpoint.py
index d19653494732685fe8eb45aa3d3109e0342df8d0..54c364812b041280148a1d0b85543209dc449724 100644
--- a/PaddleCV/PaddleDetection/ppdet/utils/checkpoint.py
+++ b/PaddleCV/PaddleDetection/ppdet/utils/checkpoint.py
@@ -86,6 +86,8 @@ def load_params(exe, prog, path, ignore_params=[]):
         prog (fluid.Program): load weight to which Program object.
         path (string): URL string or loca model path.
         ignore_params (bool): ignore variable to load when finetuning.
+            It can be specified by finetune_exclude_pretrained_params 
+            and the usage can refer to docs/TRANSFER_LEARNING.md
     """
 
     if is_url(path):
@@ -175,7 +177,8 @@ def load_and_fusebn(exe, prog, path):
         prog (fluid.Program): save weight from which Program object.
         path (string): the path to save model.
     """
-    logger.info('Load model and fuse batch norm from {}...'.format(path))
+    logger.info('Load model and fuse batch norm if have from {}...'.format(
+        path))
 
     if is_url(path):
         path = _get_weight_path(path)
@@ -251,8 +254,11 @@ def load_and_fusebn(exe, prog, path):
                         [scale_name, bias_name, mean_name, variance_name])
 
     if not bn_in_path:
-        raise ValueError("There is no params of batch norm in model {}.".format(
-            path))
+        fluid.io.load_vars(exe, path, prog, vars=all_vars)
+        logger.warning(
+            "There is no paramters of batch norm in model {}. "
+            "Skip to fuse batch norm. And load paramters done.".format(path))
+        return
 
     # load running mean and running variance on cpu place into global scope.
     place = fluid.CPUPlace()
diff --git a/PaddleCV/PaddleDetection/ppdet/utils/cli.py b/PaddleCV/PaddleDetection/ppdet/utils/cli.py
index 1bec22894bbca2e87a6de75fc03c699ef31e89ce..b8ba59d78f1ddf606012fd0cf6d71a71d79eea05 100644
--- a/PaddleCV/PaddleDetection/ppdet/utils/cli.py
+++ b/PaddleCV/PaddleDetection/ppdet/utils/cli.py
@@ -16,7 +16,7 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter
 
 import yaml
 import re
-from ppdet.core.workspace import get_registered_modules
+from ppdet.core.workspace import get_registered_modules, dump_value
 
 __all__ = ['ColorTTY', 'ArgsParser']
 
diff --git a/PaddleCV/PaddleDetection/ppdet/utils/coco_eval.py b/PaddleCV/PaddleDetection/ppdet/utils/coco_eval.py
index d1ce5dcbd35e6f1cce80e90ef8e066dbd9390fcc..cb5df475fb16f10fffa18a02920687780a3fe298 100644
--- a/PaddleCV/PaddleDetection/ppdet/utils/coco_eval.py
+++ b/PaddleCV/PaddleDetection/ppdet/utils/coco_eval.py
@@ -214,7 +214,7 @@ def bbox2out(results, clsid2catid, is_bbox_normalized=False):
             for j in range(num):
                 dt = bboxes[k]
                 clsid, score, xmin, ymin, xmax, ymax = dt.tolist()
-                catid = clsid2catid[clsid]
+                catid = (clsid2catid[int(clsid)])
 
                 if is_bbox_normalized:
                     xmin, ymin, xmax, ymax = \
diff --git a/PaddleCV/PaddleDetection/ppdet/utils/download.py b/PaddleCV/PaddleDetection/ppdet/utils/download.py
index 05f62749192cf0546aabb181c5397e2551806fb2..f3494d04acfd93ea056e8558970dd19fc8180813 100644
--- a/PaddleCV/PaddleDetection/ppdet/utils/download.py
+++ b/PaddleCV/PaddleDetection/ppdet/utils/download.py
@@ -60,11 +60,20 @@ DATASETS = {
             'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar',
             'b6e924de25625d8de591ea690078ad9f', ),
     ], ["VOCdevkit/VOC_all"]),
-    'fruit': ([
+    'wider_face': ([
         (
-            'https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar',
-            '374554a7633b1b68d6a5fbb7c061b8ba', ),
-    ], ["fruit-detection"]),
+            'https://dataset.bj.bcebos.com/wider_face/WIDER_train.zip',
+            '3fedf70df600953d25982bcd13d91ba2', ),
+        (
+            'https://dataset.bj.bcebos.com/wider_face/WIDER_val.zip',
+            'dfa7d7e790efa35df3788964cf0bbaea', ),
+        (
+            'https://dataset.bj.bcebos.com/wider_face/wider_face_split.zip',
+            'a4a898d6193db4b9ef3260a68bad0dc7', ),
+    ], ["WIDER_train", "WIDER_val", "wider_face_split"]),
+    'fruit': ([(
+        'https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar',
+        '374554a7633b1b68d6a5fbb7c061b8ba', ), ], ["fruit-detection"]),
 }
 
 DOWNLOAD_RETRY_LIMIT = 3
@@ -114,7 +123,8 @@ def get_dataset_path(path, annotation, image_dir):
     # not match any dataset in DATASETS
     raise ValueError("Dataset {} is not valid and cannot parse dataset type "
                      "'{}' for automaticly downloading, which only supports "
-                     "'voc' and 'coco' currently".format(path, osp.split(path)[-1]))
+                     "'voc' and 'coco' currently".format(path,
+                                                         osp.split(path)[-1]))
 
 
 def _merge_voc_dir(data_dir, output_subdir):
@@ -201,7 +211,7 @@ def _dataset_exists(path, annotation, image_dir):
     """
     if not osp.exists(path):
         logger.info("Config dataset_dir {} is not exits, "
-                "dataset config is not valid".format(path))
+                    "dataset config is not valid".format(path))
         return False
 
     if annotation:
@@ -324,7 +334,7 @@ def _decompress(fname):
 
 def _move_and_merge_tree(src, dst):
     """
-    Move src directory to dst, if dst is already exists, 
+    Move src directory to dst, if dst is already exists,
     merge src to dst
     """
     if not osp.exists(dst):
diff --git a/PaddleCV/PaddleDetection/ppdet/utils/eval_utils.py b/PaddleCV/PaddleDetection/ppdet/utils/eval_utils.py
index c8913dcb4f01274fc1e2404c53a5688d982a2207..a1b92b911ea9f50cc56f570ec076673ac24ed348 100644
--- a/PaddleCV/PaddleDetection/ppdet/utils/eval_utils.py
+++ b/PaddleCV/PaddleDetection/ppdet/utils/eval_utils.py
@@ -24,6 +24,7 @@ import time
 import paddle.fluid as fluid
 
 from ppdet.utils.voc_eval import bbox_eval as voc_bbox_eval
+from ppdet.utils.post_process import mstest_box_post_process, mstest_mask_post_process, box_flip
 
 __all__ = ['parse_fetches', 'eval_run', 'eval_results', 'json_eval_results']
 
@@ -57,7 +58,52 @@ def parse_fetches(fetches, prog=None, extra_keys=None):
     return keys, values, cls
 
 
-def eval_run(exe, compile_program, loader, keys, values, cls):
+def length2lod(length_lod):
+    offset_lod = [0]
+    for i in length_lod:
+        offset_lod.append(offset_lod[-1] + i)
+    return [offset_lod]
+
+
+def get_sub_feed(input, place):
+    new_dict = {}
+    res_feed = {}
+    key_name = ['bbox', 'im_info', 'im_id', 'im_shape', 'bbox_flip']
+    for k in key_name:
+        if k in input.keys():
+            new_dict[k] = input[k]
+    for k in input.keys():
+        if 'image' in k:
+            new_dict[k] = input[k]
+    for k, v in new_dict.items():
+        data_t = fluid.LoDTensor()
+        data_t.set(v[0], place)
+        if 'bbox' in k:
+            lod = length2lod(v[1][0])
+            data_t.set_lod(lod)
+        res_feed[k] = data_t
+    return res_feed
+
+
+def clean_res(result, keep_name_list):
+    clean_result = {}
+    for k in result.keys():
+        if k in keep_name_list:
+            clean_result[k] = result[k]
+    result.clear()
+    return clean_result
+
+
+def eval_run(exe,
+             compile_program,
+             loader,
+             keys,
+             values,
+             cls,
+             cfg=None,
+             sub_prog=None,
+             sub_keys=None,
+             sub_values=None):
     """
     Run evaluation program, return program outputs.
     """
@@ -84,6 +130,28 @@ def eval_run(exe, compile_program, loader, keys, values, cls):
                 k: (np.array(v), v.recursive_sequence_lengths())
                 for k, v in zip(keys, outs)
             }
+            multi_scale_test = getattr(cfg, 'MultiScaleTEST', None)
+            mask_multi_scale_test = multi_scale_test and 'Mask' in cfg.architecture
+
+            if multi_scale_test:
+                post_res = mstest_box_post_process(res, cfg)
+                res.update(post_res)
+            if mask_multi_scale_test:
+                place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
+                sub_feed = get_sub_feed(res, place)
+                sub_prog_outs = exe.run(sub_prog,
+                                        feed=sub_feed,
+                                        fetch_list=sub_values,
+                                        return_numpy=False)
+                sub_prog_res = {
+                    k: (np.array(v), v.recursive_sequence_lengths())
+                    for k, v in zip(sub_keys, sub_prog_outs)
+                }
+                post_res = mstest_mask_post_process(sub_prog_res, cfg)
+                res.update(post_res)
+            if multi_scale_test:
+                res = clean_res(
+                    res, ['im_info', 'bbox', 'im_id', 'im_shape', 'mask'])
             results.append(res)
             if iter_id % 100 == 0:
                 logger.info('Test iter {}'.format(iter_id))
diff --git a/PaddleCV/PaddleDetection/ppdet/utils/post_process.py b/PaddleCV/PaddleDetection/ppdet/utils/post_process.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc80bc186626c4e983328ffd53f53d217230a244
--- /dev/null
+++ b/PaddleCV/PaddleDetection/ppdet/utils/post_process.py
@@ -0,0 +1,212 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import numpy as np
+
+import paddle.fluid as fluid
+
+__all__ = ['nms']
+
+logger = logging.getLogger(__name__)
+
+
+def box_flip(boxes, im_shape):
+    im_width = im_shape[0][1]
+    flipped_boxes = boxes.copy()
+
+    flipped_boxes[:, 0::4] = im_width - boxes[:, 2::4] - 1
+    flipped_boxes[:, 2::4] = im_width - boxes[:, 0::4] - 1
+    return flipped_boxes
+
+
+def nms(dets, thresh):
+    """Apply classic DPM-style greedy NMS."""
+    if dets.shape[0] == 0:
+        return []
+    scores = dets[:, 0]
+    x1 = dets[:, 1]
+    y1 = dets[:, 2]
+    x2 = dets[:, 3]
+    y2 = dets[:, 4]
+
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    order = scores.argsort()[::-1]
+
+    ndets = dets.shape[0]
+    suppressed = np.zeros((ndets), dtype=np.int)
+
+    # nominal indices
+    # _i, _j
+    # sorted indices
+    # i, j
+    # temp variables for box i's (the box currently under consideration)
+    # ix1, iy1, ix2, iy2, iarea
+
+    # variables for computing overlap with box j (lower scoring box)
+    # xx1, yy1, xx2, yy2
+    # w, h
+    # inter, ovr
+
+    for _i in range(ndets):
+        i = order[_i]
+        if suppressed[i] == 1:
+            continue
+        ix1 = x1[i]
+        iy1 = y1[i]
+        ix2 = x2[i]
+        iy2 = y2[i]
+        iarea = areas[i]
+        for _j in range(_i + 1, ndets):
+            j = order[_j]
+            if suppressed[j] == 1:
+                continue
+            xx1 = max(ix1, x1[j])
+            yy1 = max(iy1, y1[j])
+            xx2 = min(ix2, x2[j])
+            yy2 = min(iy2, y2[j])
+            w = max(0.0, xx2 - xx1 + 1)
+            h = max(0.0, yy2 - yy1 + 1)
+            inter = w * h
+            ovr = inter / (iarea + areas[j] - inter)
+            if ovr >= thresh:
+                suppressed[j] = 1
+
+    return np.where(suppressed == 0)[0]
+
+
+def bbox_area(box):
+    w = box[2] - box[0] + 1
+    h = box[3] - box[1] + 1
+    return w * h
+
+
+def bbox_overlaps(x, y):
+    N = x.shape[0]
+    K = y.shape[0]
+    overlaps = np.zeros((N, K), dtype=np.float32)
+    for k in range(K):
+        y_area = bbox_area(y[k])
+        for n in range(N):
+            iw = min(x[n, 2], y[k, 2]) - max(x[n, 0], y[k, 0]) + 1
+            if iw > 0:
+                ih = min(x[n, 3], y[k, 3]) - max(x[n, 1], y[k, 1]) + 1
+                if ih > 0:
+                    x_area = bbox_area(x[n])
+                    ua = x_area + y_area - iw * ih
+                    overlaps[n, k] = iw * ih / ua
+    return overlaps
+
+
+def box_voting(nms_dets, dets, vote_thresh):
+    top_dets = nms_dets.copy()
+    top_boxes = nms_dets[:, 1:]
+    all_boxes = dets[:, 1:]
+    all_scores = dets[:, 0]
+    top_to_all_overlaps = bbox_overlaps(top_boxes, all_boxes)
+    for k in range(nms_dets.shape[0]):
+        inds_to_vote = np.where(top_to_all_overlaps[k] >= vote_thresh)[0]
+        boxes_to_vote = all_boxes[inds_to_vote, :]
+        ws = all_scores[inds_to_vote]
+        top_dets[k, 1:] = np.average(boxes_to_vote, axis=0, weights=ws)
+
+    return top_dets
+
+
+def get_nms_result(boxes, scores, cfg):
+    cls_boxes = [[] for _ in range(cfg.num_classes)]
+    for j in range(1, cfg.num_classes):
+        inds = np.where(scores[:, j] > cfg.MultiScaleTEST['score_thresh'])[0]
+        scores_j = scores[inds, j]
+        boxes_j = boxes[inds, j * 4:(j + 1) * 4]
+        dets_j = np.hstack((scores_j[:, np.newaxis], boxes_j)).astype(
+            np.float32, copy=False)
+        keep = nms(dets_j, cfg.MultiScaleTEST['nms_thresh'])
+        nms_dets = dets_j[keep, :]
+        if cfg.MultiScaleTEST['enable_voting']:
+            nms_dets = box_voting(nms_dets, dets_j,
+                                  cfg.MultiScaleTEST['vote_thresh'])
+        #add labels
+        label = np.array([j for _ in range(len(keep))])
+        nms_dets = np.hstack((label[:, np.newaxis], nms_dets)).astype(
+            np.float32, copy=False)
+        cls_boxes[j] = nms_dets
+    # Limit to max_per_image detections **over all classes**
+    image_scores = np.hstack(
+        [cls_boxes[j][:, 1] for j in range(1, cfg.num_classes)])
+    if len(image_scores) > cfg.MultiScaleTEST['detections_per_im']:
+        image_thresh = np.sort(image_scores)[-cfg.MultiScaleTEST[
+            'detections_per_im']]
+        for j in range(1, cfg.num_classes):
+            keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0]
+            cls_boxes[j] = cls_boxes[j][keep, :]
+
+    im_results = np.vstack([cls_boxes[j] for j in range(1, cfg.num_classes)])
+    return im_results
+
+
+def mstest_box_post_process(result, cfg):
+    """
+    Multi-scale Test
+    Only available for batch_size=1 now.
+    """
+    post_bbox = {}
+    use_flip = False
+    ms_boxes = []
+    ms_scores = []
+    im_shape = result['im_shape'][0]
+    for k in result.keys():
+        if 'bbox' in k:
+            boxes = result[k][0]
+            boxes = np.reshape(boxes, (-1, 4 * cfg.num_classes))
+            scores = result['score' + k[4:]][0]
+            if 'flip' in k:
+                boxes = box_flip(boxes, im_shape)
+                use_flip = True
+            ms_boxes.append(boxes)
+            ms_scores.append(scores)
+
+    ms_boxes = np.concatenate(ms_boxes)
+    ms_scores = np.concatenate(ms_scores)
+    bbox_pred = get_nms_result(ms_boxes, ms_scores, cfg)
+    post_bbox.update({'bbox': (bbox_pred, [[len(bbox_pred)]])})
+    if use_flip:
+        bbox = bbox_pred[:, 2:]
+        bbox_flip = np.append(
+            bbox_pred[:, :2], box_flip(bbox, im_shape), axis=1)
+        post_bbox.update({'bbox_flip': (bbox_flip, [[len(bbox_flip)]])})
+    return post_bbox
+
+
+def mstest_mask_post_process(result, cfg):
+    mask_list = []
+    im_shape = result['im_shape'][0]
+    M = cfg.FPNRoIAlign['mask_resolution']
+    for k in result.keys():
+        if 'mask' in k:
+            masks = result[k][0]
+            if len(masks.shape) != 4:
+                masks = np.zeros((0, M, M))
+                mask_list.append(masks)
+                continue
+            if 'flip' in k:
+                masks = masks[:, :, :, ::-1]
+            mask_list.append(masks)
+
+    mask_pred = np.mean(mask_list, axis=0)
+    return {'mask': (mask_pred, [[len(mask_pred)]])}
diff --git a/PaddleCV/PaddleDetection/slim/distillation/compress.py b/PaddleCV/PaddleDetection/slim/distillation/compress.py
index f1e0189ab4f6285587d4677d0b73e62fbd89619c..7c344aef6fa89cf8d7f86a90d98fa34ae66908b6 100644
--- a/PaddleCV/PaddleDetection/slim/distillation/compress.py
+++ b/PaddleCV/PaddleDetection/slim/distillation/compress.py
@@ -143,7 +143,7 @@ def main():
 
     # build program
     model = create(main_arch)
-    train_loader, train_feed_vars = create_feed(train_feed, iterable=True)
+    _, train_feed_vars = create_feed(train_feed, True)
     train_fetches = model.train(train_feed_vars)
     loss = train_fetches['loss']
     lr = lr_builder()
@@ -155,7 +155,6 @@ def main():
 
     cfg.max_iters = 258
     train_reader = create_reader(train_feed, cfg.max_iters, FLAGS.dataset_dir)
-    train_loader.set_sample_list_generator(train_reader, place)
 
     exe.run(fluid.default_startup_program())
 
@@ -174,7 +173,7 @@ def main():
     with fluid.program_guard(eval_prog, startup_prog):
         with fluid.unique_name.guard():
             model = create(main_arch)
-            _, test_feed_vars = create_feed(eval_feed, iterable=True)
+            _, test_feed_vars = create_feed(eval_feed, True)
             fetches = model.eval(test_feed_vars)
     eval_prog = eval_prog.clone(True)
 
diff --git a/PaddleCV/PaddleDetection/slim/eval.py b/PaddleCV/PaddleDetection/slim/eval.py
index a0020314a9e6349f8112f821706a3cf63dfb4e90..579f58d2932e215362efa9e8864c16d1a451b26a 100644
--- a/PaddleCV/PaddleDetection/slim/eval.py
+++ b/PaddleCV/PaddleDetection/slim/eval.py
@@ -134,9 +134,10 @@ def main():
     place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
     exe = fluid.Executor(place)
 
-    _, test_feed_vars = create_feed(eval_feed, iterable=True)
+    _, test_feed_vars = create_feed(eval_feed, False)
 
     eval_reader = create_reader(eval_feed, args_path=FLAGS.dataset_dir)
+    #eval_pyreader.decorate_sample_list_generator(eval_reader, place)
     test_data_feed = fluid.DataFeeder(test_feed_vars.values(), place)
 
     assert os.path.exists(FLAGS.model_path)
diff --git a/PaddleCV/PaddleDetection/slim/prune/compress.py b/PaddleCV/PaddleDetection/slim/prune/compress.py
index fc8228d58948df964d96597fbf15092dff6ce4a6..004496cc0beff7f06efc9d4ac0551d5f77928efc 100644
--- a/PaddleCV/PaddleDetection/slim/prune/compress.py
+++ b/PaddleCV/PaddleDetection/slim/prune/compress.py
@@ -24,11 +24,13 @@ import sys
 sys.path.append("../../")
 from paddle.fluid.contrib.slim import Compressor
 
+
 def set_paddle_flags(**kwargs):
     for key, value in kwargs.items():
         if os.environ.get(key, None) is None:
             os.environ[key] = str(value)
 
+
 # NOTE(paddle-dev): All of these flags should be set before
 # `import paddle`. Otherwise, it would not take any effect.
 set_paddle_flags(
@@ -48,6 +50,8 @@ import logging
 FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
 logging.basicConfig(level=logging.INFO, format=FORMAT)
 logger = logging.getLogger(__name__)
+
+
 def eval_run(exe, compile_program, reader, keys, values, cls, test_feed):
     """
     Run evaluation program, return program outputs.
@@ -66,8 +70,7 @@ def eval_run(exe, compile_program, reader, keys, values, cls, test_feed):
     has_bbox = 'bbox' in keys
     for data in reader():
         data = test_feed.feed(data)
-        feed_data = {'image': data['image'],
-                     'im_size': data['im_size']}
+        feed_data = {'image': data['image'], 'im_size': data['im_size']}
         outs = exe.run(compile_program,
                        feed=feed_data,
                        fetch_list=[values[0]],
@@ -140,24 +143,21 @@ def main():
     with fluid.program_guard(train_prog, startup_prog):
         with fluid.unique_name.guard():
             model = create(main_arch)
-            train_loader, feed_vars = create_feed(train_feed, iterable=True)
+            _, feed_vars = create_feed(train_feed, True)
             train_fetches = model.train(feed_vars)
             loss = train_fetches['loss']
             lr = lr_builder()
             optimizer = optim_builder(lr)
             optimizer.minimize(loss)
 
-
-    train_reader = create_reader(train_feed, cfg.max_iters,
-                                 FLAGS.dataset_dir)
-    train_loader.set_sample_list_generator(train_reader, place)
+    train_reader = create_reader(train_feed, cfg.max_iters, FLAGS.dataset_dir)
 
     # parse train fetches
     train_keys, train_values, _ = parse_fetches(train_fetches)
     train_keys.append("lr")
     train_values.append(lr.name)
 
-    train_fetch_list=[]
+    train_fetch_list = []
     for k, v in zip(train_keys, train_values):
         train_fetch_list.append((k, v))
 
@@ -165,13 +165,12 @@ def main():
     with fluid.program_guard(eval_prog, startup_prog):
         with fluid.unique_name.guard():
             model = create(main_arch)
-            _, test_feed_vars = create_feed(eval_feed, iterable=True)
+            _, test_feed_vars = create_feed(eval_feed, True)
             fetches = model.eval(test_feed_vars)
 
     eval_prog = eval_prog.clone(True)
 
     eval_reader = create_reader(eval_feed, args_path=FLAGS.dataset_dir)
-    #eval_pyreader.decorate_sample_list_generator(eval_reader, place)
     test_data_feed = fluid.DataFeeder(test_feed_vars.values(), place)
 
     # parse eval fetches
@@ -181,8 +180,8 @@ def main():
     if cfg.metric == 'VOC':
         extra_keys = ['gt_box', 'gt_label', 'is_difficult']
     eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog,
-                                                         extra_keys)
-    eval_fetch_list=[]
+                                                     extra_keys)
+    eval_fetch_list = []
     for k, v in zip(eval_keys, eval_values):
         eval_fetch_list.append((k, v))
 
@@ -195,20 +194,20 @@ def main():
 
         #place = fluid.CPUPlace()
         #exe = fluid.Executor(place)
-        results = eval_run(exe, program, eval_reader,
-                           eval_keys, eval_values, eval_cls, test_data_feed)
+        results = eval_run(exe, program, eval_reader, eval_keys, eval_values,
+                           eval_cls, test_data_feed)
 
         resolution = None
         if 'mask' in results[0]:
             resolution = model.mask_head.resolution
-        box_ap_stats = eval_results(results, eval_feed, cfg.metric, cfg.num_classes,
-                     resolution, False, FLAGS.output_eval)
+        box_ap_stats = eval_results(results, eval_feed, cfg.metric,
+                                    cfg.num_classes, resolution, False,
+                                    FLAGS.output_eval)
         if len(best_box_ap_list) == 0:
             best_box_ap_list.append(box_ap_stats[0])
         elif box_ap_stats[0] > best_box_ap_list[0]:
             best_box_ap_list[0] = box_ap_stats[0]
-        logger.info("Best test box ap: {}".format(
-            best_box_ap_list[0]))
+        logger.info("Best test box ap: {}".format(best_box_ap_list[0]))
         return best_box_ap_list[0]
 
     test_feed = [('image', test_feed_vars['image'].name),
@@ -227,13 +226,12 @@ def main():
         eval_func={'map': eval_func},
         eval_fetch_list=[eval_fetch_list[0]],
         save_eval_model=True,
-        prune_infer_model=[["image", "im_size"],["multiclass_nms_0.tmp_0"]],
+        prune_infer_model=[["image", "im_size"], ["multiclass_nms_0.tmp_0"]],
         train_optimizer=None)
     com.config(FLAGS.slim_file)
     com.run()
 
 
-
 if __name__ == '__main__':
     parser = ArgsParser()
     parser.add_argument(
diff --git a/PaddleCV/PaddleDetection/slim/quantization/README.md b/PaddleCV/PaddleDetection/slim/quantization/README.md
index d22f636cc60f3c3cbe568b6a3a4bb3d7236dacf6..acb4c9efcbd49bccc4682c7eb7af294885e5d42a 100644
--- a/PaddleCV/PaddleDetection/slim/quantization/README.md
+++ b/PaddleCV/PaddleDetection/slim/quantization/README.md
@@ -81,7 +81,7 @@ python compress.py \
     LearningRate.schedulers="[!PiecewiseDecay {gamma: 0.1, milestones: [258, 516]}]" \
     pretrain_weights=https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_voc.tar \
     YoloTrainFeed.batch_size=64
-  
+
 ```
 
 以下为2卡训练示例，受显存所制，单卡`batch_size`不变, 总`batch_size`减小，`base_lr`减小，一个epoch内batch数量增加，同时需要调整学习率相关参数，如下：
@@ -171,7 +171,7 @@ python ../eval.py \
     --model_name __model__ \
     --params_name __params__ \
     -c ../../configs/yolov3_mobilenet_v1_voc.yml \
-    -d "../../dataset/voc" 
+    -d "../../dataset/voc"
 ```
 
 在评估之后，选取效果最好的epoch的模型，可使用脚本 <a href='./freeze.py'>slim/quantization/freeze.py</a>将该模型转化为以上介绍的2种模型：FP32模型，int8模型，需要配置的参数为：
@@ -195,7 +195,7 @@ python freeze.py \
 运行命令为：
 ```
 python ../eval.py \
-    --model_path ${float_model_path} 
+    --model_path ${float_model_path}
     --model_name model \
     --params_name weights \
     -c ../../configs/yolov3_mobilenet_v1_voc.yml \
@@ -239,5 +239,3 @@ FP32模型可使用PaddleLite进行加载预测，可参见教程[Paddle-Lite如
 
 
 ## FAQ
-
-
diff --git a/PaddleCV/PaddleDetection/slim/quantization/compress.py b/PaddleCV/PaddleDetection/slim/quantization/compress.py
index e949216808ceb13ec10a14685736819c48b4eecc..0e76c01363839e9ef27180067063d039a605bcac 100644
--- a/PaddleCV/PaddleDetection/slim/quantization/compress.py
+++ b/PaddleCV/PaddleDetection/slim/quantization/compress.py
@@ -49,7 +49,7 @@ from ppdet.data.data_feed import create_reader
 from ppdet.utils.eval_utils import parse_fetches, eval_results
 from ppdet.utils.stats import TrainingStats
 from ppdet.utils.cli import ArgsParser, print_total_cfg
-from ppdet.utils.check import check_gpu, check_version
+from ppdet.utils.check import check_gpu
 import ppdet.utils.checkpoint as checkpoint
 from ppdet.modeling.model_input import create_feed
 
@@ -151,7 +151,7 @@ def main():
     with fluid.program_guard(train_prog, startup_prog):
         with fluid.unique_name.guard():
             model = create(main_arch)
-            train_loader, feed_vars = create_feed(train_feed, iterable=True)
+            _, feed_vars = create_feed(train_feed, True)
             train_fetches = model.train(feed_vars)
             loss = train_fetches['loss']
             lr = lr_builder()
@@ -159,7 +159,6 @@ def main():
             optimizer.minimize(loss)
 
     train_reader = create_reader(train_feed, cfg.max_iters, FLAGS.dataset_dir)
-    train_loader.set_sample_list_generator(train_reader, place)
 
     # parse train fetches
     train_keys, train_values, _ = parse_fetches(train_fetches)
@@ -174,7 +173,7 @@ def main():
     with fluid.program_guard(eval_prog, startup_prog):
         with fluid.unique_name.guard():
             model = create(main_arch)
-            _, test_feed_vars = create_feed(eval_feed, iterable=True)
+            _, test_feed_vars = create_feed(eval_feed, True)
             fetches = model.eval(test_feed_vars)
     eval_prog = eval_prog.clone(True)
 
@@ -199,6 +198,7 @@ def main():
     exe.run(startup_prog)
 
     start_iter = 0
+
     checkpoint.load_params(exe, train_prog, cfg.pretrain_weights)
 
     best_box_ap_list = []
diff --git a/PaddleCV/PaddleDetection/slim/quantization/freeze.py b/PaddleCV/PaddleDetection/slim/quantization/freeze.py
index bdf330c1201026d718ff3023587b37b88e5af02f..38c06578e3d22e1cc4f2bdcc933298553c1c1f37 100644
--- a/PaddleCV/PaddleDetection/slim/quantization/freeze.py
+++ b/PaddleCV/PaddleDetection/slim/quantization/freeze.py
@@ -134,7 +134,7 @@ def main():
     place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
     exe = fluid.Executor(place)
 
-    _, test_feed_vars = create_feed(eval_feed, iterable=True)
+    _, test_feed_vars = create_feed(eval_feed, False)
 
     eval_reader = create_reader(eval_feed, args_path=FLAGS.dataset_dir)
     #eval_pyreader.decorate_sample_list_generator(eval_reader, place)
@@ -156,6 +156,7 @@ def main():
 
     results = eval_run(exe, infer_prog, eval_reader, eval_keys, eval_values,
                        eval_cls, test_data_feed)
+
     resolution = None
     if 'mask' in results[0]:
         resolution = model.mask_head.resolution
diff --git a/PaddleCV/PaddleDetection/tools/configure.py b/PaddleCV/PaddleDetection/tools/configure.py
index 45b297116a1eaa787e72b8d191245ff70dfef3dd..560d161513ae8f0115d8d3d5f97f6a0695642015 100644
--- a/PaddleCV/PaddleDetection/tools/configure.py
+++ b/PaddleCV/PaddleDetection/tools/configure.py
@@ -19,7 +19,7 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter
 
 import yaml
 
-from ppdet.core.workspace import get_registered_modules, load_config
+from ppdet.core.workspace import get_registered_modules, load_config, dump_value
 from ppdet.utils.cli import ColorTTY, print_total_cfg
 
 color_tty = ColorTTY()
@@ -43,18 +43,6 @@ MISC_CONFIG = {
 }
 
 
-def dump_value(value):
-    # XXX this is hackish, but collections.abc is not available in python 2
-    if hasattr(value, '__dict__') or isinstance(value, (dict, tuple, list)):
-        value = yaml.dump(value, default_flow_style=True)
-        value = value.replace('\n', '')
-        value = value.replace('...', '')
-        return "'{}'".format(value)
-    else:
-        # primitive types
-        return str(value)
-
-
 def dump_config(module, minimal=False):
     args = module.schema.values()
     if minimal:
diff --git a/PaddleCV/PaddleDetection/tools/eval.py b/PaddleCV/PaddleDetection/tools/eval.py
index d2ce2c44bc6a97d2b052e8177fa8b2cd9b0e87c3..39f741f6a15ef81a63b21dbdf03a0333dcbc6afc 100644
--- a/PaddleCV/PaddleDetection/tools/eval.py
+++ b/PaddleCV/PaddleDetection/tools/eval.py
@@ -59,7 +59,6 @@ def main():
         raise ValueError("'architecture' not specified in config file.")
 
     merge_config(FLAGS.opt)
-
     # check if set use_gpu=True in paddlepaddle cpu version
     check_gpu(cfg.use_gpu)
     # check if paddlepaddle version is satisfied
@@ -71,6 +70,8 @@ def main():
     else:
         eval_feed = create(cfg.eval_feed)
 
+    multi_scale_test = getattr(cfg, 'MultiScaleTEST', None)
+
     # define executor
     place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
     exe = fluid.Executor(place)
@@ -82,9 +83,11 @@ def main():
     with fluid.program_guard(eval_prog, startup_prog):
         with fluid.unique_name.guard():
             loader, feed_vars = create_feed(eval_feed)
-            fetches = model.eval(feed_vars)
+            if multi_scale_test is None:
+                fetches = model.eval(feed_vars)
+            else:
+                fetches = model.eval(feed_vars, multi_scale_test)
     eval_prog = eval_prog.clone(True)
-
     reader = create_reader(eval_feed, args_path=FLAGS.dataset_dir)
     loader.set_sample_list_generator(reader, place)
 
@@ -122,7 +125,31 @@ def main():
             callable(model.is_bbox_normalized):
         is_bbox_normalized = model.is_bbox_normalized()
 
-    results = eval_run(exe, compile_program, loader, keys, values, cls)
+    sub_eval_prog = None
+    sub_keys = None
+    sub_values = None
+    # build sub-program
+    if 'Mask' in main_arch and multi_scale_test:
+        sub_eval_prog = fluid.Program()
+        with fluid.program_guard(sub_eval_prog, startup_prog):
+            with fluid.unique_name.guard():
+                _, feed_vars = create_feed(eval_feed, False, sub_prog_feed=True)
+                sub_fetches = model.eval(
+                    feed_vars, multi_scale_test, mask_branch=True)
+                extra_keys = []
+                if cfg.metric == 'COCO':
+                    extra_keys = ['im_id', 'im_shape']
+                if cfg.metric == 'VOC':
+                    extra_keys = ['gt_box', 'gt_label', 'is_difficult']
+        sub_keys, sub_values, _ = parse_fetches(sub_fetches, sub_eval_prog,
+                                                extra_keys)
+        sub_eval_prog = sub_eval_prog.clone(True)
+
+        if 'weights' in cfg:
+            checkpoint.load_params(exe, sub_eval_prog, cfg.weights)
+
+    results = eval_run(exe, compile_program, loader, keys, values, cls, cfg,
+                       sub_eval_prog, sub_keys, sub_values)
 
     # evaluation
     resolution = None
diff --git a/PaddleCV/PaddleDetection/tools/export_model.py b/PaddleCV/PaddleDetection/tools/export_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..b0c9edac316220ca2b752fad05eec5437f698de8
--- /dev/null
+++ b/PaddleCV/PaddleDetection/tools/export_model.py
@@ -0,0 +1,118 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from paddle import fluid
+
+from ppdet.core.workspace import load_config, merge_config, create
+from ppdet.modeling.model_input import create_feed
+from ppdet.utils.cli import ArgsParser
+import ppdet.utils.checkpoint as checkpoint
+
+import logging
+FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
+logging.basicConfig(level=logging.INFO, format=FORMAT)
+logger = logging.getLogger(__name__)
+
+
+def prune_feed_vars(feeded_var_names, target_vars, prog):
+    """
+    Filter out feed variables which are not in program,
+    pruned feed variables are only used in post processing
+    on model output, which are not used in program, such
+    as im_id to identify image order, im_shape to clip bbox
+    in image.
+    """
+    exist_var_names = []
+    prog = prog.clone()
+    prog = prog._prune(targets=target_vars)
+    global_block = prog.global_block()
+    for name in feeded_var_names:
+        try:
+            v = global_block.var(name)
+            exist_var_names.append(str(v.name))
+        except Exception:
+            logger.info('save_inference_model pruned unused feed '
+                        'variables {}'.format(name))
+            pass
+    return exist_var_names
+
+
+def save_infer_model(FLAGS, exe, feed_vars, test_fetches, infer_prog):
+    cfg_name = os.path.basename(FLAGS.config).split('.')[0]
+    save_dir = os.path.join(FLAGS.output_dir, cfg_name)
+    feed_var_names = [var.name for var in feed_vars.values()]
+    target_vars = list(test_fetches.values())
+    feed_var_names = prune_feed_vars(feed_var_names, target_vars, infer_prog)
+    logger.info("Export inference model to {}, input: {}, output: "
+                "{}...".format(save_dir, feed_var_names,
+                               [str(var.name) for var in target_vars]))
+    fluid.io.save_inference_model(
+        save_dir,
+        feeded_var_names=feed_var_names,
+        target_vars=target_vars,
+        executor=exe,
+        main_program=infer_prog,
+        params_filename="__params__")
+
+
+def main():
+    cfg = load_config(FLAGS.config)
+
+    if 'architecture' in cfg:
+        main_arch = cfg.architecture
+    else:
+        raise ValueError("'architecture' not specified in config file.")
+
+    merge_config(FLAGS.opt)
+
+    if 'test_feed' not in cfg:
+        test_feed = create(main_arch + 'TestFeed')
+    else:
+        test_feed = create(cfg.test_feed)
+
+    # Use CPU for exporting inference model instead of GPU
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+
+    model = create(main_arch)
+
+    startup_prog = fluid.Program()
+    infer_prog = fluid.Program()
+    with fluid.program_guard(infer_prog, startup_prog):
+        with fluid.unique_name.guard():
+            _, feed_vars = create_feed(test_feed, use_pyreader=False)
+            test_fetches = model.test(feed_vars)
+    infer_prog = infer_prog.clone(True)
+
+    exe.run(startup_prog)
+    checkpoint.load_params(exe, infer_prog, cfg.weights)
+
+    save_infer_model(FLAGS, exe, feed_vars, test_fetches, infer_prog)
+
+
+if __name__ == '__main__':
+    parser = ArgsParser()
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        default="output",
+        help="Directory for storing the output model files.")
+    FLAGS = parser.parse_args()
+    main()
diff --git a/PaddleCV/PaddleDetection/tools/face_eval.py b/PaddleCV/PaddleDetection/tools/face_eval.py
index a049d26da2f46826c1cb5d1ecf68200292b66ac9..4c49e15f248e6997b4845d9b009c86bcf265f2d3 100644
--- a/PaddleCV/PaddleDetection/tools/face_eval.py
+++ b/PaddleCV/PaddleDetection/tools/face_eval.py
@@ -56,7 +56,8 @@ def face_eval_run(exe,
                   img_root_dir,
                   gt_file,
                   pred_dir='output/pred',
-                  eval_mode='widerface'):
+                  eval_mode='widerface',
+                  multi_scale=False):
     # load ground truth files
     with open(gt_file, 'r') as f:
         gt_lines = f.readlines()
@@ -76,16 +77,18 @@ def face_eval_run(exe,
         if eval_mode == 'fddb':
             image_path += '.jpg'
         image = Image.open(image_path).convert('RGB')
-        shrink, max_shrink = get_shrink(image.size[1], image.size[0])
-
-        det0 = detect_face(exe, compile_program, fetches, image, shrink)
-        det1 = flip_test(exe, compile_program, fetches, image, shrink)
-        [det2, det3] = multi_scale_test(exe, compile_program, fetches, image,
-                                        max_shrink)
-        det4 = multi_scale_test_pyramid(exe, compile_program, fetches, image,
-                                        max_shrink)
-        det = np.row_stack((det0, det1, det2, det3, det4))
-        dets = bbox_vote(det)
+        if multi_scale:
+            shrink, max_shrink = get_shrink(image.size[1], image.size[0])
+            det0 = detect_face(exe, compile_program, fetches, image, shrink)
+            det1 = flip_test(exe, compile_program, fetches, image, shrink)
+            [det2, det3] = multi_scale_test(exe, compile_program, fetches,
+                                            image, max_shrink)
+            det4 = multi_scale_test_pyramid(exe, compile_program, fetches,
+                                            image, max_shrink)
+            det = np.row_stack((det0, det1, det2, det3, det4))
+            dets = bbox_vote(det)
+        else:
+            dets = detect_face(exe, compile_program, fetches, image, 1)
         if eval_mode == 'widerface':
             save_widerface_bboxes(image_path, dets, pred_dir)
         else:
@@ -261,7 +264,8 @@ def main():
         img_root_dir,
         gt_file,
         pred_dir=pred_dir,
-        eval_mode=FLAGS.eval_mode)
+        eval_mode=FLAGS.eval_mode,
+        multi_scale=FLAGS.multi_scale)
 
 
 if __name__ == '__main__':
@@ -285,5 +289,11 @@ if __name__ == '__main__':
         type=str,
         help="Evaluation mode, include `widerface` and `fddb`, default is `widerface`."
     )
+    parser.add_argument(
+        "--multi_scale",
+        action='store_true',
+        default=False,
+        help="If True it will select `multi_scale` evaluation. Default is `False`, it will select `single-scale` evaluation."
+    )
     FLAGS = parser.parse_args()
     main()
diff --git a/PaddleCV/PaddleDetection/tools/infer.py b/PaddleCV/PaddleDetection/tools/infer.py
index 9801cd6fc39936a57b126ca493f96c7f625ea168..896b9e2d814320623d88929f80e904bdc3aed8a7 100644
--- a/PaddleCV/PaddleDetection/tools/infer.py
+++ b/PaddleCV/PaddleDetection/tools/infer.py
@@ -96,48 +96,6 @@ def get_test_images(infer_dir, infer_img):
     return images
 
 
-def prune_feed_vars(feeded_var_names, target_vars, prog):
-    """
-    Filter out feed variables which are not in program,
-    pruned feed variables are only used in post processing
-    on model output, which are not used in program, such
-    as im_id to identify image order, im_shape to clip bbox
-    in image.
-    """
-    exist_var_names = []
-    prog = prog.clone()
-    prog = prog._prune(targets=target_vars)
-    global_block = prog.global_block()
-    for name in feeded_var_names:
-        try:
-            v = global_block.var(name)
-            exist_var_names.append(str(v.name))
-        except Exception:
-            logger.info('save_inference_model pruned unused feed '
-                        'variables {}'.format(name))
-            pass
-    return exist_var_names
-
-
-def save_infer_model(FLAGS, exe, feed_vars, test_fetches, infer_prog):
-    cfg_name = os.path.basename(FLAGS.config).split('.')[0]
-    save_dir = os.path.join(FLAGS.output_dir, cfg_name)
-    feeded_var_names = [var.name for var in feed_vars.values()]
-    target_vars = list(test_fetches.values())
-    feeded_var_names = prune_feed_vars(feeded_var_names, target_vars,
-                                       infer_prog)
-    logger.info("Save inference model to {}, input: {}, output: "
-                "{}...".format(save_dir, feeded_var_names,
-                               [str(var.name) for var in target_vars]))
-    fluid.io.save_inference_model(
-        save_dir,
-        feeded_var_names=feeded_var_names,
-        target_vars=target_vars,
-        executor=exe,
-        main_program=infer_prog,
-        params_filename="__params__")
-
-
 def main():
     cfg = load_config(FLAGS.config)
 
@@ -182,9 +140,6 @@ def main():
     if cfg.weights:
         checkpoint.load_params(exe, infer_prog, cfg.weights)
 
-    if FLAGS.save_inference_model:
-        save_infer_model(FLAGS, exe, feed_vars, test_fetches, infer_prog)
-
     # parse infer fetches
     assert cfg.metric in ['COCO', 'VOC', 'WIDERFACE'], \
             "unknown metric type {}".format(cfg.metric)
@@ -302,11 +257,6 @@ if __name__ == '__main__':
         type=float,
         default=0.5,
         help="Threshold to reserve the result for visualization.")
-    parser.add_argument(
-        "--save_inference_model",
-        action='store_true',
-        default=False,
-        help="Save inference model in output_dir if True.")
     parser.add_argument(
         "--use_tb",
         type=bool,
diff --git a/PaddleCV/PaddleDetection/tools/train.py b/PaddleCV/PaddleDetection/tools/train.py
index 0d4dd85ee65f9d81cb6afa11ec1ee03b1935182b..04d1c621c81106d5be0d3d0305fbb704f1bff598 100644
--- a/PaddleCV/PaddleDetection/tools/train.py
+++ b/PaddleCV/PaddleDetection/tools/train.py
@@ -73,12 +73,10 @@ def main():
         raise ValueError("'architecture' not specified in config file.")
 
     merge_config(FLAGS.opt)
+
     if 'log_iter' not in cfg:
         cfg.log_iter = 20
 
-    ignore_params = cfg.finetune_exclude_pretrained_params \
-                 if 'finetune_exclude_pretrained_params' in cfg else []
-
     # check if set use_gpu=True in paddlepaddle cpu version
     check_gpu(cfg.use_gpu)
     # check if paddlepaddle version is satisfied
@@ -120,6 +118,12 @@ def main():
             model = create(main_arch)
             train_loader, feed_vars = create_feed(train_feed)
 
+            if FLAGS.fp16:
+                assert (getattr(model.backbone, 'norm_type', None)
+                        != 'affine_channel'), \
+                    '--fp16 currently does not support affine channel, ' \
+                    ' please modify backbone settings to use batch norm'
+
             with mixed_precision_context(FLAGS.loss_scale, FLAGS.fp16) as ctx:
                 train_fetches = model.train(feed_vars)
 
@@ -188,8 +192,11 @@ def main():
         compiled_eval_prog = fluid.compiler.CompiledProgram(eval_prog)
 
     fuse_bn = getattr(model.backbone, 'norm_type', None) == 'affine_channel'
-    start_iter = 0
 
+    ignore_params = cfg.finetune_exclude_pretrained_params \
+                 if 'finetune_exclude_pretrained_params' in cfg else []
+
+    start_iter = 0
     if FLAGS.resume_checkpoint:
         checkpoint.load_checkpoint(exe, train_prog, FLAGS.resume_checkpoint)
         start_iter = checkpoint.global_step()
@@ -219,7 +226,7 @@ def main():
 
     cfg_name = os.path.basename(FLAGS.config).split('.')[0]
     save_dir = os.path.join(cfg.save_dir, cfg_name)
-    time_stat = deque(maxlen=cfg.log_iter)
+    time_stat = deque(maxlen=cfg.log_smooth_window)
     best_box_ap_list = [0.0, 0]  #[map, iter]
 
     # use tb-paddle to log data