diff --git a/configs/slim/README.md b/configs/slim/README.md
index 728c2f3760ffb7355e0ac3627ff3cfd193bd2b82..67ba7158502c2e4108521d249a0729ca5bf7dc18 100755
--- a/configs/slim/README.md
+++ b/configs/slim/README.md
@@ -4,33 +4,11 @@
 
 - [剪裁](prune)
 - [量化](quant)
+- [蒸馏](distill)
+- [联合策略](extensions)
 
 推荐您使用剪裁和蒸馏联合训练，或者使用剪裁和量化，进行检测模型压缩。 下面以YOLOv3为例，进行剪裁、蒸馏和量化实验。
 
-## Benchmark
-
-### 剪裁
-
-#### Pascal VOC上benchmark
-
-| 模型         |  压缩策略 |     GFLOPs     |  模型体积(MB)   | 输入尺寸 | 预测时延（SD855）|   Box AP   |                           下载                          | 模型配置文件 | 压缩算法配置文件  |
-| :----------------| :-------: | :------------: | :-------------: | :------: | :--------: | :------: | :-----------------------------------------------------: |:-------------: | :------: |
-| YOLOv3-MobileNetV1      |  baseline | 24.13          |  93          |   608    | 289.9ms | 75.1       | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml)  |  -  |
-| YOLOv3-MobileNetV1      |  剪裁-l1_norm(sensity) | 15.78(-34.49%) |  66(-29%) |   608   | - | 77.6(+2.5) | [下载链接](https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v1_voc_prune_l1_norm.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml)  |  [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/slim/prune/yolov3_prune_l1_norm.yml)  |
-
-### 量化
-
-#### COCO上benchmark
-
-| 模型               | 压缩策略     | 输入尺寸 |   Box AP    |                             下载                             |                         模型配置文件                         |                       压缩算法配置文件                       |
-| ------------------ | ------------ | -------- | :---------: | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: |
-| YOLOv3-MobileNetV1 | baseline     | 608      |    28.8     | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) |                              -                               |
-| YOLOv3-MobileNetV1 | 普通在线量化 | 608      | 27.5 (-1.3) | [下载链接](https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v1_coco_qat.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/slim/quant/yolov3_mobilenet_v1_qat.yml) |
-| YOLOv3-MobileNetV3 | baseline     | 608      |    31.4     | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml) |                              -                               |
-| YOLOv3-MobileNetV3 | PACT在线量化 | 608      | 29.0 (-2.4) | [下载链接](https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v3_coco_qat.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/slim/quant/yolov3_mobilenet_v3_qat.yml) |
-
-- SD855预测时延为使用PaddleLite部署，使用arm8架构并使用4线程(4 Threads)推理时延
-
 ## 实验环境
 
 - Python 3.7+
@@ -39,6 +17,8 @@
 - CUDA 9.0+
 - cuDNN >=7.5
 
+**注意：** 量化训练需要依赖Paddle develop分支，可在[PaddlePaddle每日版本](https://www.paddlepaddle.org.cn/documentation/docs/zh/install/Tables.html#whl-dev)中下载安装合适的PaddlePaddle版本。
+
 ## 快速开始
 
 ### 训练
@@ -84,3 +64,56 @@ python tools/export_model.py -c configs/{MODEL.yml} --slim_config configs/slim/{
 - `-c`: 指定模型配置文件。
 - `--slim_config`: 指定压缩策略配置文件。
 - `-o weights`: 指定压缩算法训好的模型路径。
+
+
+## Benchmark
+
+### 剪裁
+
+#### Pascal VOC上benchmark
+
+| 模型         |  压缩策略 |     GFLOPs     |  模型体积(MB)   | 输入尺寸 | 预测时延（SD855）|   Box AP   |                           下载                          | 模型配置文件 | 压缩算法配置文件  |
+| :----------------| :-------: | :------------: | :-------------: | :------: | :--------: | :------: | :-----------------------------------------------------: |:-------------: | :------: |
+| YOLOv3-MobileNetV1      |  baseline | 24.13          |  93          |   608    | 289.9ms | 75.1       | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml)  |  -  |
+| YOLOv3-MobileNetV1      |  剪裁-l1_norm(sensity) | 15.78(-34.49%) |  66(-29%) |   608   | - | 77.6(+2.5) | [下载链接](https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v1_voc_prune_l1_norm.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml)  |  [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/slim/prune/yolov3_prune_l1_norm.yml)  |
+
+- 目前剪裁支持YOLO系列、SSD、TTFNet、BlazeFace，其余模型正在开发支持中。
+- SD855预测时延为使用PaddleLite部署，使用arm8架构并使用4线程(4 Threads)推理时延。
+
+### 量化
+
+#### COCO上benchmark
+
+| 模型               | 压缩策略     | 输入尺寸 |   Box AP    |                             下载                             |                         模型配置文件                         |                       压缩算法配置文件                       |
+| ------------------ | ------------ | -------- | :---------: | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: |
+| YOLOv3-MobileNetV1 | baseline     | 608      |    28.8     | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) |                            -                               |
+| YOLOv3-MobileNetV1 | 普通在线量化 | 608      | 30.3 (+1.5) | [下载链接](https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v1_coco_qat.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/slim/quant/yolov3_mobilenet_v1_qat.yml) |
+| YOLOv3-MobileNetV3 | baseline     | 608      |    31.4     | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml) |                              -                               |
+| YOLOv3-MobileNetV3 | PACT在线量化 | 608      | 29.5 (-1.9) | [下载链接](https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v3_coco_qat.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/slim/quant/yolov3_mobilenet_v3_qat.yml) |
+| YOLOv3-DarkNet53 | baseline     | 608      |    39.0     | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_darknet53_270e_coco.yml) |                           -                               |
+| YOLOv3-DarkNet53 | 普通在线量化 | 608      | 38.7 (-0.3) | [下载链接](https://paddledet.bj.bcebos.com/models/slim/yolov3_darknet_coco_qat.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_darknet53_270e_coco.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/slim/quant/yolov3_darknet_qat.yml) |
+| SSD-MobileNet_v1    |  baseline   |   300   |  73.8  | [下载链接](https://paddledet.bj.bcebos.com/models/ssd_mobilenet_v1_300_120e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ssd/ssd_mobilenet_v1_300_120e_voc.yml) |     -    |
+| SSD-MobileNet_v1    |  普通在线量化   |   300   |  73.1(-0.7)  | [下载链接](https://paddledet.bj.bcebos.com/models/slim/ssd_mobilenet_v1_300_voc_qat.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ssd/ssd_mobilenet_v1_300_120e_voc.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/slim/quant/ssd_mobilenet_v1_qat.yml) |
+| Mask-ResNet50-FPN     |    baseline      |    (800, 1333)   |  39.2/35.6    | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.yml) |  -  |
+| Mask-ResNet50-FPN     |    普通在线量化      |    (800, 1333)   |  39.7(+0.5)/35.9(+0.3)    | [下载链接](https://paddledet.bj.bcebos.com/models/slim/mask_rcnn_r50_fpn_1x_qat.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.yml) |  [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/slim/quant/mask_rcnn_r50_fpn_1x_qat.yml)  |
+
+
+### 蒸馏
+
+#### COCO上benchmark
+
+| 模型               | 压缩策略     | 输入尺寸 |   Box AP    |                             下载                             |                         模型配置文件                         |                       压缩算法配置文件                       |
+| ------------------ | ------------ | -------- | :---------: | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: |
+| YOLOv3-MobileNetV1 | baseline     | 608      |    28.8     | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) |                              -                               |
+| YOLOv3-MobileNetV1 | 蒸馏 | 608      | 29.0 | [下载链接](https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v1_coco_distill.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/slim/distill/yolov3_mobilenet_v1_coco_distill.yml) |
+
+- 具体蒸馏方法请参考[蒸馏策略文档](distill/README.md)
+
+### 蒸馏剪裁联合策略
+
+#### COCO上benchmark
+
+| 模型               | 压缩策略     | 输入尺寸 | GFLOPs | 模型体积(MB) |  Box AP    |                             下载                             |                         模型配置文件                         |                       压缩算法配置文件                       |
+| ------------------ | ------------ | -------- | :---------: |:---------: | :---------: |:----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: |
+| YOLOv3-MobileNetV1 | baseline     | 608      | 24.65 | 94.6 |  28.8     | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) |                              -                               |
+| YOLOv3-MobileNetV1 | 蒸馏+剪裁 | 608      | 7.54(-69.4%) | 32.0(-66.0%) | 28.7(-0.1) | [下载链接](https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v1_coco_distill.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/slim/extensions/yolov3_mobilenet_v1_coco_distill_prune.yml) |
diff --git a/configs/slim/distill/README.md b/configs/slim/distill/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..da5795764cec02ea384f8e063f918b56b4f2b9bb
--- /dev/null
+++ b/configs/slim/distill/README.md
@@ -0,0 +1,18 @@
+# Distillation(蒸馏)
+
+## YOLOv3模型蒸馏
+以YOLOv3-MobileNetV1为例，使用YOLOv3-ResNet34作为蒸馏训练的teacher网络, 对YOLOv3-MobileNetV1结构的student网络进行蒸馏。
+COCO数据集作为目标检测任务的训练目标难度更大，意味着teacher网络会预测出更多的背景bbox，如果直接用teacher的预测输出作为student学习的`soft label`会有严重的类别不均衡问题。解决这个问题需要引入新的方法，详细背景请参考论文:[Object detection at 200 Frames Per Second](https://arxiv.org/abs/1805.06361)。
+为了确定蒸馏的对象，我们首先需要找到student和teacher网络得到的`x,y,w,h,cls,objness`等Tensor，用teacher得到的结果指导student训练。具体实现可参考[代码](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/ppdet/slim/distill.py)
+
+## Citations
+```
+@article{mehta2018object,
+      title={Object detection at 200 Frames Per Second},
+      author={Rakesh Mehta and Cemalettin Ozturk},
+      year={2018},
+      eprint={1805.06361},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV}
+}
+```
diff --git a/configs/slim/distill/yolov3_mobilenet_v1_coco_distill.yml b/configs/slim/distill/yolov3_mobilenet_v1_coco_distill.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9998dec5620adac38fd8a487f7ad1ec6aeb055dd
--- /dev/null
+++ b/configs/slim/distill/yolov3_mobilenet_v1_coco_distill.yml
@@ -0,0 +1,12 @@
+_BASE_: [
+  '../../yolov3/yolov3_r34_270e_coco.yml',
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_r34_270e_coco.pdparams
+
+
+slim: Distill
+distill_loss: DistillYOLOv3Loss
+
+DistillYOLOv3Loss:
+  weight: 1000
diff --git a/configs/slim/extensions/yolov3_mobilenet_v1_coco_distill_prune.yml b/configs/slim/extensions/yolov3_mobilenet_v1_coco_distill_prune.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f86fac5e9ed0f291c5b3f9b6266ac5755807422c
--- /dev/null
+++ b/configs/slim/extensions/yolov3_mobilenet_v1_coco_distill_prune.yml
@@ -0,0 +1,24 @@
+_BASE_: [
+  '../../yolov3/yolov3_r34_270e_coco.yml',
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_r34_270e_coco.pdparams
+
+slim: DistillPrune
+
+distill_loss: DistillYOLOv3Loss
+
+DistillYOLOv3Loss:
+  weight: 1000
+
+pruner: Pruner
+
+Pruner:
+  criterion: l1_norm
+  pruned_params: ['conv2d_27.w_0', 'conv2d_28.w_0', 'conv2d_29.w_0',
+                  'conv2d_30.w_0', 'conv2d_31.w_0', 'conv2d_32.w_0',
+                  'conv2d_34.w_0', 'conv2d_35.w_0', 'conv2d_36.w_0',
+                  'conv2d_37.w_0', 'conv2d_38.w_0', 'conv2d_39.w_0',
+                  'conv2d_41.w_0', 'conv2d_42.w_0', 'conv2d_43.w_0',
+                  'conv2d_44.w_0', 'conv2d_45.w_0', 'conv2d_46.w_0']
+  pruned_ratios: [0.5,0.5,0.5,0.5,0.5,0.5,0.7,0.7,0.7,0.7,0.7,0.7,0.8,0.8,0.8,0.8,0.8,0.8]
diff --git a/configs/slim/prune/yolov3_prune_fpgm.yml b/configs/slim/prune/yolov3_prune_fpgm.yml
index ed9495a73e4fbacbe20bbeb3093f2a7a406ea9e6..8d99997ef8ef547196d5c140705f21132ff41f8f 100644
--- a/configs/slim/prune/yolov3_prune_fpgm.yml
+++ b/configs/slim/prune/yolov3_prune_fpgm.yml
@@ -5,11 +5,11 @@ slim: Pruner
 
 Pruner:
   criterion: fpgm
-  pruned_params: ['yolo_block.0.0.0.conv.weights', 'yolo_block.0.0.1.conv.weights', 'yolo_block.0.1.0.conv.weights',
-                  'yolo_block.0.1.1.conv.weights', 'yolo_block.0.2.conv.weights', 'yolo_block.0.tip.conv.weights',
-                  'yolo_block.1.0.0.conv.weights', 'yolo_block.1.0.1.conv.weights', 'yolo_block.1.1.0.conv.weights',
-                  'yolo_block.1.1.1.conv.weights', 'yolo_block.1.2.conv.weights', 'yolo_block.1.tip.conv.weights',
-                  'yolo_block.2.0.0.conv.weights', 'yolo_block.2.0.1.conv.weights', 'yolo_block.2.1.0.conv.weights',
-                  'yolo_block.2.1.1.conv.weights', 'yolo_block.2.2.conv.weights', 'yolo_block.2.tip.conv.weights']
+  pruned_params: ['conv2d_27.w_0', 'conv2d_28.w_0', 'conv2d_29.w_0',
+                  'conv2d_30.w_0', 'conv2d_31.w_0', 'conv2d_32.w_0',
+                  'conv2d_34.w_0', 'conv2d_35.w_0', 'conv2d_36.w_0',
+                  'conv2d_37.w_0', 'conv2d_38.w_0', 'conv2d_39.w_0',
+                  'conv2d_41.w_0', 'conv2d_42.w_0', 'conv2d_43.w_0',
+                  'conv2d_44.w_0', 'conv2d_45.w_0', 'conv2d_46.w_0']
   pruned_ratios: [0.1,0.2,0.2,0.2,0.2,0.1,0.2,0.3,0.3,0.3,0.2,0.1,0.3,0.4,0.4,0.4,0.4,0.3]
   print_params: False
diff --git a/configs/slim/prune/yolov3_prune_l1_norm.yml b/configs/slim/prune/yolov3_prune_l1_norm.yml
index db2a616daab7087e4b02c76c72df34c3a6a7937f..757117e387817a401dd8aa8b969c183053b41982 100644
--- a/configs/slim/prune/yolov3_prune_l1_norm.yml
+++ b/configs/slim/prune/yolov3_prune_l1_norm.yml
@@ -5,11 +5,11 @@ slim: Pruner
 
 Pruner:
   criterion: l1_norm
-  pruned_params: ['yolo_block.0.0.0.conv.weights', 'yolo_block.0.0.1.conv.weights', 'yolo_block.0.1.0.conv.weights',
-                  'yolo_block.0.1.1.conv.weights', 'yolo_block.0.2.conv.weights', 'yolo_block.0.tip.conv.weights',
-                  'yolo_block.1.0.0.conv.weights', 'yolo_block.1.0.1.conv.weights', 'yolo_block.1.1.0.conv.weights',
-                  'yolo_block.1.1.1.conv.weights', 'yolo_block.1.2.conv.weights', 'yolo_block.1.tip.conv.weights',
-                  'yolo_block.2.0.0.conv.weights', 'yolo_block.2.0.1.conv.weights', 'yolo_block.2.1.0.conv.weights',
-                  'yolo_block.2.1.1.conv.weights', 'yolo_block.2.2.conv.weights', 'yolo_block.2.tip.conv.weights']
+  pruned_params: ['conv2d_27.w_0', 'conv2d_28.w_0', 'conv2d_29.w_0',
+                  'conv2d_30.w_0', 'conv2d_31.w_0', 'conv2d_32.w_0',
+                  'conv2d_34.w_0', 'conv2d_35.w_0', 'conv2d_36.w_0',
+                  'conv2d_37.w_0', 'conv2d_38.w_0', 'conv2d_39.w_0',
+                  'conv2d_41.w_0', 'conv2d_42.w_0', 'conv2d_43.w_0',
+                  'conv2d_44.w_0', 'conv2d_45.w_0', 'conv2d_46.w_0']
   pruned_ratios: [0.1,0.2,0.2,0.2,0.2,0.1,0.2,0.3,0.3,0.3,0.2,0.1,0.3,0.4,0.4,0.4,0.4,0.3]
   print_params: False
diff --git a/configs/yolov3/README.md b/configs/yolov3/README.md
index 97465271518066b886ed023372afbde7b267cd03..b27673d9cf21907c1f6b92dd44218c6fae9915d1 100644
--- a/configs/yolov3/README.md
+++ b/configs/yolov3/README.md
@@ -9,27 +9,30 @@
 | DarkNet53(paper)  | 608         |    8    |   270e    |     ----     |  33.0  |    -   |    -   |
 | DarkNet53(paper)  | 416         |    8    |   270e    |     ----     |  31.0  |    -   |    -   |
 | DarkNet53(paper)  | 320         |    8    |   270e    |     ----     |  28.2  |    -   |    -   |
-| DarkNet53         | 608         |    8    |   270e    |     ----     |  39.0  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_darknet53_270e_coco.yml) |
-| DarkNet53         | 416         |    8    |   270e    |     ----     |  37.5  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_darknet53_270e_coco.yml) |
-| DarkNet53         | 320         |    8    |   270e    |     ----     |  34.6  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_darknet53_270e_coco.yml) |
-|   ResNet50_vd        | 608        |    8    |   270e    |     ----     |  39.1  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_r50vd_dcn_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_r50vd_dcn_270e_coco.yml) |
-| MobileNet-V1         | 608         |    8    |   270e    |     ----     |  28.8  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) |
-| MobileNet-V1         | 416         |    8    |   270e    |     ----     |  28.7  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) |
-| MobileNet-V1         | 320         |    8    |   270e    |     ----     |  26.5  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) |
-| MobileNet-V3         | 608         |    8    |   270e    |     ----     |  31.4  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml) |
-| MobileNet-V3         | 416         |    8    |   270e    |     ----     |  29.7  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml) |
-| MobileNet-V3         | 320         |    8    |   270e    |     ----     |  26.9  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml) |
+| DarkNet53         | 608         |    8    |   270e    |     ----     |  39.0  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_darknet53_270e_coco.yml) |
+| DarkNet53         | 416         |    8    |   270e    |     ----     |  37.5  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_darknet53_270e_coco.yml) |
+| DarkNet53         | 320         |    8    |   270e    |     ----     |  34.6  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_darknet53_270e_coco.yml) |
+|   ResNet50_vd        | 608        |    8    |   270e    |     ----     |  39.1  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_r50vd_dcn_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_r50vd_dcn_270e_coco.yml) |
+| ResNet34         | 608         |    8    |   270e    |     ----     |  36.2  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_r34_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_r34_270e_coco.yml) |
+| ResNet34         | 416         |    8    |   270e    |     ----     |  34.3  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_r34_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_r34_270e_coco.yml) |
+| ResNet34         | 320         |    8    |   270e    |     ----     |  31.2  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_r34_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_r34_270e_coco.yml) |
+| MobileNet-V1         | 608         |    8    |   270e    |     ----     |  28.8  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) |
+| MobileNet-V1         | 416         |    8    |   270e    |     ----     |  28.7  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) |
+| MobileNet-V1         | 320         |    8    |   270e    |     ----     |  26.5  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) |
+| MobileNet-V3         | 608         |    8    |   270e    |     ----     |  31.4  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml) |
+| MobileNet-V3         | 416         |    8    |   270e    |     ----     |  29.7  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml) |
+| MobileNet-V3         | 320         |    8    |   270e    |     ----     |  26.9  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml) |
 
 ### YOLOv3 on Pasacl VOC
 
 | 骨架网络     | 输入尺寸 | 每张GPU图片个数 | 学习率策略 |推理时间(fps)| Box AP | 下载 | 配置文件 |
 | :----------- | :--: | :-----: | :-----: |:------------: |:----: | :-------: | :----: |
-| MobileNet-V1 | 608  |    8    |   270e  |      -        |  75.1  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml) |
-| MobileNet-V1 | 416  |    8    |   270e  |      -        |  76.1  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml) |
-| MobileNet-V1 | 320  |    8    |   270e  |      -        |  73.6  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml) |
-| MobileNet-V3 | 608  |    8    |   270e  |      -        |  79.6  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v3_large_270e_voc.yml) |
-| MobileNet-V3 | 416  |    8    |   270e  |      -        |  78.6  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v3_large_270e_voc.yml) |
-| MobileNet-V3 | 320  |    8    |   270e  |      -        |  76.4  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v3_large_270e_voc.yml) |
+| MobileNet-V1 | 608  |    8    |   270e  |      -        |  75.1  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml) |
+| MobileNet-V1 | 416  |    8    |   270e  |      -        |  76.1  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml) |
+| MobileNet-V1 | 320  |    8    |   270e  |      -        |  73.6  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml) |
+| MobileNet-V3 | 608  |    8    |   270e  |      -        |  79.6  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v3_large_270e_voc.yml) |
+| MobileNet-V3 | 416  |    8    |   270e  |      -        |  78.6  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v3_large_270e_voc.yml) |
+| MobileNet-V3 | 320  |    8    |   270e  |      -        |  76.4  | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v3_large_270e_voc.yml) |
 
 **注意：** YOLOv3均使用8GPU训练，训练270个epoch
 
diff --git a/configs/yolov3/_base_/yolov3_r34.yml b/configs/yolov3/_base_/yolov3_r34.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c2d1489f07ba65240e5b545662b8c1672750b705
--- /dev/null
+++ b/configs/yolov3/_base_/yolov3_r34.yml
@@ -0,0 +1,41 @@
+architecture: YOLOv3
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet34_pretrained.pdparams
+norm_type: sync_bn
+
+YOLOv3:
+  backbone: ResNet
+  neck: YOLOv3FPN
+  yolo_head: YOLOv3Head
+  post_process: BBoxPostProcess
+
+ResNet:
+  depth: 34
+  return_idx: [1, 2, 3]
+  freeze_at: -1
+  freeze_norm: false
+  norm_decay: 0.
+
+YOLOv3Head:
+  anchors: [[10, 13], [16, 30], [33, 23],
+            [30, 61], [62, 45], [59, 119],
+            [116, 90], [156, 198], [373, 326]]
+  anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+  loss: YOLOv3Loss
+
+YOLOv3Loss:
+  ignore_thresh: 0.7
+  downsample: [32, 16, 8]
+  label_smooth: false
+
+BBoxPostProcess:
+  decode:
+    name: YOLOBox
+    conf_thresh: 0.005
+    downsample_ratio: 32
+    clip_bbox: true
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.01
+    nms_threshold: 0.45
+    nms_top_k: 1000
diff --git a/configs/yolov3/yolov3_r34_270e_coco.yml b/configs/yolov3/yolov3_r34_270e_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8653b06161b9145dbd23e00878d5c056986db5ec
--- /dev/null
+++ b/configs/yolov3/yolov3_r34_270e_coco.yml
@@ -0,0 +1,10 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/optimizer_270e.yml',
+  '_base_/yolov3_r34.yml',
+  '_base_/yolov3_reader.yml',
+]
+
+snapshot_epoch: 5
+weights: output/yolov3_r34_270e_coco/model_final
diff --git a/ppdet/engine/callbacks.py b/ppdet/engine/callbacks.py
index ca4ad613616056f41842f2dd2e9f1341bb556e4e..410de77447f990a84a42e4a4660056017ba5d4be 100644
--- a/ppdet/engine/callbacks.py
+++ b/ppdet/engine/callbacks.py
@@ -146,13 +146,17 @@ class Checkpointer(Callback):
         self.use_ema = ('use_ema' in cfg and cfg['use_ema'])
         self.save_dir = os.path.join(self.model.cfg.save_dir,
                                      self.model.cfg.filename)
+        if hasattr(self.model.model, 'student_model'):
+            self.weight = self.model.model.student_model
+        else:
+            self.weight = self.model.model
         if self.use_ema:
             self.ema = ModelEMA(
-                cfg['ema_decay'], self.model.model, use_thres_step=True)
+                cfg['ema_decay'], self.weight, use_thres_step=True)
 
     def on_step_end(self, status):
         if self.use_ema:
-            self.ema.update(self.model.model)
+            self.ema.update(self.weight)
 
     def on_epoch_end(self, status):
         # Checkpointer only performed during training
@@ -169,7 +173,7 @@ class Checkpointer(Callback):
                     if self.use_ema:
                         weight = self.ema.apply()
                     else:
-                        weight = self.model.model
+                        weight = self.weight
             elif mode == 'eval':
                 if 'save_best_model' in status and status['save_best_model']:
                     for metric in self.model._metrics:
@@ -181,7 +185,7 @@ class Checkpointer(Callback):
                             if self.use_ema:
                                 weight = self.ema.apply()
                             else:
-                                weight = self.model.model
+                                weight = self.weight
                         logger.info("Best test {} ap is {:0.3f}.".format(
                             key, self.best_ap))
             if weight:
diff --git a/ppdet/engine/trainer.py b/ppdet/engine/trainer.py
index f6b10b144675b0664c72320fa8e402e5a17ffa47..be2c14e68f6d7c0eefd0a084c3b7faf4c2fc73c2 100644
--- a/ppdet/engine/trainer.py
+++ b/ppdet/engine/trainer.py
@@ -52,17 +52,14 @@ class Trainer(object):
                 "mode should be 'train', 'eval' or 'test'"
         self.mode = mode.lower()
         self.optimizer = None
-        self.slim = None
+        self.is_loaded_weights = False
 
         # build model
-        self.model = create(cfg.architecture)
-
-        # model slim build
-        if 'slim' in cfg and cfg.slim:
-            if self.mode == 'train':
-                self.load_weights(cfg.pretrain_weights)
-            self.slim = create(cfg.slim)
-            self.slim(self.model)
+        if 'model' not in self.cfg:
+            self.model = create(cfg.architecture)
+        else:
+            self.model = self.cfg.model
+            self.is_loaded_weights = True
 
         # build data loader
         self.dataset = cfg['{}Dataset'.format(self.mode.capitalize())]
@@ -192,12 +189,19 @@ class Trainer(object):
         self._metrics.extend(metrics)
 
     def load_weights(self, weights):
+        if self.is_loaded_weights:
+            return
         self.start_epoch = 0
         load_pretrain_weight(self.model, weights)
         logger.debug("Load weights {} to start training".format(weights))
 
     def resume_weights(self, weights):
-        self.start_epoch = load_weight(self.model, weights, self.optimizer)
+        # support Distill resume weights
+        if hasattr(self.model, 'student_model'):
+            self.start_epoch = load_weight(self.model.student_model, weights,
+                                           self.optimizer)
+        else:
+            self.start_epoch = load_weight(self.model, weights, self.optimizer)
         logger.debug("Resume weights of epoch {}".format(self.start_epoch))
 
     def train(self, validate=False):
@@ -419,7 +423,7 @@ class Trainer(object):
         }]
 
         # dy2st and save model
-        if self.slim is None or self.cfg['slim'] != 'QAT':
+        if 'slim' not in self.cfg or self.cfg['slim'] != 'QAT':
             static_model = paddle.jit.to_static(
                 self.model, input_spec=input_spec)
             # NOTE: dy2st do not pruned program, but jit.save will prune program
@@ -433,7 +437,7 @@ class Trainer(object):
                 input_spec=pruned_input_spec)
             logger.info("Export model and saved in {}".format(save_dir))
         else:
-            self.slim.save_quantized_model(
+            self.cfg.slim.save_quantized_model(
                 self.model,
                 os.path.join(save_dir, 'model'),
                 input_spec=input_spec)
diff --git a/ppdet/modeling/backbones/darknet.py b/ppdet/modeling/backbones/darknet.py
index 7981306a912b7d893e9c63b76b0aee9247019ba0..ab748c66b9d525082b11adb1c498f5b6603be0ba 100755
--- a/ppdet/modeling/backbones/darknet.py
+++ b/ppdet/modeling/backbones/darknet.py
@@ -46,7 +46,6 @@ class ConvBNLayer(nn.Layer):
             stride=stride,
             padding=padding,
             groups=groups,
-            weight_attr=ParamAttr(name=name + '.conv.weights'),
             data_format=data_format,
             bias_attr=False)
         self.batch_norm = batch_norm(
diff --git a/ppdet/modeling/backbones/mobilenet_v1.py b/ppdet/modeling/backbones/mobilenet_v1.py
index 5b4d1287847808f0e4c0d20b7809a1efee3cd82b..cecc6a5b5e79db265936fa149019ac9323811eaf 100644
--- a/ppdet/modeling/backbones/mobilenet_v1.py
+++ b/ppdet/modeling/backbones/mobilenet_v1.py
@@ -55,14 +55,11 @@ class ConvBNLayer(nn.Layer):
             weight_attr=ParamAttr(
                 learning_rate=conv_lr,
                 initializer=KaimingNormal(),
-                regularizer=L2Decay(conv_decay),
-                name=name + "_weights"),
+                regularizer=L2Decay(conv_decay)),
             bias_attr=False)
 
-        param_attr = ParamAttr(
-            name=name + "_bn_scale", regularizer=L2Decay(norm_decay))
-        bias_attr = ParamAttr(
-            name=name + "_bn_offset", regularizer=L2Decay(norm_decay))
+        param_attr = ParamAttr(regularizer=L2Decay(norm_decay))
+        bias_attr = ParamAttr(regularizer=L2Decay(norm_decay))
         if norm_type == 'sync_bn':
             self._batch_norm = nn.SyncBatchNorm(
                 out_channels, weight_attr=param_attr, bias_attr=bias_attr)
@@ -72,9 +69,7 @@ class ConvBNLayer(nn.Layer):
                 act=None,
                 param_attr=param_attr,
                 bias_attr=bias_attr,
-                use_global_stats=False,
-                moving_mean_name=name + '_bn_mean',
-                moving_variance_name=name + '_bn_variance')
+                use_global_stats=False)
 
     def forward(self, x):
         x = self._conv(x)
diff --git a/ppdet/modeling/backbones/resnet.py b/ppdet/modeling/backbones/resnet.py
index e59f1761464a59538b692e5413292577175b2be9..f642764ad73b65ba69b374b05ff715d9586c364b 100755
--- a/ppdet/modeling/backbones/resnet.py
+++ b/ppdet/modeling/backbones/resnet.py
@@ -64,7 +64,7 @@ class ConvNormLayer(nn.Layer):
                 padding=(filter_size - 1) // 2,
                 groups=groups,
                 weight_attr=paddle.ParamAttr(
-                    learning_rate=lr, name=name + "_weights"),
+                    learning_rate=lr, ),
                 bias_attr=False)
         else:
             self.conv = DeformableConvV2(
@@ -75,7 +75,7 @@ class ConvNormLayer(nn.Layer):
                 padding=(filter_size - 1) // 2,
                 groups=groups,
                 weight_attr=paddle.ParamAttr(
-                    learning_rate=lr, name=name + '_weights'),
+                    learning_rate=lr, ),
                 bias_attr=False,
                 name=name)
 
@@ -84,12 +84,10 @@ class ConvNormLayer(nn.Layer):
         param_attr = paddle.ParamAttr(
             learning_rate=norm_lr,
             regularizer=L2Decay(norm_decay),
-            name=bn_name + "_scale",
             trainable=False if freeze_norm else True)
         bias_attr = paddle.ParamAttr(
             learning_rate=norm_lr,
             regularizer=L2Decay(norm_decay),
-            name=bn_name + "_offset",
             trainable=False if freeze_norm else True)
 
         global_stats = True if freeze_norm else False
@@ -102,9 +100,7 @@ class ConvNormLayer(nn.Layer):
                 act=None,
                 param_attr=param_attr,
                 bias_attr=bias_attr,
-                use_global_stats=global_stats,
-                moving_mean_name=bn_name + '_mean',
-                moving_variance_name=bn_name + '_variance')
+                use_global_stats=global_stats)
         norm_params = self.norm.parameters()
 
         if freeze_norm:
diff --git a/ppdet/modeling/heads/yolo_head.py b/ppdet/modeling/heads/yolo_head.py
index 723bf4fc6e541021a3d0f7c3a782f843b4272fff..3516da4108ddac38c93480c401c6b40af5b9ef05 100644
--- a/ppdet/modeling/heads/yolo_head.py
+++ b/ppdet/modeling/heads/yolo_head.py
@@ -56,9 +56,7 @@ class YOLOv3Head(nn.Layer):
                     stride=1,
                     padding=0,
                     data_format=data_format,
-                    weight_attr=ParamAttr(name=name + '.conv.weights'),
-                    bias_attr=ParamAttr(
-                        name=name + '.conv.bias', regularizer=L2Decay(0.))))
+                    bias_attr=ParamAttr(regularizer=L2Decay(0.))))
             self.yolo_outputs.append(yolo_output)
 
     def parse_anchor(self, anchors, anchor_masks):
diff --git a/ppdet/modeling/losses/fcos_loss.py b/ppdet/modeling/losses/fcos_loss.py
index 350011accd0dce47177f744665791afa9eccb5a0..201786c9a559206fcbf8176d025b72a5334b0237 100644
--- a/ppdet/modeling/losses/fcos_loss.py
+++ b/ppdet/modeling/losses/fcos_loss.py
@@ -20,6 +20,7 @@ import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from ppdet.core.workspace import register
+from ppdet.modeling import ops
 
 INF = 1e8
 __all__ = ['FCOSLoss']
@@ -44,19 +45,6 @@ def flatten_tensor(inputs, channel_first=False):
     return output_channel_last
 
 
-def sigmoid_cross_entropy_with_logits_loss(inputs,
-                                           label,
-                                           ignore_index=-100,
-                                           normalize=False):
-    output = F.binary_cross_entropy_with_logits(inputs, label, reduction='none')
-    mask_tensor = paddle.cast(label != ignore_index, 'float32')
-    output = paddle.multiply(output, mask_tensor)
-    if normalize:
-        sum_valid_mask = paddle.sum(mask_tensor)
-        output = output / sum_valid_mask
-    return output
-
-
 @register
 class FCOSLoss(nn.Layer):
     """
@@ -226,8 +214,8 @@ class FCOSLoss(nn.Layer):
 
         # 3. centerness: sigmoid_cross_entropy_with_logits_loss
         centerness_flatten = paddle.squeeze(centerness_flatten, axis=-1)
-        ctn_loss = sigmoid_cross_entropy_with_logits_loss(centerness_flatten,
-                                                          tag_center_flatten)
+        ctn_loss = ops.sigmoid_cross_entropy_with_logits(centerness_flatten,
+                                                         tag_center_flatten)
         ctn_loss = ctn_loss * mask_positive_float / num_positive_fp32
 
         loss_all = {
diff --git a/ppdet/modeling/losses/yolo_loss.py b/ppdet/modeling/losses/yolo_loss.py
index e460d2e2866d26afa5ca49a4823f2a696ba2e947..9579acf9f9a1d6f27bed7431e6dbe769ce7edbf6 100644
--- a/ppdet/modeling/losses/yolo_loss.py
+++ b/ppdet/modeling/losses/yolo_loss.py
@@ -54,6 +54,7 @@ class YOLOv3Loss(nn.Layer):
         self.scale_x_y = scale_x_y
         self.iou_loss = iou_loss
         self.iou_aware_loss = iou_aware_loss
+        self.distill_pairs = []
 
     def obj_loss(self, pbox, gbox, pobj, tobj, anchor, downsample):
         # pbox
@@ -108,6 +109,7 @@ class YOLOv3Loss(nn.Layer):
         x, y = p[:, :, :, :, 0:1], p[:, :, :, :, 1:2]
         w, h = p[:, :, :, :, 2:3], p[:, :, :, :, 3:4]
         obj, pcls = p[:, :, :, :, 4:5], p[:, :, :, :, 5:]
+        self.distill_pairs.append([x, y, w, h, obj, pcls])
 
         t = t.transpose((0, 1, 3, 4, 2))
         tx, ty = t[:, :, :, :, 0:1], t[:, :, :, :, 1:2]
@@ -173,6 +175,7 @@ class YOLOv3Loss(nn.Layer):
         gt_targets = [targets['target{}'.format(i)] for i in range(np)]
         gt_box = targets['gt_bbox']
         yolo_losses = dict()
+        self.distill_pairs.clear()
         for x, t, anchor, downsample in zip(inputs, gt_targets, anchors,
                                             self.downsample):
             yolo_loss = self.yolov3_loss(x, t, gt_box, anchor, downsample,
diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py
index ef961dd1c220f8cee5a46745259add9a3d0cbbe1..cbef3d6b401ace03c83a0354692b311a729e938b 100644
--- a/ppdet/modeling/ops.py
+++ b/ppdet/modeling/ops.py
@@ -59,11 +59,8 @@ def batch_norm(ch,
     return batch_norm(
         ch,
         weight_attr=ParamAttr(
-            name=bn_name + '.scale',
-            initializer=initializer,
-            regularizer=L2Decay(norm_decay)),
-        bias_attr=ParamAttr(
-            name=bn_name + '.offset', regularizer=L2Decay(norm_decay)),
+            initializer=initializer, regularizer=L2Decay(norm_decay)),
+        bias_attr=ParamAttr(regularizer=L2Decay(norm_decay)),
         data_format=data_format)
 
 
@@ -1558,7 +1555,6 @@ def sigmoid_cross_entropy_with_logits(input,
     output = F.binary_cross_entropy_with_logits(input, label, reduction='none')
     mask_tensor = paddle.cast(label != ignore_index, 'float32')
     output = paddle.multiply(output, mask_tensor)
-    output = paddle.reshape(output, shape=[output.shape[0], -1])
     if normalize:
         sum_valid_mask = paddle.sum(mask_tensor)
         output = output / sum_valid_mask
diff --git a/ppdet/slim/__init__.py b/ppdet/slim/__init__.py
index 7a58bf591c895d699301c0f88dd55268552581e5..ebd64c0e582bb91df474ce5a89f74574f3f01828 100644
--- a/ppdet/slim/__init__.py
+++ b/ppdet/slim/__init__.py
@@ -14,6 +14,47 @@
 
 from . import prune
 from . import quant
+from . import distill
 
 from .prune import *
 from .quant import *
+from .distill import *
+
+import yaml
+from ppdet.core.workspace import load_config
+from ppdet.utils.checkpoint import load_pretrain_weight
+
+
+def build_slim_model(cfg, slim_cfg, mode='train'):
+    with open(slim_cfg) as f:
+        slim_load_cfg = yaml.load(f, Loader=yaml.Loader)
+    if mode != 'train' and slim_load_cfg['slim'] == 'Distill':
+        return cfg
+
+    if slim_load_cfg['slim'] == 'Distill':
+        model = DistillModel(cfg, slim_cfg)
+        cfg['model'] = model
+    elif slim_load_cfg['slim'] == 'DistillPrune':
+        if mode == 'train':
+            model = DistillModel(cfg, slim_cfg)
+            pruner = create(cfg.pruner)
+            pruner(model.student_model)
+        else:
+            model = create(cfg.architecture)
+            weights = cfg.weights
+            load_config(slim_cfg)
+            pruner = create(cfg.pruner)
+            model = pruner(model)
+            load_pretrain_weight(model, weights)
+        cfg['model'] = model
+    else:
+        load_config(slim_cfg)
+        model = create(cfg.architecture)
+        if mode == 'train':
+            load_pretrain_weight(model, cfg.pretrain_weights)
+        slim = create(cfg.slim)
+        cfg['model'] = slim(model)
+        if mode != 'train':
+            load_pretrain_weight(cfg['model'], cfg.weights)
+
+    return cfg
diff --git a/ppdet/slim/distill.py b/ppdet/slim/distill.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5c9d72669a601ce331afde69ce92ca6642fb3d2
--- /dev/null
+++ b/ppdet/slim/distill.py
@@ -0,0 +1,110 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from ppdet.core.workspace import register, serializable, load_config
+from ppdet.core.workspace import create
+from ppdet.utils.logger import setup_logger
+from ppdet.modeling import ops
+from ppdet.utils.checkpoint import load_pretrain_weight
+from ppdet.modeling.losses import YOLOv3Loss
+logger = setup_logger(__name__)
+
+
+class DistillModel(nn.Layer):
+    def __init__(self, cfg, slim_cfg):
+        super(DistillModel, self).__init__()
+
+        self.student_model = create(cfg.architecture)
+        logger.debug('Load student model pretrain_weights:{}'.format(
+            cfg.pretrain_weights))
+        load_pretrain_weight(self.student_model, cfg.pretrain_weights)
+
+        slim_cfg = load_config(slim_cfg)
+        self.teacher_model = create(slim_cfg.architecture)
+        self.distill_loss = create(slim_cfg.distill_loss)
+        logger.debug('Load teacher model pretrain_weights:{}'.format(
+            slim_cfg.pretrain_weights))
+        load_pretrain_weight(self.teacher_model, slim_cfg.pretrain_weights)
+
+        for param in self.teacher_model.parameters():
+            param.trainable = False
+
+    def parameters(self):
+        return self.student_model.parameters()
+
+    def forward(self, inputs):
+        if self.training:
+            teacher_loss = self.teacher_model(inputs)
+            student_loss = self.student_model(inputs)
+            loss = self.distill_loss(self.teacher_model, self.student_model)
+            student_loss['distill_loss'] = loss
+            student_loss['teacher_loss'] = teacher_loss['loss']
+            student_loss['loss'] += student_loss['distill_loss']
+            return student_loss
+        else:
+            return self.student_model(inputs)
+
+
+@register
+class DistillYOLOv3Loss(nn.Layer):
+    def __init__(self, weight=1000):
+        super(DistillYOLOv3Loss, self).__init__()
+        self.weight = weight
+
+    def obj_weighted_reg(self, sx, sy, sw, sh, tx, ty, tw, th, tobj):
+        loss_x = ops.sigmoid_cross_entropy_with_logits(sx, F.sigmoid(tx))
+        loss_y = ops.sigmoid_cross_entropy_with_logits(sy, F.sigmoid(ty))
+        loss_w = paddle.abs(sw - tw)
+        loss_h = paddle.abs(sh - th)
+        loss = paddle.add_n([loss_x, loss_y, loss_w, loss_h])
+        weighted_loss = paddle.mean(loss * F.sigmoid(tobj))
+        return weighted_loss
+
+    def obj_weighted_cls(self, scls, tcls, tobj):
+        loss = ops.sigmoid_cross_entropy_with_logits(scls, F.sigmoid(tcls))
+        weighted_loss = paddle.mean(paddle.multiply(loss, F.sigmoid(tobj)))
+        return weighted_loss
+
+    def obj_loss(self, sobj, tobj):
+        obj_mask = paddle.cast(tobj > 0., dtype="float32")
+        obj_mask.stop_gradient = True
+        loss = paddle.mean(
+            ops.sigmoid_cross_entropy_with_logits(sobj, obj_mask))
+        return loss
+
+    def forward(self, teacher_model, student_model):
+        teacher_distill_pairs = teacher_model.yolo_head.loss.distill_pairs
+        student_distill_pairs = student_model.yolo_head.loss.distill_pairs
+        distill_reg_loss, distill_cls_loss, distill_obj_loss = [], [], []
+        for s_pair, t_pair in zip(student_distill_pairs, teacher_distill_pairs):
+            distill_reg_loss.append(
+                self.obj_weighted_reg(s_pair[0], s_pair[1], s_pair[2], s_pair[
+                    3], t_pair[0], t_pair[1], t_pair[2], t_pair[3], t_pair[4]))
+            distill_cls_loss.append(
+                self.obj_weighted_cls(s_pair[5], t_pair[5], t_pair[4]))
+            distill_obj_loss.append(self.obj_loss(s_pair[4], t_pair[4]))
+        distill_reg_loss = paddle.add_n(distill_reg_loss)
+        distill_cls_loss = paddle.add_n(distill_cls_loss)
+        distill_obj_loss = paddle.add_n(distill_obj_loss)
+        loss = (distill_reg_loss + distill_cls_loss + distill_obj_loss
+                ) * self.weight
+        return loss
diff --git a/ppdet/utils/checkpoint.py b/ppdet/utils/checkpoint.py
index f70cad4fce8c6bb32bde9ae577621e1b69b25334..d491976f58c6d1e63e6901c793d95e9c9e25f888 100644
--- a/ppdet/utils/checkpoint.py
+++ b/ppdet/utils/checkpoint.py
@@ -178,6 +178,38 @@ def load_pretrain_weight(model, pretrain_weight):
     logger.info('Finish loading model weights: {}'.format(weights_path))
 
 
+def load_static_pretrain_weight(model, pretrain_weight):
+
+    if is_url(pretrain_weight):
+        pretrain_weight = get_weights_path_dist(pretrain_weight)
+
+    path = _strip_postfix(pretrain_weight)
+    if not (os.path.isdir(path) or os.path.isfile(path) or
+            os.path.exists(path + '.pdparams')):
+        raise ValueError("Model pretrain path `{}` does not exists. "
+                         "If you don't want to load pretrain model, "
+                         "please delete `pretrain_weights` field in "
+                         "config file.".format(path))
+
+    model_dict = model.state_dict()
+
+    pre_state_dict = paddle.static.load_program_state(path)
+    param_state_dict = {}
+    for key in model_dict.keys():
+        weight_name = model_dict[key].name
+        if weight_name in pre_state_dict.keys():
+            logger.info('Load weight: {}, shape: {}'.format(
+                weight_name, pre_state_dict[weight_name].shape))
+            param_state_dict[key] = pre_state_dict[weight_name]
+        else:
+            if 'backbone' in key:
+                logger.info('Lack weight: {}, structure name: {}'.format(
+                    weight_name, key))
+            param_state_dict[key] = model_dict[key]
+    model.set_dict(param_state_dict)
+    return
+
+
 def save_model(model, optimizer, save_dir, save_name, last_epoch):
     """
     save model into disk.
diff --git a/tools/eval.py b/tools/eval.py
index 21ee29d160cfb1331489dcc09a597e302f6a09c8..3025dd822d021811440ceb11964d9bd357eb3a15 100755
--- a/tools/eval.py
+++ b/tools/eval.py
@@ -33,6 +33,7 @@ from ppdet.utils.check import check_gpu, check_version, check_config
 from ppdet.utils.cli import ArgsParser
 from ppdet.engine import Trainer, init_parallel_env
 from ppdet.metrics.coco_utils import json_eval_results
+from ppdet.slim import build_slim_model
 
 from ppdet.utils.logger import setup_logger
 logger = setup_logger('eval')
@@ -100,22 +101,22 @@ def run(FLAGS, cfg):
 
 def main():
     FLAGS = parse_args()
-
     cfg = load_config(FLAGS.config)
     # TODO: bias should be unified
     cfg['bias'] = 1 if FLAGS.bias else 0
     cfg['classwise'] = True if FLAGS.classwise else False
     cfg['output_eval'] = FLAGS.output_eval
     merge_config(FLAGS.opt)
+
+    place = paddle.set_device('gpu' if cfg.use_gpu else 'cpu')
+
     if FLAGS.slim_config:
-        slim_cfg = load_config(FLAGS.slim_config)
-        merge_config(slim_cfg)
+        cfg = build_slim_model(cfg, FLAGS.slim_config, mode='eval')
+
     check_config(cfg)
     check_gpu(cfg.use_gpu)
     check_version()
 
-    place = paddle.set_device('gpu' if cfg.use_gpu else 'cpu')
-
     run(FLAGS, cfg)
 
 
diff --git a/tools/export_model.py b/tools/export_model.py
index d04422873d06e49e2de596d09496c91778251532..8cf3885c88552ca9b48f8b8d6796377d96912a2e 100644
--- a/tools/export_model.py
+++ b/tools/export_model.py
@@ -31,6 +31,7 @@ from ppdet.core.workspace import load_config, merge_config
 from ppdet.utils.check import check_gpu, check_version, check_config
 from ppdet.utils.cli import ArgsParser
 from ppdet.engine import Trainer
+from ppdet.slim import build_slim_model
 
 from ppdet.utils.logger import setup_logger
 logger = setup_logger('export_model')
@@ -84,15 +85,15 @@ def run(FLAGS, cfg):
 def main():
     paddle.set_device("cpu")
     FLAGS = parse_args()
-
     cfg = load_config(FLAGS.config)
     # TODO: to be refined in the future
     if 'norm_type' in cfg and cfg['norm_type'] == 'sync_bn':
         FLAGS.opt['norm_type'] = 'bn'
     merge_config(FLAGS.opt)
+
     if FLAGS.slim_config:
-        slim_cfg = load_config(FLAGS.slim_config)
-        merge_config(slim_cfg)
+        cfg = build_slim_model(cfg, FLAGS.slim_config, mode='test')
+
     check_config(cfg)
     check_gpu(cfg.use_gpu)
     check_version()
diff --git a/tools/infer.py b/tools/infer.py
index a2507680b812b34e04007c4bff8e366330bf4760..19d7b36f6460521b01b0f20f540984399c701991 100755
--- a/tools/infer.py
+++ b/tools/infer.py
@@ -31,6 +31,7 @@ from ppdet.core.workspace import load_config, merge_config
 from ppdet.engine import Trainer
 from ppdet.utils.check import check_gpu, check_version, check_config
 from ppdet.utils.cli import ArgsParser
+from ppdet.slim import build_slim_model
 
 from ppdet.utils.logger import setup_logger
 logger = setup_logger('train')
@@ -127,19 +128,20 @@ def run(FLAGS, cfg):
 
 def main():
     FLAGS = parse_args()
-
     cfg = load_config(FLAGS.config)
     cfg['use_vdl'] = FLAGS.use_vdl
     cfg['vdl_log_dir'] = FLAGS.vdl_log_dir
     merge_config(FLAGS.opt)
+
+    place = paddle.set_device('gpu' if cfg.use_gpu else 'cpu')
+
     if FLAGS.slim_config:
-        slim_cfg = load_config(FLAGS.slim_config)
-        merge_config(slim_cfg)
+        cfg = build_slim_model(cfg, FLAGS.slim_config, mode='test')
+
     check_config(cfg)
     check_gpu(cfg.use_gpu)
     check_version()
 
-    place = paddle.set_device('gpu' if cfg.use_gpu else 'cpu')
     run(FLAGS, cfg)
 
 
diff --git a/tools/train.py b/tools/train.py
index cdbe87ab2d1603527ddf60397a66ad53670a04f3..565ba8e906f19c95c1a75a8e1e9ce479c12d4cac 100755
--- a/tools/train.py
+++ b/tools/train.py
@@ -33,6 +33,7 @@ import paddle
 from ppdet.core.workspace import load_config, merge_config, create
 from ppdet.utils.checkpoint import load_weight, load_pretrain_weight
 from ppdet.engine import Trainer, init_parallel_env, set_random_seed, init_fleet_env
+from ppdet.slim import build_slim_model
 
 import ppdet.utils.cli as cli
 import ppdet.utils.check as check
@@ -98,7 +99,7 @@ def run(FLAGS, cfg):
     # load weights
     if FLAGS.resume is not None:
         trainer.resume_weights(FLAGS.resume)
-    elif not FLAGS.slim_config and 'pretrain_weights' in cfg and cfg.pretrain_weights:
+    elif 'pretrain_weights' in cfg and cfg.pretrain_weights:
         trainer.load_weights(cfg.pretrain_weights)
 
     # training
@@ -107,22 +108,22 @@ def run(FLAGS, cfg):
 
 def main():
     FLAGS = parse_args()
-
     cfg = load_config(FLAGS.config)
     cfg['fp16'] = FLAGS.fp16
     cfg['fleet'] = FLAGS.fleet
     cfg['use_vdl'] = FLAGS.use_vdl
     cfg['vdl_log_dir'] = FLAGS.vdl_log_dir
     merge_config(FLAGS.opt)
+
+    place = paddle.set_device('gpu' if cfg.use_gpu else 'cpu')
+
     if FLAGS.slim_config:
-        slim_cfg = load_config(FLAGS.slim_config)
-        merge_config(slim_cfg)
+        cfg = build_slim_model(cfg, FLAGS.slim_config)
+
     check.check_config(cfg)
     check.check_gpu(cfg.use_gpu)
     check.check_version()
 
-    place = paddle.set_device('gpu' if cfg.use_gpu else 'cpu')
-
     run(FLAGS, cfg)