diff --git a/example/auto_compression/detection/eval.py b/example/auto_compression/detection/eval.py
index d80f3cfe8af1a0461cc25a66666f40cae17a9343..fc0c09ae46c644fea8ca6218d0f0da3544d59161 100644
--- a/example/auto_compression/detection/eval.py
+++ b/example/auto_compression/detection/eval.py
@@ -20,7 +20,7 @@ import paddle
 from ppdet.core.workspace import load_config, merge_config
 from ppdet.core.workspace import create
 from ppdet.metrics import COCOMetric, VOCMetric, KeyPointTopDownCOCOEval
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
 from keypoint_utils import keypoint_post_process
 from post_process import PPYOLOEPostProcess
 
diff --git a/example/auto_compression/detection/post_quant.py b/example/auto_compression/detection/post_quant.py
index b3a709000e7a28d950e4aaf9931b7f990a81df9d..edc7d2fea66dfb16e51b8ad16a5e61b75294b895 100644
--- a/example/auto_compression/detection/post_quant.py
+++ b/example/auto_compression/detection/post_quant.py
@@ -19,7 +19,7 @@ import argparse
 import paddle
 from ppdet.core.workspace import load_config, merge_config
 from ppdet.core.workspace import create
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
 from paddleslim.quant import quant_post_static
 
 
diff --git a/example/auto_compression/detection/run.py b/example/auto_compression/detection/run.py
index a3c46d4753440a54b96c264f197f0664b6bb4f10..6a4838cad67ed633f190b4284efb8045e6e1f242 100644
--- a/example/auto_compression/detection/run.py
+++ b/example/auto_compression/detection/run.py
@@ -20,7 +20,7 @@ import paddle
 from ppdet.core.workspace import load_config, merge_config
 from ppdet.core.workspace import create
 from ppdet.metrics import COCOMetric, VOCMetric, KeyPointTopDownCOCOEval
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
 from paddleslim.auto_compression import AutoCompression
 from keypoint_utils import keypoint_post_process
 from post_process import PPYOLOEPostProcess
@@ -126,7 +126,8 @@ def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list):
 def main():
     global global_config
     all_config = load_slim_config(FLAGS.config_path)
-    assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}"
+    assert "Global" in all_config, "Key 'Global' not found in config file. \n{}".format(
+        all_config)
     global_config = all_config["Global"]
     reader_cfg = load_config(global_config['reader_config'])
 
diff --git a/example/auto_compression/image_classification/eval.py b/example/auto_compression/image_classification/eval.py
index d0e0c3d17ed845e4da649e3867f2f8ac1de6e8e3..9cd9b4a3b4a2bff7e97c00be88dc50fe802779e6 100644
--- a/example/auto_compression/image_classification/eval.py
+++ b/example/auto_compression/image_classification/eval.py
@@ -23,7 +23,7 @@ import paddle
 import paddle.nn as nn
 from paddle.io import DataLoader
 from imagenet_reader import ImageNetDataset
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
 
 
 def argsparser():
diff --git a/example/auto_compression/image_classification/infer.py b/example/auto_compression/image_classification/infer.py
index 5060115c60e9f16dfeb4d417c4a2f49a6f517602..46eb7115a948e10fa756e89c3ea83dce8d6ba7cc 100644
--- a/example/auto_compression/image_classification/infer.py
+++ b/example/auto_compression/image_classification/infer.py
@@ -22,7 +22,7 @@ import yaml
 from utils import preprocess, postprocess
 import paddle
 from paddle.inference import create_predictor
-from paddleslim.auto_compression.config_helpers import load_config
+from paddleslim.common import load_config
 
 
 def argsparser():
diff --git a/example/auto_compression/image_classification/run.py b/example/auto_compression/image_classification/run.py
index d8da1a9f419b7f19c03b7fb004ea0725724f2803..7d660431391f6338c97de14582cb999b8177c3c2 100644
--- a/example/auto_compression/image_classification/run.py
+++ b/example/auto_compression/image_classification/run.py
@@ -24,7 +24,7 @@ import paddle
 import paddle.nn as nn
 from paddle.io import DataLoader
 from imagenet_reader import ImageNetDataset
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
 from paddleslim.auto_compression import AutoCompression
 
 
diff --git a/example/auto_compression/nlp/run.py b/example/auto_compression/nlp/run.py
index 769f58ef7580b950521e4e8b841d64ea20d94f34..c70a3a344b2ce5ca626551120c782f8c56ec708e 100644
--- a/example/auto_compression/nlp/run.py
+++ b/example/auto_compression/nlp/run.py
@@ -15,7 +15,7 @@ from paddlenlp.datasets import load_dataset
 from paddlenlp.data import Stack, Tuple, Pad
 from paddlenlp.data.sampler import SamplerHelper
 from paddlenlp.metrics import Mcc, PearsonAndSpearman
-from paddleslim.auto_compression.config_helpers import load_config
+from paddleslim.common import load_config
 from paddleslim.auto_compression.compressor import AutoCompression
 
 
diff --git a/example/auto_compression/pytorch_huggingface/run.py b/example/auto_compression/pytorch_huggingface/run.py
index 4da4e703fb8b3177f17d567564159f0b6a41f483..0c730dffa6823f5e4b6a0879be2a5b120ada320b 100644
--- a/example/auto_compression/pytorch_huggingface/run.py
+++ b/example/auto_compression/pytorch_huggingface/run.py
@@ -27,7 +27,7 @@ from paddlenlp.transformers import AutoModelForTokenClassification, AutoTokenize
 from paddlenlp.datasets import load_dataset
 from paddlenlp.data import Stack, Tuple, Pad
 from paddlenlp.metrics import AccuracyAndF1, Mcc, PearsonAndSpearman
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
 from paddleslim.auto_compression.compressor import AutoCompression
 
 
diff --git a/example/auto_compression/pytorch_yolov5/eval.py b/example/auto_compression/pytorch_yolov5/eval.py
index 42f2e121205b22fafd277ac8868f9303976f25ec..68461c995b20537ce2c1f396d1572178f65be16e 100644
--- a/example/auto_compression/pytorch_yolov5/eval.py
+++ b/example/auto_compression/pytorch_yolov5/eval.py
@@ -18,7 +18,7 @@ import numpy as np
 import argparse
 from tqdm import tqdm
 import paddle
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
 from paddleslim.common import load_onnx_model
 from post_process import YOLOv5PostProcess, coco_metric
 from dataset import COCOValDataset
diff --git a/example/auto_compression/pytorch_yolov5/post_quant.py b/example/auto_compression/pytorch_yolov5/post_quant.py
index 84db4f989f41ad76c1aa2a2cbd49656b456f8750..97f467411b02ef559ba978ef48630eb74e844bf1 100644
--- a/example/auto_compression/pytorch_yolov5/post_quant.py
+++ b/example/auto_compression/pytorch_yolov5/post_quant.py
@@ -17,11 +17,12 @@ import sys
 import numpy as np
 import argparse
 import paddle
-from paddleslim.auto_compression.config_helpers import load_config
+from paddleslim.common import load_config
 from paddleslim.common import load_onnx_model
 from paddleslim.quant import quant_post_static
 from dataset import COCOTrainDataset
 
+
 def argsparser():
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument(
diff --git a/example/auto_compression/pytorch_yolov5/run.py b/example/auto_compression/pytorch_yolov5/run.py
index 9f505535b55f1b92d583dc3099aa8e887040fef6..b1ca6bceee28c3f23e0b70158c518a1e9820ab13 100644
--- a/example/auto_compression/pytorch_yolov5/run.py
+++ b/example/auto_compression/pytorch_yolov5/run.py
@@ -18,7 +18,7 @@ import numpy as np
 import argparse
 from tqdm import tqdm
 import paddle
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
 from paddleslim.auto_compression import AutoCompression
 from dataset import COCOValDataset, COCOTrainDataset
 from post_process import YOLOv5PostProcess, coco_metric
@@ -75,7 +75,8 @@ def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list):
 def main():
     global global_config
     all_config = load_slim_config(FLAGS.config_path)
-    assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}"
+    assert "Global" in all_config, "Key 'Global' not found in config file. \n{}".format(
+        all_config)
     global_config = all_config["Global"]
 
     dataset = COCOTrainDataset(
diff --git a/example/auto_compression/pytorch_yolov6/eval.py b/example/auto_compression/pytorch_yolov6/eval.py
index 1d28466ad6953244b9dcf809628a6288c3c5d4d4..038a1f8bc224bf90a57c2b88741ce3db6f07d45e 100644
--- a/example/auto_compression/pytorch_yolov6/eval.py
+++ b/example/auto_compression/pytorch_yolov6/eval.py
@@ -18,7 +18,7 @@ import numpy as np
 import argparse
 from tqdm import tqdm
 import paddle
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
 from paddleslim.common import load_onnx_model
 from post_process import YOLOv6PostProcess, coco_metric
 from dataset import COCOValDataset
diff --git a/example/auto_compression/pytorch_yolov6/post_quant.py b/example/auto_compression/pytorch_yolov6/post_quant.py
index 84db4f989f41ad76c1aa2a2cbd49656b456f8750..97f467411b02ef559ba978ef48630eb74e844bf1 100644
--- a/example/auto_compression/pytorch_yolov6/post_quant.py
+++ b/example/auto_compression/pytorch_yolov6/post_quant.py
@@ -17,11 +17,12 @@ import sys
 import numpy as np
 import argparse
 import paddle
-from paddleslim.auto_compression.config_helpers import load_config
+from paddleslim.common import load_config
 from paddleslim.common import load_onnx_model
 from paddleslim.quant import quant_post_static
 from dataset import COCOTrainDataset
 
+
 def argsparser():
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument(
diff --git a/example/auto_compression/pytorch_yolov6/run.py b/example/auto_compression/pytorch_yolov6/run.py
index 7e28e1f6bdf64012cbe1b1ad08e5909c7e6a187e..8676e7b3b51c5334b7a11719d6dd29043041c869 100644
--- a/example/auto_compression/pytorch_yolov6/run.py
+++ b/example/auto_compression/pytorch_yolov6/run.py
@@ -18,7 +18,7 @@ import numpy as np
 import argparse
 from tqdm import tqdm
 import paddle
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
 from paddleslim.auto_compression import AutoCompression
 from dataset import COCOValDataset, COCOTrainDataset
 from post_process import YOLOv6PostProcess, coco_metric
@@ -75,7 +75,8 @@ def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list):
 def main():
     global global_config
     all_config = load_slim_config(FLAGS.config_path)
-    assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}"
+    assert "Global" in all_config, "Key 'Global' not found in config file. \n{}".format(
+        all_config)
     global_config = all_config["Global"]
 
     dataset = COCOTrainDataset(
diff --git a/example/auto_compression/pytorch_yolov7/eval.py b/example/auto_compression/pytorch_yolov7/eval.py
index 451301f1056c0233eefa49b13ffa7265f5d6bef2..f758f8f97495eed3d2281e25d1ca675ee6a9ab42 100644
--- a/example/auto_compression/pytorch_yolov7/eval.py
+++ b/example/auto_compression/pytorch_yolov7/eval.py
@@ -18,8 +18,8 @@ import numpy as np
 import argparse
 from tqdm import tqdm
 import paddle
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
-from paddleslim.auto_compression.utils import load_inference_model
+from paddleslim.common import load_config as load_slim_config
+from paddleslim.common import load_inference_model
 from post_process import YOLOv7PostProcess, coco_metric
 from dataset import COCOValDataset
 
diff --git a/example/auto_compression/pytorch_yolov7/post_quant.py b/example/auto_compression/pytorch_yolov7/post_quant.py
index a253e671f8fd16f0a8ab3d13dbf1413de6f56d14..97f467411b02ef559ba978ef48630eb74e844bf1 100644
--- a/example/auto_compression/pytorch_yolov7/post_quant.py
+++ b/example/auto_compression/pytorch_yolov7/post_quant.py
@@ -17,7 +17,7 @@ import sys
 import numpy as np
 import argparse
 import paddle
-from paddleslim.auto_compression.config_helpers import load_config
+from paddleslim.common import load_config
 from paddleslim.common import load_onnx_model
 from paddleslim.quant import quant_post_static
 from dataset import COCOTrainDataset
diff --git a/example/auto_compression/pytorch_yolov7/run.py b/example/auto_compression/pytorch_yolov7/run.py
index b3df96397f10dbe815304660518400443569ccc9..f6ab75334c2ee501e6dbf69bd754b9be55e8c5cd 100644
--- a/example/auto_compression/pytorch_yolov7/run.py
+++ b/example/auto_compression/pytorch_yolov7/run.py
@@ -18,7 +18,7 @@ import numpy as np
 import argparse
 from tqdm import tqdm
 import paddle
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
 from paddleslim.auto_compression import AutoCompression
 from dataset import COCOValDataset, COCOTrainDataset
 from post_process import YOLOv7PostProcess, coco_metric
@@ -75,7 +75,8 @@ def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list):
 def main():
     global global_config
     all_config = load_slim_config(FLAGS.config_path)
-    assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}"
+    assert "Global" in all_config, "Key 'Global' not found in config file. \n{}".format(
+        all_config)
     global_config = all_config["Global"]
 
     dataset = COCOTrainDataset(
diff --git a/example/auto_compression/semantic_segmentation/run.py b/example/auto_compression/semantic_segmentation/run.py
index 4f4d4c56fb5f4653ceb46f3a94b5ae2d7e98d3cf..6bc7d75293e21741c1b88d0c399b5873bd17c9a1 100644
--- a/example/auto_compression/semantic_segmentation/run.py
+++ b/example/auto_compression/semantic_segmentation/run.py
@@ -21,7 +21,7 @@ from paddleseg.cvlibs import Config as PaddleSegDataConfig
 from paddleseg.utils import worker_init_fn
 
 from paddleslim.auto_compression import AutoCompression
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
 from paddleseg.core.infer import reverse_transform
 from paddleseg.utils import metrics
 
diff --git a/example/auto_compression/tensorflow_mobilenet/eval.py b/example/auto_compression/tensorflow_mobilenet/eval.py
index 85e5fdaf504ce30c7ddb8d4013371da906006b7b..bf0987e3538c86b9ca278348db3a5b444cb00773 100644
--- a/example/auto_compression/tensorflow_mobilenet/eval.py
+++ b/example/auto_compression/tensorflow_mobilenet/eval.py
@@ -23,7 +23,7 @@ import paddle
 import paddle.nn as nn
 from paddle.io import DataLoader
 from imagenet_reader import ImageNetDataset
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
 
 
 def argsparser():
@@ -93,7 +93,8 @@ def eval():
 def main():
     global global_config
     all_config = load_slim_config(args.config_path)
-    assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}"
+    assert "Global" in all_config, "Key 'Global' not found in config file. \n{}".format(
+        all_config)
     global_config = all_config["Global"]
     global data_dir
     data_dir = global_config['data_dir']
diff --git a/example/auto_compression/tensorflow_mobilenet/run.py b/example/auto_compression/tensorflow_mobilenet/run.py
index 86345ec2071a95b3e630120f274cb1e6e4b99ba6..aefd2941f637d6e933bd978933e5829b211180f6 100644
--- a/example/auto_compression/tensorflow_mobilenet/run.py
+++ b/example/auto_compression/tensorflow_mobilenet/run.py
@@ -23,7 +23,7 @@ import paddle
 import paddle.nn as nn
 from paddle.io import DataLoader
 from imagenet_reader import ImageNetDataset
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
 from paddleslim.auto_compression import AutoCompression
 
 
@@ -107,7 +107,8 @@ def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list):
 def main():
     global global_config
     all_config = load_slim_config(args.config_path)
-    assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}"
+    assert "Global" in all_config, "Key 'Global' not found in config file. \n{}".format(
+        all_config)
     global_config = all_config["Global"]
     global data_dir
     data_dir = global_config['data_dir']
diff --git a/example/full_quantization/detection/eval.py b/example/full_quantization/detection/eval.py
index 81a169d164744eaba7eb650ec6a2cabdb9ae4433..d6c7d49daf8ccc43ad914eb56dd7727ae3e1f00b 100644
--- a/example/full_quantization/detection/eval.py
+++ b/example/full_quantization/detection/eval.py
@@ -20,7 +20,7 @@ import paddle
 from ppdet.core.workspace import load_config, merge_config
 from ppdet.core.workspace import create
 from ppdet.metrics import COCOMetric, VOCMetric, KeyPointTopDownCOCOEval
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
 
 
 def argsparser():
diff --git a/example/full_quantization/detection/run.py b/example/full_quantization/detection/run.py
index aca12b2610dbef526cd3c58bb7a8ebdb54c61182..fb0b9ad05cafe33399a76740e54ad46d3c204294 100644
--- a/example/full_quantization/detection/run.py
+++ b/example/full_quantization/detection/run.py
@@ -20,7 +20,7 @@ import paddle
 from ppdet.core.workspace import load_config, merge_config
 from ppdet.core.workspace import create
 from ppdet.metrics import COCOMetric, VOCMetric, KeyPointTopDownCOCOEval
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
 from paddleslim.auto_compression import AutoCompression
 
 
diff --git a/example/post_training_quantization/pytorch_yolo_series/README.md b/example/post_training_quantization/pytorch_yolo_series/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c83bf76a2bb0d119590e5f703fe87c8e4f9e310e
--- /dev/null
+++ b/example/post_training_quantization/pytorch_yolo_series/README.md
@@ -0,0 +1,138 @@
+# YOLO系列离线量化示例
+
+目录：
+- [1.简介](#1简介)
+- [2.Benchmark](#2Benchmark)
+- [3.开始自动压缩](#离线量化流程)
+  - [3.1 准备环境](#31-准备环境)
+  - [3.2 准备数据集](#32-准备数据集)
+  - [3.3 准备预测模型](#33-准备预测模型)
+  - [3.4 离线量化并产出模型](#34-离线量化并产出模型)
+  - [3.5 测试模型精度](#35-测试模型精度)
+  - [3.6 提高离线量化精度](#36-提高离线量化精度)
+- [4.预测部署](#4预测部署)
+- [5.FAQ](5FAQ)
+
+
+本示例将以[ultralytics/yolov5](https://github.com/ultralytics/yolov5)，[meituan/YOLOv6](https://github.com/meituan/YOLOv6) 和 [WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7) YOLO系列目标检测模型为例，将PyTorch框架产出的推理模型转换为Paddle推理模型，使用离线量化功能进行压缩，并使用敏感度分析功能提升离线量化精度。离线量化产出的模型可以用PaddleInference部署，也可以导出为ONNX格式模型文件，并用TensorRT部署。
+
+
+## 2.Benchmark
+| 模型  |  策略  | 输入尺寸 | mAP<sup>val<br>0.5:0.95 | 预测时延<sup><small>FP32</small><sup><br><sup>(ms) |预测时延<sup><small>FP16</small><sup><br><sup>(ms) | 预测时延<sup><small>INT8</small><sup><br><sup>(ms) |  配置文件 | Inference模型  |
+| :-------- |:-------- |:--------: | :---------------------: | :----------------: | :----------------: | :---------------: | :-----------------------------: | :-----------------------------: |
+| YOLOv5s |  Base模型 | 640*640  |  37.4   |   5.95ms  |   2.44ms   |  -  |  - | [Model](https://paddle-slim-models.bj.bcebos.com/act/yolov5s.onnx) |
+| YOLOv5s |  KL离线量化 | 640*640  |  36.0   |   - |   -   |  1.87ms  |  -  | - |
+|  |  |  |  |  |  |  |  |  |
+| YOLOv6s |  Base模型 | 640*640  |  42.4   |   9.06ms  |   2.90ms   |  -  |  - | [Model](https://paddle-slim-models.bj.bcebos.com/act/yolov6s.onnx) |
+| YOLOv6s |  KL离线量化(量化分析前) | 640*640  |  30.3   |   - |   -   |  1.83ms  |  -  | - |
+| YOLOv6s |  KL离线量化(量化分析后) | 640*640  |  39.7   |   - |   -   |  -  |  -  | [Infer Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov6s_analyzed_ptq.tar) |
+|  |  |  |  |  |  |  |  |  |
+| YOLOv7 |  Base模型 | 640*640  |  51.1   |   26.84ms  |   7.44ms   |  -  |  - | [Model](https://paddle-slim-models.bj.bcebos.com/act/yolov7.onnx) |
+| YOLOv7 |  KL离线量化 | 640*640  |  50.2   |   -  |   -   |  4.55ms  |  - | - |
+
+说明：
+- mAP的指标均在COCO val2017数据集中评测得到。
+
+## 3. 离线量化流程
+
+#### 3.1 准备环境
+- PaddlePaddle >= 2.3 （可从[Paddle官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)下载安装）
+- PaddleSlim > 2.3版本
+- opencv-python
+
+（1）安装paddlepaddle：
+```shell
+# CPU
+pip install paddlepaddle
+# GPU
+pip install paddlepaddle-gpu
+```
+
+（2）安装paddleslim：
+```shell
+pip install paddleslim
+```
+
+#### 3.2 准备数据集
+本示例默认以COCO数据进行自动压缩实验，可以从[MS COCO官网](https://cocodataset.org)下载[Train](http://images.cocodataset.org/zips/train2017.zip)、[Val](http://images.cocodataset.org/zips/val2017.zip)、[annotation](http://images.cocodataset.org/annotations/annotations_trainval2017.zip)。
+
+目录格式如下：
+```
+dataset/coco/
+├── annotations
+│   ├── instances_train2017.json
+│   ├── instances_val2017.json
+│   |   ...
+├── train2017
+│   ├── 000000000009.jpg
+│   ├── 000000580008.jpg
+│   |   ...
+├── val2017
+│   ├── 000000000139.jpg
+│   ├── 000000000285.jpg
+```
+
+#### 3.3 准备预测模型
+（1）准备ONNX模型：
+
+**yolov5**：可通过[ultralytics/yolov5](https://github.com/ultralytics/yolov5) 官方的[导出教程](https://github.com/ultralytics/yolov5/issues/251)来准备ONNX模型。也可以下载准备好的[yolov5s.onnx](https://paddle-slim-models.bj.bcebos.com/act/yolov5s.onnx)。
+
+**yolov6**：可通过[WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7)的导出脚本来准备ONNX模型。也可以直接下载我们已经准备好的[yolov7.onnx](https://paddle-slim-models.bj.bcebos.com/act/yolov7.onnx)。
+
+**yolov7**：可通过[meituan/YOLOv6](https://github.com/meituan/YOLOv6)官方的[导出教程](https://github.com/meituan/YOLOv6/blob/main/deploy/ONNX/README.md)来准备ONNX模型。也可以下载已经准备好的[yolov6s.onnx](https://paddle-slim-models.bj.bcebos.com/act/yolov6s.onnx)。
+
+
+#### 3.4 离线量化并产出模型
+离线量化示例通过post_quant.py脚本启动，会使用接口```paddleslim.quant.quant_post_static```对模型进行量化。配置config文件中模型路径、数据路径和量化相关的参数，配置完成后便可对模型进行离线量化。具体运行命令为：
+- yolov5
+
+```shell
+python post_quant.py --config_path=./configs/yolov5s_ptq.yaml --save_dir=./yolov5s_ptq_out
+```
+
+- yolov6
+
+```shell
+python post_quant.py --config_path=./configs/yolov6s_ptq.yaml --save_dir=./yolov6s_ptq_out
+```
+
+- yolov7
+
+```shell
+python post_quant.py --config_path=./configs/yolov7s_ptq.yaml --save_dir=./yolov7s_ptq_out
+```
+
+
+#### 3.5 测试模型精度
+
+修改[yolov5s_ptq.yaml](./configs/yolov5s_ptq.yaml)中`model_dir`字段为模型存储路径，然后使用eval.py脚本得到模型的mAP：
+```shell
+export CUDA_VISIBLE_DEVICES=0
+python eval.py --config_path=./configs/yolov5s_ptq.yaml
+```
+
+
+#### 3.6 提高离线量化精度
+本节介绍如何使用量化分析工具提升离线量化精度。离线量化功能仅需使用少量数据，且使用简单、能快速得到量化模型，但往往会造成较大的精度损失。PaddleSlim提供量化分析工具，会使用接口```paddleslim.quant.AnalysisQuant```，可视化展示出不适合量化的层，通过跳过这些层，提高离线量化模型精度。由于yolov6离线量化效果较差，以yolov6为例，量化分析工具具体使用方法如下：
+
+```shell
+python analysis.py --config_path=./configs/yolov6s_analysis.yaml
+```
+
+经过分析之后，会产出模型每一层量化后的精度，和较差层的weight和activation的分布图。在进行离线量化时，可以跳过这些导致精度下降较多的层，如yolov6中，经过分析后，可跳过`conv2d_2.w_0`， `conv2d_11.w_0`，`conv2d_15.w_0`， `conv2d_46.w_0`， `conv2d_49.w_0`，可使用[yolov6s_analyzed_ptq.yaml](./configs/yolov6s_analyzed_ptq.yaml)，然后再次进行离线量化。跳过这五层后，离线量化精度上升9.4个点。
+
+```shell
+python post_quant.py --config_path=./configs/yolov6s_analyzed_ptq.yaml --save_dir=./yolov6s_analyzed_ptq_out
+```
+
+注：
+- 分析后，每层量化的精度会默认保存在`./analysis_results/analysis.txt`，直方分布图会默认保存在`./analysis_results/act_hist_result.pdf`和 `./analysis_results/weight_hist_result.pdf`中。
+
+<p align="center">
+<img src="./images/sensitivity_rank.png" width=849 hspace='10'/> <br />
+</p>
+
+
+## 4.预测部署
+
+## 5.FAQ
diff --git a/example/post_training_quantization/pytorch_yolo_series/analysis.py b/example/post_training_quantization/pytorch_yolo_series/analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..118a7227e9c34aefe6d5a474da8594c76785f9ca
--- /dev/null
+++ b/example/post_training_quantization/pytorch_yolo_series/analysis.py
@@ -0,0 +1,115 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import numpy as np
+import argparse
+import paddle
+from tqdm import tqdm
+from post_process import YOLOv6PostProcess, coco_metric
+from dataset import COCOValDataset, COCOTrainDataset
+from paddleslim.common import load_config, load_onnx_model
+from paddleslim.quant.analysis import AnalysisQuant
+
+
+def argsparser():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        '--config_path',
+        type=str,
+        default=None,
+        help="path of analysis config.",
+        required=True)
+    parser.add_argument(
+        '--devices',
+        type=str,
+        default='gpu',
+        help="which device used to compress.")
+    return parser
+
+
+def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list):
+    bboxes_list, bbox_nums_list, image_id_list = [], [], []
+    with tqdm(
+            total=len(val_loader),
+            bar_format='Evaluation stage, Run batch:|{bar}| {n_fmt}/{total_fmt}',
+            ncols=80) as t:
+        for data in val_loader:
+            data_all = {k: np.array(v) for k, v in data.items()}
+            outs = exe.run(compiled_test_program,
+                           feed={test_feed_names[0]: data_all['image']},
+                           fetch_list=test_fetch_list,
+                           return_numpy=False)
+            res = {}
+            postprocess = YOLOv6PostProcess(
+                score_threshold=0.001, nms_threshold=0.65, multi_label=True)
+            res = postprocess(np.array(outs[0]), data_all['scale_factor'])
+            bboxes_list.append(res['bbox'])
+            bbox_nums_list.append(res['bbox_num'])
+            image_id_list.append(np.array(data_all['im_id']))
+            t.update()
+    map_res = coco_metric(anno_file, bboxes_list, bbox_nums_list, image_id_list)
+    return map_res[0]
+
+
+def main():
+
+    global config
+    config = load_config(FLAGS.config_path)
+
+    dataset = COCOTrainDataset(
+        dataset_dir=config['dataset_dir'],
+        image_dir=config['val_image_dir'],
+        anno_path=config['val_anno_path'])
+    data_loader = paddle.io.DataLoader(
+        dataset, batch_size=1, shuffle=True, drop_last=True, num_workers=0)
+
+    global val_loader
+    dataset = COCOValDataset(
+        dataset_dir=config['dataset_dir'],
+        image_dir=config['val_image_dir'],
+        anno_path=config['val_anno_path'])
+    global anno_file
+    anno_file = dataset.ann_file
+    val_loader = paddle.io.DataLoader(
+        dataset, batch_size=1, shuffle=False, drop_last=False, num_workers=0)
+
+    load_onnx_model(config["model_dir"])
+    inference_model_path = config["model_dir"].rstrip().rstrip(
+        '.onnx') + '_infer'
+    analyzer = AnalysisQuant(
+        model_dir=inference_model_path,
+        model_filename='model.pdmodel',
+        params_filename='model.pdiparams',
+        eval_function=eval_function,
+        quantizable_op_type=config['quantizable_op_type'],
+        weight_quantize_type=config['weight_quantize_type'],
+        activation_quantize_type=config['activation_quantize_type'],
+        is_full_quantize=config['is_full_quantize'],
+        data_loader=data_loader,
+        batch_size=config['batch_size'],
+        save_dir=config['save_dir'], )
+    analyzer.analysis()
+
+
+if __name__ == '__main__':
+    paddle.enable_static()
+    parser = argsparser()
+    FLAGS = parser.parse_args()
+
+    assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu']
+    paddle.set_device(FLAGS.devices)
+
+    main()
diff --git a/example/post_training_quantization/pytorch_yolo_series/configs/yolov5s_ptq.yaml b/example/post_training_quantization/pytorch_yolo_series/configs/yolov5s_ptq.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8fb861ec439f76538bc1e19a3fb26821c1aa5896
--- /dev/null
+++ b/example/post_training_quantization/pytorch_yolo_series/configs/yolov5s_ptq.yaml
@@ -0,0 +1,7 @@
+model_dir: ./yolov5s.onnx
+dataset_dir: /dataset/coco/
+train_image_dir: train2017
+val_image_dir: val2017
+train_anno_path: annotations/instances_train2017.json
+val_anno_path: annotations/instances_val2017.json
+skip_tensors: None # you can set it after analysis
diff --git a/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_analysis.yaml b/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_analysis.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6d1e726b4a52c1ff764bc7a84896267f566a7e86
--- /dev/null
+++ b/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_analysis.yaml
@@ -0,0 +1,10 @@
+model_dir: ./yolov6s.onnx
+save_dir: ./analysis_results
+quantizable_op_type: ["conv2d", "depthwise_conv2d"]
+weight_quantize_type: 'channel_wise_abs_max'
+activation_quantize_type: 'moving_average_abs_max'
+is_full_quantize: False
+dataset_dir: /dataset/coco/
+val_image_dir: val2017
+val_anno_path: annotations/instances_val2017.json
+batch_size: 10
diff --git a/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_analyzed_ptq.yaml b/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_analyzed_ptq.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c857032896167819fb6febf56822ff7cc25e00f7
--- /dev/null
+++ b/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_analyzed_ptq.yaml
@@ -0,0 +1,7 @@
+model_dir: ./yolov6s.onnx
+dataset_dir: /dataset/coco/
+train_image_dir: train2017
+val_image_dir: val2017
+train_anno_path: annotations/instances_train2017.json
+val_anno_path: annotations/instances_val2017.json
+skip_tensor_list: ['conv2d_2.w_0', 'conv2d_15.w_0', 'conv2d_46.w_0', 'conv2d_11.w_0', 'conv2d_49.w_0']
diff --git a/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_ptq.yaml b/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_ptq.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ab67a9df8a37364098f826657634dde3139d1cf0
--- /dev/null
+++ b/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_ptq.yaml
@@ -0,0 +1,7 @@
+model_dir: ./yolov6s.onnx
+dataset_dir: /dataset/coco/
+train_image_dir: train2017
+val_image_dir: val2017
+train_anno_path: annotations/instances_train2017.json
+val_anno_path: annotations/instances_val2017.json
+skip_tensor_list: None
diff --git a/example/post_training_quantization/pytorch_yolo_series/configs/yolov7s_ptq.yaml b/example/post_training_quantization/pytorch_yolo_series/configs/yolov7s_ptq.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0ad89a207b65c4849cb284881c687934b9a0f59b
--- /dev/null
+++ b/example/post_training_quantization/pytorch_yolo_series/configs/yolov7s_ptq.yaml
@@ -0,0 +1,6 @@
+model_dir: ./yolov7s.onnx
+dataset_dir: /dataset/coco/
+train_image_dir: train2017
+val_image_dir: val2017
+train_anno_path: annotations/instances_train2017.json
+val_anno_path: annotations/instances_val2017.json
diff --git a/example/post_training_quantization/pytorch_yolo_series/dataset.py b/example/post_training_quantization/pytorch_yolo_series/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ddec29d0d7b2b4380d6680103dc93bec04c08e4
--- /dev/null
+++ b/example/post_training_quantization/pytorch_yolo_series/dataset.py
@@ -0,0 +1,113 @@
+from pycocotools.coco import COCO
+import cv2
+import os
+import numpy as np
+import paddle
+
+
+class COCOValDataset(paddle.io.Dataset):
+    def __init__(self,
+                 dataset_dir=None,
+                 image_dir=None,
+                 anno_path=None,
+                 img_size=[640, 640]):
+        self.dataset_dir = dataset_dir
+        self.image_dir = image_dir
+        self.img_size = img_size
+        self.ann_file = os.path.join(dataset_dir, anno_path)
+        self.coco = COCO(self.ann_file)
+        ori_ids = list(sorted(self.coco.imgs.keys()))
+        # check gt bbox
+        clean_ids = []
+        for idx in ori_ids:
+            ins_anno_ids = self.coco.getAnnIds(imgIds=[idx], iscrowd=False)
+            instances = self.coco.loadAnns(ins_anno_ids)
+            num_bbox = 0
+            for inst in instances:
+                if inst.get('ignore', False):
+                    continue
+                if 'bbox' not in inst.keys():
+                    continue
+                elif not any(np.array(inst['bbox'])):
+                    continue
+                else:
+                    num_bbox += 1
+            if num_bbox > 0:
+                clean_ids.append(idx)
+        self.ids = clean_ids
+
+    def __getitem__(self, idx):
+        img_id = self.ids[idx]
+        img = self._get_img_data_from_img_id(img_id)
+        img, scale_factor = self.image_preprocess(img, self.img_size)
+        return {
+            'image': img,
+            'im_id': np.array([img_id]),
+            'scale_factor': scale_factor
+        }
+
+    def __len__(self):
+        return len(self.ids)
+
+    def _get_img_data_from_img_id(self, img_id):
+        img_info = self.coco.loadImgs(img_id)[0]
+        img_path = os.path.join(self.dataset_dir, self.image_dir,
+                                img_info['file_name'])
+        img = cv2.imread(img_path)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        return img
+
+    def _generate_scale(self, im, target_shape, keep_ratio=True):
+        """
+            Args:
+                im (np.ndarray): image (np.ndarray)
+            Returns:
+                im_scale_x: the resize ratio of X
+                im_scale_y: the resize ratio of Y
+            """
+        origin_shape = im.shape[:2]
+        if keep_ratio:
+            im_size_min = np.min(origin_shape)
+            im_size_max = np.max(origin_shape)
+            target_size_min = np.min(target_shape)
+            target_size_max = np.max(target_shape)
+            im_scale = float(target_size_min) / float(im_size_min)
+            if np.round(im_scale * im_size_max) > target_size_max:
+                im_scale = float(target_size_max) / float(im_size_max)
+            im_scale_x = im_scale
+            im_scale_y = im_scale
+        else:
+            resize_h, resize_w = target_shape
+            im_scale_y = resize_h / float(origin_shape[0])
+            im_scale_x = resize_w / float(origin_shape[1])
+        return im_scale_y, im_scale_x
+
+    def image_preprocess(self, img, target_shape):
+        # Resize image
+        im_scale_y, im_scale_x = self._generate_scale(img, target_shape)
+        img = cv2.resize(
+            img,
+            None,
+            None,
+            fx=im_scale_x,
+            fy=im_scale_y,
+            interpolation=cv2.INTER_LINEAR)
+        # Pad
+        im_h, im_w = img.shape[:2]
+        h, w = target_shape[:]
+        if h != im_h or w != im_w:
+            canvas = np.ones((h, w, 3), dtype=np.float32)
+            canvas *= np.array([114.0, 114.0, 114.0], dtype=np.float32)
+            canvas[0:im_h, 0:im_w, :] = img.astype(np.float32)
+            img = canvas
+        img = np.transpose(img / 255, [2, 0, 1])
+        scale_factor = np.array([im_scale_y, im_scale_x])
+        return img.astype(np.float32), scale_factor
+
+
+class COCOTrainDataset(COCOValDataset):
+    def __getitem__(self, idx):
+        img_id = self.ids[idx]
+        img = self._get_img_data_from_img_id(img_id)
+        img, scale_factor = self.image_preprocess(img, self.img_size)
+        return {'x2paddle_image_arrays': img}
diff --git a/example/post_training_quantization/pytorch_yolo_series/eval.py b/example/post_training_quantization/pytorch_yolo_series/eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..6705104b69ac3db586bd0954e42e76010ad88426
--- /dev/null
+++ b/example/post_training_quantization/pytorch_yolo_series/eval.py
@@ -0,0 +1,99 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import numpy as np
+import argparse
+from tqdm import tqdm
+import paddle
+from paddleslim.common import load_config as load_slim_config
+from paddleslim.common import load_inference_model
+from post_process import YOLOv6PostProcess, coco_metric
+from dataset import COCOValDataset
+
+
+def argsparser():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        '--config_path',
+        type=str,
+        default=None,
+        help="path of compression strategy config.",
+        required=True)
+    parser.add_argument(
+        '--devices',
+        type=str,
+        default='gpu',
+        help="which device used to compress.")
+
+    return parser
+
+
+def eval():
+
+    place = paddle.CUDAPlace(0) if FLAGS.devices == 'gpu' else paddle.CPUPlace()
+    exe = paddle.static.Executor(place)
+
+    val_program, feed_target_names, fetch_targets = load_inference_model(
+        config["model_dir"], exe, "model.pdmodel", "model.pdiparams")
+
+    bboxes_list, bbox_nums_list, image_id_list = [], [], []
+    with tqdm(
+            total=len(val_loader),
+            bar_format='Evaluation stage, Run batch:|{bar}| {n_fmt}/{total_fmt}',
+            ncols=80) as t:
+        for data in val_loader:
+            data_all = {k: np.array(v) for k, v in data.items()}
+            outs = exe.run(val_program,
+                           feed={feed_target_names[0]: data_all['image']},
+                           fetch_list=fetch_targets,
+                           return_numpy=False)
+            res = {}
+            postprocess = YOLOv6PostProcess(
+                score_threshold=0.001, nms_threshold=0.65, multi_label=True)
+            res = postprocess(np.array(outs[0]), data_all['scale_factor'])
+            bboxes_list.append(res['bbox'])
+            bbox_nums_list.append(res['bbox_num'])
+            image_id_list.append(np.array(data_all['im_id']))
+            t.update()
+
+    coco_metric(anno_file, bboxes_list, bbox_nums_list, image_id_list)
+
+
+def main():
+    global config
+    config = load_slim_config(FLAGS.config_path)
+
+    global val_loader
+    dataset = COCOValDataset(
+        dataset_dir=config['dataset_dir'],
+        image_dir=config['val_image_dir'],
+        anno_path=config['val_anno_path'])
+    global anno_file
+    anno_file = dataset.ann_file
+    val_loader = paddle.io.DataLoader(dataset, batch_size=1)
+
+    eval()
+
+
+if __name__ == '__main__':
+    paddle.enable_static()
+    parser = argsparser()
+    FLAGS = parser.parse_args()
+
+    assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu']
+    paddle.set_device(FLAGS.devices)
+
+    main()
diff --git a/example/post_training_quantization/pytorch_yolo_series/images/sensitivity_rank.png b/example/post_training_quantization/pytorch_yolo_series/images/sensitivity_rank.png
new file mode 100644
index 0000000000000000000000000000000000000000..1eab297a1118f48e56d4ef496fcf5d18948016eb
Binary files /dev/null and b/example/post_training_quantization/pytorch_yolo_series/images/sensitivity_rank.png differ
diff --git a/example/post_training_quantization/pytorch_yolo_series/post_process.py b/example/post_training_quantization/pytorch_yolo_series/post_process.py
new file mode 100644
index 0000000000000000000000000000000000000000..3232fe3f56ede4c77975f3e9331cdabdf12ea66c
--- /dev/null
+++ b/example/post_training_quantization/pytorch_yolo_series/post_process.py
@@ -0,0 +1,231 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import cv2
+import json
+import sys
+
+
+def box_area(boxes):
+    """
+    Args:
+        boxes(np.ndarray): [N, 4]
+    return: [N]
+    """
+    return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+
+
+def box_iou(box1, box2):
+    """
+    Args:
+        box1(np.ndarray): [N, 4]
+        box2(np.ndarray): [M, 4]
+    return: [N, M]
+    """
+    area1 = box_area(box1)
+    area2 = box_area(box2)
+    lt = np.maximum(box1[:, np.newaxis, :2], box2[:, :2])
+    rb = np.minimum(box1[:, np.newaxis, 2:], box2[:, 2:])
+    wh = rb - lt
+    wh = np.maximum(0, wh)
+    inter = wh[:, :, 0] * wh[:, :, 1]
+    iou = inter / (area1[:, np.newaxis] + area2 - inter)
+    return iou
+
+
+def nms(boxes, scores, iou_threshold):
+    """
+    Non Max Suppression numpy implementation.
+    args:
+        boxes(np.ndarray): [N, 4]
+        scores(np.ndarray): [N, 1]
+        iou_threshold(float): Threshold of IoU.
+    """
+    idxs = scores.argsort()
+    keep = []
+    while idxs.size > 0:
+        max_score_index = idxs[-1]
+        max_score_box = boxes[max_score_index][None, :]
+        keep.append(max_score_index)
+        if idxs.size == 1:
+            break
+        idxs = idxs[:-1]
+        other_boxes = boxes[idxs]
+        ious = box_iou(max_score_box, other_boxes)
+        idxs = idxs[ious[0] <= iou_threshold]
+
+    keep = np.array(keep)
+    return keep
+
+
+class YOLOv6PostProcess(object):
+    """
+    Post process of YOLOv6 network.
+    args:
+        score_threshold(float): Threshold to filter out bounding boxes with low 
+                confidence score. If not provided, consider all boxes.
+        nms_threshold(float): The threshold to be used in NMS.
+        multi_label(bool): Whether keep multi label in boxes.
+        keep_top_k(int): Number of total bboxes to be kept per image after NMS
+                step. -1 means keeping all bboxes after NMS step.
+    """
+
+    def __init__(self,
+                 score_threshold=0.25,
+                 nms_threshold=0.5,
+                 multi_label=False,
+                 keep_top_k=300):
+        self.score_threshold = score_threshold
+        self.nms_threshold = nms_threshold
+        self.multi_label = multi_label
+        self.keep_top_k = keep_top_k
+
+    def _xywh2xyxy(self, x):
+        # Convert from [x, y, w, h] to [x1, y1, x2, y2]
+        y = np.copy(x)
+        y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
+        y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
+        y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
+        y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
+        return y
+
+    def _non_max_suppression(self, prediction):
+        max_wh = 4096  # (pixels) minimum and maximum box width and height
+        nms_top_k = 30000
+
+        cand_boxes = prediction[..., 4] > self.score_threshold  # candidates
+        output = [np.zeros((0, 6))] * prediction.shape[0]
+
+        for batch_id, boxes in enumerate(prediction):
+            # Apply constraints
+            boxes = boxes[cand_boxes[batch_id]]
+            if not boxes.shape[0]:
+                continue
+            # Compute conf (conf = obj_conf * cls_conf)
+            boxes[:, 5:] *= boxes[:, 4:5]
+
+            # Box (center x, center y, width, height) to (x1, y1, x2, y2)
+            convert_box = self._xywh2xyxy(boxes[:, :4])
+
+            # Detections matrix nx6 (xyxy, conf, cls)
+            if self.multi_label:
+                i, j = (boxes[:, 5:] > self.score_threshold).nonzero()
+                boxes = np.concatenate(
+                    (convert_box[i], boxes[i, j + 5, None],
+                     j[:, None].astype(np.float32)),
+                    axis=1)
+            else:
+                conf = np.max(boxes[:, 5:], axis=1)
+                j = np.argmax(boxes[:, 5:], axis=1)
+                re = np.array(conf.reshape(-1) > self.score_threshold)
+                conf = conf.reshape(-1, 1)
+                j = j.reshape(-1, 1)
+                boxes = np.concatenate((convert_box, conf, j), axis=1)[re]
+
+            num_box = boxes.shape[0]
+            if not num_box:
+                continue
+            elif num_box > nms_top_k:
+                boxes = boxes[boxes[:, 4].argsort()[::-1][:nms_top_k]]
+
+            # Batched NMS
+            c = boxes[:, 5:6] * max_wh
+            clean_boxes, scores = boxes[:, :4] + c, boxes[:, 4]
+            keep = nms(clean_boxes, scores, self.nms_threshold)
+            # limit detection box num
+            if keep.shape[0] > self.keep_top_k:
+                keep = keep[:self.keep_top_k]
+            output[batch_id] = boxes[keep]
+        return output
+
+    def __call__(self, outs, scale_factor):
+        preds = self._non_max_suppression(outs)
+        bboxs, box_nums = [], []
+        for i, pred in enumerate(preds):
+            if len(pred.shape) > 2:
+                pred = np.squeeze(pred)
+            if len(pred.shape) == 1:
+                pred = pred[np.newaxis, :]
+            pred_bboxes = pred[:, :4]
+            scale_factor = np.tile(scale_factor[i][::-1], (1, 2))
+            pred_bboxes /= scale_factor
+            bbox = np.concatenate(
+                [
+                    pred[:, -1][:, np.newaxis], pred[:, -2][:, np.newaxis],
+                    pred_bboxes
+                ],
+                axis=-1)
+            bboxs.append(bbox)
+            box_num = bbox.shape[0]
+            box_nums.append(box_num)
+        bboxs = np.concatenate(bboxs, axis=0)
+        box_nums = np.array(box_nums)
+        return {'bbox': bboxs, 'bbox_num': box_nums}
+
+
+def coco_metric(anno_file, bboxes_list, bbox_nums_list, image_id_list):
+    try:
+        from pycocotools.coco import COCO
+        from pycocotools.cocoeval import COCOeval
+    except:
+        print(
+            "[ERROR] Not found pycocotools, please install by `pip install pycocotools`"
+        )
+        sys.exit(1)
+
+    coco_gt = COCO(anno_file)
+    cats = coco_gt.loadCats(coco_gt.getCatIds())
+    clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
+    results = []
+    for bboxes, bbox_nums, image_id in zip(bboxes_list, bbox_nums_list,
+                                           image_id_list):
+        results += _get_det_res(bboxes, bbox_nums, image_id, clsid2catid)
+
+    output = "bbox.json"
+    with open(output, 'w') as f:
+        json.dump(results, f)
+
+    coco_dt = coco_gt.loadRes(output)
+    coco_eval = COCOeval(coco_gt, coco_dt, 'bbox')
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    coco_eval.summarize()
+    return coco_eval.stats
+
+
+def _get_det_res(bboxes, bbox_nums, image_id, label_to_cat_id_map):
+    det_res = []
+    k = 0
+    for i in range(len(bbox_nums)):
+        cur_image_id = int(image_id[i][0])
+        det_nums = bbox_nums[i]
+        for j in range(det_nums):
+            dt = bboxes[k]
+            k = k + 1
+            num_id, score, xmin, ymin, xmax, ymax = dt.tolist()
+            if int(num_id) < 0:
+                continue
+            category_id = label_to_cat_id_map[int(num_id)]
+            w = xmax - xmin
+            h = ymax - ymin
+            bbox = [xmin, ymin, w, h]
+            dt_res = {
+                'image_id': cur_image_id,
+                'category_id': category_id,
+                'bbox': bbox,
+                'score': score
+            }
+            det_res.append(dt_res)
+    return det_res
diff --git a/example/post_training_quantization/pytorch_yolo_series/post_quant.py b/example/post_training_quantization/pytorch_yolo_series/post_quant.py
new file mode 100644
index 0000000000000000000000000000000000000000..cac752a934b350c6fec81866c2e44add0edbfe65
--- /dev/null
+++ b/example/post_training_quantization/pytorch_yolo_series/post_quant.py
@@ -0,0 +1,93 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import numpy as np
+import argparse
+import paddle
+from paddleslim.common import load_config, load_onnx_model
+from paddleslim.quant import quant_post_static
+from dataset import COCOTrainDataset
+
+
+def argsparser():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        '--config_path',
+        type=str,
+        default=None,
+        help="path of post training quantization config.",
+        required=True)
+    parser.add_argument(
+        '--save_dir',
+        type=str,
+        default='ptq_out',
+        help="directory to save compressed model.")
+    parser.add_argument(
+        '--devices',
+        type=str,
+        default='gpu',
+        help="which device used to compress.")
+    parser.add_argument(
+        '--algo', type=str, default='KL', help="post quant algo.")
+
+    return parser
+
+
+def main():
+    global config
+    config = load_config(FLAGS.config_path)
+
+    dataset = COCOTrainDataset(
+        dataset_dir=config['dataset_dir'],
+        image_dir=config['val_image_dir'],
+        anno_path=config['val_anno_path'])
+    train_loader = paddle.io.DataLoader(
+        dataset, batch_size=1, shuffle=True, drop_last=True, num_workers=0)
+
+    place = paddle.CUDAPlace(0) if FLAGS.devices == 'gpu' else paddle.CPUPlace()
+    exe = paddle.static.Executor(place)
+
+    # since the type pf model converted from pytorch is onnx,
+    # use load_onnx_model firstly and rename the model_dir
+    load_onnx_model(config["model_dir"])
+    inference_model_path = config["model_dir"].rstrip().rstrip(
+        '.onnx') + '_infer'
+
+    quant_post_static(
+        executor=exe,
+        model_dir=inference_model_path,
+        quantize_model_path=FLAGS.save_dir,
+        data_loader=train_loader,
+        model_filename='model.pdmodel',
+        params_filename='model.pdiparams',
+        batch_size=32,
+        batch_nums=10,
+        algo=FLAGS.algo,
+        hist_percent=0.999,
+        is_full_quantize=False,
+        bias_correction=False,
+        onnx_format=True)
+
+
+if __name__ == '__main__':
+    paddle.enable_static()
+    parser = argsparser()
+    FLAGS = parser.parse_args()
+
+    assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu']
+    paddle.set_device(FLAGS.devices)
+
+    main()
diff --git a/paddleslim/auto_compression/__init__.py b/paddleslim/auto_compression/__init__.py
index 990ad37b71e76cb8044a30e64a7c4377dec274a1..cfc26259d2fa85cd80710eb61972483681525f6b 100644
--- a/paddleslim/auto_compression/__init__.py
+++ b/paddleslim/auto_compression/__init__.py
@@ -19,8 +19,14 @@ from .config_helpers import *
 from .utils import *
 
 __all__ = [
-    "AutoCompression", "Quantization", "Distillation",
-    "MultiTeacherDistillation", "HyperParameterOptimization", "Prune",
-    "UnstructurePrune", "ProgramInfo", "TrainConfig", "save_config",
-    "load_config", "predict_compressed_model"
+    "AutoCompression",
+    "Quantization",
+    "Distillation",
+    "MultiTeacherDistillation",
+    "HyperParameterOptimization",
+    "Prune",
+    "UnstructurePrune",
+    "ProgramInfo",
+    "TrainConfig",
+    "predict_compressed_model",
 ]
diff --git a/paddleslim/auto_compression/compressor.py b/paddleslim/auto_compression/compressor.py
index b64152b9c3138bb87b062df8592c52b95dbb3694..a907e0c9b7d6a997d04c9b7ec500f6093da3c2d4 100644
--- a/paddleslim/auto_compression/compressor.py
+++ b/paddleslim/auto_compression/compressor.py
@@ -29,13 +29,15 @@ from ..quant.quanter import convert, quant_post
 from ..common.recover_program import recover_inference_program
 from ..common import get_logger
 from ..common.patterns import get_patterns
+from ..common.load_model import load_inference_model, get_model_dir
+from ..common.dataloader import wrap_dataloader, get_feed_vars
+from ..common.config_helper import load_config
 from ..analysis import TableLatencyPredictor
 from .create_compressed_program import build_distill_program, build_quant_program, build_prune_program, remove_unused_var_nodes
 from .strategy_config import TrainConfig, ProgramInfo, merge_config
 from .auto_strategy import prepare_strategy, get_final_quant_config, create_strategy_config, create_train_config
-from .config_helpers import load_config, extract_strategy_config, extract_train_config
+from .config_helpers import extract_strategy_config, extract_train_config
 from .utils.predict import with_variable_shape
-from .utils import get_feed_vars, wrap_dataloader, load_inference_model, get_model_dir
 
 _logger = get_logger(__name__, level=logging.INFO)
 
diff --git a/paddleslim/auto_compression/config_helpers.py b/paddleslim/auto_compression/config_helpers.py
index ebc5b45c83bb393e48cb005541e9f8733499f790..b1e426cc246b327a54f35ea0f0df7f5233391492 100644
--- a/paddleslim/auto_compression/config_helpers.py
+++ b/paddleslim/auto_compression/config_helpers.py
@@ -14,42 +14,7 @@
 import yaml
 import os
 from paddleslim.auto_compression.strategy_config import *
-
-__all__ = ['save_config', 'load_config']
-
-
-def print_arguments(args, level=0):
-    if level == 0:
-        print('-----------  Running Arguments -----------')
-    for arg, value in sorted(args.items()):
-        if isinstance(value, dict):
-            print('\t' * level, '%s:' % arg)
-            print_arguments(value, level + 1)
-        else:
-            print('\t' * level, '%s: %s' % (arg, value))
-    if level == 0:
-        print('------------------------------------------')
-
-
-def load_config(config):
-    """Load configurations from yaml file into dict.
-    Fields validation is skipped for loading some custom information.
-    Args:
-      config(str): The path of configuration file.
-    Returns:
-      dict: A dict storing configuration information.
-    """
-    if config is None:
-        return None
-    assert isinstance(
-        config,
-        str), f"config should be str but got type(config)={type(config)}"
-    assert os.path.exists(config) and os.path.isfile(
-        config), f"{config} not found or it is not a file."
-    with open(config) as f:
-        cfg = yaml.load(f, Loader=yaml.FullLoader)
-    print_arguments(cfg)
-    return cfg
+from ..common.config_helper import load_config
 
 
 def extract_strategy_config(config):
@@ -101,12 +66,3 @@ def extract_train_config(config):
                     **value) if value is not None else TrainConfig()
     # return default training config when it is not set
     return TrainConfig()
-
-
-def save_config(config, config_path):
-    """
-        convert dict config to yaml.
-    """
-    f = open(config_path, "w")
-    yaml.dump(config, f)
-    f.close()
diff --git a/paddleslim/auto_compression/create_compressed_program.py b/paddleslim/auto_compression/create_compressed_program.py
index 30276bbf64b2e2e23d9e4adc19ee435c2c72ee19..8a6c7db2f5b691a2fc81643830e53bef231350ee 100644
--- a/paddleslim/auto_compression/create_compressed_program.py
+++ b/paddleslim/auto_compression/create_compressed_program.py
@@ -23,7 +23,7 @@ from ..dist import *
 from ..common.recover_program import recover_inference_program, _remove_fetch_node
 from ..common import get_logger
 from .strategy_config import ProgramInfo
-from .utils import load_inference_model
+from ..common.load_model import load_inference_model
 
 _logger = get_logger(__name__, level=logging.INFO)
 __all__ = [
@@ -52,7 +52,8 @@ def _create_optimizer(train_config):
     optimizer_builder = train_config['optimizer_builder']
     assert isinstance(
         optimizer_builder, dict
-    ), f"Value of 'optimizer_builder' in train_config should be dict but got {type(optimizer_builder)}"
+    ), "Value of 'optimizer_builder' in train_config should be dict but got {}".format(
+        type(optimizer_builder))
     if 'grad_clip' in optimizer_builder:
         g_clip_params = optimizer_builder['grad_clip']
         g_clip_type = g_clip_params.pop('type')
@@ -444,9 +445,8 @@ def build_prune_program(executor,
             "####################channel pruning##########################")
         for param in pruned_program.global_block().all_parameters():
             if param.name in original_shapes:
-                _logger.info(
-                    f"{param.name}, from {original_shapes[param.name]} to {param.shape}"
-                )
+                _logger.info("{}, from {} to {}".format(
+                    param.name, original_shapes[param.name], param.shape))
         _logger.info(
             "####################channel pruning end##########################")
         train_program_info.program = pruned_program
diff --git a/paddleslim/auto_compression/utils/__init__.py b/paddleslim/auto_compression/utils/__init__.py
index aa4f3ec07ac02436b5eaed00c781b9a6e34e70f8..e3c3a49d71823f432c810d9dccee0205d548f7ed 100644
--- a/paddleslim/auto_compression/utils/__init__.py
+++ b/paddleslim/auto_compression/utils/__init__.py
@@ -14,11 +14,5 @@
 
 from __future__ import absolute_import
 from .predict import predict_compressed_model
-from .dataloader import *
-from . import dataloader
-from .load_model import *
-from . import load_model
 
 __all__ = ["predict_compressed_model"]
-__all__ += dataloader.__all__
-__all__ += load_model.__all__
diff --git a/paddleslim/auto_compression/utils/fake_ptq.py b/paddleslim/auto_compression/utils/fake_ptq.py
index fbecc224f663c39403f4741aa903a3cbaf5e9188..e86dd84860b869bf50903bbbf9e4126e6492084a 100644
--- a/paddleslim/auto_compression/utils/fake_ptq.py
+++ b/paddleslim/auto_compression/utils/fake_ptq.py
@@ -12,7 +12,7 @@ except:
     TRANSFORM_PASS_OP_TYPES = QuantizationTransformPass._supported_quantizable_op_type
     QUANT_DEQUANT_PASS_OP_TYPES = AddQuantDequantPass._supported_quantizable_op_type
 
-from .load_model import load_inference_model
+from ...common.load_model import load_inference_model
 
 
 def post_quant_fake(executor,
diff --git a/paddleslim/auto_compression/utils/load_model.py b/paddleslim/auto_compression/utils/load_model.py
deleted file mode 100644
index 637e808ace099b70a210e3ead7f7cd285a8c46ad..0000000000000000000000000000000000000000
--- a/paddleslim/auto_compression/utils/load_model.py
+++ /dev/null
@@ -1,90 +0,0 @@
-#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import paddle
-from ...common import load_onnx_model
-
-__all__ = ['load_inference_model', 'get_model_dir']
-
-
-def load_inference_model(path_prefix,
-                         executor,
-                         model_filename=None,
-                         params_filename=None):
-    # Load onnx model to Inference model.
-    if path_prefix.endswith('.onnx'):
-        inference_program, feed_target_names, fetch_targets = load_onnx_model(
-            path_prefix)
-        return [inference_program, feed_target_names, fetch_targets]
-    # Load Inference model.
-    # TODO: clean code
-    if model_filename is not None and model_filename.endswith('.pdmodel'):
-        model_name = '.'.join(model_filename.split('.')[:-1])
-        assert os.path.exists(
-            os.path.join(path_prefix, model_name + '.pdmodel')
-        ), 'Please check {}, or fix model_filename parameter.'.format(
-            os.path.join(path_prefix, model_name + '.pdmodel'))
-        assert os.path.exists(
-            os.path.join(path_prefix, model_name + '.pdiparams')
-        ), 'Please check {}, or fix params_filename parameter.'.format(
-            os.path.join(path_prefix, model_name + '.pdiparams'))
-        model_path_prefix = os.path.join(path_prefix, model_name)
-        [inference_program, feed_target_names, fetch_targets] = (
-            paddle.static.load_inference_model(
-                path_prefix=model_path_prefix, executor=executor))
-    elif model_filename is not None and params_filename is not None:
-        [inference_program, feed_target_names, fetch_targets] = (
-            paddle.static.load_inference_model(
-                path_prefix=path_prefix,
-                executor=executor,
-                model_filename=model_filename,
-                params_filename=params_filename))
-    else:
-        model_name = '.'.join(model_filename.split('.')
-                              [:-1]) if model_filename is not None else 'model'
-        if os.path.exists(os.path.join(path_prefix, model_name + '.pdmodel')):
-            model_path_prefix = os.path.join(path_prefix, model_name)
-            [inference_program, feed_target_names, fetch_targets] = (
-                paddle.static.load_inference_model(
-                    path_prefix=model_path_prefix, executor=executor))
-        else:
-            [inference_program, feed_target_names, fetch_targets] = (
-                paddle.static.load_inference_model(
-                    path_prefix=path_prefix, executor=executor))
-
-    return [inference_program, feed_target_names, fetch_targets]
-
-
-def get_model_dir(model_dir, model_filename, params_filename):
-    if model_dir.endswith('.onnx'):
-        updated_model_dir = model_dir.rstrip().rstrip('.onnx') + '_infer'
-    else:
-        updated_model_dir = model_dir.rstrip('/')
-
-    if model_filename == None:
-        updated_model_filename = 'model.pdmodel'
-    else:
-        updated_model_filename = model_filename
-
-    if params_filename == None:
-        updated_params_filename = 'model.pdiparams'
-    else:
-        updated_params_filename = params_filename
-
-    if params_filename is None and model_filename is not None:
-        raise NotImplementedError(
-            "NOT SUPPORT parameters saved in separate files. Please convert it to single binary file first."
-        )
-    return updated_model_dir, updated_model_filename, updated_params_filename
diff --git a/paddleslim/auto_compression/utils/predict.py b/paddleslim/auto_compression/utils/predict.py
index 01ef6a90b17280a74ef7ca54853cf20891392aad..5b8c6adb1850fd086317423bcb9e2fe97c34d4a8 100644
--- a/paddleslim/auto_compression/utils/predict.py
+++ b/paddleslim/auto_compression/utils/predict.py
@@ -4,7 +4,7 @@ import paddle
 from ...analysis import TableLatencyPredictor
 from .prune_model import get_sparse_model, get_prune_model
 from .fake_ptq import post_quant_fake
-from .load_model import load_inference_model
+from ...common.load_model import load_inference_model
 
 
 def with_variable_shape(model_dir, model_filename=None, params_filename=None):
@@ -53,7 +53,7 @@ def predict_compressed_model(executor,
         latency_dict(dict): The latency latency of the model under various compression strategies.
     """
     local_rank = paddle.distributed.get_rank()
-    quant_model_path = f'quant_model_rank_{local_rank}_tmp'
+    quant_model_path = 'quant_model_rank_{}_tmp'.format(local_rank)
     prune_model_path = f'prune_model_rank_{local_rank}_tmp'
     sparse_model_path = f'sparse_model_rank_{local_rank}_tmp'
 
diff --git a/paddleslim/auto_compression/utils/prune_model.py b/paddleslim/auto_compression/utils/prune_model.py
index 426a1859c4419fd4bb0d4db3f8f097d5894c223b..c0da14ca9693112cf6919294f21136b86a5ea1d5 100644
--- a/paddleslim/auto_compression/utils/prune_model.py
+++ b/paddleslim/auto_compression/utils/prune_model.py
@@ -5,7 +5,7 @@ import paddle
 import paddle.static as static
 from ...prune import Pruner
 from ...core import GraphWrapper
-from .load_model import load_inference_model
+from ...common.load_model import load_inference_model
 __all__ = ["get_sparse_model", "get_prune_model"]
 
 
@@ -19,9 +19,10 @@ def get_sparse_model(executor, places, model_file, param_file, ratio,
         ratio(float): The ratio to prune the model.
         save_path(str): The save path of pruned model.
     """
-    assert os.path.exists(model_file), f'{model_file} does not exist.'
+    assert os.path.exists(model_file), '{} does not exist.'.format(model_file)
     assert os.path.exists(
-        param_file) or param_file is None, f'{param_file} does not exist.'
+        param_file) or param_file is None, '{} does not exist.'.format(
+            param_file)
     paddle.enable_static()
 
     SKIP = ['image', 'feed', 'pool2d_0.tmp_0']
diff --git a/paddleslim/common/__init__.py b/paddleslim/common/__init__.py
index e866790d7fa1be1a4d9ad44ba3aee63c37d7859b..03825c8a6bb98a39ca1f1026b3a86e484576b790 100644
--- a/paddleslim/common/__init__.py
+++ b/paddleslim/common/__init__.py
@@ -25,12 +25,16 @@ from .analyze_helper import VarCollector
 from . import wrapper_function
 from . import recover_program
 from . import patterns
-from .convert_model import load_onnx_model
+from .load_model import load_inference_model, get_model_dir, load_onnx_model
+from .dataloader import wrap_dataloader, get_feed_vars
+from .config_helper import load_config, save_config
 
 __all__ = [
     'EvolutionaryController', 'SAController', 'get_logger', 'ControllerServer',
     'ControllerClient', 'lock', 'unlock', 'cached_reader', 'AvgrageMeter',
-    'Server', 'Client', 'RLBaseController', 'VarCollector', 'load_onnx_model'
+    'Server', 'Client', 'RLBaseController', 'VarCollector', 'load_onnx_model',
+    'load_inference_model', 'get_model_dir', 'wrap_dataloader', 'get_feed_vars',
+    'load_config', 'save_config'
 ]
 
 __all__ += wrapper_function.__all__
diff --git a/paddleslim/common/config_helper.py b/paddleslim/common/config_helper.py
new file mode 100644
index 0000000000000000000000000000000000000000..486fa9b449789f3c3d60d5e50cd3364406a9c908
--- /dev/null
+++ b/paddleslim/common/config_helper.py
@@ -0,0 +1,60 @@
+# Copyright (c) 2022  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import yaml
+import os
+
+__all__ = ['load_config', 'save_config']
+
+
+def print_arguments(args, level=0):
+    if level == 0:
+        print('-----------  Running Arguments -----------')
+    for arg, value in sorted(args.items()):
+        if isinstance(value, dict):
+            print('\t' * level, '%s:' % arg)
+            print_arguments(value, level + 1)
+        else:
+            print('\t' * level, '%s: %s' % (arg, value))
+    if level == 0:
+        print('------------------------------------------')
+
+
+def load_config(config):
+    """Load configurations from yaml file into dict.
+    Fields validation is skipped for loading some custom information.
+    Args:
+      config(str): The path of configuration file.
+    Returns:
+      dict: A dict storing configuration information.
+    """
+    if config is None:
+        return None
+    assert isinstance(
+        config,
+        str), f"config should be str but got type(config)={type(config)}"
+    assert os.path.exists(config) and os.path.isfile(
+        config), f"{config} not found or it is not a file."
+    with open(config) as f:
+        cfg = yaml.load(f, Loader=yaml.FullLoader)
+    print_arguments(cfg)
+    return cfg
+
+
+def save_config(config, config_path):
+    """
+        convert dict config to yaml.
+    """
+    f = open(config_path, "w")
+    yaml.dump(config, f)
+    f.close()
diff --git a/paddleslim/auto_compression/utils/dataloader.py b/paddleslim/common/dataloader.py
similarity index 100%
rename from paddleslim/auto_compression/utils/dataloader.py
rename to paddleslim/common/dataloader.py
diff --git a/paddleslim/common/convert_model.py b/paddleslim/common/load_model.py
similarity index 58%
rename from paddleslim/common/convert_model.py
rename to paddleslim/common/load_model.py
index 00e0182017c4d1e60b2a6c28e8db1d109be28666..81d50f7311b41eb1600263915a48bb1f44640519 100644
--- a/paddleslim/common/convert_model.py
+++ b/paddleslim/common/load_model.py
@@ -17,7 +17,6 @@ import logging
 import os
 import shutil
 import sys
-
 import paddle
 from x2paddle.decoder.onnx_decoder import ONNXDecoder
 from x2paddle.op_mapper.onnx2paddle.onnx_op_mapper import ONNXOpMapper
@@ -27,7 +26,78 @@ from x2paddle.utils import ConverterCheck
 from . import get_logger
 _logger = get_logger(__name__, level=logging.INFO)
 
-__all__ = ['load_onnx_model']
+__all__ = ['load_inference_model', 'get_model_dir', 'load_onnx_model']
+
+
+def load_inference_model(path_prefix,
+                         executor,
+                         model_filename=None,
+                         params_filename=None):
+    # Load onnx model to Inference model.
+    if path_prefix.endswith('.onnx'):
+        inference_program, feed_target_names, fetch_targets = load_onnx_model(
+            path_prefix)
+        return [inference_program, feed_target_names, fetch_targets]
+    # Load Inference model.
+    # TODO: clean code
+    if model_filename is not None and model_filename.endswith('.pdmodel'):
+        model_name = '.'.join(model_filename.split('.')[:-1])
+        assert os.path.exists(
+            os.path.join(path_prefix, model_name + '.pdmodel')
+        ), 'Please check {}, or fix model_filename parameter.'.format(
+            os.path.join(path_prefix, model_name + '.pdmodel'))
+        assert os.path.exists(
+            os.path.join(path_prefix, model_name + '.pdiparams')
+        ), 'Please check {}, or fix params_filename parameter.'.format(
+            os.path.join(path_prefix, model_name + '.pdiparams'))
+        model_path_prefix = os.path.join(path_prefix, model_name)
+        [inference_program, feed_target_names, fetch_targets] = (
+            paddle.static.load_inference_model(
+                path_prefix=model_path_prefix, executor=executor))
+    elif model_filename is not None and params_filename is not None:
+        [inference_program, feed_target_names, fetch_targets] = (
+            paddle.static.load_inference_model(
+                path_prefix=path_prefix,
+                executor=executor,
+                model_filename=model_filename,
+                params_filename=params_filename))
+    else:
+        model_name = '.'.join(model_filename.split('.')
+                              [:-1]) if model_filename is not None else 'model'
+        if os.path.exists(os.path.join(path_prefix, model_name + '.pdmodel')):
+            model_path_prefix = os.path.join(path_prefix, model_name)
+            [inference_program, feed_target_names, fetch_targets] = (
+                paddle.static.load_inference_model(
+                    path_prefix=model_path_prefix, executor=executor))
+        else:
+            [inference_program, feed_target_names, fetch_targets] = (
+                paddle.static.load_inference_model(
+                    path_prefix=path_prefix, executor=executor))
+
+    return [inference_program, feed_target_names, fetch_targets]
+
+
+def get_model_dir(model_dir, model_filename, params_filename):
+    if model_dir.endswith('.onnx'):
+        updated_model_dir = model_dir.rstrip().rstrip('.onnx') + '_infer'
+    else:
+        updated_model_dir = model_dir.rstrip('/')
+
+    if model_filename == None:
+        updated_model_filename = 'model.pdmodel'
+    else:
+        updated_model_filename = model_filename
+
+    if params_filename == None:
+        updated_params_filename = 'model.pdiparams'
+    else:
+        updated_params_filename = params_filename
+
+    if params_filename is None and model_filename is not None:
+        raise NotImplementedError(
+            "NOT SUPPORT parameters saved in separate files. Please convert it to single binary file first."
+        )
+    return updated_model_dir, updated_model_filename, updated_params_filename
 
 
 def load_onnx_model(model_path, disable_feedback=False):
@@ -112,4 +182,4 @@ def load_onnx_model(model_path, disable_feedback=False):
         shutil.rmtree(
             os.path.join(inference_model_path, 'onnx2paddle_{}'.format(
                 model_idx)))
-        return val_program, feed_target_names, fetch_targets
+        return val_program, feed_target_names, fetch_targets
\ No newline at end of file
diff --git a/paddleslim/quant/analysis.py b/paddleslim/quant/analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..1013c392056a115bd08d8ec1a6a38041ab967cf5
--- /dev/null
+++ b/paddleslim/quant/analysis.py
@@ -0,0 +1,331 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import pickle
+import copy
+import logging
+import matplotlib.pyplot as plt
+from matplotlib.backends.backend_pdf import PdfPages
+import numpy as np
+
+import paddle
+from paddle.fluid import core
+from paddle.fluid import framework
+from paddle.fluid.framework import IrGraph
+from paddle.fluid.executor import global_scope
+from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization
+from paddle.fluid.contrib.slim.quantization.utils import _get_op_input_var_names, load_variable_data
+from .quanter import quant_post
+from ..core import GraphWrapper
+from ..common import get_logger
+from ..common import get_feed_vars, wrap_dataloader, load_inference_model, get_model_dir
+
+_logger = get_logger(__name__, level=logging.INFO)
+
+__all__ = ["AnalysisQuant"]
+
+
+class AnalysisQuant(object):
+    def __init__(
+            self,
+            model_dir,
+            model_filename=None,
+            params_filename=None,
+            eval_function=None,
+            quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"],
+            weight_quantize_type='abs_max',
+            activation_quantize_type='moving_average_abs_max',
+            is_full_quantize=False,
+            batch_size=10,
+            batch_nums=10,
+            data_loader=None,
+            save_dir='analysis_results',
+            checkpoint_name='analysis_checkpoint.pkl',
+            num_histogram_plots=10, ):
+        """
+        AnalysisQuant provides to analysis the sensitivity of each op in the model.
+        
+        Args:
+            model_dir(str): the path of fp32 model that will be quantized
+            model_filename(str): the model file name of the fp32 model
+            params_filename(str): the parameter file name of the fp32 model
+            eval_function(function): eval function, define by yourself to return the metric of the inference program, can be used to judge the metric of quantized model.  (TODO: optional)
+            quantizable_op_type(list, optional): op types that can be quantized
+            batch_size(int, optional): the batch size of DataLoader, default is 10
+            data_loader(Python Generator, Paddle.io.DataLoader, optional): the
+                Generator or Dataloader provides calibrate data, and it could
+                return a batch every time
+            save_dir(str, optional): the output dir that stores the analyzed information
+            checkpoint_name(str, optional): the name of checkpoint file that saves analyzed information and avoids break off while ananlyzing
+            num_histogram_plots: the number histogram plots you want to visilize, the plots will show in one PDF file in the save_dir
+        """
+        self.model_dir = model_dir
+        self.model_filename = model_filename
+        self.params_filename = params_filename
+        self.batch_nums = batch_nums
+        self.quantizable_op_type = quantizable_op_type
+        self.weight_quantize_type = weight_quantize_type
+        self.activation_quantize_type = activation_quantize_type
+        self.is_full_quantize = is_full_quantize
+        self.histogram_bins = 1000
+        self.save_dir = save_dir
+        self.eval_function = eval_function
+        self.quant_layer_names = []
+        self.checkpoint_name = os.path.join(save_dir, checkpoint_name)
+        self.quant_layer_metrics = {}
+        self.batch_size = batch_size
+        self.batch_nums = batch_nums
+        self.num_histogram_plots = num_histogram_plots
+
+        if not os.path.exists(self.save_dir):
+            os.mkdir(self.save_dir)
+
+        devices = paddle.device.get_device().split(':')[0]
+        self.places = paddle.device._convert_to_place(devices)
+        executor = paddle.static.Executor(self.places)
+
+        # load model 
+        [program, self.feed_list, self.fetch_list]= load_inference_model( \
+            model_dir, \
+            executor=executor, \
+            model_filename=model_filename, \
+            params_filename=params_filename)
+
+        # create data_loader
+        self.data_loader = wrap_dataloader(data_loader, self.feed_list)
+
+        # evaluate before quant 
+        # TODO: self.eval_function can be None
+        if self.eval_function is not None:
+            self.base_metric = self.eval_function(
+                executor, program, self.feed_list, self.fetch_list)
+            _logger.info('before quantized, the accuracy of the model is: {}'.
+                         format(self.base_metric))
+
+        # quant and evaluate after quant (skip_list = None)
+        post_training_quantization = PostTrainingQuantization(
+            executor=executor,
+            data_loader=self.data_loader,
+            model_dir=self.model_dir,
+            model_filename=self.model_filename,
+            params_filename=self.params_filename,
+            batch_size=self.batch_size,
+            batch_nums=self.batch_nums,
+            algo='avg',  # fastest
+            quantizable_op_type=self.quantizable_op_type,
+            weight_quantize_type=self.weight_quantize_type,
+            activation_quantize_type=self.activation_quantize_type,
+            is_full_quantize=self.is_full_quantize,
+            skip_tensor_list=None, )
+        program = post_training_quantization.quantize()
+        self.quant_metric = self.eval_function(executor, program,
+                                               self.feed_list, self.fetch_list)
+        _logger.info('after quantized, the accuracy of the model is: {}'.format(
+            self.quant_metric))
+
+        # get quantized weight and act var name
+        self.quantized_weight_var_name = post_training_quantization._quantized_weight_var_name
+        self.quantized_act_var_name = post_training_quantization._quantized_act_var_name
+        executor.close()
+
+        # load tobe_analyized_layer from checkpoint 
+        self.load_checkpoint()
+        self.tobe_analyized_layer = self.quantized_weight_var_name - set(
+            list(self.quant_layer_metrics.keys()))
+        self.tobe_analyized_layer = sorted(list(self.tobe_analyized_layer))
+
+    def analysis(self):
+        self.compute_quant_sensitivity()
+        self.sensitivity_ranklist = sorted(
+            self.quant_layer_metrics,
+            key=self.quant_layer_metrics.get,
+            reverse=False)
+
+        _logger.info('Finished computing the sensitivity of the model.')
+        for name in self.sensitivity_ranklist:
+            _logger.info("quant layer name: {}, eval metric: {}".format(
+                name, self.quant_layer_metrics[name]))
+
+        analysis_file = os.path.join(self.save_dir, "analysis.txt")
+        with open(analysis_file, "w") as analysis_ret_f:
+            for name in self.sensitivity_ranklist:
+                analysis_ret_f.write(
+                    "quant layer name: {}, eval metric: {}\n".format(
+                        name, self.quant_layer_metrics[name]))
+        _logger.info('Analysis file is saved in {}'.format(analysis_file))
+        self.calculate_histogram()
+        self.draw_pdf()
+
+    def save_checkpoint(self):
+        if not os.path.exists(self.save_dir):
+            os.makedirs(self.save_dir)
+        with open(self.checkpoint_name, 'wb') as f:
+            pickle.dump(self.quant_layer_metrics, f)
+        _logger.info('save checkpoint to {}'.format(self.checkpoint_name))
+
+    def load_checkpoint(self):
+        if not os.path.exists(self.checkpoint_name):
+            return False
+        with open(self.checkpoint_name, 'rb') as f:
+            self.quant_layer_metrics = pickle.load(f)
+        _logger.info('load checkpoint from {}'.format(self.checkpoint_name))
+        return True
+
+    def compute_quant_sensitivity(self):
+        '''
+        For each layer, quantize the weight op and evaluate the quantized model.
+        '''
+        for i, layer_name in enumerate(self.tobe_analyized_layer):
+            _logger.info('checking {}/{} quant model: quant layer {}'.format(
+                i + 1, len(self.tobe_analyized_layer), layer_name))
+            skip_list = copy.copy(list(self.quantized_weight_var_name))
+            skip_list.remove(layer_name)
+
+            executor = paddle.static.Executor(self.places)
+            post_training_quantization = PostTrainingQuantization(
+                executor=executor,
+                data_loader=self.data_loader,
+                model_dir=self.model_dir,
+                model_filename=self.model_filename,
+                params_filename=self.params_filename,
+                batch_size=self.batch_size,
+                batch_nums=self.batch_nums,
+                algo='avg',  # fastest
+                quantizable_op_type=self.quantizable_op_type,
+                weight_quantize_type=self.weight_quantize_type,
+                activation_quantize_type=self.activation_quantize_type,
+                is_full_quantize=self.is_full_quantize,
+                skip_tensor_list=skip_list, )
+            program = post_training_quantization.quantize()
+
+            _logger.info('Evaluating...')
+            quant_metric = self.eval_function(executor, program, self.feed_list,
+                                              self.fetch_list)
+            executor.close()
+            _logger.info(
+                "quant layer name: {}, eval metric: {}, the loss caused by this layer: {}".
+                format(layer_name, quant_metric, self.base_metric -
+                       quant_metric))
+            self.quant_layer_metrics[layer_name] = quant_metric
+            self.save_checkpoint()
+
+    def get_sensitive_ops_name(self, graph, program):
+        sensitive_weight_ops = self.sensitivity_ranklist[:self.
+                                                         num_histogram_plots]
+        sensitive_act_ops = []
+        persistable_var_names = []
+        persistable_var_names = []
+        for var in program.list_vars():
+            if var.persistable:
+                persistable_var_names.append(var.name)
+        for op_name in sensitive_weight_ops:
+            for block_id in range(len(program.blocks)):
+                for op in program.blocks[block_id].ops:
+                    var_name_list = _get_op_input_var_names(op)
+                    if op_name in var_name_list:
+                        for var_name in var_name_list:
+                            if var_name not in persistable_var_names:
+                                sensitive_act_ops.append(var_name)
+        return sensitive_act_ops, sensitive_weight_ops
+
+    def calculate_histogram(self):
+        '''
+        Sample histograms for the weight and corresponding act tensors
+        '''
+        devices = paddle.device.get_device().split(':')[0]
+        places = paddle.device._convert_to_place(devices)
+        executor = paddle.static.Executor(places)
+
+        [program, feed_list, fetch_list]= load_inference_model( \
+            self.model_dir, \
+            executor=executor, \
+            model_filename=self.model_filename, \
+            params_filename=self.params_filename)
+
+        scope = global_scope()
+
+        graph = IrGraph(core.Graph(program.desc), for_test=False)
+        self.sensitive_act_ops, self.sensitive_weight_ops = self.get_sensitive_ops_name(
+            graph, program)
+
+        for var in program.list_vars():
+            if var.name in self.quantized_act_var_name:
+                var.persistable = True
+
+        batch_id = 0
+        for data in self.data_loader():
+            executor.run(program=program,
+                         feed=data,
+                         fetch_list=fetch_list,
+                         return_numpy=False,
+                         scope=scope)
+            batch_id += 1
+            if batch_id >= self.batch_nums:
+                break
+
+        self.weight_histogram = {}
+        self.act_histogram = {}
+        for var_name in self.sensitive_act_ops:
+            var_tensor = load_variable_data(scope, var_name)
+            var_tensor = np.array(var_tensor)
+            min_v = float(np.min(var_tensor))
+            max_v = float(np.max(var_tensor))
+            var_tensor = var_tensor.flatten()
+            _, hist_edges = np.histogram(
+                var_tensor.copy(),
+                bins=self.histogram_bins,
+                range=(min_v, max_v))
+            self.act_histogram[var_name] = [var_tensor, hist_edges]
+
+        for var_name in self.sensitive_weight_ops:
+            var_tensor = load_variable_data(scope, var_name)
+            var_tensor = np.array(var_tensor)
+            min_v = float(np.min(var_tensor))
+            max_v = float(np.max(var_tensor))
+            var_tensor = var_tensor.flatten()
+            _, hist_edges = np.histogram(
+                var_tensor.copy(),
+                bins=self.histogram_bins,
+                range=(min_v, max_v))
+            self.weight_histogram[var_name] = [var_tensor, hist_edges]
+
+    def draw_pdf(self):
+        pdf_path_a = os.path.join(self.save_dir, 'act_hist_result.pdf')
+        pdf_path_w = os.path.join(self.save_dir, 'weight_hist_result.pdf')
+        with PdfPages(pdf_path_a) as pdf:
+            for name in self.act_histogram:
+                plt.hist(
+                    self.act_histogram[name][0],
+                    bins=self.act_histogram[name][1])
+                plt.xlabel(name)
+                plt.ylabel("frequency")
+                plt.title("Hist of variable {}".format(name))
+                plt.show()
+                pdf.savefig()
+            plt.close()
+        with PdfPages(pdf_path_w) as pdf:
+            for name in self.weight_histogram:
+                plt.hist(
+                    self.weight_histogram[name][0],
+                    bins=self.weight_histogram[name][1])
+                plt.xlabel(name)
+                plt.ylabel("frequency")
+                plt.title("Hist of variable {}".format(name))
+                plt.show()
+                pdf.savefig()
+            plt.close()
+        _logger.info('Histogram plots are saved in {} and {}'.format(
+            pdf_path_a, pdf_path_w))