diff --git a/example/auto_compression/detection/eval.py b/example/auto_compression/detection/eval.py index d80f3cfe8af1a0461cc25a66666f40cae17a9343..fc0c09ae46c644fea8ca6218d0f0da3544d59161 100644 --- a/example/auto_compression/detection/eval.py +++ b/example/auto_compression/detection/eval.py @@ -20,7 +20,7 @@ import paddle from ppdet.core.workspace import load_config, merge_config from ppdet.core.workspace import create from ppdet.metrics import COCOMetric, VOCMetric, KeyPointTopDownCOCOEval -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config from keypoint_utils import keypoint_post_process from post_process import PPYOLOEPostProcess diff --git a/example/auto_compression/detection/post_quant.py b/example/auto_compression/detection/post_quant.py index b3a709000e7a28d950e4aaf9931b7f990a81df9d..edc7d2fea66dfb16e51b8ad16a5e61b75294b895 100644 --- a/example/auto_compression/detection/post_quant.py +++ b/example/auto_compression/detection/post_quant.py @@ -19,7 +19,7 @@ import argparse import paddle from ppdet.core.workspace import load_config, merge_config from ppdet.core.workspace import create -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config from paddleslim.quant import quant_post_static diff --git a/example/auto_compression/detection/run.py b/example/auto_compression/detection/run.py index a3c46d4753440a54b96c264f197f0664b6bb4f10..6a4838cad67ed633f190b4284efb8045e6e1f242 100644 --- a/example/auto_compression/detection/run.py +++ b/example/auto_compression/detection/run.py @@ -20,7 +20,7 @@ import paddle from ppdet.core.workspace import load_config, merge_config from ppdet.core.workspace import create from ppdet.metrics import COCOMetric, VOCMetric, KeyPointTopDownCOCOEval -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config from paddleslim.auto_compression import AutoCompression from keypoint_utils import keypoint_post_process from post_process import PPYOLOEPostProcess @@ -126,7 +126,8 @@ def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list): def main(): global global_config all_config = load_slim_config(FLAGS.config_path) - assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}" + assert "Global" in all_config, "Key 'Global' not found in config file. \n{}".format( + all_config) global_config = all_config["Global"] reader_cfg = load_config(global_config['reader_config']) diff --git a/example/auto_compression/image_classification/eval.py b/example/auto_compression/image_classification/eval.py index d0e0c3d17ed845e4da649e3867f2f8ac1de6e8e3..9cd9b4a3b4a2bff7e97c00be88dc50fe802779e6 100644 --- a/example/auto_compression/image_classification/eval.py +++ b/example/auto_compression/image_classification/eval.py @@ -23,7 +23,7 @@ import paddle import paddle.nn as nn from paddle.io import DataLoader from imagenet_reader import ImageNetDataset -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config def argsparser(): diff --git a/example/auto_compression/image_classification/infer.py b/example/auto_compression/image_classification/infer.py index 5060115c60e9f16dfeb4d417c4a2f49a6f517602..46eb7115a948e10fa756e89c3ea83dce8d6ba7cc 100644 --- a/example/auto_compression/image_classification/infer.py +++ b/example/auto_compression/image_classification/infer.py @@ -22,7 +22,7 @@ import yaml from utils import preprocess, postprocess import paddle from paddle.inference import create_predictor -from paddleslim.auto_compression.config_helpers import load_config +from paddleslim.common import load_config def argsparser(): diff --git a/example/auto_compression/image_classification/run.py b/example/auto_compression/image_classification/run.py index d8da1a9f419b7f19c03b7fb004ea0725724f2803..7d660431391f6338c97de14582cb999b8177c3c2 100644 --- a/example/auto_compression/image_classification/run.py +++ b/example/auto_compression/image_classification/run.py @@ -24,7 +24,7 @@ import paddle import paddle.nn as nn from paddle.io import DataLoader from imagenet_reader import ImageNetDataset -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config from paddleslim.auto_compression import AutoCompression diff --git a/example/auto_compression/nlp/run.py b/example/auto_compression/nlp/run.py index 769f58ef7580b950521e4e8b841d64ea20d94f34..c70a3a344b2ce5ca626551120c782f8c56ec708e 100644 --- a/example/auto_compression/nlp/run.py +++ b/example/auto_compression/nlp/run.py @@ -15,7 +15,7 @@ from paddlenlp.datasets import load_dataset from paddlenlp.data import Stack, Tuple, Pad from paddlenlp.data.sampler import SamplerHelper from paddlenlp.metrics import Mcc, PearsonAndSpearman -from paddleslim.auto_compression.config_helpers import load_config +from paddleslim.common import load_config from paddleslim.auto_compression.compressor import AutoCompression diff --git a/example/auto_compression/pytorch_huggingface/run.py b/example/auto_compression/pytorch_huggingface/run.py index 4da4e703fb8b3177f17d567564159f0b6a41f483..0c730dffa6823f5e4b6a0879be2a5b120ada320b 100644 --- a/example/auto_compression/pytorch_huggingface/run.py +++ b/example/auto_compression/pytorch_huggingface/run.py @@ -27,7 +27,7 @@ from paddlenlp.transformers import AutoModelForTokenClassification, AutoTokenize from paddlenlp.datasets import load_dataset from paddlenlp.data import Stack, Tuple, Pad from paddlenlp.metrics import AccuracyAndF1, Mcc, PearsonAndSpearman -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config from paddleslim.auto_compression.compressor import AutoCompression diff --git a/example/auto_compression/pytorch_yolov5/eval.py b/example/auto_compression/pytorch_yolov5/eval.py index 42f2e121205b22fafd277ac8868f9303976f25ec..68461c995b20537ce2c1f396d1572178f65be16e 100644 --- a/example/auto_compression/pytorch_yolov5/eval.py +++ b/example/auto_compression/pytorch_yolov5/eval.py @@ -18,7 +18,7 @@ import numpy as np import argparse from tqdm import tqdm import paddle -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config from paddleslim.common import load_onnx_model from post_process import YOLOv5PostProcess, coco_metric from dataset import COCOValDataset diff --git a/example/auto_compression/pytorch_yolov5/post_quant.py b/example/auto_compression/pytorch_yolov5/post_quant.py index 84db4f989f41ad76c1aa2a2cbd49656b456f8750..97f467411b02ef559ba978ef48630eb74e844bf1 100644 --- a/example/auto_compression/pytorch_yolov5/post_quant.py +++ b/example/auto_compression/pytorch_yolov5/post_quant.py @@ -17,11 +17,12 @@ import sys import numpy as np import argparse import paddle -from paddleslim.auto_compression.config_helpers import load_config +from paddleslim.common import load_config from paddleslim.common import load_onnx_model from paddleslim.quant import quant_post_static from dataset import COCOTrainDataset + def argsparser(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( diff --git a/example/auto_compression/pytorch_yolov5/run.py b/example/auto_compression/pytorch_yolov5/run.py index 9f505535b55f1b92d583dc3099aa8e887040fef6..b1ca6bceee28c3f23e0b70158c518a1e9820ab13 100644 --- a/example/auto_compression/pytorch_yolov5/run.py +++ b/example/auto_compression/pytorch_yolov5/run.py @@ -18,7 +18,7 @@ import numpy as np import argparse from tqdm import tqdm import paddle -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config from paddleslim.auto_compression import AutoCompression from dataset import COCOValDataset, COCOTrainDataset from post_process import YOLOv5PostProcess, coco_metric @@ -75,7 +75,8 @@ def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list): def main(): global global_config all_config = load_slim_config(FLAGS.config_path) - assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}" + assert "Global" in all_config, "Key 'Global' not found in config file. \n{}".format( + all_config) global_config = all_config["Global"] dataset = COCOTrainDataset( diff --git a/example/auto_compression/pytorch_yolov6/eval.py b/example/auto_compression/pytorch_yolov6/eval.py index 1d28466ad6953244b9dcf809628a6288c3c5d4d4..038a1f8bc224bf90a57c2b88741ce3db6f07d45e 100644 --- a/example/auto_compression/pytorch_yolov6/eval.py +++ b/example/auto_compression/pytorch_yolov6/eval.py @@ -18,7 +18,7 @@ import numpy as np import argparse from tqdm import tqdm import paddle -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config from paddleslim.common import load_onnx_model from post_process import YOLOv6PostProcess, coco_metric from dataset import COCOValDataset diff --git a/example/auto_compression/pytorch_yolov6/post_quant.py b/example/auto_compression/pytorch_yolov6/post_quant.py index 84db4f989f41ad76c1aa2a2cbd49656b456f8750..97f467411b02ef559ba978ef48630eb74e844bf1 100644 --- a/example/auto_compression/pytorch_yolov6/post_quant.py +++ b/example/auto_compression/pytorch_yolov6/post_quant.py @@ -17,11 +17,12 @@ import sys import numpy as np import argparse import paddle -from paddleslim.auto_compression.config_helpers import load_config +from paddleslim.common import load_config from paddleslim.common import load_onnx_model from paddleslim.quant import quant_post_static from dataset import COCOTrainDataset + def argsparser(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( diff --git a/example/auto_compression/pytorch_yolov6/run.py b/example/auto_compression/pytorch_yolov6/run.py index 7e28e1f6bdf64012cbe1b1ad08e5909c7e6a187e..8676e7b3b51c5334b7a11719d6dd29043041c869 100644 --- a/example/auto_compression/pytorch_yolov6/run.py +++ b/example/auto_compression/pytorch_yolov6/run.py @@ -18,7 +18,7 @@ import numpy as np import argparse from tqdm import tqdm import paddle -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config from paddleslim.auto_compression import AutoCompression from dataset import COCOValDataset, COCOTrainDataset from post_process import YOLOv6PostProcess, coco_metric @@ -75,7 +75,8 @@ def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list): def main(): global global_config all_config = load_slim_config(FLAGS.config_path) - assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}" + assert "Global" in all_config, "Key 'Global' not found in config file. \n{}".format( + all_config) global_config = all_config["Global"] dataset = COCOTrainDataset( diff --git a/example/auto_compression/pytorch_yolov7/eval.py b/example/auto_compression/pytorch_yolov7/eval.py index 451301f1056c0233eefa49b13ffa7265f5d6bef2..f758f8f97495eed3d2281e25d1ca675ee6a9ab42 100644 --- a/example/auto_compression/pytorch_yolov7/eval.py +++ b/example/auto_compression/pytorch_yolov7/eval.py @@ -18,8 +18,8 @@ import numpy as np import argparse from tqdm import tqdm import paddle -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config -from paddleslim.auto_compression.utils import load_inference_model +from paddleslim.common import load_config as load_slim_config +from paddleslim.common import load_inference_model from post_process import YOLOv7PostProcess, coco_metric from dataset import COCOValDataset diff --git a/example/auto_compression/pytorch_yolov7/post_quant.py b/example/auto_compression/pytorch_yolov7/post_quant.py index a253e671f8fd16f0a8ab3d13dbf1413de6f56d14..97f467411b02ef559ba978ef48630eb74e844bf1 100644 --- a/example/auto_compression/pytorch_yolov7/post_quant.py +++ b/example/auto_compression/pytorch_yolov7/post_quant.py @@ -17,7 +17,7 @@ import sys import numpy as np import argparse import paddle -from paddleslim.auto_compression.config_helpers import load_config +from paddleslim.common import load_config from paddleslim.common import load_onnx_model from paddleslim.quant import quant_post_static from dataset import COCOTrainDataset diff --git a/example/auto_compression/pytorch_yolov7/run.py b/example/auto_compression/pytorch_yolov7/run.py index b3df96397f10dbe815304660518400443569ccc9..f6ab75334c2ee501e6dbf69bd754b9be55e8c5cd 100644 --- a/example/auto_compression/pytorch_yolov7/run.py +++ b/example/auto_compression/pytorch_yolov7/run.py @@ -18,7 +18,7 @@ import numpy as np import argparse from tqdm import tqdm import paddle -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config from paddleslim.auto_compression import AutoCompression from dataset import COCOValDataset, COCOTrainDataset from post_process import YOLOv7PostProcess, coco_metric @@ -75,7 +75,8 @@ def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list): def main(): global global_config all_config = load_slim_config(FLAGS.config_path) - assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}" + assert "Global" in all_config, "Key 'Global' not found in config file. \n{}".format( + all_config) global_config = all_config["Global"] dataset = COCOTrainDataset( diff --git a/example/auto_compression/semantic_segmentation/run.py b/example/auto_compression/semantic_segmentation/run.py index 4f4d4c56fb5f4653ceb46f3a94b5ae2d7e98d3cf..6bc7d75293e21741c1b88d0c399b5873bd17c9a1 100644 --- a/example/auto_compression/semantic_segmentation/run.py +++ b/example/auto_compression/semantic_segmentation/run.py @@ -21,7 +21,7 @@ from paddleseg.cvlibs import Config as PaddleSegDataConfig from paddleseg.utils import worker_init_fn from paddleslim.auto_compression import AutoCompression -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config from paddleseg.core.infer import reverse_transform from paddleseg.utils import metrics diff --git a/example/auto_compression/tensorflow_mobilenet/eval.py b/example/auto_compression/tensorflow_mobilenet/eval.py index 85e5fdaf504ce30c7ddb8d4013371da906006b7b..bf0987e3538c86b9ca278348db3a5b444cb00773 100644 --- a/example/auto_compression/tensorflow_mobilenet/eval.py +++ b/example/auto_compression/tensorflow_mobilenet/eval.py @@ -23,7 +23,7 @@ import paddle import paddle.nn as nn from paddle.io import DataLoader from imagenet_reader import ImageNetDataset -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config def argsparser(): @@ -93,7 +93,8 @@ def eval(): def main(): global global_config all_config = load_slim_config(args.config_path) - assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}" + assert "Global" in all_config, "Key 'Global' not found in config file. \n{}".format( + all_config) global_config = all_config["Global"] global data_dir data_dir = global_config['data_dir'] diff --git a/example/auto_compression/tensorflow_mobilenet/run.py b/example/auto_compression/tensorflow_mobilenet/run.py index 86345ec2071a95b3e630120f274cb1e6e4b99ba6..aefd2941f637d6e933bd978933e5829b211180f6 100644 --- a/example/auto_compression/tensorflow_mobilenet/run.py +++ b/example/auto_compression/tensorflow_mobilenet/run.py @@ -23,7 +23,7 @@ import paddle import paddle.nn as nn from paddle.io import DataLoader from imagenet_reader import ImageNetDataset -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config from paddleslim.auto_compression import AutoCompression @@ -107,7 +107,8 @@ def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list): def main(): global global_config all_config = load_slim_config(args.config_path) - assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}" + assert "Global" in all_config, "Key 'Global' not found in config file. \n{}".format( + all_config) global_config = all_config["Global"] global data_dir data_dir = global_config['data_dir'] diff --git a/example/full_quantization/detection/eval.py b/example/full_quantization/detection/eval.py index 81a169d164744eaba7eb650ec6a2cabdb9ae4433..d6c7d49daf8ccc43ad914eb56dd7727ae3e1f00b 100644 --- a/example/full_quantization/detection/eval.py +++ b/example/full_quantization/detection/eval.py @@ -20,7 +20,7 @@ import paddle from ppdet.core.workspace import load_config, merge_config from ppdet.core.workspace import create from ppdet.metrics import COCOMetric, VOCMetric, KeyPointTopDownCOCOEval -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config def argsparser(): diff --git a/example/full_quantization/detection/run.py b/example/full_quantization/detection/run.py index aca12b2610dbef526cd3c58bb7a8ebdb54c61182..fb0b9ad05cafe33399a76740e54ad46d3c204294 100644 --- a/example/full_quantization/detection/run.py +++ b/example/full_quantization/detection/run.py @@ -20,7 +20,7 @@ import paddle from ppdet.core.workspace import load_config, merge_config from ppdet.core.workspace import create from ppdet.metrics import COCOMetric, VOCMetric, KeyPointTopDownCOCOEval -from paddleslim.auto_compression.config_helpers import load_config as load_slim_config +from paddleslim.common import load_config as load_slim_config from paddleslim.auto_compression import AutoCompression diff --git a/example/post_training_quantization/pytorch_yolo_series/README.md b/example/post_training_quantization/pytorch_yolo_series/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c83bf76a2bb0d119590e5f703fe87c8e4f9e310e --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/README.md @@ -0,0 +1,138 @@ +# YOLO系列离线量化示例 + +目录: +- [1.简介](#1简介) +- [2.Benchmark](#2Benchmark) +- [3.开始自动压缩](#离线量化流程) + - [3.1 准备环境](#31-准备环境) + - [3.2 准备数据集](#32-准备数据集) + - [3.3 准备预测模型](#33-准备预测模型) + - [3.4 离线量化并产出模型](#34-离线量化并产出模型) + - [3.5 测试模型精度](#35-测试模型精度) + - [3.6 提高离线量化精度](#36-提高离线量化精度) +- [4.预测部署](#4预测部署) +- [5.FAQ](5FAQ) + + +本示例将以[ultralytics/yolov5](https://github.com/ultralytics/yolov5),[meituan/YOLOv6](https://github.com/meituan/YOLOv6) 和 [WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7) YOLO系列目标检测模型为例,将PyTorch框架产出的推理模型转换为Paddle推理模型,使用离线量化功能进行压缩,并使用敏感度分析功能提升离线量化精度。离线量化产出的模型可以用PaddleInference部署,也可以导出为ONNX格式模型文件,并用TensorRT部署。 + + +## 2.Benchmark +| 模型 | 策略 | 输入尺寸 | mAPval
0.5:0.95 | 预测时延FP32
(ms) |预测时延FP16
(ms) | 预测时延INT8
(ms) | 配置文件 | Inference模型 | +| :-------- |:-------- |:--------: | :---------------------: | :----------------: | :----------------: | :---------------: | :-----------------------------: | :-----------------------------: | +| YOLOv5s | Base模型 | 640*640 | 37.4 | 5.95ms | 2.44ms | - | - | [Model](https://paddle-slim-models.bj.bcebos.com/act/yolov5s.onnx) | +| YOLOv5s | KL离线量化 | 640*640 | 36.0 | - | - | 1.87ms | - | - | +| | | | | | | | | | +| YOLOv6s | Base模型 | 640*640 | 42.4 | 9.06ms | 2.90ms | - | - | [Model](https://paddle-slim-models.bj.bcebos.com/act/yolov6s.onnx) | +| YOLOv6s | KL离线量化(量化分析前) | 640*640 | 30.3 | - | - | 1.83ms | - | - | +| YOLOv6s | KL离线量化(量化分析后) | 640*640 | 39.7 | - | - | - | - | [Infer Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov6s_analyzed_ptq.tar) | +| | | | | | | | | | +| YOLOv7 | Base模型 | 640*640 | 51.1 | 26.84ms | 7.44ms | - | - | [Model](https://paddle-slim-models.bj.bcebos.com/act/yolov7.onnx) | +| YOLOv7 | KL离线量化 | 640*640 | 50.2 | - | - | 4.55ms | - | - | + +说明: +- mAP的指标均在COCO val2017数据集中评测得到。 + +## 3. 离线量化流程 + +#### 3.1 准备环境 +- PaddlePaddle >= 2.3 (可从[Paddle官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)下载安装) +- PaddleSlim > 2.3版本 +- opencv-python + +(1)安装paddlepaddle: +```shell +# CPU +pip install paddlepaddle +# GPU +pip install paddlepaddle-gpu +``` + +(2)安装paddleslim: +```shell +pip install paddleslim +``` + +#### 3.2 准备数据集 +本示例默认以COCO数据进行自动压缩实验,可以从[MS COCO官网](https://cocodataset.org)下载[Train](http://images.cocodataset.org/zips/train2017.zip)、[Val](http://images.cocodataset.org/zips/val2017.zip)、[annotation](http://images.cocodataset.org/annotations/annotations_trainval2017.zip)。 + +目录格式如下: +``` +dataset/coco/ +├── annotations +│ ├── instances_train2017.json +│ ├── instances_val2017.json +│ | ... +├── train2017 +│ ├── 000000000009.jpg +│ ├── 000000580008.jpg +│ | ... +├── val2017 +│ ├── 000000000139.jpg +│ ├── 000000000285.jpg +``` + +#### 3.3 准备预测模型 +(1)准备ONNX模型: + +**yolov5**:可通过[ultralytics/yolov5](https://github.com/ultralytics/yolov5) 官方的[导出教程](https://github.com/ultralytics/yolov5/issues/251)来准备ONNX模型。也可以下载准备好的[yolov5s.onnx](https://paddle-slim-models.bj.bcebos.com/act/yolov5s.onnx)。 + +**yolov6**:可通过[WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7)的导出脚本来准备ONNX模型。也可以直接下载我们已经准备好的[yolov7.onnx](https://paddle-slim-models.bj.bcebos.com/act/yolov7.onnx)。 + +**yolov7**:可通过[meituan/YOLOv6](https://github.com/meituan/YOLOv6)官方的[导出教程](https://github.com/meituan/YOLOv6/blob/main/deploy/ONNX/README.md)来准备ONNX模型。也可以下载已经准备好的[yolov6s.onnx](https://paddle-slim-models.bj.bcebos.com/act/yolov6s.onnx)。 + + +#### 3.4 离线量化并产出模型 +离线量化示例通过post_quant.py脚本启动,会使用接口```paddleslim.quant.quant_post_static```对模型进行量化。配置config文件中模型路径、数据路径和量化相关的参数,配置完成后便可对模型进行离线量化。具体运行命令为: +- yolov5 + +```shell +python post_quant.py --config_path=./configs/yolov5s_ptq.yaml --save_dir=./yolov5s_ptq_out +``` + +- yolov6 + +```shell +python post_quant.py --config_path=./configs/yolov6s_ptq.yaml --save_dir=./yolov6s_ptq_out +``` + +- yolov7 + +```shell +python post_quant.py --config_path=./configs/yolov7s_ptq.yaml --save_dir=./yolov7s_ptq_out +``` + + +#### 3.5 测试模型精度 + +修改[yolov5s_ptq.yaml](./configs/yolov5s_ptq.yaml)中`model_dir`字段为模型存储路径,然后使用eval.py脚本得到模型的mAP: +```shell +export CUDA_VISIBLE_DEVICES=0 +python eval.py --config_path=./configs/yolov5s_ptq.yaml +``` + + +#### 3.6 提高离线量化精度 +本节介绍如何使用量化分析工具提升离线量化精度。离线量化功能仅需使用少量数据,且使用简单、能快速得到量化模型,但往往会造成较大的精度损失。PaddleSlim提供量化分析工具,会使用接口```paddleslim.quant.AnalysisQuant```,可视化展示出不适合量化的层,通过跳过这些层,提高离线量化模型精度。由于yolov6离线量化效果较差,以yolov6为例,量化分析工具具体使用方法如下: + +```shell +python analysis.py --config_path=./configs/yolov6s_analysis.yaml +``` + +经过分析之后,会产出模型每一层量化后的精度,和较差层的weight和activation的分布图。在进行离线量化时,可以跳过这些导致精度下降较多的层,如yolov6中,经过分析后,可跳过`conv2d_2.w_0`, `conv2d_11.w_0`,`conv2d_15.w_0`, `conv2d_46.w_0`, `conv2d_49.w_0`,可使用[yolov6s_analyzed_ptq.yaml](./configs/yolov6s_analyzed_ptq.yaml),然后再次进行离线量化。跳过这五层后,离线量化精度上升9.4个点。 + +```shell +python post_quant.py --config_path=./configs/yolov6s_analyzed_ptq.yaml --save_dir=./yolov6s_analyzed_ptq_out +``` + +注: +- 分析后,每层量化的精度会默认保存在`./analysis_results/analysis.txt`,直方分布图会默认保存在`./analysis_results/act_hist_result.pdf`和 `./analysis_results/weight_hist_result.pdf`中。 + +

+
+

+ + +## 4.预测部署 + +## 5.FAQ diff --git a/example/post_training_quantization/pytorch_yolo_series/analysis.py b/example/post_training_quantization/pytorch_yolo_series/analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..118a7227e9c34aefe6d5a474da8594c76785f9ca --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/analysis.py @@ -0,0 +1,115 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import numpy as np +import argparse +import paddle +from tqdm import tqdm +from post_process import YOLOv6PostProcess, coco_metric +from dataset import COCOValDataset, COCOTrainDataset +from paddleslim.common import load_config, load_onnx_model +from paddleslim.quant.analysis import AnalysisQuant + + +def argsparser(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--config_path', + type=str, + default=None, + help="path of analysis config.", + required=True) + parser.add_argument( + '--devices', + type=str, + default='gpu', + help="which device used to compress.") + return parser + + +def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list): + bboxes_list, bbox_nums_list, image_id_list = [], [], [] + with tqdm( + total=len(val_loader), + bar_format='Evaluation stage, Run batch:|{bar}| {n_fmt}/{total_fmt}', + ncols=80) as t: + for data in val_loader: + data_all = {k: np.array(v) for k, v in data.items()} + outs = exe.run(compiled_test_program, + feed={test_feed_names[0]: data_all['image']}, + fetch_list=test_fetch_list, + return_numpy=False) + res = {} + postprocess = YOLOv6PostProcess( + score_threshold=0.001, nms_threshold=0.65, multi_label=True) + res = postprocess(np.array(outs[0]), data_all['scale_factor']) + bboxes_list.append(res['bbox']) + bbox_nums_list.append(res['bbox_num']) + image_id_list.append(np.array(data_all['im_id'])) + t.update() + map_res = coco_metric(anno_file, bboxes_list, bbox_nums_list, image_id_list) + return map_res[0] + + +def main(): + + global config + config = load_config(FLAGS.config_path) + + dataset = COCOTrainDataset( + dataset_dir=config['dataset_dir'], + image_dir=config['val_image_dir'], + anno_path=config['val_anno_path']) + data_loader = paddle.io.DataLoader( + dataset, batch_size=1, shuffle=True, drop_last=True, num_workers=0) + + global val_loader + dataset = COCOValDataset( + dataset_dir=config['dataset_dir'], + image_dir=config['val_image_dir'], + anno_path=config['val_anno_path']) + global anno_file + anno_file = dataset.ann_file + val_loader = paddle.io.DataLoader( + dataset, batch_size=1, shuffle=False, drop_last=False, num_workers=0) + + load_onnx_model(config["model_dir"]) + inference_model_path = config["model_dir"].rstrip().rstrip( + '.onnx') + '_infer' + analyzer = AnalysisQuant( + model_dir=inference_model_path, + model_filename='model.pdmodel', + params_filename='model.pdiparams', + eval_function=eval_function, + quantizable_op_type=config['quantizable_op_type'], + weight_quantize_type=config['weight_quantize_type'], + activation_quantize_type=config['activation_quantize_type'], + is_full_quantize=config['is_full_quantize'], + data_loader=data_loader, + batch_size=config['batch_size'], + save_dir=config['save_dir'], ) + analyzer.analysis() + + +if __name__ == '__main__': + paddle.enable_static() + parser = argsparser() + FLAGS = parser.parse_args() + + assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu'] + paddle.set_device(FLAGS.devices) + + main() diff --git a/example/post_training_quantization/pytorch_yolo_series/configs/yolov5s_ptq.yaml b/example/post_training_quantization/pytorch_yolo_series/configs/yolov5s_ptq.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8fb861ec439f76538bc1e19a3fb26821c1aa5896 --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/configs/yolov5s_ptq.yaml @@ -0,0 +1,7 @@ +model_dir: ./yolov5s.onnx +dataset_dir: /dataset/coco/ +train_image_dir: train2017 +val_image_dir: val2017 +train_anno_path: annotations/instances_train2017.json +val_anno_path: annotations/instances_val2017.json +skip_tensors: None # you can set it after analysis diff --git a/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_analysis.yaml b/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_analysis.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6d1e726b4a52c1ff764bc7a84896267f566a7e86 --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_analysis.yaml @@ -0,0 +1,10 @@ +model_dir: ./yolov6s.onnx +save_dir: ./analysis_results +quantizable_op_type: ["conv2d", "depthwise_conv2d"] +weight_quantize_type: 'channel_wise_abs_max' +activation_quantize_type: 'moving_average_abs_max' +is_full_quantize: False +dataset_dir: /dataset/coco/ +val_image_dir: val2017 +val_anno_path: annotations/instances_val2017.json +batch_size: 10 diff --git a/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_analyzed_ptq.yaml b/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_analyzed_ptq.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c857032896167819fb6febf56822ff7cc25e00f7 --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_analyzed_ptq.yaml @@ -0,0 +1,7 @@ +model_dir: ./yolov6s.onnx +dataset_dir: /dataset/coco/ +train_image_dir: train2017 +val_image_dir: val2017 +train_anno_path: annotations/instances_train2017.json +val_anno_path: annotations/instances_val2017.json +skip_tensor_list: ['conv2d_2.w_0', 'conv2d_15.w_0', 'conv2d_46.w_0', 'conv2d_11.w_0', 'conv2d_49.w_0'] diff --git a/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_ptq.yaml b/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_ptq.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ab67a9df8a37364098f826657634dde3139d1cf0 --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_ptq.yaml @@ -0,0 +1,7 @@ +model_dir: ./yolov6s.onnx +dataset_dir: /dataset/coco/ +train_image_dir: train2017 +val_image_dir: val2017 +train_anno_path: annotations/instances_train2017.json +val_anno_path: annotations/instances_val2017.json +skip_tensor_list: None diff --git a/example/post_training_quantization/pytorch_yolo_series/configs/yolov7s_ptq.yaml b/example/post_training_quantization/pytorch_yolo_series/configs/yolov7s_ptq.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ad89a207b65c4849cb284881c687934b9a0f59b --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/configs/yolov7s_ptq.yaml @@ -0,0 +1,6 @@ +model_dir: ./yolov7s.onnx +dataset_dir: /dataset/coco/ +train_image_dir: train2017 +val_image_dir: val2017 +train_anno_path: annotations/instances_train2017.json +val_anno_path: annotations/instances_val2017.json diff --git a/example/post_training_quantization/pytorch_yolo_series/dataset.py b/example/post_training_quantization/pytorch_yolo_series/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..7ddec29d0d7b2b4380d6680103dc93bec04c08e4 --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/dataset.py @@ -0,0 +1,113 @@ +from pycocotools.coco import COCO +import cv2 +import os +import numpy as np +import paddle + + +class COCOValDataset(paddle.io.Dataset): + def __init__(self, + dataset_dir=None, + image_dir=None, + anno_path=None, + img_size=[640, 640]): + self.dataset_dir = dataset_dir + self.image_dir = image_dir + self.img_size = img_size + self.ann_file = os.path.join(dataset_dir, anno_path) + self.coco = COCO(self.ann_file) + ori_ids = list(sorted(self.coco.imgs.keys())) + # check gt bbox + clean_ids = [] + for idx in ori_ids: + ins_anno_ids = self.coco.getAnnIds(imgIds=[idx], iscrowd=False) + instances = self.coco.loadAnns(ins_anno_ids) + num_bbox = 0 + for inst in instances: + if inst.get('ignore', False): + continue + if 'bbox' not in inst.keys(): + continue + elif not any(np.array(inst['bbox'])): + continue + else: + num_bbox += 1 + if num_bbox > 0: + clean_ids.append(idx) + self.ids = clean_ids + + def __getitem__(self, idx): + img_id = self.ids[idx] + img = self._get_img_data_from_img_id(img_id) + img, scale_factor = self.image_preprocess(img, self.img_size) + return { + 'image': img, + 'im_id': np.array([img_id]), + 'scale_factor': scale_factor + } + + def __len__(self): + return len(self.ids) + + def _get_img_data_from_img_id(self, img_id): + img_info = self.coco.loadImgs(img_id)[0] + img_path = os.path.join(self.dataset_dir, self.image_dir, + img_info['file_name']) + img = cv2.imread(img_path) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + return img + + def _generate_scale(self, im, target_shape, keep_ratio=True): + """ + Args: + im (np.ndarray): image (np.ndarray) + Returns: + im_scale_x: the resize ratio of X + im_scale_y: the resize ratio of Y + """ + origin_shape = im.shape[:2] + if keep_ratio: + im_size_min = np.min(origin_shape) + im_size_max = np.max(origin_shape) + target_size_min = np.min(target_shape) + target_size_max = np.max(target_shape) + im_scale = float(target_size_min) / float(im_size_min) + if np.round(im_scale * im_size_max) > target_size_max: + im_scale = float(target_size_max) / float(im_size_max) + im_scale_x = im_scale + im_scale_y = im_scale + else: + resize_h, resize_w = target_shape + im_scale_y = resize_h / float(origin_shape[0]) + im_scale_x = resize_w / float(origin_shape[1]) + return im_scale_y, im_scale_x + + def image_preprocess(self, img, target_shape): + # Resize image + im_scale_y, im_scale_x = self._generate_scale(img, target_shape) + img = cv2.resize( + img, + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=cv2.INTER_LINEAR) + # Pad + im_h, im_w = img.shape[:2] + h, w = target_shape[:] + if h != im_h or w != im_w: + canvas = np.ones((h, w, 3), dtype=np.float32) + canvas *= np.array([114.0, 114.0, 114.0], dtype=np.float32) + canvas[0:im_h, 0:im_w, :] = img.astype(np.float32) + img = canvas + img = np.transpose(img / 255, [2, 0, 1]) + scale_factor = np.array([im_scale_y, im_scale_x]) + return img.astype(np.float32), scale_factor + + +class COCOTrainDataset(COCOValDataset): + def __getitem__(self, idx): + img_id = self.ids[idx] + img = self._get_img_data_from_img_id(img_id) + img, scale_factor = self.image_preprocess(img, self.img_size) + return {'x2paddle_image_arrays': img} diff --git a/example/post_training_quantization/pytorch_yolo_series/eval.py b/example/post_training_quantization/pytorch_yolo_series/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..6705104b69ac3db586bd0954e42e76010ad88426 --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/eval.py @@ -0,0 +1,99 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import numpy as np +import argparse +from tqdm import tqdm +import paddle +from paddleslim.common import load_config as load_slim_config +from paddleslim.common import load_inference_model +from post_process import YOLOv6PostProcess, coco_metric +from dataset import COCOValDataset + + +def argsparser(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--config_path', + type=str, + default=None, + help="path of compression strategy config.", + required=True) + parser.add_argument( + '--devices', + type=str, + default='gpu', + help="which device used to compress.") + + return parser + + +def eval(): + + place = paddle.CUDAPlace(0) if FLAGS.devices == 'gpu' else paddle.CPUPlace() + exe = paddle.static.Executor(place) + + val_program, feed_target_names, fetch_targets = load_inference_model( + config["model_dir"], exe, "model.pdmodel", "model.pdiparams") + + bboxes_list, bbox_nums_list, image_id_list = [], [], [] + with tqdm( + total=len(val_loader), + bar_format='Evaluation stage, Run batch:|{bar}| {n_fmt}/{total_fmt}', + ncols=80) as t: + for data in val_loader: + data_all = {k: np.array(v) for k, v in data.items()} + outs = exe.run(val_program, + feed={feed_target_names[0]: data_all['image']}, + fetch_list=fetch_targets, + return_numpy=False) + res = {} + postprocess = YOLOv6PostProcess( + score_threshold=0.001, nms_threshold=0.65, multi_label=True) + res = postprocess(np.array(outs[0]), data_all['scale_factor']) + bboxes_list.append(res['bbox']) + bbox_nums_list.append(res['bbox_num']) + image_id_list.append(np.array(data_all['im_id'])) + t.update() + + coco_metric(anno_file, bboxes_list, bbox_nums_list, image_id_list) + + +def main(): + global config + config = load_slim_config(FLAGS.config_path) + + global val_loader + dataset = COCOValDataset( + dataset_dir=config['dataset_dir'], + image_dir=config['val_image_dir'], + anno_path=config['val_anno_path']) + global anno_file + anno_file = dataset.ann_file + val_loader = paddle.io.DataLoader(dataset, batch_size=1) + + eval() + + +if __name__ == '__main__': + paddle.enable_static() + parser = argsparser() + FLAGS = parser.parse_args() + + assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu'] + paddle.set_device(FLAGS.devices) + + main() diff --git a/example/post_training_quantization/pytorch_yolo_series/images/sensitivity_rank.png b/example/post_training_quantization/pytorch_yolo_series/images/sensitivity_rank.png new file mode 100644 index 0000000000000000000000000000000000000000..1eab297a1118f48e56d4ef496fcf5d18948016eb Binary files /dev/null and b/example/post_training_quantization/pytorch_yolo_series/images/sensitivity_rank.png differ diff --git a/example/post_training_quantization/pytorch_yolo_series/post_process.py b/example/post_training_quantization/pytorch_yolo_series/post_process.py new file mode 100644 index 0000000000000000000000000000000000000000..3232fe3f56ede4c77975f3e9331cdabdf12ea66c --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/post_process.py @@ -0,0 +1,231 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import cv2 +import json +import sys + + +def box_area(boxes): + """ + Args: + boxes(np.ndarray): [N, 4] + return: [N] + """ + return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) + + +def box_iou(box1, box2): + """ + Args: + box1(np.ndarray): [N, 4] + box2(np.ndarray): [M, 4] + return: [N, M] + """ + area1 = box_area(box1) + area2 = box_area(box2) + lt = np.maximum(box1[:, np.newaxis, :2], box2[:, :2]) + rb = np.minimum(box1[:, np.newaxis, 2:], box2[:, 2:]) + wh = rb - lt + wh = np.maximum(0, wh) + inter = wh[:, :, 0] * wh[:, :, 1] + iou = inter / (area1[:, np.newaxis] + area2 - inter) + return iou + + +def nms(boxes, scores, iou_threshold): + """ + Non Max Suppression numpy implementation. + args: + boxes(np.ndarray): [N, 4] + scores(np.ndarray): [N, 1] + iou_threshold(float): Threshold of IoU. + """ + idxs = scores.argsort() + keep = [] + while idxs.size > 0: + max_score_index = idxs[-1] + max_score_box = boxes[max_score_index][None, :] + keep.append(max_score_index) + if idxs.size == 1: + break + idxs = idxs[:-1] + other_boxes = boxes[idxs] + ious = box_iou(max_score_box, other_boxes) + idxs = idxs[ious[0] <= iou_threshold] + + keep = np.array(keep) + return keep + + +class YOLOv6PostProcess(object): + """ + Post process of YOLOv6 network. + args: + score_threshold(float): Threshold to filter out bounding boxes with low + confidence score. If not provided, consider all boxes. + nms_threshold(float): The threshold to be used in NMS. + multi_label(bool): Whether keep multi label in boxes. + keep_top_k(int): Number of total bboxes to be kept per image after NMS + step. -1 means keeping all bboxes after NMS step. + """ + + def __init__(self, + score_threshold=0.25, + nms_threshold=0.5, + multi_label=False, + keep_top_k=300): + self.score_threshold = score_threshold + self.nms_threshold = nms_threshold + self.multi_label = multi_label + self.keep_top_k = keep_top_k + + def _xywh2xyxy(self, x): + # Convert from [x, y, w, h] to [x1, y1, x2, y2] + y = np.copy(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x + y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y + y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x + y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y + return y + + def _non_max_suppression(self, prediction): + max_wh = 4096 # (pixels) minimum and maximum box width and height + nms_top_k = 30000 + + cand_boxes = prediction[..., 4] > self.score_threshold # candidates + output = [np.zeros((0, 6))] * prediction.shape[0] + + for batch_id, boxes in enumerate(prediction): + # Apply constraints + boxes = boxes[cand_boxes[batch_id]] + if not boxes.shape[0]: + continue + # Compute conf (conf = obj_conf * cls_conf) + boxes[:, 5:] *= boxes[:, 4:5] + + # Box (center x, center y, width, height) to (x1, y1, x2, y2) + convert_box = self._xywh2xyxy(boxes[:, :4]) + + # Detections matrix nx6 (xyxy, conf, cls) + if self.multi_label: + i, j = (boxes[:, 5:] > self.score_threshold).nonzero() + boxes = np.concatenate( + (convert_box[i], boxes[i, j + 5, None], + j[:, None].astype(np.float32)), + axis=1) + else: + conf = np.max(boxes[:, 5:], axis=1) + j = np.argmax(boxes[:, 5:], axis=1) + re = np.array(conf.reshape(-1) > self.score_threshold) + conf = conf.reshape(-1, 1) + j = j.reshape(-1, 1) + boxes = np.concatenate((convert_box, conf, j), axis=1)[re] + + num_box = boxes.shape[0] + if not num_box: + continue + elif num_box > nms_top_k: + boxes = boxes[boxes[:, 4].argsort()[::-1][:nms_top_k]] + + # Batched NMS + c = boxes[:, 5:6] * max_wh + clean_boxes, scores = boxes[:, :4] + c, boxes[:, 4] + keep = nms(clean_boxes, scores, self.nms_threshold) + # limit detection box num + if keep.shape[0] > self.keep_top_k: + keep = keep[:self.keep_top_k] + output[batch_id] = boxes[keep] + return output + + def __call__(self, outs, scale_factor): + preds = self._non_max_suppression(outs) + bboxs, box_nums = [], [] + for i, pred in enumerate(preds): + if len(pred.shape) > 2: + pred = np.squeeze(pred) + if len(pred.shape) == 1: + pred = pred[np.newaxis, :] + pred_bboxes = pred[:, :4] + scale_factor = np.tile(scale_factor[i][::-1], (1, 2)) + pred_bboxes /= scale_factor + bbox = np.concatenate( + [ + pred[:, -1][:, np.newaxis], pred[:, -2][:, np.newaxis], + pred_bboxes + ], + axis=-1) + bboxs.append(bbox) + box_num = bbox.shape[0] + box_nums.append(box_num) + bboxs = np.concatenate(bboxs, axis=0) + box_nums = np.array(box_nums) + return {'bbox': bboxs, 'bbox_num': box_nums} + + +def coco_metric(anno_file, bboxes_list, bbox_nums_list, image_id_list): + try: + from pycocotools.coco import COCO + from pycocotools.cocoeval import COCOeval + except: + print( + "[ERROR] Not found pycocotools, please install by `pip install pycocotools`" + ) + sys.exit(1) + + coco_gt = COCO(anno_file) + cats = coco_gt.loadCats(coco_gt.getCatIds()) + clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)} + results = [] + for bboxes, bbox_nums, image_id in zip(bboxes_list, bbox_nums_list, + image_id_list): + results += _get_det_res(bboxes, bbox_nums, image_id, clsid2catid) + + output = "bbox.json" + with open(output, 'w') as f: + json.dump(results, f) + + coco_dt = coco_gt.loadRes(output) + coco_eval = COCOeval(coco_gt, coco_dt, 'bbox') + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + return coco_eval.stats + + +def _get_det_res(bboxes, bbox_nums, image_id, label_to_cat_id_map): + det_res = [] + k = 0 + for i in range(len(bbox_nums)): + cur_image_id = int(image_id[i][0]) + det_nums = bbox_nums[i] + for j in range(det_nums): + dt = bboxes[k] + k = k + 1 + num_id, score, xmin, ymin, xmax, ymax = dt.tolist() + if int(num_id) < 0: + continue + category_id = label_to_cat_id_map[int(num_id)] + w = xmax - xmin + h = ymax - ymin + bbox = [xmin, ymin, w, h] + dt_res = { + 'image_id': cur_image_id, + 'category_id': category_id, + 'bbox': bbox, + 'score': score + } + det_res.append(dt_res) + return det_res diff --git a/example/post_training_quantization/pytorch_yolo_series/post_quant.py b/example/post_training_quantization/pytorch_yolo_series/post_quant.py new file mode 100644 index 0000000000000000000000000000000000000000..cac752a934b350c6fec81866c2e44add0edbfe65 --- /dev/null +++ b/example/post_training_quantization/pytorch_yolo_series/post_quant.py @@ -0,0 +1,93 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import numpy as np +import argparse +import paddle +from paddleslim.common import load_config, load_onnx_model +from paddleslim.quant import quant_post_static +from dataset import COCOTrainDataset + + +def argsparser(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--config_path', + type=str, + default=None, + help="path of post training quantization config.", + required=True) + parser.add_argument( + '--save_dir', + type=str, + default='ptq_out', + help="directory to save compressed model.") + parser.add_argument( + '--devices', + type=str, + default='gpu', + help="which device used to compress.") + parser.add_argument( + '--algo', type=str, default='KL', help="post quant algo.") + + return parser + + +def main(): + global config + config = load_config(FLAGS.config_path) + + dataset = COCOTrainDataset( + dataset_dir=config['dataset_dir'], + image_dir=config['val_image_dir'], + anno_path=config['val_anno_path']) + train_loader = paddle.io.DataLoader( + dataset, batch_size=1, shuffle=True, drop_last=True, num_workers=0) + + place = paddle.CUDAPlace(0) if FLAGS.devices == 'gpu' else paddle.CPUPlace() + exe = paddle.static.Executor(place) + + # since the type pf model converted from pytorch is onnx, + # use load_onnx_model firstly and rename the model_dir + load_onnx_model(config["model_dir"]) + inference_model_path = config["model_dir"].rstrip().rstrip( + '.onnx') + '_infer' + + quant_post_static( + executor=exe, + model_dir=inference_model_path, + quantize_model_path=FLAGS.save_dir, + data_loader=train_loader, + model_filename='model.pdmodel', + params_filename='model.pdiparams', + batch_size=32, + batch_nums=10, + algo=FLAGS.algo, + hist_percent=0.999, + is_full_quantize=False, + bias_correction=False, + onnx_format=True) + + +if __name__ == '__main__': + paddle.enable_static() + parser = argsparser() + FLAGS = parser.parse_args() + + assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu'] + paddle.set_device(FLAGS.devices) + + main() diff --git a/paddleslim/auto_compression/__init__.py b/paddleslim/auto_compression/__init__.py index 990ad37b71e76cb8044a30e64a7c4377dec274a1..cfc26259d2fa85cd80710eb61972483681525f6b 100644 --- a/paddleslim/auto_compression/__init__.py +++ b/paddleslim/auto_compression/__init__.py @@ -19,8 +19,14 @@ from .config_helpers import * from .utils import * __all__ = [ - "AutoCompression", "Quantization", "Distillation", - "MultiTeacherDistillation", "HyperParameterOptimization", "Prune", - "UnstructurePrune", "ProgramInfo", "TrainConfig", "save_config", - "load_config", "predict_compressed_model" + "AutoCompression", + "Quantization", + "Distillation", + "MultiTeacherDistillation", + "HyperParameterOptimization", + "Prune", + "UnstructurePrune", + "ProgramInfo", + "TrainConfig", + "predict_compressed_model", ] diff --git a/paddleslim/auto_compression/compressor.py b/paddleslim/auto_compression/compressor.py index b64152b9c3138bb87b062df8592c52b95dbb3694..a907e0c9b7d6a997d04c9b7ec500f6093da3c2d4 100644 --- a/paddleslim/auto_compression/compressor.py +++ b/paddleslim/auto_compression/compressor.py @@ -29,13 +29,15 @@ from ..quant.quanter import convert, quant_post from ..common.recover_program import recover_inference_program from ..common import get_logger from ..common.patterns import get_patterns +from ..common.load_model import load_inference_model, get_model_dir +from ..common.dataloader import wrap_dataloader, get_feed_vars +from ..common.config_helper import load_config from ..analysis import TableLatencyPredictor from .create_compressed_program import build_distill_program, build_quant_program, build_prune_program, remove_unused_var_nodes from .strategy_config import TrainConfig, ProgramInfo, merge_config from .auto_strategy import prepare_strategy, get_final_quant_config, create_strategy_config, create_train_config -from .config_helpers import load_config, extract_strategy_config, extract_train_config +from .config_helpers import extract_strategy_config, extract_train_config from .utils.predict import with_variable_shape -from .utils import get_feed_vars, wrap_dataloader, load_inference_model, get_model_dir _logger = get_logger(__name__, level=logging.INFO) diff --git a/paddleslim/auto_compression/config_helpers.py b/paddleslim/auto_compression/config_helpers.py index ebc5b45c83bb393e48cb005541e9f8733499f790..b1e426cc246b327a54f35ea0f0df7f5233391492 100644 --- a/paddleslim/auto_compression/config_helpers.py +++ b/paddleslim/auto_compression/config_helpers.py @@ -14,42 +14,7 @@ import yaml import os from paddleslim.auto_compression.strategy_config import * - -__all__ = ['save_config', 'load_config'] - - -def print_arguments(args, level=0): - if level == 0: - print('----------- Running Arguments -----------') - for arg, value in sorted(args.items()): - if isinstance(value, dict): - print('\t' * level, '%s:' % arg) - print_arguments(value, level + 1) - else: - print('\t' * level, '%s: %s' % (arg, value)) - if level == 0: - print('------------------------------------------') - - -def load_config(config): - """Load configurations from yaml file into dict. - Fields validation is skipped for loading some custom information. - Args: - config(str): The path of configuration file. - Returns: - dict: A dict storing configuration information. - """ - if config is None: - return None - assert isinstance( - config, - str), f"config should be str but got type(config)={type(config)}" - assert os.path.exists(config) and os.path.isfile( - config), f"{config} not found or it is not a file." - with open(config) as f: - cfg = yaml.load(f, Loader=yaml.FullLoader) - print_arguments(cfg) - return cfg +from ..common.config_helper import load_config def extract_strategy_config(config): @@ -101,12 +66,3 @@ def extract_train_config(config): **value) if value is not None else TrainConfig() # return default training config when it is not set return TrainConfig() - - -def save_config(config, config_path): - """ - convert dict config to yaml. - """ - f = open(config_path, "w") - yaml.dump(config, f) - f.close() diff --git a/paddleslim/auto_compression/create_compressed_program.py b/paddleslim/auto_compression/create_compressed_program.py index 30276bbf64b2e2e23d9e4adc19ee435c2c72ee19..8a6c7db2f5b691a2fc81643830e53bef231350ee 100644 --- a/paddleslim/auto_compression/create_compressed_program.py +++ b/paddleslim/auto_compression/create_compressed_program.py @@ -23,7 +23,7 @@ from ..dist import * from ..common.recover_program import recover_inference_program, _remove_fetch_node from ..common import get_logger from .strategy_config import ProgramInfo -from .utils import load_inference_model +from ..common.load_model import load_inference_model _logger = get_logger(__name__, level=logging.INFO) __all__ = [ @@ -52,7 +52,8 @@ def _create_optimizer(train_config): optimizer_builder = train_config['optimizer_builder'] assert isinstance( optimizer_builder, dict - ), f"Value of 'optimizer_builder' in train_config should be dict but got {type(optimizer_builder)}" + ), "Value of 'optimizer_builder' in train_config should be dict but got {}".format( + type(optimizer_builder)) if 'grad_clip' in optimizer_builder: g_clip_params = optimizer_builder['grad_clip'] g_clip_type = g_clip_params.pop('type') @@ -444,9 +445,8 @@ def build_prune_program(executor, "####################channel pruning##########################") for param in pruned_program.global_block().all_parameters(): if param.name in original_shapes: - _logger.info( - f"{param.name}, from {original_shapes[param.name]} to {param.shape}" - ) + _logger.info("{}, from {} to {}".format( + param.name, original_shapes[param.name], param.shape)) _logger.info( "####################channel pruning end##########################") train_program_info.program = pruned_program diff --git a/paddleslim/auto_compression/utils/__init__.py b/paddleslim/auto_compression/utils/__init__.py index aa4f3ec07ac02436b5eaed00c781b9a6e34e70f8..e3c3a49d71823f432c810d9dccee0205d548f7ed 100644 --- a/paddleslim/auto_compression/utils/__init__.py +++ b/paddleslim/auto_compression/utils/__init__.py @@ -14,11 +14,5 @@ from __future__ import absolute_import from .predict import predict_compressed_model -from .dataloader import * -from . import dataloader -from .load_model import * -from . import load_model __all__ = ["predict_compressed_model"] -__all__ += dataloader.__all__ -__all__ += load_model.__all__ diff --git a/paddleslim/auto_compression/utils/fake_ptq.py b/paddleslim/auto_compression/utils/fake_ptq.py index fbecc224f663c39403f4741aa903a3cbaf5e9188..e86dd84860b869bf50903bbbf9e4126e6492084a 100644 --- a/paddleslim/auto_compression/utils/fake_ptq.py +++ b/paddleslim/auto_compression/utils/fake_ptq.py @@ -12,7 +12,7 @@ except: TRANSFORM_PASS_OP_TYPES = QuantizationTransformPass._supported_quantizable_op_type QUANT_DEQUANT_PASS_OP_TYPES = AddQuantDequantPass._supported_quantizable_op_type -from .load_model import load_inference_model +from ...common.load_model import load_inference_model def post_quant_fake(executor, diff --git a/paddleslim/auto_compression/utils/load_model.py b/paddleslim/auto_compression/utils/load_model.py deleted file mode 100644 index 637e808ace099b70a210e3ead7f7cd285a8c46ad..0000000000000000000000000000000000000000 --- a/paddleslim/auto_compression/utils/load_model.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import paddle -from ...common import load_onnx_model - -__all__ = ['load_inference_model', 'get_model_dir'] - - -def load_inference_model(path_prefix, - executor, - model_filename=None, - params_filename=None): - # Load onnx model to Inference model. - if path_prefix.endswith('.onnx'): - inference_program, feed_target_names, fetch_targets = load_onnx_model( - path_prefix) - return [inference_program, feed_target_names, fetch_targets] - # Load Inference model. - # TODO: clean code - if model_filename is not None and model_filename.endswith('.pdmodel'): - model_name = '.'.join(model_filename.split('.')[:-1]) - assert os.path.exists( - os.path.join(path_prefix, model_name + '.pdmodel') - ), 'Please check {}, or fix model_filename parameter.'.format( - os.path.join(path_prefix, model_name + '.pdmodel')) - assert os.path.exists( - os.path.join(path_prefix, model_name + '.pdiparams') - ), 'Please check {}, or fix params_filename parameter.'.format( - os.path.join(path_prefix, model_name + '.pdiparams')) - model_path_prefix = os.path.join(path_prefix, model_name) - [inference_program, feed_target_names, fetch_targets] = ( - paddle.static.load_inference_model( - path_prefix=model_path_prefix, executor=executor)) - elif model_filename is not None and params_filename is not None: - [inference_program, feed_target_names, fetch_targets] = ( - paddle.static.load_inference_model( - path_prefix=path_prefix, - executor=executor, - model_filename=model_filename, - params_filename=params_filename)) - else: - model_name = '.'.join(model_filename.split('.') - [:-1]) if model_filename is not None else 'model' - if os.path.exists(os.path.join(path_prefix, model_name + '.pdmodel')): - model_path_prefix = os.path.join(path_prefix, model_name) - [inference_program, feed_target_names, fetch_targets] = ( - paddle.static.load_inference_model( - path_prefix=model_path_prefix, executor=executor)) - else: - [inference_program, feed_target_names, fetch_targets] = ( - paddle.static.load_inference_model( - path_prefix=path_prefix, executor=executor)) - - return [inference_program, feed_target_names, fetch_targets] - - -def get_model_dir(model_dir, model_filename, params_filename): - if model_dir.endswith('.onnx'): - updated_model_dir = model_dir.rstrip().rstrip('.onnx') + '_infer' - else: - updated_model_dir = model_dir.rstrip('/') - - if model_filename == None: - updated_model_filename = 'model.pdmodel' - else: - updated_model_filename = model_filename - - if params_filename == None: - updated_params_filename = 'model.pdiparams' - else: - updated_params_filename = params_filename - - if params_filename is None and model_filename is not None: - raise NotImplementedError( - "NOT SUPPORT parameters saved in separate files. Please convert it to single binary file first." - ) - return updated_model_dir, updated_model_filename, updated_params_filename diff --git a/paddleslim/auto_compression/utils/predict.py b/paddleslim/auto_compression/utils/predict.py index 01ef6a90b17280a74ef7ca54853cf20891392aad..5b8c6adb1850fd086317423bcb9e2fe97c34d4a8 100644 --- a/paddleslim/auto_compression/utils/predict.py +++ b/paddleslim/auto_compression/utils/predict.py @@ -4,7 +4,7 @@ import paddle from ...analysis import TableLatencyPredictor from .prune_model import get_sparse_model, get_prune_model from .fake_ptq import post_quant_fake -from .load_model import load_inference_model +from ...common.load_model import load_inference_model def with_variable_shape(model_dir, model_filename=None, params_filename=None): @@ -53,7 +53,7 @@ def predict_compressed_model(executor, latency_dict(dict): The latency latency of the model under various compression strategies. """ local_rank = paddle.distributed.get_rank() - quant_model_path = f'quant_model_rank_{local_rank}_tmp' + quant_model_path = 'quant_model_rank_{}_tmp'.format(local_rank) prune_model_path = f'prune_model_rank_{local_rank}_tmp' sparse_model_path = f'sparse_model_rank_{local_rank}_tmp' diff --git a/paddleslim/auto_compression/utils/prune_model.py b/paddleslim/auto_compression/utils/prune_model.py index 426a1859c4419fd4bb0d4db3f8f097d5894c223b..c0da14ca9693112cf6919294f21136b86a5ea1d5 100644 --- a/paddleslim/auto_compression/utils/prune_model.py +++ b/paddleslim/auto_compression/utils/prune_model.py @@ -5,7 +5,7 @@ import paddle import paddle.static as static from ...prune import Pruner from ...core import GraphWrapper -from .load_model import load_inference_model +from ...common.load_model import load_inference_model __all__ = ["get_sparse_model", "get_prune_model"] @@ -19,9 +19,10 @@ def get_sparse_model(executor, places, model_file, param_file, ratio, ratio(float): The ratio to prune the model. save_path(str): The save path of pruned model. """ - assert os.path.exists(model_file), f'{model_file} does not exist.' + assert os.path.exists(model_file), '{} does not exist.'.format(model_file) assert os.path.exists( - param_file) or param_file is None, f'{param_file} does not exist.' + param_file) or param_file is None, '{} does not exist.'.format( + param_file) paddle.enable_static() SKIP = ['image', 'feed', 'pool2d_0.tmp_0'] diff --git a/paddleslim/common/__init__.py b/paddleslim/common/__init__.py index e866790d7fa1be1a4d9ad44ba3aee63c37d7859b..03825c8a6bb98a39ca1f1026b3a86e484576b790 100644 --- a/paddleslim/common/__init__.py +++ b/paddleslim/common/__init__.py @@ -25,12 +25,16 @@ from .analyze_helper import VarCollector from . import wrapper_function from . import recover_program from . import patterns -from .convert_model import load_onnx_model +from .load_model import load_inference_model, get_model_dir, load_onnx_model +from .dataloader import wrap_dataloader, get_feed_vars +from .config_helper import load_config, save_config __all__ = [ 'EvolutionaryController', 'SAController', 'get_logger', 'ControllerServer', 'ControllerClient', 'lock', 'unlock', 'cached_reader', 'AvgrageMeter', - 'Server', 'Client', 'RLBaseController', 'VarCollector', 'load_onnx_model' + 'Server', 'Client', 'RLBaseController', 'VarCollector', 'load_onnx_model', + 'load_inference_model', 'get_model_dir', 'wrap_dataloader', 'get_feed_vars', + 'load_config', 'save_config' ] __all__ += wrapper_function.__all__ diff --git a/paddleslim/common/config_helper.py b/paddleslim/common/config_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..486fa9b449789f3c3d60d5e50cd3364406a9c908 --- /dev/null +++ b/paddleslim/common/config_helper.py @@ -0,0 +1,60 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import yaml +import os + +__all__ = ['load_config', 'save_config'] + + +def print_arguments(args, level=0): + if level == 0: + print('----------- Running Arguments -----------') + for arg, value in sorted(args.items()): + if isinstance(value, dict): + print('\t' * level, '%s:' % arg) + print_arguments(value, level + 1) + else: + print('\t' * level, '%s: %s' % (arg, value)) + if level == 0: + print('------------------------------------------') + + +def load_config(config): + """Load configurations from yaml file into dict. + Fields validation is skipped for loading some custom information. + Args: + config(str): The path of configuration file. + Returns: + dict: A dict storing configuration information. + """ + if config is None: + return None + assert isinstance( + config, + str), f"config should be str but got type(config)={type(config)}" + assert os.path.exists(config) and os.path.isfile( + config), f"{config} not found or it is not a file." + with open(config) as f: + cfg = yaml.load(f, Loader=yaml.FullLoader) + print_arguments(cfg) + return cfg + + +def save_config(config, config_path): + """ + convert dict config to yaml. + """ + f = open(config_path, "w") + yaml.dump(config, f) + f.close() diff --git a/paddleslim/auto_compression/utils/dataloader.py b/paddleslim/common/dataloader.py similarity index 100% rename from paddleslim/auto_compression/utils/dataloader.py rename to paddleslim/common/dataloader.py diff --git a/paddleslim/common/convert_model.py b/paddleslim/common/load_model.py similarity index 58% rename from paddleslim/common/convert_model.py rename to paddleslim/common/load_model.py index 00e0182017c4d1e60b2a6c28e8db1d109be28666..81d50f7311b41eb1600263915a48bb1f44640519 100644 --- a/paddleslim/common/convert_model.py +++ b/paddleslim/common/load_model.py @@ -17,7 +17,6 @@ import logging import os import shutil import sys - import paddle from x2paddle.decoder.onnx_decoder import ONNXDecoder from x2paddle.op_mapper.onnx2paddle.onnx_op_mapper import ONNXOpMapper @@ -27,7 +26,78 @@ from x2paddle.utils import ConverterCheck from . import get_logger _logger = get_logger(__name__, level=logging.INFO) -__all__ = ['load_onnx_model'] +__all__ = ['load_inference_model', 'get_model_dir', 'load_onnx_model'] + + +def load_inference_model(path_prefix, + executor, + model_filename=None, + params_filename=None): + # Load onnx model to Inference model. + if path_prefix.endswith('.onnx'): + inference_program, feed_target_names, fetch_targets = load_onnx_model( + path_prefix) + return [inference_program, feed_target_names, fetch_targets] + # Load Inference model. + # TODO: clean code + if model_filename is not None and model_filename.endswith('.pdmodel'): + model_name = '.'.join(model_filename.split('.')[:-1]) + assert os.path.exists( + os.path.join(path_prefix, model_name + '.pdmodel') + ), 'Please check {}, or fix model_filename parameter.'.format( + os.path.join(path_prefix, model_name + '.pdmodel')) + assert os.path.exists( + os.path.join(path_prefix, model_name + '.pdiparams') + ), 'Please check {}, or fix params_filename parameter.'.format( + os.path.join(path_prefix, model_name + '.pdiparams')) + model_path_prefix = os.path.join(path_prefix, model_name) + [inference_program, feed_target_names, fetch_targets] = ( + paddle.static.load_inference_model( + path_prefix=model_path_prefix, executor=executor)) + elif model_filename is not None and params_filename is not None: + [inference_program, feed_target_names, fetch_targets] = ( + paddle.static.load_inference_model( + path_prefix=path_prefix, + executor=executor, + model_filename=model_filename, + params_filename=params_filename)) + else: + model_name = '.'.join(model_filename.split('.') + [:-1]) if model_filename is not None else 'model' + if os.path.exists(os.path.join(path_prefix, model_name + '.pdmodel')): + model_path_prefix = os.path.join(path_prefix, model_name) + [inference_program, feed_target_names, fetch_targets] = ( + paddle.static.load_inference_model( + path_prefix=model_path_prefix, executor=executor)) + else: + [inference_program, feed_target_names, fetch_targets] = ( + paddle.static.load_inference_model( + path_prefix=path_prefix, executor=executor)) + + return [inference_program, feed_target_names, fetch_targets] + + +def get_model_dir(model_dir, model_filename, params_filename): + if model_dir.endswith('.onnx'): + updated_model_dir = model_dir.rstrip().rstrip('.onnx') + '_infer' + else: + updated_model_dir = model_dir.rstrip('/') + + if model_filename == None: + updated_model_filename = 'model.pdmodel' + else: + updated_model_filename = model_filename + + if params_filename == None: + updated_params_filename = 'model.pdiparams' + else: + updated_params_filename = params_filename + + if params_filename is None and model_filename is not None: + raise NotImplementedError( + "NOT SUPPORT parameters saved in separate files. Please convert it to single binary file first." + ) + return updated_model_dir, updated_model_filename, updated_params_filename def load_onnx_model(model_path, disable_feedback=False): @@ -112,4 +182,4 @@ def load_onnx_model(model_path, disable_feedback=False): shutil.rmtree( os.path.join(inference_model_path, 'onnx2paddle_{}'.format( model_idx))) - return val_program, feed_target_names, fetch_targets + return val_program, feed_target_names, fetch_targets \ No newline at end of file diff --git a/paddleslim/quant/analysis.py b/paddleslim/quant/analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..1013c392056a115bd08d8ec1a6a38041ab967cf5 --- /dev/null +++ b/paddleslim/quant/analysis.py @@ -0,0 +1,331 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import pickle +import copy +import logging +import matplotlib.pyplot as plt +from matplotlib.backends.backend_pdf import PdfPages +import numpy as np + +import paddle +from paddle.fluid import core +from paddle.fluid import framework +from paddle.fluid.framework import IrGraph +from paddle.fluid.executor import global_scope +from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization +from paddle.fluid.contrib.slim.quantization.utils import _get_op_input_var_names, load_variable_data +from .quanter import quant_post +from ..core import GraphWrapper +from ..common import get_logger +from ..common import get_feed_vars, wrap_dataloader, load_inference_model, get_model_dir + +_logger = get_logger(__name__, level=logging.INFO) + +__all__ = ["AnalysisQuant"] + + +class AnalysisQuant(object): + def __init__( + self, + model_dir, + model_filename=None, + params_filename=None, + eval_function=None, + quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"], + weight_quantize_type='abs_max', + activation_quantize_type='moving_average_abs_max', + is_full_quantize=False, + batch_size=10, + batch_nums=10, + data_loader=None, + save_dir='analysis_results', + checkpoint_name='analysis_checkpoint.pkl', + num_histogram_plots=10, ): + """ + AnalysisQuant provides to analysis the sensitivity of each op in the model. + + Args: + model_dir(str): the path of fp32 model that will be quantized + model_filename(str): the model file name of the fp32 model + params_filename(str): the parameter file name of the fp32 model + eval_function(function): eval function, define by yourself to return the metric of the inference program, can be used to judge the metric of quantized model. (TODO: optional) + quantizable_op_type(list, optional): op types that can be quantized + batch_size(int, optional): the batch size of DataLoader, default is 10 + data_loader(Python Generator, Paddle.io.DataLoader, optional): the + Generator or Dataloader provides calibrate data, and it could + return a batch every time + save_dir(str, optional): the output dir that stores the analyzed information + checkpoint_name(str, optional): the name of checkpoint file that saves analyzed information and avoids break off while ananlyzing + num_histogram_plots: the number histogram plots you want to visilize, the plots will show in one PDF file in the save_dir + """ + self.model_dir = model_dir + self.model_filename = model_filename + self.params_filename = params_filename + self.batch_nums = batch_nums + self.quantizable_op_type = quantizable_op_type + self.weight_quantize_type = weight_quantize_type + self.activation_quantize_type = activation_quantize_type + self.is_full_quantize = is_full_quantize + self.histogram_bins = 1000 + self.save_dir = save_dir + self.eval_function = eval_function + self.quant_layer_names = [] + self.checkpoint_name = os.path.join(save_dir, checkpoint_name) + self.quant_layer_metrics = {} + self.batch_size = batch_size + self.batch_nums = batch_nums + self.num_histogram_plots = num_histogram_plots + + if not os.path.exists(self.save_dir): + os.mkdir(self.save_dir) + + devices = paddle.device.get_device().split(':')[0] + self.places = paddle.device._convert_to_place(devices) + executor = paddle.static.Executor(self.places) + + # load model + [program, self.feed_list, self.fetch_list]= load_inference_model( \ + model_dir, \ + executor=executor, \ + model_filename=model_filename, \ + params_filename=params_filename) + + # create data_loader + self.data_loader = wrap_dataloader(data_loader, self.feed_list) + + # evaluate before quant + # TODO: self.eval_function can be None + if self.eval_function is not None: + self.base_metric = self.eval_function( + executor, program, self.feed_list, self.fetch_list) + _logger.info('before quantized, the accuracy of the model is: {}'. + format(self.base_metric)) + + # quant and evaluate after quant (skip_list = None) + post_training_quantization = PostTrainingQuantization( + executor=executor, + data_loader=self.data_loader, + model_dir=self.model_dir, + model_filename=self.model_filename, + params_filename=self.params_filename, + batch_size=self.batch_size, + batch_nums=self.batch_nums, + algo='avg', # fastest + quantizable_op_type=self.quantizable_op_type, + weight_quantize_type=self.weight_quantize_type, + activation_quantize_type=self.activation_quantize_type, + is_full_quantize=self.is_full_quantize, + skip_tensor_list=None, ) + program = post_training_quantization.quantize() + self.quant_metric = self.eval_function(executor, program, + self.feed_list, self.fetch_list) + _logger.info('after quantized, the accuracy of the model is: {}'.format( + self.quant_metric)) + + # get quantized weight and act var name + self.quantized_weight_var_name = post_training_quantization._quantized_weight_var_name + self.quantized_act_var_name = post_training_quantization._quantized_act_var_name + executor.close() + + # load tobe_analyized_layer from checkpoint + self.load_checkpoint() + self.tobe_analyized_layer = self.quantized_weight_var_name - set( + list(self.quant_layer_metrics.keys())) + self.tobe_analyized_layer = sorted(list(self.tobe_analyized_layer)) + + def analysis(self): + self.compute_quant_sensitivity() + self.sensitivity_ranklist = sorted( + self.quant_layer_metrics, + key=self.quant_layer_metrics.get, + reverse=False) + + _logger.info('Finished computing the sensitivity of the model.') + for name in self.sensitivity_ranklist: + _logger.info("quant layer name: {}, eval metric: {}".format( + name, self.quant_layer_metrics[name])) + + analysis_file = os.path.join(self.save_dir, "analysis.txt") + with open(analysis_file, "w") as analysis_ret_f: + for name in self.sensitivity_ranklist: + analysis_ret_f.write( + "quant layer name: {}, eval metric: {}\n".format( + name, self.quant_layer_metrics[name])) + _logger.info('Analysis file is saved in {}'.format(analysis_file)) + self.calculate_histogram() + self.draw_pdf() + + def save_checkpoint(self): + if not os.path.exists(self.save_dir): + os.makedirs(self.save_dir) + with open(self.checkpoint_name, 'wb') as f: + pickle.dump(self.quant_layer_metrics, f) + _logger.info('save checkpoint to {}'.format(self.checkpoint_name)) + + def load_checkpoint(self): + if not os.path.exists(self.checkpoint_name): + return False + with open(self.checkpoint_name, 'rb') as f: + self.quant_layer_metrics = pickle.load(f) + _logger.info('load checkpoint from {}'.format(self.checkpoint_name)) + return True + + def compute_quant_sensitivity(self): + ''' + For each layer, quantize the weight op and evaluate the quantized model. + ''' + for i, layer_name in enumerate(self.tobe_analyized_layer): + _logger.info('checking {}/{} quant model: quant layer {}'.format( + i + 1, len(self.tobe_analyized_layer), layer_name)) + skip_list = copy.copy(list(self.quantized_weight_var_name)) + skip_list.remove(layer_name) + + executor = paddle.static.Executor(self.places) + post_training_quantization = PostTrainingQuantization( + executor=executor, + data_loader=self.data_loader, + model_dir=self.model_dir, + model_filename=self.model_filename, + params_filename=self.params_filename, + batch_size=self.batch_size, + batch_nums=self.batch_nums, + algo='avg', # fastest + quantizable_op_type=self.quantizable_op_type, + weight_quantize_type=self.weight_quantize_type, + activation_quantize_type=self.activation_quantize_type, + is_full_quantize=self.is_full_quantize, + skip_tensor_list=skip_list, ) + program = post_training_quantization.quantize() + + _logger.info('Evaluating...') + quant_metric = self.eval_function(executor, program, self.feed_list, + self.fetch_list) + executor.close() + _logger.info( + "quant layer name: {}, eval metric: {}, the loss caused by this layer: {}". + format(layer_name, quant_metric, self.base_metric - + quant_metric)) + self.quant_layer_metrics[layer_name] = quant_metric + self.save_checkpoint() + + def get_sensitive_ops_name(self, graph, program): + sensitive_weight_ops = self.sensitivity_ranklist[:self. + num_histogram_plots] + sensitive_act_ops = [] + persistable_var_names = [] + persistable_var_names = [] + for var in program.list_vars(): + if var.persistable: + persistable_var_names.append(var.name) + for op_name in sensitive_weight_ops: + for block_id in range(len(program.blocks)): + for op in program.blocks[block_id].ops: + var_name_list = _get_op_input_var_names(op) + if op_name in var_name_list: + for var_name in var_name_list: + if var_name not in persistable_var_names: + sensitive_act_ops.append(var_name) + return sensitive_act_ops, sensitive_weight_ops + + def calculate_histogram(self): + ''' + Sample histograms for the weight and corresponding act tensors + ''' + devices = paddle.device.get_device().split(':')[0] + places = paddle.device._convert_to_place(devices) + executor = paddle.static.Executor(places) + + [program, feed_list, fetch_list]= load_inference_model( \ + self.model_dir, \ + executor=executor, \ + model_filename=self.model_filename, \ + params_filename=self.params_filename) + + scope = global_scope() + + graph = IrGraph(core.Graph(program.desc), for_test=False) + self.sensitive_act_ops, self.sensitive_weight_ops = self.get_sensitive_ops_name( + graph, program) + + for var in program.list_vars(): + if var.name in self.quantized_act_var_name: + var.persistable = True + + batch_id = 0 + for data in self.data_loader(): + executor.run(program=program, + feed=data, + fetch_list=fetch_list, + return_numpy=False, + scope=scope) + batch_id += 1 + if batch_id >= self.batch_nums: + break + + self.weight_histogram = {} + self.act_histogram = {} + for var_name in self.sensitive_act_ops: + var_tensor = load_variable_data(scope, var_name) + var_tensor = np.array(var_tensor) + min_v = float(np.min(var_tensor)) + max_v = float(np.max(var_tensor)) + var_tensor = var_tensor.flatten() + _, hist_edges = np.histogram( + var_tensor.copy(), + bins=self.histogram_bins, + range=(min_v, max_v)) + self.act_histogram[var_name] = [var_tensor, hist_edges] + + for var_name in self.sensitive_weight_ops: + var_tensor = load_variable_data(scope, var_name) + var_tensor = np.array(var_tensor) + min_v = float(np.min(var_tensor)) + max_v = float(np.max(var_tensor)) + var_tensor = var_tensor.flatten() + _, hist_edges = np.histogram( + var_tensor.copy(), + bins=self.histogram_bins, + range=(min_v, max_v)) + self.weight_histogram[var_name] = [var_tensor, hist_edges] + + def draw_pdf(self): + pdf_path_a = os.path.join(self.save_dir, 'act_hist_result.pdf') + pdf_path_w = os.path.join(self.save_dir, 'weight_hist_result.pdf') + with PdfPages(pdf_path_a) as pdf: + for name in self.act_histogram: + plt.hist( + self.act_histogram[name][0], + bins=self.act_histogram[name][1]) + plt.xlabel(name) + plt.ylabel("frequency") + plt.title("Hist of variable {}".format(name)) + plt.show() + pdf.savefig() + plt.close() + with PdfPages(pdf_path_w) as pdf: + for name in self.weight_histogram: + plt.hist( + self.weight_histogram[name][0], + bins=self.weight_histogram[name][1]) + plt.xlabel(name) + plt.ylabel("frequency") + plt.title("Hist of variable {}".format(name)) + plt.show() + pdf.savefig() + plt.close() + _logger.info('Histogram plots are saved in {} and {}'.format( + pdf_path_a, pdf_path_w))