From 96d309a7765bf0ba97771f618c988c491e31ffa0 Mon Sep 17 00:00:00 2001
From: Guanghua Yu <742925032@qq.com>
Date: Fri, 19 Aug 2022 19:50:59 +0800
Subject: [PATCH] Cherry pick some PR (#1354)
---
README.md | 52 ++-
demo/quant/pact_quant_aware/train.py | 30 +-
demo/quant/quant_aware/train.py | 7 +-
demo/quant/quant_post/eval.py | 7 +-
.../auto-compression/auto_compression_api.rst | 10 +-
.../api_cn/static/quant/quantization_api.rst | 10 +-
example/auto_compression/README.md | 10 +-
.../detection/configs/ppyoloe_l_qat_dis.yaml | 1 +
.../detection/configs/ppyoloe_s_qat_dis.yaml | 34 ++
example/auto_compression/detection/eval.py | 2 +-
example/auto_compression/detection/run.py | 5 +-
.../hyperparameter_tutorial.md | 186 +++++++--
.../image_classification/README.md | 46 ++-
.../image_classification/eval.py | 2 +-
.../image_classification/infer.py | 2 +-
.../image_classification/run.py | 14 +-
example/auto_compression/nlp/run.py | 2 +-
.../pytorch_huggingface/run.py | 2 +-
.../pytorch_yolo_series/README.md | 170 ++++++++
.../configs/yolov5s_qat_dis.yaml | 15 +-
.../configs/yolov6s_qat_dis.yaml | 15 +-
.../configs/yolov7_qat_dis.yaml | 18 +-
.../configs/yolov7_tiny_qat_dis.yaml | 32 ++
.../cpp_infer/CMakeLists.txt | 0
.../cpp_infer/README.md | 37 +-
.../cpp_infer/compile.sh | 0
.../cpp_infer/trt_run.cc | 9 +-
.../pytorch_yolo_series/dataset.py | 115 ++++++
.../pytorch_yolo_series/eval.py | 102 +++++
.../images/000000570688.jpg | Bin
.../pytorch_yolo_series/onnx_trt_infer.py | 378 ++++++++++++++++++
.../paddle_trt_infer.py | 10 +-
.../post_process.py | 66 ++-
.../pytorch_yolo_series/run.py | 127 ++++++
.../auto_compression/pytorch_yolov5/README.md | 147 -------
.../pytorch_yolov5/configs/yolov5_reader.yml | 27 --
.../pytorch_yolov5/cpp_infer/trt_run.cc | 116 ------
.../pytorch_yolov5/paddle_trt_infer.py | 322 ---------------
.../auto_compression/pytorch_yolov5/run.py | 179 ---------
.../auto_compression/pytorch_yolov6/README.md | 143 -------
.../pytorch_yolov6/configs/yolov6_reader.yml | 27 --
.../pytorch_yolov6/cpp_infer/CMakeLists.txt | 263 ------------
.../pytorch_yolov6/cpp_infer/README.md | 50 ---
.../pytorch_yolov6/cpp_infer/compile.sh | 37 --
.../pytorch_yolov6/cpp_infer/trt_run.cc | 116 ------
.../auto_compression/pytorch_yolov6/eval.py | 159 --------
.../pytorch_yolov6/images/000000570688.jpg | Bin 138365 -> 0 bytes
.../pytorch_yolov6/paddle_trt_infer.py | 322 ---------------
.../pytorch_yolov6/post_quant.py | 106 -----
.../auto_compression/pytorch_yolov6/run.py | 181 ---------
.../auto_compression/pytorch_yolov7/README.md | 152 -------
.../pytorch_yolov7/configs/yolov7_reader.yaml | 27 --
.../pytorch_yolov7/cpp_infer/CMakeLists.txt | 263 ------------
.../pytorch_yolov7/cpp_infer/README.md | 51 ---
.../pytorch_yolov7/cpp_infer/compile.sh | 37 --
.../auto_compression/pytorch_yolov7/eval.py | 151 -------
.../pytorch_yolov7/images/000000570688.jpg | Bin 138365 -> 0 bytes
.../pytorch_yolov7/post_process.py | 173 --------
.../auto_compression/pytorch_yolov7/run.py | 172 --------
.../semantic_segmentation/README.md | 4 +-
.../semantic_segmentation/run.py | 14 +-
.../tensorflow_mobilenet/eval.py | 5 +-
.../tensorflow_mobilenet/run.py | 5 +-
.../post_training_quantization/analysis.md | 49 +++
.../detection/analysis.py | 179 +++++++++
.../detection/configs/picodet_s_analysis.yaml | 47 +++
.../detection/configs/picodet_s_ptq.yaml | 38 ++
.../detection}/eval.py | 24 +-
.../detection/keypoint_utils.py | 307 ++++++++++++++
.../detection/post_process.py | 157 ++++++++
.../detection}/post_quant.py | 29 +-
.../pytorch_yolo_series/README.md | 150 +++++++
.../pytorch_yolo_series/analysis.py | 115 ++++++
.../configs/yolov5s_ptq.yaml | 8 +
.../configs/yolov6s_analysis.yaml | 15 +
.../configs/yolov6s_analyzed_ptq.yaml | 8 +
.../configs/yolov6s_ptq.yaml | 8 +
.../configs/yolov7s_ptq.yaml | 7 +
.../pytorch_yolo_series/dataset.py | 115 ++++++
.../pytorch_yolo_series/eval.py | 101 +++++
.../images/hist_compare.png | Bin 0 -> 159445 bytes
.../images/sensitivity_rank.png | Bin 0 -> 25389 bytes
.../pytorch_yolo_series}/post_process.py | 66 ++-
.../pytorch_yolo_series}/post_quant.py | 60 ++-
paddleslim/analysis/_utils.py | 4 +-
paddleslim/analysis/latency_predictor.py | 4 +-
paddleslim/auto_compression/__init__.py | 14 +-
paddleslim/auto_compression/auto_strategy.py | 6 +-
paddleslim/auto_compression/compressor.py | 124 ++++--
paddleslim/auto_compression/config_helpers.py | 46 +--
.../create_compressed_program.py | 10 +-
.../auto_compression/strategy_config.py | 6 +-
paddleslim/auto_compression/utils/__init__.py | 6 -
paddleslim/auto_compression/utils/fake_ptq.py | 2 +-
.../auto_compression/utils/load_model.py | 45 ---
paddleslim/auto_compression/utils/predict.py | 4 +-
.../auto_compression/utils/prune_model.py | 7 +-
paddleslim/common/__init__.py | 7 +-
paddleslim/common/config_helper.py | 60 +++
.../utils => common}/dataloader.py | 3 +-
paddleslim/common/load_model.py | 222 ++++++++++
paddleslim/dygraph/prune/pruning_plan.py | 3 +-
paddleslim/prune/prune_worker.py | 5 +-
paddleslim/prune/pruner.py | 4 +-
paddleslim/quant/__init__.py | 4 +-
paddleslim/quant/analysis.py | 312 +++++++++++++++
paddleslim/quant/post_quant_hpo.py | 20 +-
paddleslim/quant/quanter.py | 128 ++++--
requirements.txt | 2 -
tests/act/test_act_api.py | 34 +-
tests/test_prune_walker.py | 6 +-
tests/test_quant_post.py | 4 +-
112 files changed, 3674 insertions(+), 3666 deletions(-)
create mode 100644 example/auto_compression/detection/configs/ppyoloe_s_qat_dis.yaml
create mode 100644 example/auto_compression/pytorch_yolo_series/README.md
rename example/auto_compression/{pytorch_yolov5 => pytorch_yolo_series}/configs/yolov5s_qat_dis.yaml (60%)
rename example/auto_compression/{pytorch_yolov6 => pytorch_yolo_series}/configs/yolov6s_qat_dis.yaml (62%)
rename example/auto_compression/{pytorch_yolov7 => pytorch_yolo_series}/configs/yolov7_qat_dis.yaml (56%)
create mode 100644 example/auto_compression/pytorch_yolo_series/configs/yolov7_tiny_qat_dis.yaml
rename example/auto_compression/{pytorch_yolov5 => pytorch_yolo_series}/cpp_infer/CMakeLists.txt (100%)
rename example/auto_compression/{pytorch_yolov5 => pytorch_yolo_series}/cpp_infer/README.md (64%)
rename example/auto_compression/{pytorch_yolov5 => pytorch_yolo_series}/cpp_infer/compile.sh (100%)
rename example/auto_compression/{pytorch_yolov7 => pytorch_yolo_series}/cpp_infer/trt_run.cc (93%)
create mode 100644 example/auto_compression/pytorch_yolo_series/dataset.py
create mode 100644 example/auto_compression/pytorch_yolo_series/eval.py
rename example/auto_compression/{pytorch_yolov5 => pytorch_yolo_series}/images/000000570688.jpg (100%)
create mode 100644 example/auto_compression/pytorch_yolo_series/onnx_trt_infer.py
rename example/auto_compression/{pytorch_yolov7 => pytorch_yolo_series}/paddle_trt_infer.py (98%)
rename example/auto_compression/{pytorch_yolov6 => pytorch_yolo_series}/post_process.py (75%)
create mode 100644 example/auto_compression/pytorch_yolo_series/run.py
delete mode 100644 example/auto_compression/pytorch_yolov5/README.md
delete mode 100644 example/auto_compression/pytorch_yolov5/configs/yolov5_reader.yml
delete mode 100644 example/auto_compression/pytorch_yolov5/cpp_infer/trt_run.cc
delete mode 100644 example/auto_compression/pytorch_yolov5/paddle_trt_infer.py
delete mode 100644 example/auto_compression/pytorch_yolov5/run.py
delete mode 100644 example/auto_compression/pytorch_yolov6/README.md
delete mode 100644 example/auto_compression/pytorch_yolov6/configs/yolov6_reader.yml
delete mode 100644 example/auto_compression/pytorch_yolov6/cpp_infer/CMakeLists.txt
delete mode 100644 example/auto_compression/pytorch_yolov6/cpp_infer/README.md
delete mode 100644 example/auto_compression/pytorch_yolov6/cpp_infer/compile.sh
delete mode 100644 example/auto_compression/pytorch_yolov6/cpp_infer/trt_run.cc
delete mode 100644 example/auto_compression/pytorch_yolov6/eval.py
delete mode 100644 example/auto_compression/pytorch_yolov6/images/000000570688.jpg
delete mode 100644 example/auto_compression/pytorch_yolov6/paddle_trt_infer.py
delete mode 100644 example/auto_compression/pytorch_yolov6/post_quant.py
delete mode 100644 example/auto_compression/pytorch_yolov6/run.py
delete mode 100644 example/auto_compression/pytorch_yolov7/README.md
delete mode 100644 example/auto_compression/pytorch_yolov7/configs/yolov7_reader.yaml
delete mode 100644 example/auto_compression/pytorch_yolov7/cpp_infer/CMakeLists.txt
delete mode 100644 example/auto_compression/pytorch_yolov7/cpp_infer/README.md
delete mode 100644 example/auto_compression/pytorch_yolov7/cpp_infer/compile.sh
delete mode 100644 example/auto_compression/pytorch_yolov7/eval.py
delete mode 100644 example/auto_compression/pytorch_yolov7/images/000000570688.jpg
delete mode 100644 example/auto_compression/pytorch_yolov7/post_process.py
delete mode 100644 example/auto_compression/pytorch_yolov7/run.py
create mode 100644 example/post_training_quantization/analysis.md
create mode 100644 example/post_training_quantization/detection/analysis.py
create mode 100644 example/post_training_quantization/detection/configs/picodet_s_analysis.yaml
create mode 100644 example/post_training_quantization/detection/configs/picodet_s_ptq.yaml
rename example/{auto_compression/pytorch_yolov5 => post_training_quantization/detection}/eval.py (86%)
create mode 100644 example/post_training_quantization/detection/keypoint_utils.py
create mode 100644 example/post_training_quantization/detection/post_process.py
rename example/{auto_compression/pytorch_yolov5 => post_training_quantization/detection}/post_quant.py (75%)
create mode 100644 example/post_training_quantization/pytorch_yolo_series/README.md
create mode 100644 example/post_training_quantization/pytorch_yolo_series/analysis.py
create mode 100644 example/post_training_quantization/pytorch_yolo_series/configs/yolov5s_ptq.yaml
create mode 100644 example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_analysis.yaml
create mode 100644 example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_analyzed_ptq.yaml
create mode 100644 example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_ptq.yaml
create mode 100644 example/post_training_quantization/pytorch_yolo_series/configs/yolov7s_ptq.yaml
create mode 100644 example/post_training_quantization/pytorch_yolo_series/dataset.py
create mode 100644 example/post_training_quantization/pytorch_yolo_series/eval.py
create mode 100644 example/post_training_quantization/pytorch_yolo_series/images/hist_compare.png
create mode 100644 example/post_training_quantization/pytorch_yolo_series/images/sensitivity_rank.png
rename example/{auto_compression/pytorch_yolov5 => post_training_quantization/pytorch_yolo_series}/post_process.py (75%)
rename example/{auto_compression/pytorch_yolov7 => post_training_quantization/pytorch_yolo_series}/post_quant.py (56%)
delete mode 100644 paddleslim/auto_compression/utils/load_model.py
create mode 100644 paddleslim/common/config_helper.py
rename paddleslim/{auto_compression/utils => common}/dataloader.py (95%)
create mode 100644 paddleslim/common/load_model.py
create mode 100644 paddleslim/quant/analysis.py
diff --git a/README.md b/README.md
index 5289fb2e..063ae08a 100755
--- a/README.md
+++ b/README.md
@@ -4,40 +4,51 @@
-
-
-
+
+
+
+
+
+
+
+
-PaddleSlim是一个专注于深度学习模型压缩的工具库,提供**低比特量化、知识蒸馏、稀疏化和模型结构搜索**等模型压缩策略,帮助用户快速实现模型的小型化。
+PaddleSlim是一个专注于深度学习模型压缩的工具库,提供**低比特量化、知识蒸馏、稀疏化和模型结构搜索**等模型压缩策略,帮助开发者快速实现模型的小型化。
## 产品动态
+- 🔥 **2022.08.16:自动化压缩功能升级**
+ - 支持直接加载ONNX模型和Paddle模型导出至ONNX
+ - 发布量化分析工具试用版,发布[YOLO系列离线量化工具](example/post_training_quantization/pytorch_yolo_series/)
+ - 更新[YOLO-Series自动化压缩模型库](example/auto_compression/pytorch_yolo_series)
+
+ | 模型 | Base mAPval
0.5:0.95 | ACT量化mAPval
0.5:0.95 | 模型体积压缩比 | 预测时延FP32
| 预测时延INT8
| 预测加速比 |
+ | :-------- |:-------- |:--------: | :--------: | :---------------------: | :----------------: | :----------------: |
+ | PPYOLOE-s | 43.1 | 42.6 | 3.9倍 | 6.51ms | 2.12ms | 3.1倍 |
+ | YOLOv5s | 37.4 | 36.9 | 3.8倍 | 5.95ms | 1.87ms | 3.2倍 |
+ | YOLOv6s | 42.4 | 41.3 | 3.9倍 | 9.06ms | 1.83ms | 5.0倍 |
+ | YOLOv7 | 51.1 | 50.9 | 3.9倍 | 26.84ms | 4.55ms | 5.9倍 |
+ | YOLOv7-Tiny | 37.3 | 37.0 | 3.9倍 | 5.06ms | 1.68ms | 3.0倍 |
+
+
- 🔥 **2022.07.01: 发布[v2.3.0版本](https://github.com/PaddlePaddle/PaddleSlim/releases/tag/v2.3.0)**
- 发布[自动化压缩功能](example/auto_compression)
-
- - 支持代码无感知压缩:用户只需提供推理模型文件和数据,既可进行离线量化(PTQ)、量化训练(QAT)、稀疏训练等压缩任务。
+ - 支持代码无感知压缩:开发者只需提供推理模型文件和数据,既可进行离线量化(PTQ)、量化训练(QAT)、稀疏训练等压缩任务。
- 支持自动策略选择,根据任务特点和部署环境特性:自动搜索合适的离线量化方法,自动搜索最佳的压缩策略组合方式。
- 发布[自然语言处理](example/auto_compression/nlp)、[图像语义分割](example/auto_compression/semantic_segmentation)、[图像目标检测](example/auto_compression/detection)三个方向的自动化压缩示例。
- - 发布`X2Paddle`模型自动化压缩方案:[YOLOv5](example/auto_compression/pytorch_yolov5)、[YOLOv6](example/auto_compression/pytorch_yolov6)、[YOLOv7](example/auto_compression/pytorch_yolov7)、[HuggingFace](example/auto_compression/pytorch_huggingface)、[MobileNet](example/auto_compression/tensorflow_mobilenet)。
-
+ - 发布`X2Paddle`模型自动化压缩方案:[YOLOv5](example/auto_compression/pytorch_yolo_series)、[YOLOv6](example/auto_compression/pytorch_yolo_series)、[YOLOv7](example/auto_compression/pytorch_yolo_series)、[HuggingFace](example/auto_compression/pytorch_huggingface)、[MobileNet](example/auto_compression/tensorflow_mobilenet)。
- 升级量化功能
-
- - 统一量化模型格式
- - 离线量化支持while op
- - 新增7种[离线量化方法](docs/zh_cn/tutorials/quant/post_training_quantization.md), 包括HIST, AVG, EMD, Bias Correction, AdaRound等
- - 修复BERT大模型量化训练过慢的问题
-
+ - 统一量化模型格式;离线量化支持while op;修复BERT大模型量化训练过慢的问题。
+ - 新增7种[离线量化方法](docs/zh_cn/tutorials/quant/post_training_quantization.md), 包括HIST, AVG, EMD, Bias Correction, AdaRound等。
- 支持半结构化稀疏训练
-
- 新增延时预估工具
+ - 支持对稀疏化模型、低比特量化模型的性能预估;支持预估指定模型在特定部署环境下 (ARM CPU + Paddle Lite) 的推理性能;提供 SD625、SD710、RK3288 芯片 + Paddle Lite 的预估接口。
+ - 提供部署环境自动扩展工具,可以自动增加在更多 ARM CPU 设备上的预估工具。
- - 支持预估指定模型在特定部署环境下 (ARM CPU + Paddle Lite) 的推理性能
- - 提供部署环境自动扩展工具,可以自动增加在更多 ARM CPU 设备上的预估工具
- - 支持对稀疏化模型、低比特量化模型的性能预估
- - 提供 SD625、SD710、RK3288 芯片 + Paddle Lite 的预估接口
-
+
+历史更新
- **2021.11.15: 发布v2.2.0版本**
@@ -52,6 +63,7 @@ PaddleSlim是一个专注于深度学习模型压缩的工具库,提供**低
更多信息请参考:[release note](https://github.com/PaddlePaddle/PaddleSlim/releases)
+
## 基础压缩功能概览
diff --git a/demo/quant/pact_quant_aware/train.py b/demo/quant/pact_quant_aware/train.py
index fb70c0fc..67945a45 100644
--- a/demo/quant/pact_quant_aware/train.py
+++ b/demo/quant/pact_quant_aware/train.py
@@ -65,6 +65,8 @@ add_arg('use_pact', bool, True,
"Whether to use PACT or not.")
add_arg('analysis', bool, False,
"Whether analysis variables distribution.")
+add_arg('onnx_format', bool, False,
+ "Whether use onnx format or not.")
add_arg('ce_test', bool, False, "Whether to CE test.")
# yapf: enable
@@ -257,6 +259,8 @@ def compress(args):
'window_size': 10000,
# The decay coefficient of moving average, default is 0.9
'moving_rate': 0.9,
+ # Whether use onnx format or not
+ 'onnx_format': args.onnx_format,
}
# 2. quantization transform programs (training aware)
@@ -298,9 +302,9 @@ def compress(args):
places,
quant_config,
scope=None,
- act_preprocess_func=act_preprocess_func,
- optimizer_func=optimizer_func,
- executor=executor,
+ act_preprocess_func=None,
+ optimizer_func=None,
+ executor=None,
for_test=True)
compiled_train_prog = quant_aware(
train_prog,
@@ -425,29 +429,23 @@ def compress(args):
# 3. Freeze the graph after training by adjusting the quantize
# operators' order for the inference.
# The dtype of float_program's weights is float32, but in int8 range.
- float_program, int8_program = convert(val_program, places, quant_config, \
- scope=None, \
- save_int8=True)
+ model_path = os.path.join(quantization_model_save_dir, args.model)
+ if not os.path.isdir(model_path):
+ os.makedirs(model_path)
+ float_program = convert(val_program, places, quant_config)
_logger.info("eval best_model after convert")
final_acc1 = test(best_epoch, float_program)
_logger.info("final acc:{}".format(final_acc1))
# 4. Save inference model
- model_path = os.path.join(quantization_model_save_dir, args.model,
- 'act_' + quant_config['activation_quantize_type']
- + '_w_' + quant_config['weight_quantize_type'])
- float_path = os.path.join(model_path, 'float')
- if not os.path.isdir(model_path):
- os.makedirs(model_path)
-
paddle.fluid.io.save_inference_model(
- dirname=float_path,
+ dirname=model_path,
feeded_var_names=[image.name],
target_vars=[out],
executor=exe,
main_program=float_program,
- model_filename=float_path + '/model',
- params_filename=float_path + '/params')
+ model_filename=model_path + '/model.pdmodel',
+ params_filename=model_path + '/model.pdiparams')
def main():
diff --git a/demo/quant/quant_aware/train.py b/demo/quant/quant_aware/train.py
index abf6073e..7fc133a4 100644
--- a/demo/quant/quant_aware/train.py
+++ b/demo/quant/quant_aware/train.py
@@ -126,6 +126,8 @@ def compress(args):
'window_size': 10000,
# The decay coefficient of moving average, default is 0.9
'moving_rate': 0.9,
+ # Whether use onnx format or not
+ 'onnx_format': args.onnx_format,
}
pretrain = True
@@ -294,10 +296,7 @@ def compress(args):
# operators' order for the inference.
# The dtype of float_program's weights is float32, but in int8 range.
############################################################################################################
- float_program, int8_program = convert(val_program, places, quant_config, \
- scope=None, \
- save_int8=True,
- onnx_format=args.onnx_format)
+ float_program = convert(val_program, places, quant_config)
print("eval best_model after convert")
final_acc1 = test(best_epoch, float_program)
############################################################################################################
diff --git a/demo/quant/quant_post/eval.py b/demo/quant/quant_post/eval.py
index 310eacd0..e8184e84 100755
--- a/demo/quant/quant_post/eval.py
+++ b/demo/quant/quant_post/eval.py
@@ -21,8 +21,7 @@ import functools
import paddle
sys.path[0] = os.path.join(
os.path.dirname("__file__"), os.path.pardir, os.path.pardir)
-sys.path[1] = os.path.join(
- os.path.dirname("__file__"), os.path.pardir)
+sys.path[1] = os.path.join(os.path.dirname("__file__"), os.path.pardir)
import imagenet_reader as reader
from utility import add_arguments, print_arguments
@@ -31,8 +30,8 @@ parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
add_arg('model_path', str, "./pruning/checkpoints/resnet50/2/eval_model/", "Whether to use pretrained model.")
-add_arg('model_name', str, '__model__', "model filename for inference model")
-add_arg('params_name', str, '__params__', "params filename for inference model")
+add_arg('model_name', str, 'model.pdmodel', "model filename for inference model")
+add_arg('params_name', str, 'model.pdiparams', "params filename for inference model")
add_arg('batch_size', int, 64, "Minibatch size.")
# yapf: enable
diff --git a/docs/zh_cn/api_cn/static/auto-compression/auto_compression_api.rst b/docs/zh_cn/api_cn/static/auto-compression/auto_compression_api.rst
index f5731df4..c308413d 100644
--- a/docs/zh_cn/api_cn/static/auto-compression/auto_compression_api.rst
+++ b/docs/zh_cn/api_cn/static/auto-compression/auto_compression_api.rst
@@ -3,19 +3,19 @@ AutoCompression自动压缩功能
AutoCompression
---------------
-.. py:class:: paddleslim.auto_compression.AutoCompression(model_dir, model_filename, params_filename, save_dir, strategy_config, train_config, train_dataloader, eval_callback, devices='gpu')
+.. py:class:: paddleslim.auto_compression.AutoCompression(model_dir, train_dataloader, model_filename, params_filename, save_dir, strategy_config, train_config, eval_callback, devices='gpu')
-`源代码 `_
+`源代码 `_
根据指定的配置对使用 ``paddle.jit.save`` 接口或者 ``paddle.static.save_inference_model`` 接口保存的推理模型进行压缩。
**参数: **
- **model_dir(str)** - 需要压缩的推理模型所在的目录。
+- **train_dataloader(paddle.io.DataLoader)** - 训练数据迭代器。注意:如果选择离线量化超参搜索策略的话, ``train_dataloader`` 和 ``eval_callback`` 设置相同的数据读取即可。
- **model_filename(str)** - 需要压缩的推理模型文件名称。
- **params_filename(str)** - 需要压缩的推理模型参数文件名称。
- **save_dir(str)** - 压缩后模型的所保存的目录。
-- **train_dataloader(paddle.io.DataLoader)** - 训练数据迭代器。注意:如果选择离线量化超参搜索策略的话, ``train_dataloader`` 和 ``eval_callback`` 设置相同的数据读取即可。
- **train_config(dict)** - 训练配置。可以配置的参数请参考: ``_ 。注意:如果选择离线量化超参搜索策略的话, ``train_config`` 直接设置为 ``None`` 即可。
- **strategy_config(dict, list(dict), 可选)** - 使用的压缩策略,可以通过设置多个单种策略来并行使用这些压缩方式。字典的关键字必须在:
``Quantization`` (量化配置, 可配置的参数参考 ``_ ),
@@ -82,13 +82,13 @@ AutoCompression
eval_dataloader = Cifar10(mode='eval')
- ac = AutoCompression(model_path, model_filename, params_filename, save_dir, \
+ ac = AutoCompression(model_path, train_dataloader, model_filename, params_filename, save_dir, \
strategy_config="Quantization": Quantization(**default_ptq_config),
"Distillation": HyperParameterOptimization(**default_distill_config)}, \
- train_config=None, train_dataloader=train_dataloader, eval_callback=eval_dataloader,devices='gpu')
+ train_config=None, eval_callback=eval_dataloader,devices='gpu')
```
diff --git a/docs/zh_cn/api_cn/static/quant/quantization_api.rst b/docs/zh_cn/api_cn/static/quant/quantization_api.rst
index a12e4e9b..f2d7b77d 100644
--- a/docs/zh_cn/api_cn/static/quant/quantization_api.rst
+++ b/docs/zh_cn/api_cn/static/quant/quantization_api.rst
@@ -118,7 +118,7 @@ quant_post_dynamic
quant_post_static
---------------
-.. py:function:: paddleslim.quant.quant_post_static(executor,model_dir, quantize_model_path, batch_generator=None, sample_generator=None, model_filename=None, params_filename=None, save_model_filename='__model__', save_params_filename='__params__', batch_size=16, batch_nums=None, scope=None, algo='KL', round_type='round', quantizable_op_type=["conv2d","depthwise_conv2d","mul"], is_full_quantize=False, weight_bits=8, activation_bits=8, activation_quantize_type='range_abs_max', weight_quantize_type='channel_wise_abs_max', onnx_format=False, skip_tensor_list=None, optimize_model=False)
+.. py:function:: paddleslim.quant.quant_post_static(executor,model_dir, quantize_model_path, batch_generator=None, sample_generator=None, model_filename=None, params_filename=None, save_model_filename='model.pdmodel', save_params_filename='model.pdiparams', batch_size=16, batch_nums=None, scope=None, algo='KL', round_type='round', quantizable_op_type=["conv2d","depthwise_conv2d","mul"], is_full_quantize=False, weight_bits=8, activation_bits=8, activation_quantize_type='range_abs_max', weight_quantize_type='channel_wise_abs_max', onnx_format=False, skip_tensor_list=None, optimize_model=False)
`源代码 `_
@@ -217,15 +217,15 @@ quant_post_static
target_vars=[out],
main_program=val_prog,
executor=exe,
- model_filename='__model__',
- params_filename='__params__')
+ model_filename='model.pdmodel',
+ params_filename='model.pdiparams')
quant_post_static(
executor=exe,
model_dir='./model_path',
quantize_model_path='./save_path',
sample_generator=val_reader,
- model_filename='__model__',
- params_filename='__params__',
+ model_filename='model.pdmodel',
+ params_filename='model.pdiparams',
batch_size=16,
batch_nums=10)
diff --git a/example/auto_compression/README.md b/example/auto_compression/README.md
index e907908b..cb3b9277 100644
--- a/example/auto_compression/README.md
+++ b/example/auto_compression/README.md
@@ -82,15 +82,15 @@ ACT相比传统的模型压缩方法,
| [语义分割](./semantic_segmentation) | UNet | 65.00 | 64.93 | 15.29 | 10.23 | **1.49** | NVIDIA Tesla T4 |
| NLP | PP-MiniLM | 72.81 | 72.44 | 128.01 | 17.97 | **7.12** | NVIDIA Tesla T4 |
| NLP | ERNIE 3.0-Medium | 73.09 | 72.40 | 29.25(fp16) | 19.61 | **1.49** | NVIDIA Tesla T4 |
-| [目标检测](./pytorch_yolov5) | YOLOv5s
(PyTorch) | 37.40 | 36.9 | 5.95 | 1.87 | **3.18** | NVIDIA Tesla T4 |
-| [目标检测](./pytorch_yolov6) | YOLOv6s
(PyTorch) | 42.4 | 41.3 | 9.06 | 1.83 | **4.95** | NVIDIA Tesla T4 |
-| [目标检测](./pytorch_yolov7) | YOLOv7
(PyTorch) | 51.1 | 50.8 | 26.84 | 4.55 | **5.89** | NVIDIA Tesla T4 |
-| [目标检测](./detection) | PP-YOLOE-l | 50.9 | 50.6 | 11.2 | 6.7 | **1.67** | NVIDIA Tesla V100 |
+| [目标检测](./pytorch_yolo_series) | YOLOv5s
(PyTorch) | 37.40 | 36.9 | 5.95 | 1.87 | **3.18** | NVIDIA Tesla T4 |
+| [目标检测](./pytorch_yolo_series) | YOLOv6s
(PyTorch) | 42.4 | 41.3 | 9.06 | 1.83 | **4.95** | NVIDIA Tesla T4 |
+| [目标检测](./pytorch_yolo_series) | YOLOv7
(PyTorch) | 51.1 | 50.8 | 26.84 | 4.55 | **5.89** | NVIDIA Tesla T4 |
+| [目标检测](./detection) | PP-YOLOE-s | 43.1 | 42.6 | 6.51 | 2.12 | **3.07** | NVIDIA Tesla T4 |
| [图像分类](./image_classification) | MobileNetV1
(TensorFlow) | 71.0 | 70.22 | 30.45 | 15.86 | **1.92** | SDMM865(骁龙865) |
- 备注:目标检测精度指标为mAP(0.5:0.95)精度测量结果。图像分割精度指标为IoU精度测量结果。
- 更多飞桨模型应用示例及Benchmark可以参考:[图像分类](./image_classification),[目标检测](./detection),[语义分割](./semantic_segmentation),[自然语言处理](./nlp)
-- 更多其它框架应用示例及Benchmark可以参考:[YOLOv5(PyTorch)](./pytorch_yolov5),[YOLOv6(PyTorch)](./pytorch_yolov6),[YOLOv7(PyTorch)](./pytorch_yolov7),[HuggingFace(PyTorch)](./pytorch_huggingface),[MobileNet(TensorFlow)](./tensorflow_mobilenet)。
+- 更多其它框架应用示例及Benchmark可以参考:[YOLOv5(PyTorch)](./pytorch_yolo_series),[YOLOv6(PyTorch)](./pytorch_yolo_series),[YOLOv7(PyTorch)](./pytorch_yolo_series),[HuggingFace(PyTorch)](./pytorch_huggingface),[MobileNet(TensorFlow)](./tensorflow_mobilenet)。
## **环境准备**
diff --git a/example/auto_compression/detection/configs/ppyoloe_l_qat_dis.yaml b/example/auto_compression/detection/configs/ppyoloe_l_qat_dis.yaml
index 0b28ef89..1727e533 100644
--- a/example/auto_compression/detection/configs/ppyoloe_l_qat_dis.yaml
+++ b/example/auto_compression/detection/configs/ppyoloe_l_qat_dis.yaml
@@ -12,6 +12,7 @@ Distillation:
loss: soft_label
Quantization:
+ onnx_format: true
use_pact: true
activation_quantize_type: 'moving_average_abs_max'
quantize_op_types:
diff --git a/example/auto_compression/detection/configs/ppyoloe_s_qat_dis.yaml b/example/auto_compression/detection/configs/ppyoloe_s_qat_dis.yaml
new file mode 100644
index 00000000..466c9c2b
--- /dev/null
+++ b/example/auto_compression/detection/configs/ppyoloe_s_qat_dis.yaml
@@ -0,0 +1,34 @@
+
+Global:
+ reader_config: configs/yolo_reader.yml
+ input_list: ['image']
+ arch: PPYOLOE # When export exclude_nms=True, need set arch: PPYOLOE
+ Evaluation: True
+ model_dir: ./ppyoloe_crn_s_300e_coco
+ model_filename: model.pdmodel
+ params_filename: model.pdiparams
+
+Distillation:
+ alpha: 1.0
+ loss: soft_label
+
+Quantization:
+ onnx_format: true
+ use_pact: true
+ activation_quantize_type: 'moving_average_abs_max'
+ quantize_op_types:
+ - conv2d
+ - depthwise_conv2d
+
+TrainConfig:
+ train_iter: 5000
+ eval_iter: 1000
+ learning_rate:
+ type: CosineAnnealingDecay
+ learning_rate: 0.00003
+ T_max: 6000
+ optimizer_builder:
+ optimizer:
+ type: SGD
+ weight_decay: 4.0e-05
+
diff --git a/example/auto_compression/detection/eval.py b/example/auto_compression/detection/eval.py
index 3a723653..a4ea554c 100644
--- a/example/auto_compression/detection/eval.py
+++ b/example/auto_compression/detection/eval.py
@@ -20,7 +20,7 @@ import paddle
from ppdet.core.workspace import load_config, merge_config
from ppdet.core.workspace import create
from ppdet.metrics import COCOMetric, VOCMetric, KeyPointTopDownCOCOEval
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
from keypoint_utils import keypoint_post_process
diff --git a/example/auto_compression/detection/run.py b/example/auto_compression/detection/run.py
index b7cc7505..523f2439 100644
--- a/example/auto_compression/detection/run.py
+++ b/example/auto_compression/detection/run.py
@@ -20,7 +20,7 @@ import paddle
from ppdet.core.workspace import load_config, merge_config
from ppdet.core.workspace import create
from ppdet.metrics import COCOMetric, VOCMetric, KeyPointTopDownCOCOEval
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
from paddleslim.auto_compression import AutoCompression
from keypoint_utils import keypoint_post_process
@@ -121,7 +121,8 @@ def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list):
def main():
global global_config
all_config = load_slim_config(FLAGS.config_path)
- assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}"
+ assert "Global" in all_config, "Key 'Global' not found in config file. \n{}".format(
+ all_config)
global_config = all_config["Global"]
reader_cfg = load_config(global_config['reader_config'])
diff --git a/example/auto_compression/hyperparameter_tutorial.md b/example/auto_compression/hyperparameter_tutorial.md
index 6d423a9f..29ce9fef 100644
--- a/example/auto_compression/hyperparameter_tutorial.md
+++ b/example/auto_compression/hyperparameter_tutorial.md
@@ -1,9 +1,9 @@
-# ACT超参详细教程
+# 1. ACT超参详细教程
-## 各压缩方法超参解析
+## 1.1 各压缩方法超参解析
-#### 配置定制量化方案
+### 1.1.1 量化(quantization)
量化参数主要设置量化比特数和量化op类型,其中量化op包含卷积层(conv2d, depthwise_conv2d)和全连接层(mul, matmul_v2)。以下为只量化卷积层的示例:
```yaml
@@ -20,69 +20,148 @@ Quantization:
moving_rate: 0.9 # 'moving_average_abs_max' 量化方式的衰减系数,默认 0.9。
for_tensorrt: false # 量化后的模型是否使用 TensorRT 进行预测。如果是的话,量化op类型为: TENSORRT_OP_TYPES 。默认值为False.
is_full_quantize: false # 是否全量化
+ onnx_format: false # 是否采用ONNX量化标准格式
```
-#### 配置定制蒸馏策略
+以上配置项说明如下:
+
+
+- use_pact: 是否开启PACT。一般情况下,开启PACT后,量化产出的模型精度会更高。算法原理请参考:[PACT: Parameterized Clipping Activation for Quantized Neural Networks](https://arxiv.org/abs/1805.06085)
+- activation_bits: 激活量化bit数,可选1~8。默认为8。
+- weight_bits: 参数量化bit数,可选1~8。默认为8。
+- activation_quantize_type: 激活量化方式,可选 'abs_max' , 'range_abs_max' , 'moving_average_abs_max' 。如果使用 TensorRT 加载量化后的模型来预测,请使用 'range_abs_max' 或 'moving_average_abs_max' 。默认为 'moving_average_abs_max'。
+- weight_quantize_type: 参数量化方式。可选 'abs_max' , 'channel_wise_abs_max' , 'range_abs_max' , 'moving_average_abs_max' 。如果使用 TensorRT 加载量化后的模型来预测,请使用 'channel_wise_abs_max' 。 默认 'channel_wise_abs_max' 。
+- not_quant_pattern: 所有 `name_scope` 包含 'not_quant_pattern' 字符串的 op ,都不量化。 `name_scope` 设置方式请参考 [paddle.static.name_scope](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/static/name_scope_cn.html#name-scope)。
+- quantize_op_types:需要进行量化的OP类型。通过以下代码输出所有支持量化的OP类型:
+```
+from paddleslim.quant.quanter import TRANSFORM_PASS_OP_TYPES,QUANT_DEQUANT_PASS_OP_TYPES
+print(TRANSFORM_PASS_OP_TYPES + QUANT_DEQUANT_PASS_OP_TYPES)
+```
+- dtype: 量化后的参数类型,默认 int8 , 目前仅支持 int8
+- window_size: 'range_abs_max' 量化方式的 window size ,默认10000。
+- moving_rate: 'moving_average_abs_max' 量化方式的衰减系数,默认 0.9。
+- for_tensorrt: 量化后的模型是否使用 TensorRT 进行预测。默认值为False. 通过以下代码,输出for_tensorrt=True时会量化到的OP:
+```
+from paddleslim.quant.quanter import TENSORRT_OP_TYPES
+print(TENSORRT_OP_TYPES)
+```
+
+- is_full_quantize: 是否量化所有可支持op类型。默认值为False.
+
+
+### 1.1.2 知识蒸馏(knowledge distillation)
蒸馏参数主要设置蒸馏节点(`node`)和教师预测模型路径,如下所示:
```yaml
Distillation:
- # alpha: 蒸馏loss所占权重;可输入多个数值,支持不同节点之间使用不同的ahpha值
alpha: 1.0
- # loss: 蒸馏loss算法;可输入多个loss,支持不同节点之间使用不同的loss算法
loss: l2
- # node: 蒸馏节点,即某层输出的变量名称,可以选择:
- # 1. 使用自蒸馏的话,蒸馏结点仅包含学生网络节点即可, 支持多节点蒸馏;
- # 2. 使用其他蒸馏的话,蒸馏节点需要包含教师网络节点和对应的学生网络节点,
- # 每两个节点组成一对,分别属于教师模型和学生模型,支持多节点蒸馏。
node:
- relu_30.tmp_0
- # teacher_model_dir: 保存预测模型文件和预测模型参数文件的文件夹名称
+
teacher_model_dir: ./inference_model
# teacher_model_filename: 预测模型文件,格式为 *.pdmodel 或 __model__
teacher_model_filename: model.pdmodel
# teacher_params_filename: 预测模型参数文件,格式为 *.pdiparams 或 __params__
teacher_params_filename: model.pdiparams
```
+以上配置项说明如下:
+
+- alpha: 蒸馏loss所占权重;可输入多个数值,支持不同节点之间使用不同的alpha值。
+- loss: 蒸馏loss算法;可输入多个loss,支持不同节点之间使用不同的loss算法。 可选"soft_label"、“l2”或“fsp”。也可自定义loss。具体定义和使用可参考[知识蒸馏API文档](https://paddleslim.readthedocs.io/zh_CN/latest/api_cn/static/dist/single_distiller_api.html)。
+- node: 蒸馏节点,即某层输出的变量名称。该选项设置方式分两种情况:
-- 蒸馏loss目前支持的有:fsp,l2,soft_label,也可自定义loss。具体定义和使用可参考[知识蒸馏API文档](https://paddleslim.readthedocs.io/zh_CN/latest/api_cn/static/dist/single_distiller_api.html)。
+ - 自蒸馏:教师模型为压缩前的推理模型,学生模型为压缩后的推理模型。‘node’ 可设置为变量名称的列表,ACT会自动在该列表中的变量上依次添加知识蒸馏loss。示例如下:
+ ```
+ node:
+ - relu_30.tmp_0
+ - relu_31.tmp_0
+ ```
+ 上述示例,会添加两个知识蒸馏loss。第一个loss的输入为教师模型和学生模型的 'relu_30.tmp_0',第二个loss的输入为教师模型和学生模型的'relu_31.tmp_0'。
+ - 普通蒸馏:教师模型为任意模型,学生模型为压缩后的推理模型。‘node’ 可设置为变量名称的列表,列表中元素数量必须为偶数。示例如下:
+ ```
+ node:
+ - teacher_relu_0.tmp_0
+ - student_relu_0.tmp_0
+ - teacher_relu_1.tmp_0
+ - student_relu_1.tmp_0
+ ```
-#### 配置定制结构化稀疏策略
+ 上述示例,会添加两个知识蒸馏loss。第一个loss的输入为教师模型的变量“teacher_relu_0.tmp_0”和学生模型的变量“student_relu_0.tmp_0”,第二个loss的输入为教师模型的变量“teacher_relu_1.tmp_0”和学生模型的“student_relu_1.tmp_0”。
+
+ 如果不设置`node`,则分别取教师模型和学生模型的最后一个带参数的层的输出,组成知识蒸馏loss.
+
+- teacher_model_dir: 用于监督压缩后模型训练的教师模型所在的路径。如果不设置该选项,则使用压缩前的模型做为教师模型。
+- teacher_model_filename: 教师模型的模型文件名称,格式为 *.pdmodel 或 __model__。仅当设置`teacher_model_dir`后生效。
+- teacher_params_filename: 教师模型的参数文件名称,格式为 *.pdiparams 或 __params__。仅当设置`teacher_model_dir`后生效。
+
+
+### 1.1.3 结构化稀疏(sparsity)
结构化稀疏参数设置如下所示:
```yaml
ChannelPrune:
- # pruned_ratio: 裁剪比例
pruned_ratio: 0.25
- # prune_params_name: 需要裁剪的参数名字
prune_params_name:
- conv1_weights
- # criterion: 评估一个卷积层内通道重要性所参考的指标
criterion: l1_norm
```
-- criterion目前支持的有:l1_norm , bn_scale , geometry_median。具体定义和使用可参考[结构化稀疏API文档](https://paddleslim.readthedocs.io/zh_CN/latest/api_cn/static/prune/prune_api.html)。
-#### 配置定制ASP半结构化稀疏策略
+- pruned_ratio: 每个卷积层的通道数被剪裁的比例。
+- prune_params_name: 待剪裁的卷积层的权重名称。通过以下脚本获得推理模型中所有卷积层的权重名称:
+
+```
+import paddle
+paddle.enable_static()
+model_dir="./inference_model"
+exe = paddle.static.Executor(paddle.CPUPlace())
+[inference_program, feed_target_names, fetch_targets] = (
+ paddle.static.load_inference_model(model_dir, exe))
+for var_ in inference_program.list_vars():
+ if var_.persistable and "conv2d" in var_.name:
+ print(f"{var_.name}")
+```
+
+或者,使用[Netron工具](https://netron.app/) 可视化`*.pdmodel`模型文件,选择合适的卷积层进行剪裁。
+
+- criterion: 评估卷积通道重要性的指标。可选 “l1_norm” , “bn_scale” , “geometry_median”。具体定义和使用可参考[结构化稀疏API文档](https://paddleslim.readthedocs.io/zh_CN/latest/api_cn/static/prune/prune_api.html)。
+
+### 1.1.4 ASP半结构化稀疏
半结构化稀疏参数设置如下所示:
```yaml
ASPPrune:
- # prune_params_name: 需要裁剪的参数名字
prune_params_name:
- conv1_weights
```
-#### 配置定制针对Transformer结构的结构化剪枝策略
+- prune_params_name: 待剪裁的卷积层的权重名称。通过以下脚本获得推理模型中所有卷积层的权重名称:
+
+```
+import paddle
+paddle.enable_static()
+model_dir="./inference_model"
+exe = paddle.static.Executor(paddle.CPUPlace())
+[inference_program, feed_target_names, fetch_targets] = (
+ paddle.static.load_inference_model(model_dir, exe))
+for var_ in inference_program.list_vars():
+ if var_.persistable and "conv2d" in var_.name:
+ print(f"{var_.name}")
+```
+
+或者,使用[Netron工具](https://netron.app/) 可视化`*.pdmodel`模型文件,选择合适的卷积层进行剪裁。
+
+### 1.1.5 Transformer结构化剪枝
针对Transformer结构的结构化剪枝参数设置如下所示:
```yaml
TransformerPrune:
- # pruned_ratio: 每个全链接层的裁剪比例
pruned_ratio: 0.25
```
+- pruned_ratio: 每个全链接层的被剪裁的比例。
-#### 配置定制非结构化稀疏策略
+### 1.1.6 非结构化稀疏策略
非结构化稀疏参数设置如下所示:
```yaml
@@ -122,7 +201,7 @@ UnstructurePrune:
- local_sparsity 表示剪裁比例(ratio)应用的范围,仅在 'ratio' 模式生效。local_sparsity 开启时意味着每个参与剪裁的参数矩阵稀疏度均为 'ratio', 关闭时表示只保证模型整体稀疏度达到'ratio',但是每个参数矩阵的稀疏度可能存在差异。各个矩阵稀疏度保持一致时,稀疏加速更显著。
- 更多非结构化稀疏的参数含义详见[非结构化稀疏API文档](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/zh_cn/api_cn/dygraph/pruners/unstructured_pruner.rst)
-#### 配置训练超参
+### 1.1.7 训练超参
训练参数主要设置学习率、训练次数(epochs)和优化器等。
```yaml
@@ -143,12 +222,69 @@ TrainConfig:
boundaries: [4500] # 设置策略参数
values: [0.005, 0.0005] # 设置策略参数
```
-## 其他参数配置
+## 1.2 FAQ
-#### 1.自动蒸馏效果不理想,怎么自主选择蒸馏节点?
+### 1.自动蒸馏效果不理想,怎么自主选择蒸馏节点?
首先使用[Netron工具](https://netron.app/) 可视化`model.pdmodel`模型文件,选择模型中某些层输出Tensor名称,对蒸馏节点进行配置。(一般选择Backbone或网络的输出等层进行蒸馏)
+
+### 2.如何获得推理模型中的OP类型
+
+执行以下代码获取推理模型中的OP类型,其中`model_dir`为推理模型存储路径。
+
+```
+import paddle
+paddle.enable_static()
+model_dir="./inference_model"
+exe = paddle.static.Executor(paddle.CPUPlace())
+inference_program, _, _ = (
+ paddle.static.load_inference_model(model_dir, exe))
+op_types = {}
+for block in inference_program.blocks:
+ for op in block.ops:
+ op_types[op.type] = 1
+print(f"Operators in inference model:\n{op_types.keys()}")
+```
+
+所用飞桨框架接口:
+
+- [load_inference_model](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/static/load_inference_model_cn.html#load-inference-model)
+- [Program](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/static/Program_cn.html#program)
+- [Executor](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/static/Executor_cn.html#executor)
+
+### 3. 量化支持对哪些OP进行量化
+
+执行以下代码,查看当前PaddlePaddle版本的量化功能所支持的OP类型:
+```
+from paddle.fluid.contrib.slim.quantization.utils import _weight_supported_quantizable_op_type, _act_supported_quantizable_op_type
+print(f"_supported_quantizable_op_type:\n{_weight_supported_quantizable_op_type}")
+print(f"_supported_quantizable_op_type:\n{_act_supported_quantizable_op_type}")
+```
+
+### 4. 如何设置推理模型中OP的‘name_scope’属性
+
+以下代码,将输出变量为`conv2d_52.tmp_0`的OP的`name_scope`设置为'skip_quant':
+```
+import paddle
+paddle.enable_static()
+model_dir="./original_model"
+exe = paddle.static.Executor(paddle.CPUPlace())
+[inference_program, feed_target_names, fetch_targets] = (
+ paddle.static.load_inference_model(model_dir, exe))
+skips = ['conv2d_52.tmp_0']
+for block in inference_program.blocks:
+ for op in block.ops:
+ if op.output_arg_names[0] in skips:
+ op._set_attr("name_scope", "skip_quant")
+
+feed_vars = []
+for var_ in inference_program.list_vars():
+ if var_.name in feed_target_names:
+ feed_vars.append(var_)
+paddle.static.save_inference_model("./infer_model", feed_vars, fetch_targets, exe, program=inference_program)
+
+```
diff --git a/example/auto_compression/image_classification/README.md b/example/auto_compression/image_classification/README.md
index c11e51c2..14c70fbd 100644
--- a/example/auto_compression/image_classification/README.md
+++ b/example/auto_compression/image_classification/README.md
@@ -21,28 +21,30 @@
### PaddleClas模型
-| 模型 | 策略 | Top-1 Acc | GPU 耗时(ms) | ARM CPU 耗时(ms) |
-|:------:|:------:|:------:|:------:|:------:|
-| MobileNetV1 | Baseline | 70.90 | - | 33.15 |
-| MobileNetV1 | 量化+蒸馏 | 70.57 | - | 13.64 |
-| ResNet50_vd | Baseline | 79.12 | 3.19 | - |
-| ResNet50_vd | 量化+蒸馏 | 78.74 | 0.92 | - |
-| ShuffleNetV2_x1_0 | Baseline | 68.65 | - | 10.43 |
-| ShuffleNetV2_x1_0 | 量化+蒸馏 | 68.32 | - | 5.51 |
-| SqueezeNet1_0_infer | Baseline | 59.60 | - | 35.98 |
-| SqueezeNet1_0_infer | 量化+蒸馏 | 59.45 | - | 16.96 |
-| PPLCNetV2_base | Baseline | 76.86 | - | 36.50 |
-| PPLCNetV2_base | 量化+蒸馏 | 76.43 | - | 15.79 |
-| PPHGNet_tiny | Baseline | 79.59 | 2.82 | - |
-| PPHGNet_tiny | 量化+蒸馏 | 79.20 | 0.98 | - |
-| InceptionV3 | Baseline | 79.14 | 4.79 | - |
-| InceptionV3 | 量化+蒸馏 | 78.32 | 1.47 | - |
-| EfficientNetB0 | Baseline | 77.02 | 1.95 | - |
-| EfficientNetB0 | 量化+蒸馏 | 75.39 | 1.44 | - |
-| GhostNet_x1_0 | Baseline | 74.02 | 2.93 | - |
-| GhostNet_x1_0 | 量化+蒸馏 | 72.62 | 1.03 | - |
-| MobileNetV3_large_x1_0 | Baseline | 75.32 | - | 16.62 |
-| MobileNetV3_large_x1_0 | 量化+蒸馏 | 70.93 | - | 9.85 |
+| 模型 | 策略 | Top-1 Acc | GPU 耗时(ms) | ARM CPU 耗时(ms) | 配置文件 | Inference模型 |
+|:------:|:------:|:------:|:------:|:------:|:------:|:------:|
+| MobileNetV1 | Baseline | 70.90 | - | 33.15 | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar) |
+| MobileNetV1 | 量化+蒸馏 | 70.57 | - | 13.64 | [Config](./configs/MobileNetV1/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/MobileNetV1_QAT.tar) |
+| ResNet50_vd | Baseline | 79.12 | 3.19 | - | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNet50_vd_infer.tar) |
+| ResNet50_vd | 量化+蒸馏 | 78.74 | 0.92 | - | [Config](./configs/ResNet50_vd/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/ResNet50_vd_QAT.tar) |
+| ShuffleNetV2_x1_0 | Baseline | 68.65 | - | 10.43 | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ShuffleNetV2_x1_0_infer.tar) |
+| ShuffleNetV2_x1_0 | 量化+蒸馏 | 68.32 | - | 5.51 | [Config](./configs/ShuffleNetV2_x1_0/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/ShuffleNetV2_x1_0_QAT.tar) |
+| SqueezeNet1_0 | Baseline | 59.60 | - | 35.98 | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/SqueezeNet1_0_infer.tar) |
+| SqueezeNet1_0 | 量化+蒸馏 | 59.45 | - | 16.96 | [Config](./configs/SqueezeNet1_0/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/SqueezeNet1_0_QAT.tar) |
+| PPLCNetV2_base | Baseline | 76.86 | - | 36.50 | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/PPLCNetV2_base_infer.tar) |
+| PPLCNetV2_base | 量化+蒸馏 | 76.43 | - | 15.79 | [Config](./configs/PPLCNetV2_base/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/PPLCNetV2_base_QAT.tar) |
+| PPHGNet_tiny | Baseline | 79.59 | 2.82 | - | - |[Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/PPHGNet_tiny_infer.tar) |
+| PPHGNet_tiny | 量化+蒸馏 | 79.20 | 0.98 | - | [Config](./configs/PPHGNet_tiny/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/PPHGNet_tiny_QAT.tar) |
+| InceptionV3 | Baseline | 79.14 | 4.79 | - | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/InceptionV3_infer.tar) |
+| InceptionV3 | 量化+蒸馏 | 78.32 | 1.47 | - | [Config](./configs/InceptionV3/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/InceptionV3_QAT.tar) |
+| EfficientNetB0 | Baseline | 77.02 | 1.95 | - | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/EfficientNetB0_infer.tar) |
+| EfficientNetB0 | 量化+蒸馏 | 75.39 | 1.44 | - | [Config](./configs/EfficientNetB0/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/EfficientNetB0_QAT.tar) |
+| GhostNet_x1_0 | Baseline | 74.02 | 2.93 | - | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/GhostNet_x1_0_infer.tar) |
+| GhostNet_x1_0 | 量化+蒸馏 | 72.62 | 1.03 | - | [Config](./configs/GhostNet_x1_0/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/GhostNet_x1_0_QAT.tar) |
+| MobileNetV3_large_x1_0 | Baseline | 75.32 | - | 16.62 | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV3_large_x1_0_infer.tar) |
+| MobileNetV3_large_x1_0 | 量化+蒸馏 | 74.41 | - | 9.85 | [Config](./configs/MobileNetV3_large_x1_0/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/MobileNetV3_large_x1_0_QAT.tar) |
+| MobileNetV3_large_x1_0_ssld | Baseline | 78.96 | - | 16.62 | - | [Model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV3_large_x1_0_ssld_infer.tar) |
+| MobileNetV3_large_x1_0_ssld | 量化+蒸馏 | 77.17 | - | 9.85 | [Config](./configs/MobileNetV3_large_x1_0/qat_dis.yaml) | [Model](https://paddle-slim-models.bj.bcebos.com/act/MobileNetV3_large_x1_0_ssld_QAT.tar) |
- ARM CPU 测试环境:`SDM865(4xA77+4xA55)`
- Nvidia GPU 测试环境:
diff --git a/example/auto_compression/image_classification/eval.py b/example/auto_compression/image_classification/eval.py
index d0e0c3d1..9cd9b4a3 100644
--- a/example/auto_compression/image_classification/eval.py
+++ b/example/auto_compression/image_classification/eval.py
@@ -23,7 +23,7 @@ import paddle
import paddle.nn as nn
from paddle.io import DataLoader
from imagenet_reader import ImageNetDataset
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
def argsparser():
diff --git a/example/auto_compression/image_classification/infer.py b/example/auto_compression/image_classification/infer.py
index 5060115c..46eb7115 100644
--- a/example/auto_compression/image_classification/infer.py
+++ b/example/auto_compression/image_classification/infer.py
@@ -22,7 +22,7 @@ import yaml
from utils import preprocess, postprocess
import paddle
from paddle.inference import create_predictor
-from paddleslim.auto_compression.config_helpers import load_config
+from paddleslim.common import load_config
def argsparser():
diff --git a/example/auto_compression/image_classification/run.py b/example/auto_compression/image_classification/run.py
index d8da1a9f..dee25a17 100644
--- a/example/auto_compression/image_classification/run.py
+++ b/example/auto_compression/image_classification/run.py
@@ -24,7 +24,7 @@ import paddle
import paddle.nn as nn
from paddle.io import DataLoader
from imagenet_reader import ImageNetDataset
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
from paddleslim.auto_compression import AutoCompression
@@ -46,6 +46,11 @@ def argsparser():
type=int,
default=1281167,
help="the number of total training images.")
+ parser.add_argument(
+ '--devices',
+ type=str,
+ default='gpu',
+ help="which device used to compress.")
return parser
@@ -122,7 +127,12 @@ def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list):
def main():
rank_id = paddle.distributed.get_rank()
- place = paddle.CUDAPlace(rank_id)
+ if args.devices == 'gpu':
+ place = paddle.CUDAPlace(rank_id)
+ paddle.set_device('gpu')
+ else:
+ place = paddle.CPUPlace()
+ paddle.set_device('cpu')
global global_config
all_config = load_slim_config(args.config_path)
diff --git a/example/auto_compression/nlp/run.py b/example/auto_compression/nlp/run.py
index 013b5826..e1bf4f25 100644
--- a/example/auto_compression/nlp/run.py
+++ b/example/auto_compression/nlp/run.py
@@ -15,7 +15,7 @@ from paddlenlp.datasets import load_dataset
from paddlenlp.data import Stack, Tuple, Pad
from paddlenlp.data.sampler import SamplerHelper
from paddlenlp.metrics import Mcc, PearsonAndSpearman
-from paddleslim.auto_compression.config_helpers import load_config
+from paddleslim.common import load_config
from paddleslim.auto_compression.compressor import AutoCompression
diff --git a/example/auto_compression/pytorch_huggingface/run.py b/example/auto_compression/pytorch_huggingface/run.py
index 4da4e703..0c730dff 100644
--- a/example/auto_compression/pytorch_huggingface/run.py
+++ b/example/auto_compression/pytorch_huggingface/run.py
@@ -27,7 +27,7 @@ from paddlenlp.transformers import AutoModelForTokenClassification, AutoTokenize
from paddlenlp.datasets import load_dataset
from paddlenlp.data import Stack, Tuple, Pad
from paddlenlp.metrics import AccuracyAndF1, Mcc, PearsonAndSpearman
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
+from paddleslim.common import load_config as load_slim_config
from paddleslim.auto_compression.compressor import AutoCompression
diff --git a/example/auto_compression/pytorch_yolo_series/README.md b/example/auto_compression/pytorch_yolo_series/README.md
new file mode 100644
index 00000000..3f80ad6f
--- /dev/null
+++ b/example/auto_compression/pytorch_yolo_series/README.md
@@ -0,0 +1,170 @@
+# YOLO系列模型自动压缩示例
+
+目录:
+- [1.简介](#1简介)
+- [2.Benchmark](#2Benchmark)
+- [3.开始自动压缩](#自动压缩流程)
+ - [3.1 环境准备](#31-准备环境)
+ - [3.2 准备数据集](#32-准备数据集)
+ - [3.3 准备预测模型](#33-准备预测模型)
+ - [3.4 测试模型精度](#34-测试模型精度)
+ - [3.5 自动压缩并产出模型](#35-自动压缩并产出模型)
+- [4.预测部署](#4预测部署)
+- [5.FAQ](5FAQ)
+
+## 1. 简介
+
+本示例将以以[ultralytics/yolov5](https://github.com/ultralytics/yolov5),[meituan/YOLOv6](https://github.com/meituan/YOLOv6) 和 [WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7) 目标检测模型为例,借助[X2Paddle](https://github.com/PaddlePaddle/X2Paddle)的能力,将PyTorch框架模型转换为Paddle框架模型,再使用ACT自动压缩功能进行模型压缩,压缩后的模型可使用Paddle Inference或者导出至ONNX,利用TensorRT部署。
+
+## 2.Benchmark
+
+| 模型 | 策略 | 输入尺寸 | mAPval
0.5:0.95 | 模型体积 | 预测时延FP32
|预测时延FP16
| 预测时延INT8
| 配置文件 | Inference模型 |
+| :-------- |:-------- |:--------: | :--------: | :---------------------: | :----------------: | :----------------: | :---------------: | :-----------------------------: | :-----------------------------: |
+| YOLOv5s | Base模型 | 640*640 | 37.4 | 28.1MB | 5.95ms | 2.44ms | - | - | [Model](https://paddle-slim-models.bj.bcebos.com/act/yolov5s.onnx) |
+| YOLOv5s | 离线量化 | 640*640 | 36.0 | 7.4MB | - | - | 1.87ms | [config](https://github.com/PaddlePaddle/PaddleSlim/tree/develop/example/post_training_quantization/pytorch_yolo_series) | - |
+| YOLOv5s | ACT量化训练 | 640*640 | **36.9** | 7.4MB | - | - | **1.87ms** | [config](./configs/yolov5s_qat_dis.yaml) | [Infer Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov5s_quant.tar) | [ONNX Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov5s_quant.onnx) |
+| | | | | | | | | |
+| YOLOv6s | Base模型 | 640*640 | 42.4 | 65.9MB | 9.06ms | 2.90ms | - | - | [Model](https://paddle-slim-models.bj.bcebos.com/act/yolov6s.onnx) |
+| YOLOv6s | KL离线量化 | 640*640 | 30.3 | 16.8MB | - | - | 1.83ms | [config](https://github.com/PaddlePaddle/PaddleSlim/tree/develop/example/post_training_quantization/pytorch_yolo_series) | - |
+| YOLOv6s | 量化蒸馏训练 | 640*640 | **41.3** | 16.8MB | - | - | **1.83ms** | [config](./configs/yolov6s_qat_dis.yaml) | [Infer Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov6s_quant.tar) | [ONNX Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov6s_quant.onnx) |
+| | | | | | | | | |
+| YOLOv7 | Base模型 | 640*640 | 51.1 | 141MB | 26.84ms | 7.44ms | - | - | [Model](https://paddle-slim-models.bj.bcebos.com/act/yolov7.onnx) |
+| YOLOv7 | 离线量化 | 640*640 | 50.2 | 36MB | - | - | 4.55ms | [config](https://github.com/PaddlePaddle/PaddleSlim/tree/develop/example/post_training_quantization/pytorch_yolo_series) | - |
+| YOLOv7 | ACT量化训练 | 640*640 | **50.9** | 36MB | - | - | **4.55ms** | [config](./configs/yolov7_qat_dis.yaml) | [Infer Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov7_quant.tar) | [ONNX Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov7_quant.onnx) |
+| | | | | | | | | |
+| YOLOv7-Tiny | Base模型 | 640*640 | 37.3 | 24MB | 5.06ms | 2.32ms | - | - | [Model](https://paddle-slim-models.bj.bcebos.com/act/yolov7-tiny.onnx) |
+| YOLOv7-Tiny | 离线量化 | 640*640 | 35.8 | 6.1MB | - | - | 1.68ms | - | - |
+| YOLOv7-Tiny | ACT量化训练 | 640*640 | **37.0** | 6.1MB | - | - | **1.68ms** | [config](./configs/yolov7_tiny_qat_dis.yaml) | [Infer Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov7_tiny_quant.tar) | [ONNX Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov7_tiny_quant.onnx) |
+
+说明:
+- mAP的指标均在COCO val2017数据集中评测得到。
+- YOLOv7模型在Tesla T4的GPU环境下开启TensorRT 8.4.1,batch_size=1, 测试脚本是[cpp_infer](./cpp_infer)。
+
+## 3. 自动压缩流程
+
+#### 3.1 准备环境
+- PaddlePaddle >= 2.3.2版本 (可从[Paddle官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)根据相应环境的安装指令进行安装)
+- PaddleSlim develop 版本
+
+(1)安装paddlepaddle
+```
+# CPU
+pip install paddlepaddle==2.3.2
+# GPU
+pip install paddlepaddle-gpu==2.3.2
+```
+
+(2)安装paddleslim:
+```shell
+git clone https://github.com/PaddlePaddle/PaddleSlim.git & cd PaddleSlim
+python setup.py install
+```
+
+
+#### 3.2 准备数据集
+
+本示例默认以COCO数据进行自动压缩实验,可以从[MS COCO官网](https://cocodataset.org)下载[Train](http://images.cocodataset.org/zips/train2017.zip)、[Val](http://images.cocodataset.org/zips/val2017.zip)、[annotation](http://images.cocodataset.org/annotations/annotations_trainval2017.zip)。
+
+目录格式如下:
+```
+dataset/coco/
+├── annotations
+│ ├── instances_train2017.json
+│ ├── instances_val2017.json
+│ | ...
+├── train2017
+│ ├── 000000000009.jpg
+│ ├── 000000580008.jpg
+│ | ...
+├── val2017
+│ ├── 000000000139.jpg
+│ ├── 000000000285.jpg
+```
+
+如果是自定义数据集,请按照如上COCO数据格式准备数据。
+
+
+#### 3.3 准备预测模型
+
+(1)准备ONNX模型:
+
+- YOLOv5:
+
+ 本示例模型使用[ultralytics/yolov5](https://github.com/ultralytics/yolov5)的master分支导出,要求v6.1之后的ONNX模型,可以根据官方的[导出教程](https://github.com/ultralytics/yolov5/issues/251)来准备ONNX模型。也可以下载准备好的[yolov5s.onnx](https://paddle-slim-models.bj.bcebos.com/act/yolov5s.onnx)。
+ ```shell
+ python export.py --weights yolov5s.pt --include onnx
+ ```
+
+- YOLOv6:
+
+ 可通过[meituan/YOLOv6](https://github.com/meituan/YOLOv6)官方的[导出教程](https://github.com/meituan/YOLOv6/blob/main/deploy/ONNX/README.md)来准备ONNX模型。也可以下载已经准备好的[yolov6s.onnx](https://paddle-slim-models.bj.bcebos.com/act/yolov6s.onnx)。
+
+- YOLOv7: 可通过[WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7)的导出脚本来准备ONNX模型,具体步骤如下:
+ ```shell
+ git clone https://github.com/WongKinYiu/yolov7.git
+ python export.py --weights yolov7-tiny.pt --grid
+ ```
+
+ **注意**:目前ACT支持**不带NMS**模型,使用如上命令导出即可。也可以直接下载我们已经准备好的[yolov7.onnx](https://paddle-slim-models.bj.bcebos.com/act/yolov7-tiny.onnx)。
+
+#### 3.4 自动压缩并产出模型
+
+蒸馏量化自动压缩示例通过run.py脚本启动,会使用接口```paddleslim.auto_compression.AutoCompression```对模型进行自动压缩。配置config文件中模型路径、蒸馏、量化、和训练等部分的参数,配置完成后便可对模型进行量化和蒸馏。
+
+本示例启动自动压缩以YOLOv7-Tiny为例,如果想要更换模型,可修改`--config_path`路径即可,具体运行命令为:
+
+- 单卡训练:
+```
+export CUDA_VISIBLE_DEVICES=0
+python run.py --config_path=./configs/yolov7_tiny_qat_dis.yaml --save_dir='./output/'
+```
+
+- 多卡训练:
+```
+CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch --log_dir=log --gpus 0,1,2,3 run.py \
+ --config_path=./configs/yolov7_tiny_qat_dis.yaml --save_dir='./output/'
+```
+
+#### 3.5 测试模型精度
+
+修改[yolov7_qat_dis.yaml](./configs/yolov7_qat_dis.yaml)中`model_dir`字段为模型存储路径,然后使用eval.py脚本得到模型的mAP:
+```
+export CUDA_VISIBLE_DEVICES=0
+python eval.py --config_path=./configs/yolov7_tiny_qat_dis.yaml
+```
+
+
+## 4.预测部署
+
+#### 导出至ONNX使用TensorRT部署
+
+执行完自动压缩后会默认在`save_dir`中生成`quant_model.onnx`的ONNX模型文件,可以直接使用TensorRT测试脚本进行验证。
+
+- 进行测试:
+```shell
+python yolov7_onnx_trt.py --model_path=output/quant_model.onnx --image_file=images/000000570688.jpg --precision=int8
+```
+
+#### Paddle-TensorRT部署
+- C++部署
+
+进入[cpp_infer](./cpp_infer)文件夹内,请按照[C++ TensorRT Benchmark测试教程](./cpp_infer/README.md)进行准备环境及编译,然后开始测试:
+```shell
+# 编译
+bash complie.sh
+# 执行
+./build/trt_run --model_file yolov7_quant/model.pdmodel --params_file yolov7_quant/model.pdiparams --run_mode=trt_int8
+```
+
+- Python部署:
+
+首先安装带有TensorRT的[Paddle安装包](https://www.paddlepaddle.org.cn/inference/v2.3/user_guides/download_lib.html#python)。
+
+然后使用[paddle_trt_infer.py](./paddle_trt_infer.py)进行部署:
+```shell
+python paddle_trt_infer.py --model_path=output --image_file=images/000000570688.jpg --benchmark=True --run_mode=trt_int8
+```
+
+## 5.FAQ
+
+- 如果想对模型进行离线量化,可进入[YOLO系列模型离线量化示例](https://github.com/PaddlePaddle/PaddleSlim/tree/develop/example/post_training_quantization/pytorch_yolo_series)中进行实验。
diff --git a/example/auto_compression/pytorch_yolov5/configs/yolov5s_qat_dis.yaml b/example/auto_compression/pytorch_yolo_series/configs/yolov5s_qat_dis.yaml
similarity index 60%
rename from example/auto_compression/pytorch_yolov5/configs/yolov5s_qat_dis.yaml
rename to example/auto_compression/pytorch_yolo_series/configs/yolov5s_qat_dis.yaml
index ef9bf8b7..d5c853be 100644
--- a/example/auto_compression/pytorch_yolov5/configs/yolov5s_qat_dis.yaml
+++ b/example/auto_compression/pytorch_yolo_series/configs/yolov5s_qat_dis.yaml
@@ -1,18 +1,19 @@
-
Global:
- reader_config: configs/yolov5_reader.yml
- input_list: {'image': 'x2paddle_images'}
+ model_dir: ./yolov5s.onnx
+ dataset_dir: dataset/coco/
+ train_image_dir: train2017
+ val_image_dir: val2017
+ train_anno_path: annotations/instances_train2017.json
+ val_anno_path: annotations/instances_val2017.json
Evaluation: True
- arch: 'YOLOv5'
- model_dir: ./yolov5s_infer
- model_filename: model.pdmodel
- params_filename: model.pdiparams
+ arch: YOLOv5
Distillation:
alpha: 1.0
loss: soft_label
Quantization:
+ onnx_format: true
use_pact: true
activation_quantize_type: 'moving_average_abs_max'
quantize_op_types:
diff --git a/example/auto_compression/pytorch_yolov6/configs/yolov6s_qat_dis.yaml b/example/auto_compression/pytorch_yolo_series/configs/yolov6s_qat_dis.yaml
similarity index 62%
rename from example/auto_compression/pytorch_yolov6/configs/yolov6s_qat_dis.yaml
rename to example/auto_compression/pytorch_yolo_series/configs/yolov6s_qat_dis.yaml
index 4fcf4777..e14a6b65 100644
--- a/example/auto_compression/pytorch_yolov6/configs/yolov6s_qat_dis.yaml
+++ b/example/auto_compression/pytorch_yolo_series/configs/yolov6s_qat_dis.yaml
@@ -1,18 +1,19 @@
-
Global:
- reader_config: configs/yolov6_reader.yml
- input_list: {'image': 'x2paddle_image_arrays'}
+ model_dir: ./yolov6s.onnx
+ dataset_dir: dataset/coco/
+ train_image_dir: train2017
+ val_image_dir: val2017
+ train_anno_path: annotations/instances_train2017.json
+ val_anno_path: annotations/instances_val2017.json
Evaluation: True
- arch: 'YOLOv6'
- model_dir: ./yolov6s_infer
- model_filename: model.pdmodel
- params_filename: model.pdiparams
+ arch: YOLOv6
Distillation:
alpha: 1.0
loss: soft_label
Quantization:
+ onnx_format: true
activation_quantize_type: 'moving_average_abs_max'
quantize_op_types:
- conv2d
diff --git a/example/auto_compression/pytorch_yolov7/configs/yolov7_qat_dis.yaml b/example/auto_compression/pytorch_yolo_series/configs/yolov7_qat_dis.yaml
similarity index 56%
rename from example/auto_compression/pytorch_yolov7/configs/yolov7_qat_dis.yaml
rename to example/auto_compression/pytorch_yolo_series/configs/yolov7_qat_dis.yaml
index 6607e361..437ceea9 100644
--- a/example/auto_compression/pytorch_yolov7/configs/yolov7_qat_dis.yaml
+++ b/example/auto_compression/pytorch_yolo_series/configs/yolov7_qat_dis.yaml
@@ -1,26 +1,28 @@
-
Global:
- reader_config: configs/yolov7_reader.yaml
- input_list: {'image': 'x2paddle_images'}
+ model_dir: ./yolov7.onnx
+ dataset_dir: dataset/coco/
+ train_image_dir: train2017
+ val_image_dir: val2017
+ train_anno_path: annotations/instances_train2017.json
+ val_anno_path: annotations/instances_val2017.json
Evaluation: True
- model_dir: ./yolov7_infer
- model_filename: model.pdmodel
- params_filename: model.pdiparams
+ arch: YOLOv7
Distillation:
alpha: 1.0
loss: soft_label
Quantization:
+ onnx_format: true
activation_quantize_type: 'moving_average_abs_max'
quantize_op_types:
- conv2d
- depthwise_conv2d
TrainConfig:
- train_iter: 8000
+ train_iter: 5000
eval_iter: 1000
- learning_rate:
+ learning_rate:
type: CosineAnnealingDecay
learning_rate: 0.00003
T_max: 8000
diff --git a/example/auto_compression/pytorch_yolo_series/configs/yolov7_tiny_qat_dis.yaml b/example/auto_compression/pytorch_yolo_series/configs/yolov7_tiny_qat_dis.yaml
new file mode 100644
index 00000000..958182f6
--- /dev/null
+++ b/example/auto_compression/pytorch_yolo_series/configs/yolov7_tiny_qat_dis.yaml
@@ -0,0 +1,32 @@
+Global:
+ model_dir: ./yolov7-tiny.onnx
+ dataset_dir: dataset/coco/
+ train_image_dir: train2017
+ val_image_dir: val2017
+ train_anno_path: annotations/instances_train2017.json
+ val_anno_path: annotations/instances_val2017.json
+ Evaluation: True
+ arch: YOLOv7
+
+Distillation:
+ alpha: 1.0
+ loss: soft_label
+
+Quantization:
+ onnx_format: true
+ activation_quantize_type: 'moving_average_abs_max'
+ quantize_op_types:
+ - conv2d
+ - depthwise_conv2d
+
+TrainConfig:
+ train_iter: 5000
+ eval_iter: 1000
+ learning_rate:
+ type: CosineAnnealingDecay
+ learning_rate: 0.00003
+ T_max: 8000
+ optimizer_builder:
+ optimizer:
+ type: SGD
+ weight_decay: 0.00004
diff --git a/example/auto_compression/pytorch_yolov5/cpp_infer/CMakeLists.txt b/example/auto_compression/pytorch_yolo_series/cpp_infer/CMakeLists.txt
similarity index 100%
rename from example/auto_compression/pytorch_yolov5/cpp_infer/CMakeLists.txt
rename to example/auto_compression/pytorch_yolo_series/cpp_infer/CMakeLists.txt
diff --git a/example/auto_compression/pytorch_yolov5/cpp_infer/README.md b/example/auto_compression/pytorch_yolo_series/cpp_infer/README.md
similarity index 64%
rename from example/auto_compression/pytorch_yolov5/cpp_infer/README.md
rename to example/auto_compression/pytorch_yolo_series/cpp_infer/README.md
index 9566728a..0286c26d 100644
--- a/example/auto_compression/pytorch_yolov5/cpp_infer/README.md
+++ b/example/auto_compression/pytorch_yolo_series/cpp_infer/README.md
@@ -1,4 +1,4 @@
-# YOLOv5 TensorRT Benchmark测试(Linux)
+# YOLOv7 TensorRT Benchmark测试(Linux)
## 环境准备
@@ -22,21 +22,37 @@ CUDA_LIB=/usr/local/cuda/lib64
TENSORRT_ROOT=/root/auto_compress/trt/trt8.4/
```
-## Paddle TensorRT测试
+## Paddle tensorRT测试
-- FP32
+- YOLOv5
```
+# FP32
./build/trt_run --model_file yolov5s_infer/model.pdmodel --params_file yolov5s_infer/model.pdiparams --run_mode=trt_fp32
+# FP16
+./build/trt_run --model_file yolov5s_infer/model.pdmodel --params_file yolov5s_infer/model.pdiparams --run_mode=trt_fp16
+# INT8
+./build/trt_run --model_file yolov5s_quant/model.pdmodel --params_file yolov5s_quant/model.pdiparams --run_mode=trt_int8
```
-- FP16
+- YOLOv6
```
-./build/trt_run --model_file yolov5s_infer/model.pdmodel --params_file yolov5s_infer/model.pdiparams --run_mode=trt_fp16
+# FP32
+./build/trt_run --arch=YOLOv6 --model_file yolov6s_infer/model.pdmodel --params_file yolov6s_infer/model.pdiparams --run_mode=trt_fp32
+# FP16
+./build/trt_run --arch=YOLOv6 --model_file yolov6s_infer/model.pdmodel --params_file yolov6s_infer/model.pdiparams --run_mode=trt_fp16
+# INT8
+./build/trt_run --arch=YOLOv6 --model_file yolov6s_quant/model.pdmodel --params_file yolov6s_quant/model.pdiparams --run_mode=trt_int8
```
-- INT8
+
+- YOLOv7
```
-./build/trt_run --model_file yolov5s_quant/model.pdmodel --params_file yolov5s_quant/model.pdiparams --run_mode=trt_int8
+# FP32
+./build/trt_run --model_file yolov7_infer/model.pdmodel --params_file yolov7_infer/model.pdiparams --run_mode=trt_fp32
+# FP16
+./build/trt_run --model_file yolov7_infer/model.pdmodel --params_file yolov7_infer/model.pdiparams --run_mode=trt_fp16
+# INT8
+./build/trt_run --model_file yolov7_quant/model.pdmodel --params_file yolov7_quant/model.pdiparams --run_mode=trt_int8
```
## 原生TensorRT测试
@@ -49,6 +65,7 @@ trtexec --onnx=yolov5s.onnx --workspace=1024 --avgRuns=1000 --inputIOFormats=fp1
# INT8
trtexec --onnx=yolov5s.onnx --workspace=1024 --avgRuns=1000 --inputIOFormats=fp16:chw --outputIOFormats=fp16:chw --int8
```
+- 注:可把--onnx=yolov5s.onnx替换成yolov6s.onnx和yolov7.onnx模型
## 性能对比
@@ -56,6 +73,12 @@ trtexec --onnx=yolov5s.onnx --workspace=1024 --avgRuns=1000 --inputIOFormats=fp1
| :--------: | :--------: |:-------- |:--------: | :---------------------: |
| Paddle TensorRT | yolov5s | 5.95ms | 2.44ms | 1.87ms |
| TensorRT | yolov5s | 6.16ms | 2.58ms | 2.07ms |
+| | | | | |
+| Paddle TensorRT | YOLOv6s | 9.06ms | 2.90ms | 1.83ms |
+| TensorRT | YOLOv6s | 8.59ms | 2.83ms | 1.87ms |
+| | | | | |
+| Paddle TensorRT | YOLOv7 | 26.84ms | 7.44ms | 4.55ms |
+| TensorRT | YOLOv7 | 28.25ms | 7.23ms | 4.67ms |
环境:
- Tesla T4,TensorRT 8.4.1,CUDA 11.2
diff --git a/example/auto_compression/pytorch_yolov5/cpp_infer/compile.sh b/example/auto_compression/pytorch_yolo_series/cpp_infer/compile.sh
similarity index 100%
rename from example/auto_compression/pytorch_yolov5/cpp_infer/compile.sh
rename to example/auto_compression/pytorch_yolo_series/cpp_infer/compile.sh
diff --git a/example/auto_compression/pytorch_yolov7/cpp_infer/trt_run.cc b/example/auto_compression/pytorch_yolo_series/cpp_infer/trt_run.cc
similarity index 93%
rename from example/auto_compression/pytorch_yolov7/cpp_infer/trt_run.cc
rename to example/auto_compression/pytorch_yolo_series/cpp_infer/trt_run.cc
index 0ae055ac..22095b39 100644
--- a/example/auto_compression/pytorch_yolov7/cpp_infer/trt_run.cc
+++ b/example/auto_compression/pytorch_yolo_series/cpp_infer/trt_run.cc
@@ -19,6 +19,7 @@ using phi::dtype::float16;
DEFINE_string(model_dir, "", "Directory of the inference model.");
DEFINE_string(model_file, "", "Path of the inference model file.");
DEFINE_string(params_file, "", "Path of the inference params file.");
+DEFINE_string(arch, "YOLOv5", "Architectures name, can be: YOLOv5, YOLOv6, YOLOv7.");
DEFINE_string(run_mode, "trt_fp32", "run_mode which can be: trt_fp32, trt_fp16 and trt_int8");
DEFINE_int32(batch_size, 1, "Batch size.");
DEFINE_int32(gpu_id, 0, "GPU card ID num.");
@@ -106,11 +107,15 @@ int main(int argc, char *argv[]) {
using dtype = float16;
std::vector input_data(FLAGS_batch_size * 3 * 640 * 640, dtype(1.0));
+ int out_box_shape = 25200;
+ if (FLAGS_arch == "YOLOv6"){
+ out_box_shape = 8400;
+ }
dtype *out_data;
- int out_data_size = FLAGS_batch_size * 25200 * 85;
+ int out_data_size = FLAGS_batch_size * out_box_shape * 85;
cudaHostAlloc((void**)&out_data, sizeof(float) * out_data_size, cudaHostAllocMapped);
- std::vector out_shape{ FLAGS_batch_size, 1, 25200, 85};
+ std::vector out_shape{ FLAGS_batch_size, 1, out_box_shape, 85};
run(predictor.get(), input_data, input_shape, out_data, out_shape);
return 0;
}
diff --git a/example/auto_compression/pytorch_yolo_series/dataset.py b/example/auto_compression/pytorch_yolo_series/dataset.py
new file mode 100644
index 00000000..0250b936
--- /dev/null
+++ b/example/auto_compression/pytorch_yolo_series/dataset.py
@@ -0,0 +1,115 @@
+from pycocotools.coco import COCO
+import cv2
+import os
+import numpy as np
+import paddle
+
+
+class COCOValDataset(paddle.io.Dataset):
+ def __init__(self,
+ dataset_dir=None,
+ image_dir=None,
+ anno_path=None,
+ img_size=[640, 640],
+ input_name='x2paddle_images'):
+ self.dataset_dir = dataset_dir
+ self.image_dir = image_dir
+ self.img_size = img_size
+ self.input_name = input_name
+ self.ann_file = os.path.join(dataset_dir, anno_path)
+ self.coco = COCO(self.ann_file)
+ ori_ids = list(sorted(self.coco.imgs.keys()))
+ # check gt bbox
+ clean_ids = []
+ for idx in ori_ids:
+ ins_anno_ids = self.coco.getAnnIds(imgIds=[idx], iscrowd=False)
+ instances = self.coco.loadAnns(ins_anno_ids)
+ num_bbox = 0
+ for inst in instances:
+ if inst.get('ignore', False):
+ continue
+ if 'bbox' not in inst.keys():
+ continue
+ elif not any(np.array(inst['bbox'])):
+ continue
+ else:
+ num_bbox += 1
+ if num_bbox > 0:
+ clean_ids.append(idx)
+ self.ids = clean_ids
+
+ def __getitem__(self, idx):
+ img_id = self.ids[idx]
+ img = self._get_img_data_from_img_id(img_id)
+ img, scale_factor = self.image_preprocess(img, self.img_size)
+ return {
+ 'image': img,
+ 'im_id': np.array([img_id]),
+ 'scale_factor': scale_factor
+ }
+
+ def __len__(self):
+ return len(self.ids)
+
+ def _get_img_data_from_img_id(self, img_id):
+ img_info = self.coco.loadImgs(img_id)[0]
+ img_path = os.path.join(self.dataset_dir, self.image_dir,
+ img_info['file_name'])
+ img = cv2.imread(img_path)
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+ return img
+
+ def _generate_scale(self, im, target_shape, keep_ratio=True):
+ """
+ Args:
+ im (np.ndarray): image (np.ndarray)
+ Returns:
+ im_scale_x: the resize ratio of X
+ im_scale_y: the resize ratio of Y
+ """
+ origin_shape = im.shape[:2]
+ if keep_ratio:
+ im_size_min = np.min(origin_shape)
+ im_size_max = np.max(origin_shape)
+ target_size_min = np.min(target_shape)
+ target_size_max = np.max(target_shape)
+ im_scale = float(target_size_min) / float(im_size_min)
+ if np.round(im_scale * im_size_max) > target_size_max:
+ im_scale = float(target_size_max) / float(im_size_max)
+ im_scale_x = im_scale
+ im_scale_y = im_scale
+ else:
+ resize_h, resize_w = target_shape
+ im_scale_y = resize_h / float(origin_shape[0])
+ im_scale_x = resize_w / float(origin_shape[1])
+ return im_scale_y, im_scale_x
+
+ def image_preprocess(self, img, target_shape):
+ # Resize image
+ im_scale_y, im_scale_x = self._generate_scale(img, target_shape)
+ img = cv2.resize(
+ img,
+ None,
+ None,
+ fx=im_scale_x,
+ fy=im_scale_y,
+ interpolation=cv2.INTER_LINEAR)
+ # Pad
+ im_h, im_w = img.shape[:2]
+ h, w = target_shape[:]
+ if h != im_h or w != im_w:
+ canvas = np.ones((h, w, 3), dtype=np.float32)
+ canvas *= np.array([114.0, 114.0, 114.0], dtype=np.float32)
+ canvas[0:im_h, 0:im_w, :] = img.astype(np.float32)
+ img = canvas
+ img = np.transpose(img / 255, [2, 0, 1])
+ scale_factor = np.array([im_scale_y, im_scale_x])
+ return img.astype(np.float32), scale_factor
+
+
+class COCOTrainDataset(COCOValDataset):
+ def __getitem__(self, idx):
+ img_id = self.ids[idx]
+ img = self._get_img_data_from_img_id(img_id)
+ img, scale_factor = self.image_preprocess(img, self.img_size)
+ return {self.input_name: img}
diff --git a/example/auto_compression/pytorch_yolo_series/eval.py b/example/auto_compression/pytorch_yolo_series/eval.py
new file mode 100644
index 00000000..de11989e
--- /dev/null
+++ b/example/auto_compression/pytorch_yolo_series/eval.py
@@ -0,0 +1,102 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import numpy as np
+import argparse
+from tqdm import tqdm
+import paddle
+from paddleslim.common import load_config
+from paddleslim.common import load_inference_model
+from post_process import YOLOPostProcess, coco_metric
+from dataset import COCOValDataset
+
+
+def argsparser():
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument(
+ '--config_path',
+ type=str,
+ default=None,
+ help="path of compression strategy config.",
+ required=True)
+ parser.add_argument(
+ '--batch_size', type=int, default=1, help="Batch size of model input.")
+ parser.add_argument(
+ '--devices',
+ type=str,
+ default='gpu',
+ help="which device used to compress.")
+
+ return parser
+
+
+def eval():
+
+ place = paddle.CUDAPlace(0) if FLAGS.devices == 'gpu' else paddle.CPUPlace()
+ exe = paddle.static.Executor(place)
+
+ val_program, feed_target_names, fetch_targets = load_inference_model(
+ global_config["model_dir"], exe)
+
+ bboxes_list, bbox_nums_list, image_id_list = [], [], []
+ with tqdm(
+ total=len(val_loader),
+ bar_format='Evaluation stage, Run batch:|{bar}| {n_fmt}/{total_fmt}',
+ ncols=80) as t:
+ for data in val_loader:
+ data_all = {k: np.array(v) for k, v in data.items()}
+ outs = exe.run(val_program,
+ feed={feed_target_names[0]: data_all['image']},
+ fetch_list=fetch_targets,
+ return_numpy=False)
+ postprocess = YOLOPostProcess(
+ score_threshold=0.001, nms_threshold=0.65, multi_label=True)
+ res = postprocess(np.array(outs[0]), data_all['scale_factor'])
+ bboxes_list.append(res['bbox'])
+ bbox_nums_list.append(res['bbox_num'])
+ image_id_list.append(np.array(data_all['im_id']))
+ t.update()
+
+ coco_metric(anno_file, bboxes_list, bbox_nums_list, image_id_list)
+
+
+def main():
+ global global_config
+ all_config = load_config(FLAGS.config_path)
+ global_config = all_config["Global"]
+
+ global val_loader
+ dataset = COCOValDataset(
+ dataset_dir=global_config['dataset_dir'],
+ image_dir=global_config['val_image_dir'],
+ anno_path=global_config['val_anno_path'])
+ global anno_file
+ anno_file = dataset.ann_file
+ val_loader = paddle.io.DataLoader(
+ dataset, batch_size=FLAGS.batch_size, drop_last=True)
+
+ eval()
+
+
+if __name__ == '__main__':
+ paddle.enable_static()
+ parser = argsparser()
+ FLAGS = parser.parse_args()
+
+ assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu']
+ paddle.set_device(FLAGS.devices)
+
+ main()
diff --git a/example/auto_compression/pytorch_yolov5/images/000000570688.jpg b/example/auto_compression/pytorch_yolo_series/images/000000570688.jpg
similarity index 100%
rename from example/auto_compression/pytorch_yolov5/images/000000570688.jpg
rename to example/auto_compression/pytorch_yolo_series/images/000000570688.jpg
diff --git a/example/auto_compression/pytorch_yolo_series/onnx_trt_infer.py b/example/auto_compression/pytorch_yolo_series/onnx_trt_infer.py
new file mode 100644
index 00000000..3540c33d
--- /dev/null
+++ b/example/auto_compression/pytorch_yolo_series/onnx_trt_infer.py
@@ -0,0 +1,378 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import cv2
+import tensorrt as trt
+import pycuda.driver as cuda
+import pycuda.autoinit
+import os
+import time
+import random
+import argparse
+
+EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+EXPLICIT_PRECISION = 1 << (
+ int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION)
+
+# load coco labels
+CLASS_LABEL = [
+ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train",
+ "truck", "boat", "traffic light", "fire hydrant", "stop sign",
+ "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
+ "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag",
+ "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
+ "baseball bat", "baseball glove", "skateboard", "surfboard",
+ "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon",
+ "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
+ "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant",
+ "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote",
+ "keyboard", "cell phone", "microwave", "oven", "toaster", "sink",
+ "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
+ "hair drier", "toothbrush"
+]
+
+
+def preprocess(image, input_size, mean=None, std=None, swap=(2, 0, 1)):
+ if len(image.shape) == 3:
+ padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0
+ else:
+ padded_img = np.ones(input_size) * 114.0
+ img = np.array(image)
+ r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
+ resized_img = cv2.resize(
+ img,
+ (int(img.shape[1] * r), int(img.shape[0] * r)),
+ interpolation=cv2.INTER_LINEAR, ).astype(np.float32)
+ padded_img[:int(img.shape[0] * r), :int(img.shape[1] * r)] = resized_img
+
+ padded_img = padded_img[:, :, ::-1]
+ padded_img /= 255.0
+ if mean is not None:
+ padded_img -= mean
+ if std is not None:
+ padded_img /= std
+ padded_img = padded_img.transpose(swap)
+ padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
+ return padded_img, r
+
+
+def postprocess(predictions, ratio):
+ boxes = predictions[:, :4]
+ scores = predictions[:, 4:5] * predictions[:, 5:]
+ boxes_xyxy = np.ones_like(boxes)
+ boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.
+ boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.
+ boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.
+ boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.
+ boxes_xyxy /= ratio
+ dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.45, score_thr=0.1)
+ return dets
+
+
+def nms(boxes, scores, nms_thr):
+ """Single class NMS implemented in Numpy."""
+ x1 = boxes[:, 0]
+ y1 = boxes[:, 1]
+ x2 = boxes[:, 2]
+ y2 = boxes[:, 3]
+
+ areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+ order = scores.argsort()[::-1]
+
+ keep = []
+ while order.size > 0:
+ i = order[0]
+ keep.append(i)
+ xx1 = np.maximum(x1[i], x1[order[1:]])
+ yy1 = np.maximum(y1[i], y1[order[1:]])
+ xx2 = np.minimum(x2[i], x2[order[1:]])
+ yy2 = np.minimum(y2[i], y2[order[1:]])
+
+ w = np.maximum(0.0, xx2 - xx1 + 1)
+ h = np.maximum(0.0, yy2 - yy1 + 1)
+ inter = w * h
+ ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+ inds = np.where(ovr <= nms_thr)[0]
+ order = order[inds + 1]
+
+ return keep
+
+
+def multiclass_nms(boxes, scores, nms_thr, score_thr):
+ """Multiclass NMS implemented in Numpy"""
+ final_dets = []
+ num_classes = scores.shape[1]
+ for cls_ind in range(num_classes):
+ cls_scores = scores[:, cls_ind]
+ valid_score_mask = cls_scores > score_thr
+ if valid_score_mask.sum() == 0:
+ continue
+ else:
+ valid_scores = cls_scores[valid_score_mask]
+ valid_boxes = boxes[valid_score_mask]
+ keep = nms(valid_boxes, valid_scores, nms_thr)
+ if len(keep) > 0:
+ cls_inds = np.ones((len(keep), 1)) * cls_ind
+ dets = np.concatenate(
+ [valid_boxes[keep], valid_scores[keep, None], cls_inds], 1)
+ final_dets.append(dets)
+ if len(final_dets) == 0:
+ return None
+ return np.concatenate(final_dets, 0)
+
+
+def get_color_map_list(num_classes):
+ color_map = num_classes * [0, 0, 0]
+ for i in range(0, num_classes):
+ j = 0
+ lab = i
+ while lab:
+ color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
+ color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
+ color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
+ j += 1
+ lab >>= 3
+ color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)]
+ return color_map
+
+
+def draw_box(img, boxes, scores, cls_ids, conf=0.5, class_names=None):
+ color_list = get_color_map_list(len(class_names))
+ for i in range(len(boxes)):
+ box = boxes[i]
+ cls_id = int(cls_ids[i])
+ color = tuple(color_list[cls_id])
+ score = scores[i]
+ if score < conf:
+ continue
+ x0 = int(box[0])
+ y0 = int(box[1])
+ x1 = int(box[2])
+ y1 = int(box[3])
+
+ text = '{}:{:.1f}%'.format(class_names[cls_id], score * 100)
+ font = cv2.FONT_HERSHEY_SIMPLEX
+
+ txt_size = cv2.getTextSize(text, font, 0.4, 1)[0]
+ cv2.rectangle(img, (x0, y0), (x1, y1), color, 2)
+ cv2.rectangle(img, (x0, y0 + 1),
+ (x0 + txt_size[0] + 1, y0 + int(1.5 * txt_size[1])),
+ color, -1)
+ cv2.putText(
+ img,
+ text, (x0, y0 + txt_size[1]),
+ font,
+ 0.8, (0, 255, 0),
+ thickness=2)
+
+ return img
+
+
+def get_engine(precision, model_file_path):
+ # TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)
+ TRT_LOGGER = trt.Logger()
+ builder = trt.Builder(TRT_LOGGER)
+ config = builder.create_builder_config()
+ if precision == 'int8':
+ network = builder.create_network(EXPLICIT_BATCH | EXPLICIT_PRECISION)
+ else:
+ network = builder.create_network(EXPLICIT_BATCH)
+ parser = trt.OnnxParser(network, TRT_LOGGER)
+
+ runtime = trt.Runtime(TRT_LOGGER)
+ if model_file_path.endswith('.trt'):
+ # If a serialized engine exists, use it instead of building an engine.
+ print("Reading engine from file {}".format(model_file_path))
+ with open(model_file_path,
+ "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
+ engine = runtime.deserialize_cuda_engine(f.read())
+ for i in range(network.num_layers):
+ layer = network.get_layer(i)
+ print(i, layer.name)
+ return engine
+ else:
+ config.max_workspace_size = 1 << 30
+
+ if precision == "fp16":
+ if not builder.platform_has_fast_fp16:
+ print("FP16 is not supported natively on this platform/device")
+ else:
+ config.set_flag(trt.BuilderFlag.FP16)
+ elif precision == "int8":
+ if not builder.platform_has_fast_int8:
+ print("INT8 is not supported natively on this platform/device")
+ else:
+ if builder.platform_has_fast_fp16:
+ # Also enable fp16, as some layers may be even more efficient in fp16 than int8
+ config.set_flag(trt.BuilderFlag.FP16)
+ config.set_flag(trt.BuilderFlag.INT8)
+
+ builder.max_batch_size = 1
+ print('Loading ONNX file from path {}...'.format(model_file_path))
+ with open(model_file_path, 'rb') as model:
+ print('Beginning ONNX file parsing')
+ if not parser.parse(model.read()):
+ print('ERROR: Failed to parse the ONNX file.')
+ for error in range(parser.num_errors):
+ print(parser.get_error(error))
+ return None
+
+ print('Completed parsing of ONNX file')
+ print('Building an engine from file {}; this may take a while...'.
+ format(model_file_path))
+ plan = builder.build_serialized_network(network, config)
+ engine = runtime.deserialize_cuda_engine(plan)
+ print("Completed creating Engine")
+ with open(model_file_path, "wb") as f:
+ f.write(engine.serialize())
+ for i in range(network.num_layers):
+ layer = network.get_layer(i)
+ print(i, layer.name)
+ return engine
+
+
+# Simple helper data class that's a little nicer to use than a 2-tuple.
+class HostDeviceMem(object):
+ def __init__(self, host_mem, device_mem):
+ self.host = host_mem
+ self.device = device_mem
+
+ def __str__(self):
+ return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
+
+ def __repr__(self):
+ return self.__str__()
+
+
+def allocate_buffers(engine):
+ inputs = []
+ outputs = []
+ bindings = []
+ stream = cuda.Stream()
+ for binding in engine:
+ size = trt.volume(engine.get_binding_shape(
+ binding)) * engine.max_batch_size
+ dtype = trt.nptype(engine.get_binding_dtype(binding))
+ # Allocate host and device buffers
+ host_mem = cuda.pagelocked_empty(size, dtype)
+ device_mem = cuda.mem_alloc(host_mem.nbytes)
+ # Append the device buffer to device bindings.
+ bindings.append(int(device_mem))
+ # Append to the appropriate list.
+ if engine.binding_is_input(binding):
+ inputs.append(HostDeviceMem(host_mem, device_mem))
+ else:
+ outputs.append(HostDeviceMem(host_mem, device_mem))
+ return inputs, outputs, bindings, stream
+
+
+def run_inference(context, bindings, inputs, outputs, stream):
+ # Transfer input data to the GPU.
+ [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
+ # Run inference.
+ context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
+ # Transfer predictions back from the GPU.
+ [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
+ # Synchronize the stream
+ stream.synchronize()
+ # Return only the host outputs.
+ return [out.host for out in outputs]
+
+
+def main(args):
+ onnx_model = args.model_path
+ img_path = args.image_file
+ num_class = len(CLASS_LABEL)
+ repeat = 1000
+ engine = get_engine(args.precision, onnx_model)
+
+ model_all_names = []
+ for idx in range(engine.num_bindings):
+ is_input = engine.binding_is_input(idx)
+ name = engine.get_binding_name(idx)
+ op_type = engine.get_binding_dtype(idx)
+ model_all_names.append(name)
+ shape = engine.get_binding_shape(idx)
+ print('input id:', idx, ' is input: ', is_input, ' binding name:',
+ name, ' shape:', shape, 'type: ', op_type)
+
+ context = engine.create_execution_context()
+ print('Allocate buffers ...')
+ inputs, outputs, bindings, stream = allocate_buffers(engine)
+ print("TRT set input ...")
+
+ origin_img = cv2.imread(img_path)
+ input_shape = [args.img_shape, args.img_shape]
+ input_image, ratio = preprocess(origin_img, input_shape)
+
+ inputs[0].host = np.expand_dims(input_image, axis=0)
+
+ for _ in range(0, 50):
+ trt_outputs = run_inference(
+ context,
+ bindings=bindings,
+ inputs=inputs,
+ outputs=outputs,
+ stream=stream)
+
+ time1 = time.time()
+ for _ in range(0, repeat):
+ trt_outputs = run_inference(
+ context,
+ bindings=bindings,
+ inputs=inputs,
+ outputs=outputs,
+ stream=stream)
+ time2 = time.time()
+ # total time cost(ms)
+ total_inference_cost = (time2 - time1) * 1000
+ print("model path: ", onnx_model, " precision: ", args.precision)
+ print("In TensorRT, ",
+ "average latency is : {} ms".format(total_inference_cost / repeat))
+ # Do postprocess
+ output = trt_outputs[0]
+ predictions = np.reshape(output, (1, -1, int(5 + num_class)))[0]
+ dets = postprocess(predictions, ratio)
+ # Draw rectangles and labels on the original image
+ if dets is not None:
+ final_boxes, final_scores, final_cls_inds = dets[:, :
+ 4], dets[:, 4], dets[:,
+ 5]
+ origin_img = draw_box(
+ origin_img,
+ final_boxes,
+ final_scores,
+ final_cls_inds,
+ conf=0.5,
+ class_names=CLASS_LABEL)
+ cv2.imwrite('output.jpg', origin_img)
+ print('The prediction results are saved in output.jpg.')
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--model_path',
+ type=str,
+ default="quant_model.onnx",
+ help="inference model filepath")
+ parser.add_argument(
+ '--image_file', type=str, default="bus.jpg", help="image path")
+ parser.add_argument(
+ '--precision', type=str, default='fp32', help="support fp32/fp16/int8.")
+ parser.add_argument('--img_shape', type=int, default=640, help="input_size")
+ args = parser.parse_args()
+ main(args)
diff --git a/example/auto_compression/pytorch_yolov7/paddle_trt_infer.py b/example/auto_compression/pytorch_yolo_series/paddle_trt_infer.py
similarity index 98%
rename from example/auto_compression/pytorch_yolov7/paddle_trt_infer.py
rename to example/auto_compression/pytorch_yolo_series/paddle_trt_infer.py
index fedc9cc1..eacb67fb 100644
--- a/example/auto_compression/pytorch_yolov7/paddle_trt_infer.py
+++ b/example/auto_compression/pytorch_yolo_series/paddle_trt_infer.py
@@ -244,8 +244,9 @@ def predict_image(predictor,
threshold=0.5,
arch='YOLOv5'):
img, scale_factor = image_preprocess(image_file, image_shape)
- inputs = {}
- if arch == 'YOLOv5':
+ if arch == 'YOLOv6':
+ inputs['x2paddle_image_arrays'] = img
+ else:
inputs['x2paddle_images'] = img
input_names = predictor.get_input_names()
for i in range(len(input_names)):
@@ -306,6 +307,8 @@ if __name__ == '__main__':
default='GPU',
help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is GPU"
)
+ parser.add_argument(
+ '--arch', type=str, default='YOLOv5', help="architectures name.")
parser.add_argument('--img_shape', type=int, default=640, help="input_size")
args = parser.parse_args()
@@ -319,4 +322,5 @@ if __name__ == '__main__':
args.image_file,
image_shape=[args.img_shape, args.img_shape],
warmup=warmup,
- repeats=repeats)
+ repeats=repeats,
+ arch=args.arch)
diff --git a/example/auto_compression/pytorch_yolov6/post_process.py b/example/auto_compression/pytorch_yolo_series/post_process.py
similarity index 75%
rename from example/auto_compression/pytorch_yolov6/post_process.py
rename to example/auto_compression/pytorch_yolo_series/post_process.py
index 37bd2c95..644c24b8 100644
--- a/example/auto_compression/pytorch_yolov6/post_process.py
+++ b/example/auto_compression/pytorch_yolo_series/post_process.py
@@ -14,6 +14,8 @@
import numpy as np
import cv2
+import json
+import sys
def box_area(boxes):
@@ -68,9 +70,9 @@ def nms(boxes, scores, iou_threshold):
return keep
-class YOLOv6PostProcess(object):
+class YOLOPostProcess(object):
"""
- Post process of YOLOv6 network.
+ Post process of YOLO-series network.
args:
score_threshold(float): Threshold to filter out bounding boxes with low
confidence score. If not provided, consider all boxes.
@@ -157,8 +159,8 @@ class YOLOv6PostProcess(object):
if len(pred.shape) == 1:
pred = pred[np.newaxis, :]
pred_bboxes = pred[:, :4]
- scale_factor = np.tile(scale_factor[i][::-1], (1, 2))
- pred_bboxes /= scale_factor
+ scale = np.tile(scale_factor[i][::-1], (2))
+ pred_bboxes /= scale
bbox = np.concatenate(
[
pred[:, -1][:, np.newaxis], pred[:, -2][:, np.newaxis],
@@ -171,3 +173,59 @@ class YOLOv6PostProcess(object):
bboxs = np.concatenate(bboxs, axis=0)
box_nums = np.array(box_nums)
return {'bbox': bboxs, 'bbox_num': box_nums}
+
+
+def coco_metric(anno_file, bboxes_list, bbox_nums_list, image_id_list):
+ try:
+ from pycocotools.coco import COCO
+ from pycocotools.cocoeval import COCOeval
+ except:
+ print(
+ "[ERROR] Not found pycocotools, please install by `pip install pycocotools`"
+ )
+ sys.exit(1)
+
+ coco_gt = COCO(anno_file)
+ cats = coco_gt.loadCats(coco_gt.getCatIds())
+ clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
+ results = []
+ for bboxes, bbox_nums, image_id in zip(bboxes_list, bbox_nums_list,
+ image_id_list):
+ results += _get_det_res(bboxes, bbox_nums, image_id, clsid2catid)
+
+ output = "bbox.json"
+ with open(output, 'w') as f:
+ json.dump(results, f)
+
+ coco_dt = coco_gt.loadRes(output)
+ coco_eval = COCOeval(coco_gt, coco_dt, 'bbox')
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+ return coco_eval.stats
+
+
+def _get_det_res(bboxes, bbox_nums, image_id, label_to_cat_id_map):
+ det_res = []
+ k = 0
+ for i in range(len(bbox_nums)):
+ cur_image_id = int(image_id[i][0])
+ det_nums = bbox_nums[i]
+ for j in range(det_nums):
+ dt = bboxes[k]
+ k = k + 1
+ num_id, score, xmin, ymin, xmax, ymax = dt.tolist()
+ if int(num_id) < 0:
+ continue
+ category_id = label_to_cat_id_map[int(num_id)]
+ w = xmax - xmin
+ h = ymax - ymin
+ bbox = [xmin, ymin, w, h]
+ dt_res = {
+ 'image_id': cur_image_id,
+ 'category_id': category_id,
+ 'bbox': bbox,
+ 'score': score
+ }
+ det_res.append(dt_res)
+ return det_res
diff --git a/example/auto_compression/pytorch_yolo_series/run.py b/example/auto_compression/pytorch_yolo_series/run.py
new file mode 100644
index 00000000..1a22d822
--- /dev/null
+++ b/example/auto_compression/pytorch_yolo_series/run.py
@@ -0,0 +1,127 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import numpy as np
+import argparse
+from tqdm import tqdm
+import paddle
+from paddleslim.common import load_config
+from paddleslim.auto_compression import AutoCompression
+from dataset import COCOValDataset, COCOTrainDataset
+from post_process import YOLOPostProcess, coco_metric
+
+
+def argsparser():
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument(
+ '--config_path',
+ type=str,
+ default=None,
+ help="path of compression strategy config.",
+ required=True)
+ parser.add_argument(
+ '--save_dir',
+ type=str,
+ default='output',
+ help="directory to save compressed model.")
+ parser.add_argument(
+ '--devices',
+ type=str,
+ default='gpu',
+ help="which device used to compress.")
+ parser.add_argument(
+ '--eval', type=bool, default=False, help="whether to run evaluation.")
+
+ return parser
+
+
+def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list):
+ bboxes_list, bbox_nums_list, image_id_list = [], [], []
+ with tqdm(
+ total=len(val_loader),
+ bar_format='Evaluation stage, Run batch:|{bar}| {n_fmt}/{total_fmt}',
+ ncols=80) as t:
+ for data in val_loader:
+ data_all = {k: np.array(v) for k, v in data.items()}
+ outs = exe.run(compiled_test_program,
+ feed={test_feed_names[0]: data_all['image']},
+ fetch_list=test_fetch_list,
+ return_numpy=False)
+ res = {}
+ postprocess = YOLOPostProcess(
+ score_threshold=0.001, nms_threshold=0.65, multi_label=True)
+ res = postprocess(np.array(outs[0]), data_all['scale_factor'])
+ bboxes_list.append(res['bbox'])
+ bbox_nums_list.append(res['bbox_num'])
+ image_id_list.append(np.array(data_all['im_id']))
+ t.update()
+ map_res = coco_metric(anno_file, bboxes_list, bbox_nums_list, image_id_list)
+ return map_res[0]
+
+
+def main():
+ global global_config
+ all_config = load_config(FLAGS.config_path)
+ assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}"
+ global_config = all_config["Global"]
+ input_name = 'x2paddle_image_arrays' if global_config[
+ 'arch'] == 'YOLOv6' else 'x2paddle_images'
+ dataset = COCOTrainDataset(
+ dataset_dir=global_config['dataset_dir'],
+ image_dir=global_config['train_image_dir'],
+ anno_path=global_config['train_anno_path'],
+ input_name=input_name)
+ train_loader = paddle.io.DataLoader(
+ dataset, batch_size=1, shuffle=True, drop_last=True, num_workers=0)
+
+ if 'Evaluation' in global_config.keys() and global_config[
+ 'Evaluation'] and paddle.distributed.get_rank() == 0:
+ eval_func = eval_function
+ global val_loader
+ dataset = COCOValDataset(
+ dataset_dir=global_config['dataset_dir'],
+ image_dir=global_config['val_image_dir'],
+ anno_path=global_config['val_anno_path'])
+ global anno_file
+ anno_file = dataset.ann_file
+ val_loader = paddle.io.DataLoader(
+ dataset,
+ batch_size=1,
+ shuffle=False,
+ drop_last=False,
+ num_workers=0)
+ else:
+ eval_func = None
+
+ ac = AutoCompression(
+ model_dir=global_config["model_dir"],
+ train_dataloader=train_loader,
+ save_dir=FLAGS.save_dir,
+ config=all_config,
+ eval_callback=eval_func)
+ ac.compress()
+ ac.export_onnx()
+
+
+if __name__ == '__main__':
+ paddle.enable_static()
+ parser = argsparser()
+ FLAGS = parser.parse_args()
+
+ assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu']
+ paddle.set_device(FLAGS.devices)
+
+ main()
diff --git a/example/auto_compression/pytorch_yolov5/README.md b/example/auto_compression/pytorch_yolov5/README.md
deleted file mode 100644
index 16709408..00000000
--- a/example/auto_compression/pytorch_yolov5/README.md
+++ /dev/null
@@ -1,147 +0,0 @@
-# YOLOv5目标检测模型自动压缩示例
-
-目录:
-- [1.简介](#1简介)
-- [2.Benchmark](#2Benchmark)
-- [3.开始自动压缩](#自动压缩流程)
- - [3.1 环境准备](#31-准备环境)
- - [3.2 准备数据集](#32-准备数据集)
- - [3.3 准备预测模型](#33-准备预测模型)
- - [3.4 测试模型精度](#34-测试模型精度)
- - [3.5 自动压缩并产出模型](#35-自动压缩并产出模型)
-- [4.预测部署](#4预测部署)
-- [5.FAQ](5FAQ)
-
-## 1. 简介
-
-飞桨模型转换工具[X2Paddle](https://github.com/PaddlePaddle/X2Paddle)支持将```Caffe/TensorFlow/ONNX/PyTorch```的模型一键转为飞桨(PaddlePaddle)的预测模型。借助X2Paddle的能力,各种框架的推理模型可以很方便的使用PaddleSlim的自动化压缩功能。
-
-本示例将以[ultralytics/yolov5](https://github.com/ultralytics/yolov5)目标检测模型为例,将PyTorch框架模型转换为Paddle框架模型,再使用ACT自动压缩功能进行自动压缩。本示例使用的自动压缩策略为量化训练。
-
-## 2.Benchmark
-
-| 模型 | 策略 | 输入尺寸 | mAPval
0.5:0.95 | 预测时延FP32
(ms) |预测时延FP16
(ms) | 预测时延INT8
(ms) | 配置文件 | Inference模型 |
-| :-------- |:-------- |:--------: | :---------------------: | :----------------: | :----------------: | :---------------: | :-----------------------------: | :-----------------------------: |
-| YOLOv5s | Base模型 | 640*640 | 37.4 | 5.95ms | 2.44ms | - | - | [Model](https://bj.bcebos.com/v1/paddle-slim-models/detection/yolov5s_infer.tar) |
-| YOLOv5s | KL离线量化 | 640*640 | 36.0 | - | - | 1.87ms | - | - |
-| YOLOv5s | 量化蒸馏训练 | 640*640 | **36.9** | - | - | **1.87ms** | [config](./configs/yolov5s_qat_dis.yaml) | [Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov5s_quant.tar) |
-
-
-说明:
-- mAP的指标均在COCO val2017数据集中评测得到。
-- YOLOv5s模型在Tesla T4的GPU环境下开启TensorRT 8.4.1,batch_size=1, 测试脚本是[cpp_infer](./cpp_infer)。
-
-## 3. 自动压缩流程
-
-#### 3.1 准备环境
-- PaddlePaddle >= 2.3 (可从[Paddle官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)下载安装)
-- PaddleSlim >= 2.3
-- PaddleDet >= 2.4
-- [X2Paddle](https://github.com/PaddlePaddle/X2Paddle) >= 1.3.6
-- opencv-python
-
-(1)安装paddlepaddle:
-```shell
-# CPU
-pip install paddlepaddle
-# GPU
-pip install paddlepaddle-gpu
-```
-
-(2)安装paddleslim:
-```shell
-pip install paddleslim
-```
-
-(3)安装paddledet:
-```shell
-pip install paddledet
-```
-
-注:安装PaddleDet的目的是为了直接使用PaddleDetection中的Dataloader组件。
-
-(4)安装X2Paddle的1.3.6以上版本:
-```shell
-pip install x2paddle sympy onnx
-```
-
-#### 3.2 准备数据集
-
-本案例默认以COCO数据进行自动压缩实验,并且依赖PaddleDetection中数据读取模块,如果自定义COCO数据,或者其他格式数据,请参考[PaddleDetection数据准备文档](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.4/docs/tutorials/PrepareDataSet.md) 来准备数据。
-
-如果已经准备好数据集,请直接修改[./configs/yolov6_reader.yml]中`EvalDataset`的`dataset_dir`字段为自己数据集路径即可。
-
-#### 3.3 准备预测模型
-
-(1)准备ONNX模型:
-
-可通过[ultralytics/yolov5](https://github.com/ultralytics/yolov5) 官方的[导出教程](https://github.com/ultralytics/yolov5/issues/251)来准备ONNX模型。也可以下载准备好的[yolov5s.onnx](https://paddle-slim-models.bj.bcebos.com/act/yolov5s.onnx)。
-```shell
-python export.py --weights yolov5s.pt --include onnx
-```
-
-(2) 转换模型:
-```shell
-x2paddle --framework=onnx --model=yolov5s.onnx --save_dir=pd_model
-cp -r pd_model/inference_model/ yolov5s_infer
-```
-即可得到YOLOv5s模型的预测模型(`model.pdmodel` 和 `model.pdiparams`)。如想快速体验,可直接下载上方表格中YOLOv5s的[Paddle预测模型](https://bj.bcebos.com/v1/paddle-slim-models/detection/yolov5s_infer.tar)。
-
-
-预测模型的格式为:`model.pdmodel` 和 `model.pdiparams`两个,带`pdmodel`的是模型文件,带`pdiparams`后缀的是权重文件。
-
-
-#### 3.4 自动压缩并产出模型
-
-蒸馏量化自动压缩示例通过run.py脚本启动,会使用接口```paddleslim.auto_compression.AutoCompression```对模型进行自动压缩。配置config文件中模型路径、蒸馏、量化、和训练等部分的参数,配置完成后便可对模型进行量化和蒸馏。具体运行命令为:
-
-- 单卡训练:
-```
-export CUDA_VISIBLE_DEVICES=0
-python run.py --config_path=./configs/yolov5s_qat_dis.yaml --save_dir='./output/'
-```
-
-- 多卡训练:
-```
-CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch --log_dir=log --gpus 0,1,2,3 run.py \
- --config_path=./configs/yolov5s_qat_dis.yaml --save_dir='./output/'
-```
-
-#### 3.5 测试模型精度
-
-使用eval.py脚本得到模型的mAP:
-```
-export CUDA_VISIBLE_DEVICES=0
-python eval.py --config_path=./configs/yolov5s_qat_dis.yaml
-```
-
-**注意**:如果要测试量化后的模型,模型路径需要在配置文件中`model_dir`字段下进行修改指定。
-
-
-## 4.预测部署
-
-#### Paddle-TensorRT C++部署
-
-进入[cpp_infer](./cpp_infer)文件夹内,请按照[C++ TensorRT Benchmark测试教程](./cpp_infer/README.md)进行准备环境及编译,然后开始测试:
-```shell
-# 编译
-bash complie.sh
-# 执行
-./build/trt_run --model_file yolov5s_quant/model.pdmodel --params_file yolov5s_quant/model.pdiparams --run_mode=trt_int8
-```
-
-#### Paddle-TensorRT Python部署:
-
-首先安装带有TensorRT的[Paddle安装包](https://www.paddlepaddle.org.cn/inference/v2.3/user_guides/download_lib.html#python)。
-
-然后使用[paddle_trt_infer.py](./paddle_trt_infer.py)进行部署:
-```shell
-python paddle_trt_infer.py --model_path=output --image_file=images/000000570688.jpg --benchmark=True --run_mode=trt_int8
-```
-
-## 5.FAQ
-
-- 如果想测试离线量化模型精度,可执行:
-```shell
-python post_quant.py --config_path=./configs/yolov5s_qat_dis.yaml
-```
diff --git a/example/auto_compression/pytorch_yolov5/configs/yolov5_reader.yml b/example/auto_compression/pytorch_yolov5/configs/yolov5_reader.yml
deleted file mode 100644
index cb87c3f8..00000000
--- a/example/auto_compression/pytorch_yolov5/configs/yolov5_reader.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-metric: COCO
-num_classes: 80
-
-# Datset configuration
-TrainDataset:
- !COCODataSet
- image_dir: train2017
- anno_path: annotations/instances_train2017.json
- dataset_dir: dataset/coco/
-
-EvalDataset:
- !COCODataSet
- image_dir: val2017
- anno_path: annotations/instances_val2017.json
- dataset_dir: dataset/coco/
-
-worker_num: 0
-
-# preprocess reader in test
-EvalReader:
- sample_transforms:
- - Decode: {}
- - Resize: {target_size: [640, 640], keep_ratio: True}
- - Pad: {size: [640, 640], fill_value: [114., 114., 114.]}
- - NormalizeImage: {mean: [0, 0, 0], std: [1, 1, 1], is_scale: True}
- - Permute: {}
- batch_size: 1
diff --git a/example/auto_compression/pytorch_yolov5/cpp_infer/trt_run.cc b/example/auto_compression/pytorch_yolov5/cpp_infer/trt_run.cc
deleted file mode 100644
index 0ae055ac..00000000
--- a/example/auto_compression/pytorch_yolov5/cpp_infer/trt_run.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-#include
-#include
-#include
-#include
-
-#include
-#include
-#include
-
-#include "paddle/include/paddle_inference_api.h"
-#include "paddle/include/experimental/phi/common/float16.h"
-
-using paddle_infer::Config;
-using paddle_infer::Predictor;
-using paddle_infer::CreatePredictor;
-using paddle_infer::PrecisionType;
-using phi::dtype::float16;
-
-DEFINE_string(model_dir, "", "Directory of the inference model.");
-DEFINE_string(model_file, "", "Path of the inference model file.");
-DEFINE_string(params_file, "", "Path of the inference params file.");
-DEFINE_string(run_mode, "trt_fp32", "run_mode which can be: trt_fp32, trt_fp16 and trt_int8");
-DEFINE_int32(batch_size, 1, "Batch size.");
-DEFINE_int32(gpu_id, 0, "GPU card ID num.");
-DEFINE_int32(trt_min_subgraph_size, 3, "tensorrt min_subgraph_size");
-DEFINE_int32(warmup, 50, "warmup");
-DEFINE_int32(repeats, 1000, "repeats");
-
-using Time = decltype(std::chrono::high_resolution_clock::now());
-Time time() { return std::chrono::high_resolution_clock::now(); };
-double time_diff(Time t1, Time t2) {
- typedef std::chrono::microseconds ms;
- auto diff = t2 - t1;
- ms counter = std::chrono::duration_cast(diff);
- return counter.count() / 1000.0;
-}
-
-std::shared_ptr InitPredictor() {
- Config config;
- std::string model_path;
- if (FLAGS_model_dir != "") {
- config.SetModel(FLAGS_model_dir);
- model_path = FLAGS_model_dir.substr(0, FLAGS_model_dir.find_last_of("/"));
- } else {
- config.SetModel(FLAGS_model_file, FLAGS_params_file);
- model_path = FLAGS_model_file.substr(0, FLAGS_model_file.find_last_of("/"));
- }
- // enable tune
- std::cout << "model_path: " << model_path << std::endl;
- config.EnableUseGpu(256, FLAGS_gpu_id);
- if (FLAGS_run_mode == "trt_fp32") {
- config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, FLAGS_trt_min_subgraph_size,
- PrecisionType::kFloat32, false, false);
- } else if (FLAGS_run_mode == "trt_fp16") {
- config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, FLAGS_trt_min_subgraph_size,
- PrecisionType::kHalf, false, false);
- } else if (FLAGS_run_mode == "trt_int8") {
- config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, FLAGS_trt_min_subgraph_size,
- PrecisionType::kInt8, false, false);
- }
- config.EnableMemoryOptim();
- config.SwitchIrOptim(true);
- return CreatePredictor(config);
-}
-
-template
-void run(Predictor *predictor, const std::vector &input,
- const std::vector &input_shape, type* out_data, std::vector out_shape) {
-
- // prepare input
- int input_num = std::accumulate(input_shape.begin(), input_shape.end(), 1,
- std::multiplies());
-
- auto input_names = predictor->GetInputNames();
- auto input_t = predictor->GetInputHandle(input_names[0]);
- input_t->Reshape(input_shape);
- input_t->CopyFromCpu(input.data());
-
- for (int i = 0; i < FLAGS_warmup; ++i)
- CHECK(predictor->Run());
-
- auto st = time();
- for (int i = 0; i < FLAGS_repeats; ++i) {
- auto input_names = predictor->GetInputNames();
- auto input_t = predictor->GetInputHandle(input_names[0]);
- input_t->Reshape(input_shape);
- input_t->CopyFromCpu(input.data());
-
- CHECK(predictor->Run());
-
- auto output_names = predictor->GetOutputNames();
- auto output_t = predictor->GetOutputHandle(output_names[0]);
- std::vector output_shape = output_t->shape();
- output_t -> ShareExternalData(out_data, out_shape, paddle_infer::PlaceType::kGPU);
- }
-
- LOG(INFO) << "[" << FLAGS_run_mode << " bs-" << FLAGS_batch_size << " ] run avg time is " << time_diff(st, time()) / FLAGS_repeats
- << " ms";
-}
-
-int main(int argc, char *argv[]) {
- google::ParseCommandLineFlags(&argc, &argv, true);
- auto predictor = InitPredictor();
- std::vector input_shape = {FLAGS_batch_size, 3, 640, 640};
- // float16
- using dtype = float16;
- std::vector input_data(FLAGS_batch_size * 3 * 640 * 640, dtype(1.0));
-
- dtype *out_data;
- int out_data_size = FLAGS_batch_size * 25200 * 85;
- cudaHostAlloc((void**)&out_data, sizeof(float) * out_data_size, cudaHostAllocMapped);
-
- std::vector out_shape{ FLAGS_batch_size, 1, 25200, 85};
- run(predictor.get(), input_data, input_shape, out_data, out_shape);
- return 0;
-}
diff --git a/example/auto_compression/pytorch_yolov5/paddle_trt_infer.py b/example/auto_compression/pytorch_yolov5/paddle_trt_infer.py
deleted file mode 100644
index 62c2c89b..00000000
--- a/example/auto_compression/pytorch_yolov5/paddle_trt_infer.py
+++ /dev/null
@@ -1,322 +0,0 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import cv2
-import numpy as np
-import argparse
-import time
-
-from paddle.inference import Config
-from paddle.inference import create_predictor
-
-from post_process import YOLOv5PostProcess
-
-CLASS_LABEL = [
- 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
- 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
- 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
- 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
- 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
- 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
- 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
- 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
- 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
- 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
- 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
- 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
- 'hair drier', 'toothbrush'
-]
-
-
-def generate_scale(im, target_shape, keep_ratio=True):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- Returns:
- im_scale_x: the resize ratio of X
- im_scale_y: the resize ratio of Y
- """
- origin_shape = im.shape[:2]
- if keep_ratio:
- im_size_min = np.min(origin_shape)
- im_size_max = np.max(origin_shape)
- target_size_min = np.min(target_shape)
- target_size_max = np.max(target_shape)
- im_scale = float(target_size_min) / float(im_size_min)
- if np.round(im_scale * im_size_max) > target_size_max:
- im_scale = float(target_size_max) / float(im_size_max)
- im_scale_x = im_scale
- im_scale_y = im_scale
- else:
- resize_h, resize_w = target_shape
- im_scale_y = resize_h / float(origin_shape[0])
- im_scale_x = resize_w / float(origin_shape[1])
- return im_scale_y, im_scale_x
-
-
-def image_preprocess(img_path, target_shape):
- img = cv2.imread(img_path)
- # Resize
- im_scale_y, im_scale_x = generate_scale(img, target_shape)
- img = cv2.resize(
- img,
- None,
- None,
- fx=im_scale_x,
- fy=im_scale_y,
- interpolation=cv2.INTER_LINEAR)
- # Pad
- im_h, im_w = img.shape[:2]
- h, w = target_shape[:]
- if h != im_h or w != im_w:
- canvas = np.ones((h, w, 3), dtype=np.float32)
- canvas *= np.array([114.0, 114.0, 114.0], dtype=np.float32)
- canvas[0:im_h, 0:im_w, :] = img.astype(np.float32)
- img = canvas
- img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
- img = np.transpose(img, [2, 0, 1]) / 255
- img = np.expand_dims(img, 0)
- scale_factor = np.array([[im_scale_y, im_scale_x]])
- return img.astype(np.float32), scale_factor
-
-
-def get_color_map_list(num_classes):
- color_map = num_classes * [0, 0, 0]
- for i in range(0, num_classes):
- j = 0
- lab = i
- while lab:
- color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
- color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
- color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
- j += 1
- lab >>= 3
- color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)]
- return color_map
-
-
-def draw_box(image_file, results, class_label, threshold=0.5):
- srcimg = cv2.imread(image_file, 1)
- for i in range(len(results)):
- color_list = get_color_map_list(len(class_label))
- clsid2color = {}
- classid, conf = int(results[i, 0]), results[i, 1]
- if conf < threshold:
- continue
- xmin, ymin, xmax, ymax = int(results[i, 2]), int(results[i, 3]), int(
- results[i, 4]), int(results[i, 5])
-
- if classid not in clsid2color:
- clsid2color[classid] = color_list[classid]
- color = tuple(clsid2color[classid])
-
- cv2.rectangle(srcimg, (xmin, ymin), (xmax, ymax), color, thickness=2)
- print(class_label[classid] + ': ' + str(round(conf, 3)))
- cv2.putText(
- srcimg,
- class_label[classid] + ':' + str(round(conf, 3)), (xmin, ymin - 10),
- cv2.FONT_HERSHEY_SIMPLEX,
- 0.8, (0, 255, 0),
- thickness=2)
- return srcimg
-
-
-def load_predictor(model_dir,
- run_mode='paddle',
- batch_size=1,
- device='CPU',
- min_subgraph_size=3,
- use_dynamic_shape=False,
- trt_min_shape=1,
- trt_max_shape=1280,
- trt_opt_shape=640,
- trt_calib_mode=False,
- cpu_threads=1,
- enable_mkldnn=False,
- enable_mkldnn_bfloat16=False,
- delete_shuffle_pass=False):
- """set AnalysisConfig, generate AnalysisPredictor
- Args:
- model_dir (str): root path of __model__ and __params__
- device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
- run_mode (str): mode of running(paddle/trt_fp32/trt_fp16/trt_int8)
- use_dynamic_shape (bool): use dynamic shape or not
- trt_min_shape (int): min shape for dynamic shape in trt
- trt_max_shape (int): max shape for dynamic shape in trt
- trt_opt_shape (int): opt shape for dynamic shape in trt
- trt_calib_mode (bool): If the model is produced by TRT offline quantitative
- calibration, trt_calib_mode need to set True
- delete_shuffle_pass (bool): whether to remove shuffle_channel_detect_pass in TensorRT.
- Used by action model.
- Returns:
- predictor (PaddlePredictor): AnalysisPredictor
- Raises:
- ValueError: predict by TensorRT need device == 'GPU'.
- """
- if device != 'GPU' and run_mode != 'paddle':
- raise ValueError(
- "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}"
- .format(run_mode, device))
- config = Config(
- os.path.join(model_dir, 'model.pdmodel'),
- os.path.join(model_dir, 'model.pdiparams'))
- if device == 'GPU':
- # initial GPU memory(M), device ID
- config.enable_use_gpu(200, 0)
- # optimize graph and fuse op
- config.switch_ir_optim(True)
- elif device == 'XPU':
- config.enable_lite_engine()
- config.enable_xpu(10 * 1024 * 1024)
- else:
- config.disable_gpu()
- config.set_cpu_math_library_num_threads(cpu_threads)
- if enable_mkldnn:
- try:
- # cache 10 different shapes for mkldnn to avoid memory leak
- config.set_mkldnn_cache_capacity(10)
- config.enable_mkldnn()
- if enable_mkldnn_bfloat16:
- config.enable_mkldnn_bfloat16()
- except Exception as e:
- print(
- "The current environment does not support `mkldnn`, so disable mkldnn."
- )
- pass
-
- precision_map = {
- 'trt_int8': Config.Precision.Int8,
- 'trt_fp32': Config.Precision.Float32,
- 'trt_fp16': Config.Precision.Half
- }
- if run_mode in precision_map.keys():
- config.enable_tensorrt_engine(
- workspace_size=(1 << 25) * batch_size,
- max_batch_size=batch_size,
- min_subgraph_size=min_subgraph_size,
- precision_mode=precision_map[run_mode],
- use_static=False,
- use_calib_mode=trt_calib_mode)
-
- if use_dynamic_shape:
- min_input_shape = {
- 'image': [batch_size, 3, trt_min_shape, trt_min_shape]
- }
- max_input_shape = {
- 'image': [batch_size, 3, trt_max_shape, trt_max_shape]
- }
- opt_input_shape = {
- 'image': [batch_size, 3, trt_opt_shape, trt_opt_shape]
- }
- config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
- opt_input_shape)
- print('trt set dynamic shape done!')
-
- # disable print log when predict
- config.disable_glog_info()
- # enable shared memory
- config.enable_memory_optim()
- # disable feed, fetch OP, needed by zero_copy_run
- config.switch_use_feed_fetch_ops(False)
- if delete_shuffle_pass:
- config.delete_pass("shuffle_channel_detect_pass")
- predictor = create_predictor(config)
- return predictor
-
-
-def predict_image(predictor,
- image_file,
- image_shape=[640, 640],
- warmup=1,
- repeats=1,
- threshold=0.5,
- arch='YOLOv5'):
- img, scale_factor = image_preprocess(image_file, image_shape)
- inputs = {}
- if arch == 'YOLOv5':
- inputs['x2paddle_images'] = img
- input_names = predictor.get_input_names()
- for i in range(len(input_names)):
- input_tensor = predictor.get_input_handle(input_names[i])
- input_tensor.copy_from_cpu(inputs[input_names[i]])
-
- for i in range(warmup):
- predictor.run()
-
- np_boxes = None
- predict_time = 0.
- time_min = float("inf")
- time_max = float('-inf')
- for i in range(repeats):
- start_time = time.time()
- predictor.run()
- output_names = predictor.get_output_names()
- boxes_tensor = predictor.get_output_handle(output_names[0])
- np_boxes = boxes_tensor.copy_to_cpu()
- end_time = time.time()
- timed = end_time - start_time
- time_min = min(time_min, timed)
- time_max = max(time_max, timed)
- predict_time += timed
-
- time_avg = predict_time / repeats
- print('Inference time(ms): min={}, max={}, avg={}'.format(
- round(time_min * 1000, 2),
- round(time_max * 1000, 1), round(time_avg * 1000, 1)))
- postprocess = YOLOv5PostProcess(
- score_threshold=0.001, nms_threshold=0.6, multi_label=True)
- res = postprocess(np_boxes, scale_factor)
- res_img = draw_box(
- image_file, res['bbox'], CLASS_LABEL, threshold=threshold)
- cv2.imwrite('result.jpg', res_img)
-
-
-if __name__ == '__main__':
-
- parser = argparse.ArgumentParser()
- parser.add_argument(
- '--image_file', type=str, default=None, help="image path")
- parser.add_argument(
- '--model_path', type=str, help="inference model filepath")
- parser.add_argument(
- '--benchmark',
- type=bool,
- default=False,
- help="Whether run benchmark or not.")
- parser.add_argument(
- '--run_mode',
- type=str,
- default='paddle',
- help="mode of running(paddle/trt_fp32/trt_fp16/trt_int8)")
- parser.add_argument(
- '--device',
- type=str,
- default='GPU',
- help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is GPU"
- )
- parser.add_argument('--img_shape', type=int, default=640, help="input_size")
- args = parser.parse_args()
-
- predictor = load_predictor(
- args.model_path, run_mode=args.run_mode, device=args.device)
- warmup, repeats = 1, 1
- if args.benchmark:
- warmup, repeats = 50, 100
- predict_image(
- predictor,
- args.image_file,
- image_shape=[args.img_shape, args.img_shape],
- warmup=warmup,
- repeats=repeats)
diff --git a/example/auto_compression/pytorch_yolov5/run.py b/example/auto_compression/pytorch_yolov5/run.py
deleted file mode 100644
index 965a546f..00000000
--- a/example/auto_compression/pytorch_yolov5/run.py
+++ /dev/null
@@ -1,179 +0,0 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-import numpy as np
-import argparse
-import paddle
-from ppdet.core.workspace import load_config, merge_config
-from ppdet.core.workspace import create
-from ppdet.metrics import COCOMetric, VOCMetric
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
-from paddleslim.auto_compression import AutoCompression
-
-from post_process import YOLOv5PostProcess
-
-
-def argsparser():
- parser = argparse.ArgumentParser(description=__doc__)
- parser.add_argument(
- '--config_path',
- type=str,
- default=None,
- help="path of compression strategy config.",
- required=True)
- parser.add_argument(
- '--save_dir',
- type=str,
- default='output',
- help="directory to save compressed model.")
- parser.add_argument(
- '--devices',
- type=str,
- default='gpu',
- help="which device used to compress.")
-
- return parser
-
-
-def reader_wrapper(reader, input_list):
- def gen():
- for data in reader:
- in_dict = {}
- if isinstance(input_list, list):
- for input_name in input_list:
- in_dict[input_name] = data[input_name]
- elif isinstance(input_list, dict):
- for input_name in input_list.keys():
- in_dict[input_list[input_name]] = data[input_name]
- yield in_dict
-
- return gen
-
-
-def convert_numpy_data(data, metric):
- data_all = {}
- data_all = {k: np.array(v) for k, v in data.items()}
- if isinstance(metric, VOCMetric):
- for k, v in data_all.items():
- if not isinstance(v[0], np.ndarray):
- tmp_list = []
- for t in v:
- tmp_list.append(np.array(t))
- data_all[k] = np.array(tmp_list)
- else:
- data_all = {k: np.array(v) for k, v in data.items()}
- return data_all
-
-
-def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list):
- metric = global_config['metric']
- for batch_id, data in enumerate(val_loader):
- data_all = convert_numpy_data(data, metric)
- data_input = {}
- for k, v in data.items():
- if isinstance(global_config['input_list'], list):
- if k in test_feed_names:
- data_input[k] = np.array(v)
- elif isinstance(global_config['input_list'], dict):
- if k in global_config['input_list'].keys():
- data_input[global_config['input_list'][k]] = np.array(v)
- outs = exe.run(compiled_test_program,
- feed=data_input,
- fetch_list=test_fetch_list,
- return_numpy=False)
- res = {}
- if 'arch' in global_config and global_config['arch'] == 'YOLOv5':
- postprocess = YOLOv5PostProcess(
- score_threshold=0.001, nms_threshold=0.6, multi_label=True)
- res = postprocess(np.array(outs[0]), data_all['scale_factor'])
- else:
- for out in outs:
- v = np.array(out)
- if len(v.shape) > 1:
- res['bbox'] = v
- else:
- res['bbox_num'] = v
-
- metric.update(data_all, res)
- if batch_id % 100 == 0:
- print('Eval iter:', batch_id)
- metric.accumulate()
- metric.log()
- map_res = metric.get_results()
- metric.reset()
- return map_res['bbox'][0]
-
-
-def main():
- global global_config
- all_config = load_slim_config(FLAGS.config_path)
- assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}"
- global_config = all_config["Global"]
- reader_cfg = load_config(global_config['reader_config'])
-
- train_loader = create('EvalReader')(reader_cfg['TrainDataset'],
- reader_cfg['worker_num'],
- return_list=True)
- train_loader = reader_wrapper(train_loader, global_config['input_list'])
-
- if 'Evaluation' in global_config.keys() and global_config[
- 'Evaluation'] and paddle.distributed.get_rank() == 0:
- eval_func = eval_function
- dataset = reader_cfg['EvalDataset']
- global val_loader
- _eval_batch_sampler = paddle.io.BatchSampler(
- dataset, batch_size=reader_cfg['EvalReader']['batch_size'])
- val_loader = create('EvalReader')(dataset,
- reader_cfg['worker_num'],
- batch_sampler=_eval_batch_sampler,
- return_list=True)
- metric = None
- if reader_cfg['metric'] == 'COCO':
- clsid2catid = {v: k for k, v in dataset.catid2clsid.items()}
- anno_file = dataset.get_anno()
- metric = COCOMetric(
- anno_file=anno_file, clsid2catid=clsid2catid, IouType='bbox')
- elif reader_cfg['metric'] == 'VOC':
- metric = VOCMetric(
- label_list=dataset.get_label_list(),
- class_num=reader_cfg['num_classes'],
- map_type=reader_cfg['map_type'])
- else:
- raise ValueError("metric currently only supports COCO and VOC.")
- global_config['metric'] = metric
- else:
- eval_func = None
-
- ac = AutoCompression(
- model_dir=global_config["model_dir"],
- model_filename=global_config["model_filename"],
- params_filename=global_config["params_filename"],
- save_dir=FLAGS.save_dir,
- config=all_config,
- train_dataloader=train_loader,
- eval_callback=eval_func)
- ac.compress()
-
-
-if __name__ == '__main__':
- paddle.enable_static()
- parser = argsparser()
- FLAGS = parser.parse_args()
-
- assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu']
- paddle.set_device(FLAGS.devices)
-
- main()
diff --git a/example/auto_compression/pytorch_yolov6/README.md b/example/auto_compression/pytorch_yolov6/README.md
deleted file mode 100644
index 7cdb5464..00000000
--- a/example/auto_compression/pytorch_yolov6/README.md
+++ /dev/null
@@ -1,143 +0,0 @@
-# YOLOv6自动压缩示例
-
-目录:
-- [1.简介](#1简介)
-- [2.Benchmark](#2Benchmark)
-- [3.开始自动压缩](#自动压缩流程)
- - [3.1 环境准备](#31-准备环境)
- - [3.2 准备数据集](#32-准备数据集)
- - [3.3 准备预测模型](#33-准备预测模型)
- - [3.4 测试模型精度](#34-测试模型精度)
- - [3.5 自动压缩并产出模型](#35-自动压缩并产出模型)
-- [4.预测部署](#4预测部署)
-- [5.FAQ](5FAQ)
-
-## 1. 简介
-
-飞桨模型转换工具[X2Paddle](https://github.com/PaddlePaddle/X2Paddle)支持将```Caffe/TensorFlow/ONNX/PyTorch```的模型一键转为飞桨(PaddlePaddle)的预测模型。借助X2Paddle的能力,各种框架的推理模型可以很方便的使用PaddleSlim的自动化压缩功能。
-
-本示例将以[meituan/YOLOv6](https://github.com/meituan/YOLOv6)目标检测模型为例,将PyTorch框架模型转换为Paddle框架模型,再使用ACT自动压缩功能进行自动压缩。本示例使用的自动压缩策略为量化训练。
-
-## 2.Benchmark
-
-| 模型 | 策略 | 输入尺寸 | mAPval
0.5:0.95 | 预测时延FP32
(ms) |预测时延FP16
(ms) | 预测时延INT8
(ms) | 配置文件 | Inference模型 |
-| :-------- |:-------- |:--------: | :---------------------: | :----------------: | :----------------: | :---------------: | :-----------------------------: | :-----------------------------: |
-| YOLOv6s | Base模型 | 640*640 | 42.4 | 9.06ms | 2.90ms | - | - | [Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov6s_infer.tar) |
-| YOLOv6s | KL离线量化 | 640*640 | 30.3 | - | - | 1.83ms | - | - |
-| YOLOv6s | 量化蒸馏训练 | 640*640 | **41.3** | - | - | **1.83ms** | [config](./configs/yolov6s_qat_dis.yaml) | [Model](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov6s_quant.tar) |
-
-说明:
-- mAP的指标均在COCO val2017数据集中评测得到。
-- YOLOv6s模型在Tesla T4的GPU环境下开启TensorRT 8.4.1,batch_size=1, 测试脚本是[cpp_infer](./cpp_infer)。
-
-## 3. 自动压缩流程
-
-#### 3.1 准备环境
-- PaddlePaddle >= 2.3 (可从[Paddle官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)下载安装)
-- PaddleSlim > 2.3版本
-- PaddleDet >= 2.4
-- [X2Paddle](https://github.com/PaddlePaddle/X2Paddle) >= 1.3.6
-- opencv-python
-
-(1)安装paddlepaddle:
-```shell
-# CPU
-pip install paddlepaddle
-# GPU
-pip install paddlepaddle-gpu
-```
-
-(2)安装paddleslim:
-```shell
-pip install paddleslim
-```
-
-(3)安装paddledet:
-```shell
-pip install paddledet
-```
-
-注:安装PaddleDet的目的只是为了直接使用PaddleDetection中的Dataloader组件。
-
-(4)安装X2Paddle的1.3.6以上版本:
-```shell
-pip install x2paddle sympy onnx
-```
-
-#### 3.2 准备数据集
-
-本案例默认以COCO数据进行自动压缩实验,并且依赖PaddleDetection中数据读取模块,如果自定义COCO数据,或者其他格式数据,请参考[PaddleDetection数据准备文档](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.4/docs/tutorials/PrepareDataSet.md) 来准备数据。
-
-如果已经准备好数据集,请直接修改[./configs/yolov6_reader.yml]中`EvalDataset`的`dataset_dir`字段为自己数据集路径即可。
-
-
-#### 3.3 准备预测模型
-
-(1)准备ONNX模型:
-
-可通过[meituan/YOLOv6](https://github.com/meituan/YOLOv6)官方的[导出教程](https://github.com/meituan/YOLOv6/blob/main/deploy/ONNX/README.md)来准备ONNX模型。也可以下载已经准备好的[yolov6s.onnx](https://paddle-slim-models.bj.bcebos.com/act/yolov6s.onnx)。
-
-
-(2) 转换模型:
-```
-x2paddle --framework=onnx --model=yolov6s.onnx --save_dir=pd_model
-cp -r pd_model/inference_model/ yolov6s_infer
-```
-即可得到YOLOv6s模型的预测模型(`model.pdmodel` 和 `model.pdiparams`)。如想快速体验,可直接下载上方表格中YOLOv6s的[Paddle预测模型](https://bj.bcebos.com/v1/paddle-slim-models/act/yolov6s_infer.tar)。
-
-
-预测模型的格式为:`model.pdmodel` 和 `model.pdiparams`两个,带`pdmodel`的是模型文件,带`pdiparams`后缀的是权重文件。
-
-
-#### 3.4 自动压缩并产出模型
-
-蒸馏量化自动压缩示例通过run.py脚本启动,会使用接口```paddleslim.auto_compression.AutoCompression```对模型进行自动压缩。配置config文件中模型路径、蒸馏、量化、和训练等部分的参数,配置完成后便可对模型进行量化和蒸馏。具体运行命令为:
-
-- 单卡训练:
-```
-export CUDA_VISIBLE_DEVICES=0
-python run.py --config_path=./configs/yolov6s_qat_dis.yaml --save_dir='./output/'
-```
-
-- 多卡训练:
-```
-CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch --log_dir=log --gpus 0,1,2,3 run.py \
- --config_path=./configs/yolov6s_qat_dis.yaml --save_dir='./output/'
-```
-
-#### 3.5 测试模型精度
-
-修改[yolov6s_qat_dis.yaml](./configs/yolov6s_qat_dis.yaml)中`model_dir`字段为模型存储路径,然后使用eval.py脚本得到模型的mAP:
-```
-export CUDA_VISIBLE_DEVICES=0
-python eval.py --config_path=./configs/yolov6s_qat_dis.yaml
-```
-
-
-## 4.预测部署
-
-#### Paddle-TensorRT C++部署
-
-进入[cpp_infer](./cpp_infer)文件夹内,请按照[C++ TensorRT Benchmark测试教程](./cpp_infer/README.md)进行准备环境及编译,然后开始测试:
-```shell
-# 编译
-bash complie.sh
-# 执行
-./build/trt_run --model_file yolov6s_quant/model.pdmodel --params_file yolov6s_quant/model.pdiparams --run_mode=trt_int8
-```
-
-#### Paddle-TensorRT Python部署:
-
-首先安装带有TensorRT的[Paddle安装包](https://www.paddlepaddle.org.cn/inference/v2.3/user_guides/download_lib.html#python)。
-
-然后使用[paddle_trt_infer.py](./paddle_trt_infer.py)进行部署:
-```shell
-python paddle_trt_infer.py --model_path=output --image_file=images/000000570688.jpg --benchmark=True --run_mode=trt_int8
-```
-
-## 5.FAQ
-
-- 如果想测试离线量化模型精度,可执行:
-```shell
-python post_quant.py --config_path=./configs/yolov6s_qat_dis.yaml
-```
diff --git a/example/auto_compression/pytorch_yolov6/configs/yolov6_reader.yml b/example/auto_compression/pytorch_yolov6/configs/yolov6_reader.yml
deleted file mode 100644
index cb87c3f8..00000000
--- a/example/auto_compression/pytorch_yolov6/configs/yolov6_reader.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-metric: COCO
-num_classes: 80
-
-# Datset configuration
-TrainDataset:
- !COCODataSet
- image_dir: train2017
- anno_path: annotations/instances_train2017.json
- dataset_dir: dataset/coco/
-
-EvalDataset:
- !COCODataSet
- image_dir: val2017
- anno_path: annotations/instances_val2017.json
- dataset_dir: dataset/coco/
-
-worker_num: 0
-
-# preprocess reader in test
-EvalReader:
- sample_transforms:
- - Decode: {}
- - Resize: {target_size: [640, 640], keep_ratio: True}
- - Pad: {size: [640, 640], fill_value: [114., 114., 114.]}
- - NormalizeImage: {mean: [0, 0, 0], std: [1, 1, 1], is_scale: True}
- - Permute: {}
- batch_size: 1
diff --git a/example/auto_compression/pytorch_yolov6/cpp_infer/CMakeLists.txt b/example/auto_compression/pytorch_yolov6/cpp_infer/CMakeLists.txt
deleted file mode 100644
index d5307c65..00000000
--- a/example/auto_compression/pytorch_yolov6/cpp_infer/CMakeLists.txt
+++ /dev/null
@@ -1,263 +0,0 @@
-cmake_minimum_required(VERSION 3.0)
-project(cpp_inference_demo CXX C)
-option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON)
-option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF)
-option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." ON)
-option(USE_TENSORRT "Compile demo with TensorRT." OFF)
-option(WITH_ROCM "Compile demo with rocm." OFF)
-option(WITH_ONNXRUNTIME "Compile demo with ONNXRuntime" OFF)
-option(WITH_ARM "Compile demo with ARM" OFF)
-option(WITH_MIPS "Compile demo with MIPS" OFF)
-option(WITH_SW "Compile demo with SW" OFF)
-option(WITH_XPU "Compile demow ith xpu" OFF)
-option(WITH_NPU "Compile demow ith npu" OFF)
-
-if(NOT WITH_STATIC_LIB)
- add_definitions("-DPADDLE_WITH_SHARED_LIB")
-else()
- # PD_INFER_DECL is mainly used to set the dllimport/dllexport attribute in dynamic library mode.
- # Set it to empty in static library mode to avoid compilation issues.
- add_definitions("/DPD_INFER_DECL=")
-endif()
-
-macro(safe_set_static_flag)
- foreach(flag_var
- CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
- CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
- if(${flag_var} MATCHES "/MD")
- string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
- endif(${flag_var} MATCHES "/MD")
- endforeach(flag_var)
-endmacro()
-
-if(NOT DEFINED PADDLE_LIB)
- message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib")
-endif()
-if(NOT DEFINED DEMO_NAME)
- message(FATAL_ERROR "please set DEMO_NAME with -DDEMO_NAME=demo_name")
-endif()
-
-include_directories("${PADDLE_LIB}/")
-set(PADDLE_LIB_THIRD_PARTY_PATH "${PADDLE_LIB}/third_party/install/")
-include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/include")
-include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/include")
-include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/include")
-include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/include")
-include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}cryptopp/include")
-include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/include")
-include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}paddle2onnx/include")
-
-link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/lib")
-link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/lib")
-link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/lib")
-link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/lib")
-link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}cryptopp/lib")
-link_directories("${PADDLE_LIB}/paddle/lib")
-link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib")
-link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}paddle2onnx/lib")
-
-if (WIN32)
- add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
- option(MSVC_STATIC_CRT "use static C Runtime library by default" ON)
- if (MSVC_STATIC_CRT)
- if (WITH_MKL)
- set(FLAG_OPENMP "/openmp")
- endif()
- set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd ${FLAG_OPENMP}")
- set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT ${FLAG_OPENMP}")
- set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd ${FLAG_OPENMP}")
- set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT ${FLAG_OPENMP}")
- safe_set_static_flag()
- if (WITH_STATIC_LIB)
- add_definitions(-DSTATIC_LIB)
- endif()
- endif()
-else()
- if(WITH_MKL)
- set(FLAG_OPENMP "-fopenmp")
- endif()
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 ${FLAG_OPENMP}")
-endif()
-
-if(WITH_GPU)
- if(NOT WIN32)
- include_directories("/usr/local/cuda/include")
- if(CUDA_LIB STREQUAL "")
- set(CUDA_LIB "/usr/local/cuda/lib64/" CACHE STRING "CUDA Library")
- endif()
- else()
- include_directories("C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v8.0\\include")
- if(CUDA_LIB STREQUAL "")
- set(CUDA_LIB "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v8.0\\lib\\x64")
- endif()
- endif(NOT WIN32)
-endif()
-
-if (USE_TENSORRT AND WITH_GPU)
- set(TENSORRT_ROOT "" CACHE STRING "The root directory of TensorRT library")
- if("${TENSORRT_ROOT}" STREQUAL "")
- message(FATAL_ERROR "The TENSORRT_ROOT is empty, you must assign it a value with CMake command. Such as: -DTENSORRT_ROOT=TENSORRT_ROOT_PATH ")
- endif()
- set(TENSORRT_INCLUDE_DIR ${TENSORRT_ROOT}/include)
- set(TENSORRT_LIB_DIR ${TENSORRT_ROOT}/lib)
- file(READ ${TENSORRT_INCLUDE_DIR}/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS)
- string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
- "${TENSORRT_VERSION_FILE_CONTENTS}")
- if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
- file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h TENSORRT_VERSION_FILE_CONTENTS)
- string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
- "${TENSORRT_VERSION_FILE_CONTENTS}")
- endif()
- if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
- message(SEND_ERROR "Failed to detect TensorRT version.")
- endif()
- string(REGEX REPLACE "define NV_TENSORRT_MAJOR +([0-9]+)" "\\1"
- TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}")
- message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. "
- "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ")
- include_directories("${TENSORRT_INCLUDE_DIR}")
- link_directories("${TENSORRT_LIB_DIR}")
-endif()
-
-if(WITH_MKL)
- set(MATH_LIB_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mklml")
- include_directories("${MATH_LIB_PATH}/include")
- if(WIN32)
- set(MATH_LIB ${MATH_LIB_PATH}/lib/mklml${CMAKE_STATIC_LIBRARY_SUFFIX}
- ${MATH_LIB_PATH}/lib/libiomp5md${CMAKE_STATIC_LIBRARY_SUFFIX})
- else()
- set(MATH_LIB ${MATH_LIB_PATH}/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX}
- ${MATH_LIB_PATH}/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX})
- endif()
- set(MKLDNN_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mkldnn")
- if(EXISTS ${MKLDNN_PATH})
- include_directories("${MKLDNN_PATH}/include")
- if(WIN32)
- set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib)
- else(WIN32)
- set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0)
- endif(WIN32)
- endif()
-elseif((NOT WITH_MIPS) AND (NOT WITH_SW))
- set(OPENBLAS_LIB_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}openblas")
- include_directories("${OPENBLAS_LIB_PATH}/include/openblas")
- if(WIN32)
- set(MATH_LIB ${OPENBLAS_LIB_PATH}/lib/openblas${CMAKE_STATIC_LIBRARY_SUFFIX})
- else()
- set(MATH_LIB ${OPENBLAS_LIB_PATH}/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX})
- endif()
-endif()
-
-if(WITH_STATIC_LIB)
- set(DEPS ${PADDLE_LIB}/paddle/lib/libpaddle_inference${CMAKE_STATIC_LIBRARY_SUFFIX})
-else()
- if(WIN32)
- set(DEPS ${PADDLE_LIB}/paddle/lib/paddle_inference${CMAKE_STATIC_LIBRARY_SUFFIX})
- else()
- set(DEPS ${PADDLE_LIB}/paddle/lib/libpaddle_inference${CMAKE_SHARED_LIBRARY_SUFFIX})
- endif()
-endif()
-
-if (WITH_ONNXRUNTIME)
- if(WIN32)
- set(DEPS ${DEPS} ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/onnxruntime.lib paddle2onnx)
- elseif(APPLE)
- set(DEPS ${DEPS} ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/libonnxruntime.1.10.0.dylib paddle2onnx)
- else()
- set(DEPS ${DEPS} ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/libonnxruntime.so.1.10.0 paddle2onnx)
- endif()
-endif()
-
-if (NOT WIN32)
- set(EXTERNAL_LIB "-lrt -ldl -lpthread")
- set(DEPS ${DEPS}
- ${MATH_LIB} ${MKLDNN_LIB}
- glog gflags protobuf xxhash cryptopp
- ${EXTERNAL_LIB})
-else()
- set(DEPS ${DEPS}
- ${MATH_LIB} ${MKLDNN_LIB}
- glog gflags_static libprotobuf xxhash cryptopp-static ${EXTERNAL_LIB})
- set(DEPS ${DEPS} shlwapi.lib)
-endif(NOT WIN32)
-
-if(WITH_GPU)
- if(NOT WIN32)
- if (USE_TENSORRT)
- set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX})
- set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX})
- endif()
- set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
- else()
- if(USE_TENSORRT)
- set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_STATIC_LIBRARY_SUFFIX})
- set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX})
- if(${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 7)
- set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/myelin64_1${CMAKE_STATIC_LIBRARY_SUFFIX})
- endif()
- endif()
- set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} )
- set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} )
- set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX} )
- endif()
-endif()
-
-if(WITH_ROCM AND NOT WIN32)
- set(DEPS ${DEPS} ${ROCM_LIB}/libamdhip64${CMAKE_SHARED_LIBRARY_SUFFIX})
-endif()
-
-if(WITH_XPU AND NOT WIN32)
- set(XPU_INSTALL_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}xpu")
- set(DEPS ${DEPS} ${XPU_INSTALL_PATH}/lib/libxpuapi${CMAKE_SHARED_LIBRARY_SUFFIX})
- set(DEPS ${DEPS} ${XPU_INSTALL_PATH}/lib/libxpurt${CMAKE_SHARED_LIBRARY_SUFFIX})
-endif()
-
-if(WITH_NPU AND NOT WIN32)
- set(DEPS ${DEPS} ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/lib64/libgraph${CMAKE_SHARED_LIBRARY_SUFFIX})
- set(DEPS ${DEPS} ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/lib64/libge_runner${CMAKE_SHARED_LIBRARY_SUFFIX})
- set(DEPS ${DEPS} ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/lib64/libascendcl${CMAKE_SHARED_LIBRARY_SUFFIX})
- set(DEPS ${DEPS} ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/lib64/libascendcl${CMAKE_SHARED_LIBRARY_SUFFIX})
- set(DEPS ${DEPS} ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/lib64/libacl_op_compiler${CMAKE_SHARED_LIBRARY_SUFFIX})
-endif()
-
-add_executable(${DEMO_NAME} ${DEMO_NAME}.cc)
-target_link_libraries(${DEMO_NAME} ${DEPS})
-if(WIN32)
- if(USE_TENSORRT)
- add_custom_command(TARGET ${DEMO_NAME} POST_BUILD
- COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_SHARED_LIBRARY_SUFFIX}
- ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}
- COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX}
- ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}
- )
- if(${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 7)
- add_custom_command(TARGET ${DEMO_NAME} POST_BUILD
- COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/myelin64_1${CMAKE_SHARED_LIBRARY_SUFFIX}
- ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE})
- endif()
- endif()
- if(WITH_MKL)
- add_custom_command(TARGET ${DEMO_NAME} POST_BUILD
- COMMAND ${CMAKE_COMMAND} -E copy ${MATH_LIB_PATH}/lib/mklml.dll ${CMAKE_BINARY_DIR}/Release
- COMMAND ${CMAKE_COMMAND} -E copy ${MATH_LIB_PATH}/lib/libiomp5md.dll ${CMAKE_BINARY_DIR}/Release
- COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_PATH}/lib/mkldnn.dll ${CMAKE_BINARY_DIR}/Release
- )
- else()
- add_custom_command(TARGET ${DEMO_NAME} POST_BUILD
- COMMAND ${CMAKE_COMMAND} -E copy ${OPENBLAS_LIB_PATH}/lib/openblas.dll ${CMAKE_BINARY_DIR}/Release
- )
- endif()
- if(WITH_ONNXRUNTIME)
- add_custom_command(TARGET ${DEMO_NAME} POST_BUILD
- COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/onnxruntime.dll
- ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}
- COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_LIB_THIRD_PARTY_PATH}paddle2onnx/lib/paddle2onnx.dll
- ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}
- )
- endif()
- if(NOT WITH_STATIC_LIB)
- add_custom_command(TARGET ${DEMO_NAME} POST_BUILD
- COMMAND ${CMAKE_COMMAND} -E copy "${PADDLE_LIB}/paddle/lib/paddle_inference.dll" ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}
- )
- endif()
-endif()
diff --git a/example/auto_compression/pytorch_yolov6/cpp_infer/README.md b/example/auto_compression/pytorch_yolov6/cpp_infer/README.md
deleted file mode 100644
index 2f220486..00000000
--- a/example/auto_compression/pytorch_yolov6/cpp_infer/README.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# YOLOv6 TensorRT Benchmark测试(Linux)
-
-## 环境准备
-
-- CUDA、CUDNN:确认环境中已经安装CUDA和CUDNN,并且提前获取其安装路径。
-
-- TensorRT:可通过NVIDIA官网下载[TensorRT 8.4.1.5](https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.4.1/tars/tensorrt-8.4.1.5.linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz)或其他版本安装包。
-
-- Paddle Inference C++预测库:编译develop版本请参考[编译文档](https://www.paddlepaddle.org.cn/inference/user_guides/source_compile.html)。编译完成后,会在build目录下生成`paddle_inference_install_dir`文件夹,这个就是我们需要的C++预测库文件。
-
-## 编译可执行程序
-
-- (1)修改`compile.sh`中依赖库路径,主要是以下内容:
-```shell
-# Paddle Inference预测库路径
-LIB_DIR=/root/auto_compress/Paddle/build/paddle_inference_install_dir/
-# CUDNN路径
-CUDNN_LIB=/usr/lib/x86_64-linux-gnu/
-# CUDA路径
-CUDA_LIB=/usr/local/cuda/lib64
-# TensorRT安装包路径,为TRT资源包解压完成后的绝对路径,其中包含`lib`和`include`文件夹
-TENSORRT_ROOT=/root/auto_compress/trt/trt8.4/
-```
-
-## 测试
-
-- FP32
-```
-./build/trt_run --model_file yolov6s_infer/model.pdmodel --params_file yolov6s_infer/model.pdiparams --run_mode=trt_fp32
-```
-
-- FP16
-```
-./build/trt_run --model_file yolov6s_infer/model.pdmodel --params_file yolov6s_infer/model.pdiparams --run_mode=trt_fp16
-```
-
-- INT8
-```
-./build/trt_run --model_file yolov6s_quant/model.pdmodel --params_file yolov6s_quant/model.pdiparams --run_mode=trt_int8
-```
-
-## 性能对比
-
-| 模型 | 预测时延FP32
(ms) |预测时延FP16
(ms) | 预测时延INT8
(ms) |
-| :-------- |:-------- |:--------: | :---------------------: |
-| YOLOv6s | 9.06ms | 2.90ms | 1.83ms |
-
-环境:
-- Tesla T4,TensorRT 8.4.1,CUDA 11.2
-- batch_size=1
diff --git a/example/auto_compression/pytorch_yolov6/cpp_infer/compile.sh b/example/auto_compression/pytorch_yolov6/cpp_infer/compile.sh
deleted file mode 100644
index afff924b..00000000
--- a/example/auto_compression/pytorch_yolov6/cpp_infer/compile.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/bash
-set +x
-set -e
-
-work_path=$(dirname $(readlink -f $0))
-
-mkdir -p build
-cd build
-rm -rf *
-
-DEMO_NAME=trt_run
-
-WITH_MKL=ON
-WITH_GPU=ON
-USE_TENSORRT=ON
-
-LIB_DIR=/root/auto_compress/Paddle/build/paddle_inference_install_dir/
-CUDNN_LIB=/usr/lib/x86_64-linux-gnu/
-CUDA_LIB=/usr/local/cuda/lib64
-TENSORRT_ROOT=/root/auto_compress/trt/trt8.4/
-
-WITH_ROCM=OFF
-ROCM_LIB=/opt/rocm/lib
-
-cmake .. -DPADDLE_LIB=${LIB_DIR} \
- -DWITH_MKL=${WITH_MKL} \
- -DDEMO_NAME=${DEMO_NAME} \
- -DWITH_GPU=${WITH_GPU} \
- -DWITH_STATIC_LIB=OFF \
- -DUSE_TENSORRT=${USE_TENSORRT} \
- -DWITH_ROCM=${WITH_ROCM} \
- -DROCM_LIB=${ROCM_LIB} \
- -DCUDNN_LIB=${CUDNN_LIB} \
- -DCUDA_LIB=${CUDA_LIB} \
- -DTENSORRT_ROOT=${TENSORRT_ROOT}
-
-make -j
diff --git a/example/auto_compression/pytorch_yolov6/cpp_infer/trt_run.cc b/example/auto_compression/pytorch_yolov6/cpp_infer/trt_run.cc
deleted file mode 100644
index 9c14baf7..00000000
--- a/example/auto_compression/pytorch_yolov6/cpp_infer/trt_run.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-#include
-#include
-#include
-#include
-
-#include
-#include
-#include
-
-#include "paddle/include/paddle_inference_api.h"
-#include "paddle/include/experimental/phi/common/float16.h"
-
-using paddle_infer::Config;
-using paddle_infer::Predictor;
-using paddle_infer::CreatePredictor;
-using paddle_infer::PrecisionType;
-using phi::dtype::float16;
-
-DEFINE_string(model_dir, "", "Directory of the inference model.");
-DEFINE_string(model_file, "", "Path of the inference model file.");
-DEFINE_string(params_file, "", "Path of the inference params file.");
-DEFINE_string(run_mode, "trt_fp32", "run_mode which can be: trt_fp32, trt_fp16 and trt_int8");
-DEFINE_int32(batch_size, 1, "Batch size.");
-DEFINE_int32(gpu_id, 0, "GPU card ID num.");
-DEFINE_int32(trt_min_subgraph_size, 3, "tensorrt min_subgraph_size");
-DEFINE_int32(warmup, 50, "warmup");
-DEFINE_int32(repeats, 1000, "repeats");
-
-using Time = decltype(std::chrono::high_resolution_clock::now());
-Time time() { return std::chrono::high_resolution_clock::now(); };
-double time_diff(Time t1, Time t2) {
- typedef std::chrono::microseconds ms;
- auto diff = t2 - t1;
- ms counter = std::chrono::duration_cast(diff);
- return counter.count() / 1000.0;
-}
-
-std::shared_ptr InitPredictor() {
- Config config;
- std::string model_path;
- if (FLAGS_model_dir != "") {
- config.SetModel(FLAGS_model_dir);
- model_path = FLAGS_model_dir.substr(0, FLAGS_model_dir.find_last_of("/"));
- } else {
- config.SetModel(FLAGS_model_file, FLAGS_params_file);
- model_path = FLAGS_model_file.substr(0, FLAGS_model_file.find_last_of("/"));
- }
- // enable tune
- std::cout << "model_path: " << model_path << std::endl;
- config.EnableUseGpu(256, FLAGS_gpu_id);
- if (FLAGS_run_mode == "trt_fp32") {
- config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, FLAGS_trt_min_subgraph_size,
- PrecisionType::kFloat32, false, false);
- } else if (FLAGS_run_mode == "trt_fp16") {
- config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, FLAGS_trt_min_subgraph_size,
- PrecisionType::kHalf, false, false);
- } else if (FLAGS_run_mode == "trt_int8") {
- config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, FLAGS_trt_min_subgraph_size,
- PrecisionType::kInt8, false, false);
- }
- config.EnableMemoryOptim();
- config.SwitchIrOptim(true);
- return CreatePredictor(config);
-}
-
-template
-void run(Predictor *predictor, const std::vector &input,
- const std::vector &input_shape, type* out_data, std::vector out_shape) {
-
- // prepare input
- int input_num = std::accumulate(input_shape.begin(), input_shape.end(), 1,
- std::multiplies());
-
- auto input_names = predictor->GetInputNames();
- auto input_t = predictor->GetInputHandle(input_names[0]);
- input_t->Reshape(input_shape);
- input_t->CopyFromCpu(input.data());
-
- for (int i = 0; i < FLAGS_warmup; ++i)
- CHECK(predictor->Run());
-
- auto st = time();
- for (int i = 0; i < FLAGS_repeats; ++i) {
- auto input_names = predictor->GetInputNames();
- auto input_t = predictor->GetInputHandle(input_names[0]);
- input_t->Reshape(input_shape);
- input_t->CopyFromCpu(input.data());
-
- CHECK(predictor->Run());
-
- auto output_names = predictor->GetOutputNames();
- auto output_t = predictor->GetOutputHandle(output_names[0]);
- std::vector output_shape = output_t->shape();
- output_t -> ShareExternalData(out_data, out_shape, paddle_infer::PlaceType::kGPU);
- }
-
- LOG(INFO) << "[" << FLAGS_run_mode << " bs-" << FLAGS_batch_size << " ] run avg time is " << time_diff(st, time()) / FLAGS_repeats
- << " ms";
-}
-
-int main(int argc, char *argv[]) {
- google::ParseCommandLineFlags(&argc, &argv, true);
- auto predictor = InitPredictor();
- std::vector input_shape = {FLAGS_batch_size, 3, 640, 640};
- // float16
- using dtype = float16;
- std::vector input_data(FLAGS_batch_size * 3 * 640 * 640, dtype(1.0));
-
- dtype *out_data;
- int out_data_size = FLAGS_batch_size * 8400 * 85;
- cudaHostAlloc((void**)&out_data, sizeof(float) * out_data_size, cudaHostAllocMapped);
-
- std::vector out_shape{ FLAGS_batch_size, 1, 8400, 85};
- run(predictor.get(), input_data, input_shape, out_data, out_shape);
- return 0;
-}
diff --git a/example/auto_compression/pytorch_yolov6/eval.py b/example/auto_compression/pytorch_yolov6/eval.py
deleted file mode 100644
index 62127b51..00000000
--- a/example/auto_compression/pytorch_yolov6/eval.py
+++ /dev/null
@@ -1,159 +0,0 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-import numpy as np
-import argparse
-import paddle
-from ppdet.core.workspace import load_config, merge_config
-from ppdet.core.workspace import create
-from ppdet.metrics import COCOMetric, VOCMetric
-from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
-
-from post_process import YOLOv6PostProcess
-
-
-def argsparser():
- parser = argparse.ArgumentParser(description=__doc__)
- parser.add_argument(
- '--config_path',
- type=str,
- default=None,
- help="path of compression strategy config.",
- required=True)
- parser.add_argument(
- '--devices',
- type=str,
- default='gpu',
- help="which device used to compress.")
-
- return parser
-
-
-def reader_wrapper(reader, input_list):
- def gen():
- for data in reader:
- in_dict = {}
- if isinstance(input_list, list):
- for input_name in input_list:
- in_dict[input_name] = data[input_name]
- elif isinstance(input_list, dict):
- for input_name in input_list.keys():
- in_dict[input_list[input_name]] = data[input_name]
- yield in_dict
-
- return gen
-
-
-def convert_numpy_data(data, metric):
- data_all = {}
- data_all = {k: np.array(v) for k, v in data.items()}
- if isinstance(metric, VOCMetric):
- for k, v in data_all.items():
- if not isinstance(v[0], np.ndarray):
- tmp_list = []
- for t in v:
- tmp_list.append(np.array(t))
- data_all[k] = np.array(tmp_list)
- else:
- data_all = {k: np.array(v) for k, v in data.items()}
- return data_all
-
-
-def eval():
-
- place = paddle.CUDAPlace(0) if FLAGS.devices == 'gpu' else paddle.CPUPlace()
- exe = paddle.static.Executor(place)
-
- val_program, feed_target_names, fetch_targets = paddle.static.load_inference_model(
- global_config["model_dir"],
- exe,
- model_filename=global_config["model_filename"],
- params_filename=global_config["params_filename"])
- print('Loaded model from: {}'.format(global_config["model_dir"]))
-
- metric = global_config['metric']
- for batch_id, data in enumerate(val_loader):
- data_all = convert_numpy_data(data, metric)
- data_input = {}
- for k, v in data.items():
- if isinstance(global_config['input_list'], list):
- if k in global_config['input_list']:
- data_input[k] = np.array(v)
- elif isinstance(global_config['input_list'], dict):
- if k in global_config['input_list'].keys():
- data_input[global_config['input_list'][k]] = np.array(v)
- outs = exe.run(val_program,
- feed=data_input,
- fetch_list=fetch_targets,
- return_numpy=False)
- res = {}
- if 'arch' in global_config and global_config['arch'] == 'YOLOv6':
- postprocess = YOLOv6PostProcess(
- score_threshold=0.001, nms_threshold=0.65, multi_label=True)
- res = postprocess(np.array(outs[0]), data_all['scale_factor'])
- else:
- for out in outs:
- v = np.array(out)
- if len(v.shape) > 1:
- res['bbox'] = v
- else:
- res['bbox_num'] = v
- metric.update(data_all, res)
- if batch_id % 100 == 0:
- print('Eval iter:', batch_id)
- metric.accumulate()
- metric.log()
- metric.reset()
-
-
-def main():
- global global_config
- all_config = load_slim_config(FLAGS.config_path)
- global_config = all_config["Global"]
- reader_cfg = load_config(global_config['reader_config'])
-
- dataset = reader_cfg['EvalDataset']
- global val_loader
- val_loader = create('EvalReader')(reader_cfg['EvalDataset'],
- reader_cfg['worker_num'],
- return_list=True)
- metric = None
- if reader_cfg['metric'] == 'COCO':
- clsid2catid = {v: k for k, v in dataset.catid2clsid.items()}
- anno_file = dataset.get_anno()
- metric = COCOMetric(
- anno_file=anno_file, clsid2catid=clsid2catid, IouType='bbox')
- elif reader_cfg['metric'] == 'VOC':
- metric = VOCMetric(
- label_list=dataset.get_label_list(),
- class_num=reader_cfg['num_classes'],
- map_type=reader_cfg['map_type'])
- else:
- raise ValueError("metric currently only supports COCO and VOC.")
- global_config['metric'] = metric
-
- eval()
-
-
-if __name__ == '__main__':
- paddle.enable_static()
- parser = argsparser()
- FLAGS = parser.parse_args()
-
- assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu']
- paddle.set_device(FLAGS.devices)
-
- main()
diff --git a/example/auto_compression/pytorch_yolov6/images/000000570688.jpg b/example/auto_compression/pytorch_yolov6/images/000000570688.jpg
deleted file mode 100644
index cb304bd56c4010c08611a30dcca58ea9140cea54..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001
literal 138365
zcmb5V3p~^B|35rulbBHE+i@z?~H~WK5`T$!vo2HfV;o1;E=dz2QM$Te`Fx;LK%mZj|4CcG|I-g*+Xv(R*thqe;>6_9Kp+3Lxw)VR
z0?E)pp!LGd&DEOC&GlNKj`twYqll3B*o1%b0?LT*1A+df?>{NZj=_;(vB9w4!{QZt
z_77r2*#G4G{q_HHH!>m~XzxFlBJfu}EXvOZ_|^s5SrHa@=(oHZsLCxg*a>JW2nEPv
zpCEG#-$FXT@=G2|mBeGz321P6y+>fNx-2#kQY0>-swn(0}54FA#izJ_M-)>45C$
z0m$0{`5DL+6by<4g@Iy$?A|JK#(-);Dpu515yT|sdPzrrp5Q{Qh*
z!m>S<{$0W;K&v>=8Q?i0C;^Zofd9gP{G|WU^S^uVf3^CLCcpFl*8JZ+JV5I5?MVJl
zeIt^cfjoJ3^8ZWgL;tDE5%lTbBlO=bI{Tk;Kp0@GNWdrLKXv_gz5kW-ze@-M>iyqQ
z{#UR217i;Z{j1&xm>N)8^#6=O*gq}){az6Mr!EjFERh7fM(N?GPnpm2KO$$t&(q-L9evRoSNU
zyAUx6;2B9Nu#^;7W$Tu$D*wN)%|{?5S+Nh|X%b>eAaNxz2_>=3XCN(L@`y?OUgrPz
z5)+pONl3}aN`mEp`yLSBKA^a`q=b~Tq@*NBQA`|Yf)Yp?Dx+hj>>#_#J6I)#vJJks
zuo{fM^LXgTc3u3(eW%jrRS}M{JM>UKY9VnMPV}Oh+9zC~JPk1r&=dbv9WY77r6eS!
zWn_W7j!Gagpeo>!k_2j%1T+IwmC%udnmI^$2gfK=3V%rNs=lML_wmrjZE*BFUS{8^
z*mU}KU7wmKT$E!*>*g3pUgDpJlt6Z%uNCH6quR$WBscFrSSM}UjbfRLDM-kuBT(kz
zRtl|
z1%{KhKCjA^7GS<0?LE^-E?K+eq-0VfEnHju9faByD2hSm&9tO*Yi&3=d>XmUWmHy0
zb?BOD32(4H*U&&kVy?yq=hJN?bfx&~H>yb2ctveFW4kne(1tfwjyy>l~JJ8bQaL$R3<#xoF?
ze#N^1=XHLb>52M?%sa=~dXvgk(WDIN`*txyHKSQAa?@QU12rZ(-EnR97E`X!0IOA`HeV*jZILbMeBb1UaKW`
z@)XWz#7GbI`TG??V#v5tt$k6N#59SQbx4M4(il~g&FkqXmmQLJSegD#$K)wSDqGfr
zSY{dHgX11}vud1I>>R7ryY+-y=sGHb;ehiB7t~B=-pp`fa78DJ=IRYCoN6`A3!mhP
zr9Y=-N!+umUtMU}al~{a%Fkav=gBXL_lk>~ARANJdycPI$VD~FI8qK+PElQNy8M!V
z9@$JpoUS?aMFAAfw`r>{)r}}D>Zr6l=dj(r*L-=Ml)*Bl7^ps}SRZ_=j*xzWSM4>U
zS`@2d9US|y@(KZglgW)gh0tZ&wZ`_@@}h)$N5-6QUQ9Y~mwidiPtHd=
z?8()`Zk@Yh)shtT?>`M0zf`4pDKWma0IRMU-jM6R^;{N?2zjfl_iWHYBBe25?n&fK
z^`8NEZ~5P~p*#q>+`rQ5@G3p!s`?AFwH;emkTP1&*1z2_EQ`DK>*}{5(O}cND|4Cm
zx7?x6d%N%Jn7<{zm+rX(+UNb`F0Od-B(D0V76*
ze1E@V%i}AqjE}U>+tM~lr)K7Fx(EDmF+a`7GJfWOx<`~~9dt}Ca9v6h+XkWo_2y^tTfi|)FWXSnYZ>H_yk+4jSHs_WXkppSf$z+
zLrPZ4ZF658mj>nYC;8Z*zFb^DM+!d@l~z!qYCcbezk;{QbXx9jy^@O^>>%&0MS1{*
z`!v+n;H$K?$u2tmNXbe&9xixq!OCDpUHUoKJWX2%by&bafkO+{nWoCyE_-0RJ*~L_
znLB}0cQDEIC@nB885dAxuVG{AN!-ep_9N0@9EL==lA~1d7m1DP!`d$}YoiNjym61S
zp=W9W$-;hbC@!#UrI#TQ>Vk{QSmGAL82OU}rTLAXg=>py*Xs&fF=87*W6*2%Q$Iz7
zOuFggbvt)le^fy}pNCZ-;v~cEhrmgjpm9%ML-rI+-4eMqsvs|8(awRnyAX*TFWEu0KPHigc#D%`b;QSX573t$QvM$uxX4b5AisXto}Mm%R%?1>;TH%
zp;hwSPm~T^u}X7&fXa#Rg10B99tW#bM<;6#BPb+hf-Y6r65imDT`8cdR~d!H@G*_(
z&P`BP-ZN>noV_XKPc|w%`P%L}aR|4(8jBmMT6R~;3^apvg7{~0MgRlKJ!zeE7y0UR32kZ$f1L$k+^&?d9mA_cm}D5(Can@U
zNELWsS;vot!I-tS@}aHJsBMi5tV`Hdes|KOo4w25F4zFAcM{#UW2L!o@U6?UlY)k<
z9dTHPby9yC#ZuCg+&@?I+-dQ+aa!E$@|)I{3c{rR+=jZcw3o#|mYN@}IroU37hHcF
zoioE&wmglesKLk4crUniT($Cc*D17viOnYHO6dgzyocuwrD!Ejy4dD=rjrPapE!)-
zNz@Wr3p~Kkl{;bah{tnEaNSs)Mh;N+=qAMB#{z))?7mnvqUkog8;2fpJfJzSQ^xNj@0=+>`Ru
zHESo|PWH?uD9jkG-t$!Y(;q^Isi&~~!%Kttq9Qk?uU??138zOj9;QvK~zPqXJv$d$=E3G9#h_d
z^o(0xC3JzY&MmwNUBz-pW~aG2oNiTxNlvXx%aom~jD%j*(NWV}BQD|O5w`pIKh?wb
z;wwZLfo|{KnED+!xf)saFwtqIjA>|1d`~DLna0f3l3=l(?U>8xq^jf6m
zM#$`(XVrM+B|6M#K=$J8UBRw)_oC}+BgIz;i5fz
zzScUa9&lmn(o%z2B>HmSTwQG}V|^3kSYuNBN#K#ZF6@&0TZ&^#ZXs=9x~L=I{I!R|
ztaMZt-fXI6t?h|IW1S8}f4nFq)swd#FTK`_yM7a9B-8Qh+#7d)tz4}3$(S{v{xHj8
z{+pfqe&w48_%|^TA;-atbgSYS0ZZSt1825noPYEsS^QaL{W$tyEBe_K8g$6RlaDTN
zvntoki^>3{j1lP)oZ$PS*`>chnaXn}-jkJ-Y%^GS*N9
zH`Q-=p1$XL2fSZ8UQ-|Aun&87$Z5F5s@EB!WFm%ua-M^)F7x^O0p)QDl>MY~_{THQWx
zf|_p%k3QrU@_WiXL+`PEVi%SMsx0e~SU9^JcMf~>BYe+UySLq$c`xu*Q@yqmLWw(`
zUJU9^L=)cZ-6qIbv%p^6C@5*FqvOlsaOm}h>#%@sRwM)+7``S=KP2AcsSaT*4^W{v
z=ZcAqQmbmXxMxvqBOZs5PWnuorsKPvg_h!2rNYIbnoz{@Kqf3HM6fYm2aHNWFG?jH
zZ}YIbR2g>%)oQq_l1Bqj(I+QTW6b~^qG!jW^aO&w5-qn%}mX2XBr>g)o*IHDAQhw
zzX;|s@~=2*mlSdRzuD%-EQk~+UBFuv1k6k{6Z!gROE-=HA5v}iL}wl)@AE2Ab2pQx
zhigxwCs+2Ao5Jk9_1wM}+`cnhH=`eopYPXN7a~p-XGcx+7Jx0Gh3}TiOsdx9sjrJ=kQ=PcEh%m!!SG
zb3MjTNGS)pyh}21Ra;%$0l5iEJJjvWwvmw?KK@NtGlw%6#&(I!_b}&(LjLT=
ziAbEtph%_$zZd0%IN(rHGR4FZW$h~8x;4C4%gnQ(m7`uS`o9bRvPve=WzE)8jZs2e
zT*WANPdahPO25}xNrTC`7@*1L6e?-lAW{f
zKTl6r^vm*Qyl#lft#ouWg#|a%HbKWNa1eNR3~N+wR89N1tpAys#?nhH1K#kB#L-|c
zFrx3t(%L|(yIqN6eWK{rE4Y-Kqbf<)Bw=Rj(yZ&fW+PqH&=9XB6FX8te5!Z4VaY2PUK^#{8PuQhzR!MtQX
zdg-zM&-{sTjU*%UTn?_vf-Y;bt)N;b&^poy9#HJmFE*n^H2gGSlYLaj!t+((4MV
zyT|+|vgD;(StEYTvR<`*Iq{WK53`r!xqfHVykFO^IG7!5{VBI%9Uv_W^>@{>+HmyB
z%IL!H#cE&4yjif^KR4-0aP?J;*aTfHi=foVbd0MXtqv@>;LR9(^=QXn%TJkK$|CY2
zD70@h^r~h+d3I%UtHKQp)=BVuSrYLDRMC7OqsDkIMQg)_Lq*{*GCUV&QucSVNxmnr
zvpc`-rstPd?dW8;XM|4D1LYN8svkv6C`PQVx3ysp$gG16Bt@}_CSgVaY(^tquvE)p~L^}a7{qa==z?wTBwy>2;{Q1`%Z&tlPm~GFf`~!guAeI83VD9Uktv#JhD;p$&fT)QX^L!Jd|!
zQVNk|!5@i;utnFrRK5eIKW2QcdOH}~C)y|_s2hcn7iwFn9HabA&`V3~2vWP6Xa5Bm
zQ;o9vOx5JMvd0JCqNSec8ddg0f+ZiMtZz?kZz+&b?D4BfTB9Q`IdSiJ|ota1pW3
zDN`1hfyMNY_KF!Z%c_8O>nW@xfzg~gSFMbID?HT#-do8Du-s(J#?d6W6EoG_7{Z{{
z3wf9huxSkc=R#dn?TFF2x!HzM#NkJSWG5m1%@-Z5m_^a)qXyq^L1NyeWP5OHZfN$@
zWelNMNvqR1d{OwQuO}JzTO$
zmwI5FvKwfX^Eez{lO5l-p|P)G`yv=QKhsTJEqhg!H$e>QvFPTl>v~(v_6Hr>
zIGwpJb;a;OoSK*8aRzB4?07^+-23&oW@_95Fj9G|R1^ZP-$T}o$2v!7+mg7BYTp&V
zGBf=WX~FsvT;N=jY?81|
zBDfh*l887m2M!pm#$W_2VfOv1CQ2vGvvYNtAz9yE3Tbhx)p97nh8yM^ou{w(teDO-W34
zEcUT2-yM=`Piyy}dN(<)ZLrZUvY>N3T^${-xXLM36IMyIx->MZNe8T0s*;rSw9+^`
za_4b)6*ge-*R=P<9wnprxu4vj{X>D}uMDhJE_s
z1fjIdN6uhHFYu~b6+(xB4Okvt)+k=pWwY#=`3}9?%
zMKuaddV&~M6A`z>i755=yuQdmxfm(hv_&a@W86+v9g=LArUZuD@i2{wHGzt-jV_c!
zmxu0pcZLH_;nKAv0kLUo8xCXB)0~OMKb)uTSF!rMJb>D{D;OEHVdt*A0|zmpOya`S
z5k~y)DP1Pzbkjb~wqf_ThlDIx&yZKDgHjB8nyJ2RxByE64r4v-q3k@Y9rU)=M0vy#=>Zdw6Jw
zB_UI8$}#}l+j#gKkHIaZEpkVsp9rvzdrOaVTV|)w2XF!7nr*xO7AU5!5}DXh>h(s>GO{{A)%UHaEb1UcF!(!Cx^n~c2sI56P#WI>D$_lDIsP1&26$5xkD(~xfOC7
z&ann@H%vjFH|$ufFh`L!k(0>&jRSw`nx*Zn4|re-_llm$Itzj1`4
zh@zon`$PgEPJo@AT_Lz2)wG)nfR|t$rBcvql|&dpF+;|COQEK-8)`!ae!5w;evkZ4
z1TQY@e{G;h(pw+S!uHp|XKsBsO@5>MAo-tp3GXOJx+7J2XblHo0(}AFe$CUG>3O5b
ze(gGzjMJ7TnM(yWtID*q^{cByrEA!CM~=ZeQd7y;Z7Kjvq0lfQZg+)R_@L_LIGgW1
z=G}$1s{E;5LP9)}V4rQztVW)AX?b)40xcfCi+${P_+Ew7r@c9U=Q5ZHOKTfl
z4!aoX*5r7TMm1c3ey`a&YRP*qZu(QVbIdK6|MX`HO&=7338kydQ*l1UXge~}e#FN_
z=IdOIw``K2*3Zp~Ap6^WXybUnyK@f}81db$5b$w#n}S}lP}dTnKR1q!#uv`6qhPEh
zp1r-|?Wu7WvrxIR1nn>N$RZ?e3Y~|5qfhl?RllVggIPwi-%$XnqY6tR&d`Hw?n{UW
zjJ}8T!l{8QRa{bKB)Fmx`*=+VQ!%5H3K7zpYg2P|xc~xS(}oz;lPLlgr3On#LH^1{
zYNQp@5&>Ud73f8k2$ndMAw8@0?lGcdW_U-Bus5UM2EvZ|3pCVJFvG~(2qMM19%I?`
z95<>$1i$+*t?rOYy;aJ<*gEuDrlD=7$58swwzHr1xHIF(>^IYIe-M6*aQ{6u#5ZL6{P(5^t?s13~ZQU#rj*
z1Ea)4ZL>qS4jupF!)@^z^S?ZCdRJ>i(F+NAhRdg7_r~sjd-8`nO6`RDyyt&mNb`G=LqL1MRt9y?yYMteQ1ExdQy4tnxe%-W8fkG5MzrTo#Ea7{B_
zgtQ&`Ra#xish>~*JA1LA{zslsRd6a{e^s2)>yJ9I+qPNSR3XXl&BtG*JO9wi?ms-{UyILZ9f{C-P{RDB?b%-XxHT{|v2T@NAgi>p)mZ6dadySC)GJpS%nVE=eu@OI
zs-AQ_Th?z+tt-t#*sLXTDjK%2zbtcdx&p)1Ho71e!J790lXTn0QJ;Pc#AxjA=HhEj>8i~mJSjZ}j?HIx!vn*+KvX_Z!Jyy-0rS%b&4O+TX
z2Xb9~St-}n?y9-vIus4Yt%P_a9O@OwO?m7RI56ba6$+5LVQat|;TjLskx%~1?zr(6
zakOOkT0T`=AvbXe-VwDfeBONJjl)Hcq=X@vP?NmLx!I=0yVBcYIMgozUxA4&CcAA=
zBzO^wJn>_Rhj+-HY6LcPg%wMLu@{>^jz$eB4)Wb)wYAt^selE`WYsl5Y_9?EiGtM>
z5bR*JN;E|
zbdJIGI~F;Wp*Qzc@5&(9TXPGS>;}=YROn=TmdehbY#tg~5H0g-n(@7a-?;(c0MTRs
zNX*g%;0h0mddbs;*aVlwX?<>jM8H+efN;W2U
z*@dko^QBxi^QQGmZ|0(-DhG!?L@(|o-Nkdccjx>>X5llX&ux0m^}9ISo7?Z*@cU9<
z8)NknE^=t;wSNpYJc34|!Iu5mKc+uP>%3BcUzOpKY>M{Zi9AE}zIpf+_rt;2K8jfw
zsOV$+Y&eVF{yZS;c8|}qCl?JOsq){x6m~wfx+467*l`)Ovq0mB+P$sc{7plS?#x~5
zo_bj-tdM!0qWy6sd#R~xDXz))bx2Yg>TmI>udGnNVl($+k+=8n{^>sBaAbG9Ui9NI
zedGSCU*h_piNE^Zcpqs4ku5a4jtz(f4bICv-TkQ(d-Et!>7dx_7OIAH+Q5U3BkQku
zv!fy7s2%sq>(-2la?9CW#>cKyR7SnId9bUsTCn?1i?tB>
z>P12r#>Fpje(+S4*4Ks&eZ}0B7#k@~!J?3W!)^_s=;$1Xh`
zu8Q+s8EreW*6>3+yR$^={Urxb1+Z(w_V~81%}-0O7~URVGx!2x`3~P!
zIm_3)7u+ffORq|o?Tx20nuZBUPWc~6dA}5?wgoqP|3vbp2&>PhB7I9=c12oI&yO2^
zZo*qgqvA~)C8oKUrc`u1^>v)QIr{Hk^8Ysb#`2iXozmY6!6J0q)+@_W(ecPDfiM+HRn5mau?@*?I$9z{(_A5oI>z!KHg;
zw%c}#F#QyRgK0O`;9uvKxzO|?TH$OX8XID<%yZ~cU7&1&$Wprsnp5p8iH2<6iogvC
zc4ga&;zBP4LhSZ5Q%Oz;LlV}z%P}vdf|TDl-Eh0RGE{|0j$09t(^G5l3yH^CDVz
zEmD$sQt3Uh5Gl3;iTjXRca@eBMphYgHAHF9104Zc-Dn`mQ
zVQ(hq_OCcswhZG6x~3~EzVB!L+1occK(}f#OtsmMY^%;o;Npm0Im)#+?aXpW<#ppK#i?B}71iyV4jN{_n>%8W8wZPJKU`)=pH6UJbsbU|C}
zSMx$`zYF
z6TJ0#Zm}{_mOWK>O;%}5h!8`{Jf5wVDaG%#9V0lZt_l)dYb4Lv+@Hp$72G~smx(Y+
zHyan!8}9OAw=?RXApnkrSK{lV!Bl)PC)2F5ITKWnLDL*eCcZd1p%WxJiWsg6keI87
z)=Ns7e+UNxEG}D)8+a*3gKhu9WRR7>LALf&`mp@JNL6ULA4k6VzixB+bKD?O@^eX5
z9X4=ka6ranV|{f7^XY*9yXHO5%x=j)dz
za`+pS!jFIV56?H&&b9k7uNto5pOzL49=@UMw<5Ia1%d-f6y%viGv-0N0bJ5Na`0PIq;sh?G=p#2oR65
zcMfHSZ-~68c+4-Bwhgd2fY?X56tq`yP$cn^rl%;<((2@ne
z=hE$D0t4pU(OpW$)6Qzwl^!qXl_{#r8I{;jS3+X@Es0NpHbK{h(~q&>0BA4EU~2Q$
zP`qn?Y$5k|sG}6Jo3Uhbu~e63M0sCWZN!3iOjS!EgVrzF2=z%1lEMWn11#WQ1c~Sn
zcElD?ofz0h
zOI_pw)Rkh(hZ3sLqIC+-ot}^<|0(3+1c$>{&U4bkeQ!;5M2bQ5@GYx}
z9O#J7i9!2A+?Ia$%A~sziIr+i1OgZ`K#oa4lT_wWZY<6QId*6);SLU9gi
z3zNX25siJ6{6;2_OFYjcvwl$990*0Gbv=C
zG+57SiHE)XBFLtVXP^IzlUcvONn_A@B3*W5GRbX2R)(OZA0E*%Q{(EF%iiQ3kc$Xu
zg7kv*Pg1l4X@djTvC&uRmMrpjCuVQO6fjc9)#;f(aY)%wJ~(ocC(fj7J1ddROoS_HUc4V8<o3>(RlnM2Blnh--RS>KVp#E>Tf@yqYU1KCeU0p?Nz1;|
zbX$M=pLtftHbDtDoX=jLU2O`>4OP|imHCzP$@8<_+ryin)ocj}|Mrio=D1VXI2vFk
z35yop?zTGobu|Stsa0b^j1ev>#%qVl+1`6&mT@NU4PvUfqSU~*_YE9wi}@M((4+p<
zZKRA%zwYPHWtI0v(m=9Xoay*`{i}(*X(oea!C_Pb{s&;3%iiS{kndNvX9nS0Kx9%B
z745J34b|=>`5Vc6o(cT^4m+TVD=Wv%fm@E2tBL(u|kAN&dF6^R-Skj?v9&|5lwsdpa2N4
z)`=6$N7I~`C#Tj(yC6o3y;Lv8?XNW!uxUmkyZdUUCZw07lI`mUk{V1)@gfh@{1IEX
z8v&Q6yXa0?h^D7Y@Wn;B^DBZ`Io(4ghkz}<_r?`YSF_DpI517~u_4-mYF*jE_RR12
z!n!gwSK1Gri?_DJB>DoO;CAq`NWdD>hoSQm*gQ+ff!i^natpKyc4{j`7+Nf05cEMcQU2)9HwEL1VDXTL<2sVK))%-
z?&VzlLA0u*e7L$C3LH~dgK1#Y^Viz+7jo!zD8skAWQ_e|OywGv@vE5!vX#K{_@6AVmuf}M!{ehk7O!p(O{c|ojVjiU0BpN)U>;x8O5!@
zDvjIK6{+TWK%RGESZBDS%k`1m6Q}fswm>q{M4`?cBk-MnK#y*B(-w-JX^n!N^mt
z7(2?u7tT%n|NNFr%n9;@}oBr
zH)*rFPSSzPCyEkzE_*c2sjj0=Qz4T=-jJ+PWrL+tH8QYrc?u0}U7oK!19vKmtA^aj
zrhTafV+YGoB%Fi3e&;4=tDNO@w)WZPOpiXLmp?hBK{okG#0F>7gIKFX?u1zq00WS%
z(Dpc~Kq^F&PkX5Qibv|Oh6*S`DbbqylH6a(7xY0#~IcQA?VI)@yFyk)h&;TM%
zMdlU+o4+9+mGqbsf-30MjuL1Pp}
zOZ0oF#?9yIGCQ@GEEO6YFIo|XY;&>w+Q(7N1^E*c4y(cb_Efo?X^AQTKSQ@I4H-}`
z;qTd{W@
zUhm4*u8*r$mt=WTraT+bQug!Xm_}xpXsg@-Fm7o*jut5w02l~U=%a)Z2^qenrS1i$
z?jE}B#G0VKO((B|kI8YJoXrppfz!Quty7!nk`l2fT#(&IrVlPa<|l{2GU9ix
z1IE^)^%P*h#UHK_aFCDHvfJjy2Xb*h_Ah`M(@LL{1Q{(Z$RPfly11^+(<>k;TFdf~
zDdiT=@?^B;dcW4x?2A
zFsMO({ZAp!4PhkG?k;2P)M588@qWh^Dnzjb(~SIIJa+|iOX#Q(aKj%tmm2+h?KvabD(MMI}KAe;%Ft|!IE!!^Qaq*rL+L1sIZv1Oam_$=e
z?V_bhT@-_AnzIuD@6F75&27zpH(d0+fGP%s^riyb-}c7C3B8PlSAaQ}Czw&mfdES#
zqpB)du!!L@@-&g2)wqB!tKsj7MXluEkeRvmCc8l2AY)&QxFrAbJ)g${=w69j-5?9i
z-sIIYQaGVt^luyAs=}IuvE@VgE6#|p)|E4z;~#UI5Vx7PE*&+mmB|fn^f#@NR9Y~%
zp+)+9>RjIXxO4BrlbSDm1Wk7}^ml$Z2%QOQn=qXOX>mhWq%y}J9vNL7jPpC3Fe0jq
zd5=V(|)1qvHb?7vsSxlRUMuBDf^dE7k0sGeC;!`h>&W6t#
zRlMjizS5MGg?{sFxX}IA)hEt^=1^*hSl%^S!EOz$-4I3-+rQB%+jMnd-Q;EqW7Nys
ze0S>86VH1mXGq&;+`s;rDq$QU@N`LTyIrmF<*MPI_!rLm2hQ8CVv`-gc{hRv
z!n@D){I$lo@!@CkE2D->%k0j+kry|SF_jTX#A@*>`AHE`55>6oaQ#j-O0Hr8c%yOn
zhYvPNT@rB4yxvmThxFFZOMt^NviFw+`>p-M;mgcX9nJe>yDJp}XI)lwsykSWBY)
znx2pZ72M{b^FZ110HxaXIK&7rg;eA|Z#-P6Ryv4tE+f|ZvTWwxC2$x=keUk{i36Fs
zMrBLOb4AtC(|cD$f+*_l6581xtAbiz^8^5IMR9uR*lqomNxG0Q8Fr*BKmsTCR9tnf
zQGKL6S?e@eYVCGX>;1drp5Vpd{dmZJ|4Shq?RCB;5F=(hx>=v!O~qeE7?rInROF11
z%pQ-p-D)$N1%g!&xaQilH}BBl?+U0dE(+!gzX7AUN}dy|3E!P0E?rhFPh>R(Ds1f6
z&4UHBXMR*%N*PnKLKx+XP6Oexb`)<+fbCxex(`LiVUmCG(1C!u?WGlsII?u9BVAHq
zav)R1j1O#Kv*lnx6TWBAU7aE;@l3
zvrL~|hl;iB$WSU&yX4;Bml`iOwWIJ)-=B6VJpJ=~MP=mMhl|^vpJ$#18yFiM4@4e0
zEjgbDx2x@Y_R20(L+gG1`4qdSjg!-%i3i^wp4B?j_$&3Y-^au0f8V`*CUbiyHhMUB
z?MY-p!TD`<2O^SatlANimWAg}qscizcfH*HC%@SgayZ~IjIbX#I2IP#BAKSG`~+4WX!JFIxX{5>Q}
zV$AvAvs=%lQhxopNFxU(&^(&-$8k$ruy=ijx#O3)hn#ok|9J5IW^<80?n%%dmGp!D
zPqkc=ekAO?^y9szng2DDgi|XJ2R+nIdzmBMFRxB2);^=2{`Efajr3Tj-Jfmh6<0uK
zBIkBVCCEJWV|ySWS?_fBe0)%+;KxdC4J-_zm(?7}y|EM}*6
zv~ZmdeYiJfF!$!tSHH1~rC-i8J81fKwVUm?o>8zT*<6?Dv~~C{YqOZoI^YMHH;hQS
z8dkSm?j`qONxMGovew|8Q3NJnYhIEJb-dO!N)|9rJi}GyfL8{%=E^
zSBTp3oA@l`iL+!2v?SRSr6Z2QT|fYSZ#rHkrpCwqcJO$20TRM!MxFMSl@f)w@aQ@I
zFK90Xb%oP6Xeq$Ci=8aM&rBH7t@Z7E3|P3b69#So`z=%KwSpyHC0P94{CAtSAux7s
z<0zEMLGn8}%IZ}BKiQI~<)9O&yw-Nf{MtDC(N=(V*Xv
z%u;Vl1hO8<)|CrYUJ{+>F>0erR0qmfRrYyFsm%aRV>CA>1hzsT)=r=d2GmujR`
z5elxhwg;>OP*4;_b#}JyawaqNfdHrb9Uv%BU46Nex)3{KW2J+w@@cfI3ViQ6m6|vH
zc}i2iAd{OQ16~oc;jpUMzRFV|FR!aqn7P?f5&(I^(^`IEL${dUq8M0DZ({0-8R=u_f&yDPN_
zNRAq?Pq+KM1LE57P+4X+KEUFQKVXhov}F>!gV%}rklsk#Z$RIfGx&*h6^8Q1Ld
z%?Bf8U7xD47r1mES1Rf-0*E8O#J<-vfll6;t&{F^%(rpZKSgvg9y`Z6h62$&mEr#?;c{fN!qHB8U%BqbF*4ns-
zLjGQJK6`Hyl(Jvb%Vq`sVs_vE{c@C6R4OA&E4-0x
z(hwAYG|6B#O7276>hy>)ciRBC58#^v=QnunrWyJ@zowoyB@O3k?^)E<4a*$_!L5s#3b90!bXhrKx?
z$iO^;t7|p8-{vB{;BGp;=Nqxs$$S&^XH^3LtN!}MW3;DJ0)hCPjj_))*QanHk0FZ#
zm?71WTb&l7_4#`1;gdJqnA&Qq0#DT0&r`{|SVN^Lh?cvpHbDuCG$ui1Vv^);|#%P?S
zrHrX$PE*_xH0NiT7K}(S9Z${sr}1=+q>3?ueRGU($l#tUdqwaqjS@7<)l(YUqp?-i
zV4H=-{2~u9gxA+nkLwabtJ&Qs9CUv0tvMDDZvNa_
zTh@zf5S^p&ukTigS(0)<@c3&r826>Si`X<-sWs+e_;FmYXz2pH?fX_{uvr{LYIEb2
zsY8KrQy%#I)0dWjs7SSV&!A`;E5!M{+X|kh1dTCj@Wn$0t~7l-CJTh(UWPe;t?^2N
zVfNJ{(LwO>_Ocoiisl1iCcPM7LVMsdS)R!N#l{J+_!LkaM&5ZI;1OuMg`;7TO!9+O
z0Z`gHN=G`dvJUXd%BC3vMtaiuWlnJ$t{)(7{oOCLPftBAdmUKOR;OK%fy+M?IRtkj
zP&aUZCdc(Dh$cxa_q}Vd%?3d6EU-S7v9)bg0X=VB{dBBYKu6VY$LaMcEzQ;7^hssF
zVF%(1eUkZ+3=scvRYZO(-a^DTRT^S*_0K7Mq>S*6y1|0>?%G
z0yjmS!1FCjUg8uB?Qf5Jw(7m;1PneTL!43HA?z(RuEEy|hel0uaqFpt+*U6+snohG
zV?1MiZbk5#>-V(X24GQ(n^3%$IS~BS0jtlN>J3*|NdDuw>U~Y%0^
zQdKr#pp?ul{%#}u8-Eb@Vu$60s-Hlgpj`kGTBQ@qWoZB)4+So3rHozBH`vDRSm5Ai
zD1k-ePIbFW@@=QkDD0#2c5mHH8OWa)MV_Eq=S&qwI`fFhUeXr}T!41TaMFyySJ`c|
zd>AqF3BDUxMBHVs!$JmF2r(Tu?bXwkRRqTEx9lRouaqJUnU>HNhH9j<#Ou4IQD1DT
zj~dYialQ_R*I##i5_I>!Px`BFZ22_#KJISaMY~VsuP#xNmY!VP?V_qGx%Ae8ZFovS
zBiJ-v!f?dm=<%wV+ll_F0nJ;UQI<#JgsD3Qdk*gNeH`HE5_ledw0T(Iv8b=b?R_6=
zn3}!yC2)w|Be5iU6O_B5c8+>%^`>Rv!a<(*^h22)j}81!hKy_E_(a}*d)bfNADL7k
zyF~59_|`WxHEez1XvY$BU
zq-EUe?XpVwzvk-7V9u2?&VFJXRxe3^6d5!H998kG(TImq!~lMu!qpaR1z{U-gbzyX
zVt%p_#$!+T$_~DtM%Wa<*8xVEQI!vHGO$Yw(*+CY0~{6G!C#8>0*1d
zrrN1ql;dK1v$X-%@)L0>a$9}q^Mc7m?Lkkc4{^As{lkvw`B^$A>TE@!H9F!}ip>QG
zc&K?&60v{8M1ad911ItXxLqbrpFfE4H^y?2&)%_8uNI}*{`H8AdgKrji+;FKit++j
zo4|1(g(l}F)g^!i2%SV1j%>Gp_y_^JU3uY4I3@P<%4ZO38@!E(#P3{TxU>jG1=
z8gFpo>hf?=)*(_sLB@9!cZLRpwxUD_oSn=li>osRE@}I9?fvsYJHOj&@lPyotPvR1
zQ0Dr+Z_>H=5o8eC-E3r!M*eymeTI=QB12rRSF;BrHH!c%c6Or+VZ0yfHPxQE1)yc8
zJVl=b&TP$9Bd<$7NM7avD|yV6Q&p8O*{&i>EwgTKO8wW3QUp%lZ`(}D`v>1QDx=*THM
zE6l`j&I-suzPva2>!-4V64g1u>^9DxwYyhskG!`cdc`f6=e1g%c-YZ{wAubyKF#b}De3NJ>7`S;yF}^k
zbP18}E@_eOknWNeknYm&{rKk$v#v8fbK9bJDVDT-TZ**KGT6adhL^sI@E*U}awajh&ssG=J(6
z(IK>7LT2lvHY*QqSjWy!*>&kbJLr`!DSkzv{PoX|pchxKCsKOlgbcq`bYF)mUaX??
ztNFd85l8A974w%bCKNd@>@qzKwG3R}D7qs!n_pCJ8jqAH6*s-oZvq)gX44D5?CdG3
zYrC0Kb1>AsJ>&FyV?4W1rkiFi^+zZs@qi)t1C4QrcZF7jr9E%^%?QH0Iv;oQc|uJ<
z%amL5*^eLYtvx$Jl}E0;bp$H%(%(PjfyLe2+{;C?TnRQ&+^2D@?wS`v`WLtnAotKs
z=ZrTA_8g02{q*h$_O-lM84{92(&|U~nJb70^kwY-}}y$8}8
z&~O8IV5ytn3zzHy%%yP&YYslMOw!5Y?*$ZYyBPrMGETakR_FE6CP)R53JrySuk;YC
z3`s*4ddjPN1{OXoJ|o0qg%-K(47+_kGzJN%*Cal%6@ekI@_u%SKzgsWCAWjI&=f$O
zgEet*Sl1xZd^*FgxvTJmgu{8rDm2nt)nBgHk5hHWDBOW2LJEueVR3jhw?A;p&Z1Cv
zJuG7es;1|q(Gag*X~YlLPsd8PIso6n0883WKrI+1!^w!uEWwGGgg!wmZ8pX<3jFk!PqWHP{*1UKzk-
zNa5mUM*EFGap|*QgT9NVABilUasnGc$k|bF7J&2e%?s!98s&`<>qT-Vmp`v}LCZbN9nK?TO|&IcIm~$(GxHJ_+yA^SS1Uml#`Qecx2|1
z$MmU3Z}7$Sjay2%cGaaBQh5B&UC`*!lM$E|gMkwRAoQ0WkH%uD@?6EbR(Y6!_ekD-
z4y6WAcke0zJHZka#5X0x7>sl*ARcZ|C3+eV0oU$J(`g4>+IC$@_k_eFs}WTJ@juXL
zZA4F{7t~!U-oFMyxYP{9pjr7Qr
zIs;~mMycQZkVNL6AXQiMOKmQdDXH!LOz!4Aa!7OkaO+`B1vtFKi{1{n(g$?D@~TeD
z&0fAV6lp60!fjSrQS
z=X6&U-#f8epZ#m@3}flFbHNWo<8%k~9Byq%{YSGz6kT8&ZF40NjuM8l
zk83(qMUP3I{Tv_v?B~fk+E?L3wfg4)!KA029%V>z>Dp)n-+aotIp(1;&G6itjJC#o
zTD`VdUEIeo=;I5;t&s((jcV&Z5dQQ4x1g>C%&J~$AJy$&lcsB&HQV21kV40eJlYp{UFeBMPi$=yu7iO{s+;E;3E7dIg0$xPenTHuOpxK?L5o)BXshX-_Fcr>&6s$WpNP+~-v
z1Fc)mdT*T{0q>61BoowAE1GjvyNQ+?1YXqsk93R&)*+(?}(@Lig
z?^l%#ho>NW18Y8lW=1(jyo%&lU%KK!$
zDq?m{*kQmUjaB+sz>(yeyg?6lbSYCE=xv7eeJ)z-`v;O@*brV8@M&F;KZ2gLSG-HF
zEKcC2N!O>3(G2pXgnnS11nuVTQWFpI7Z6+`3e8z8rneV=Z9{Fxsj}O#=M~&Bp34|_
z79p({iAX=HQ}}f9%~3W3^J_cBFkES2XkhJl*lr8g^l&Ep
zd6_h%%tPK5HLs>4<;P;mz%O?!_L{skQZztqnaw@nV_W!gkz
zESI!Q2i(fE*%dubLYUXwq0+|`@?C^Y-+on$?_60rOX63oSU7VKM!Vr=On4tVi+?b1
zZe95KeRc@%@tey_s1M&OZ#lOr6Ojx3#pi+ipRk)iHZbZ@xm)?2`o>i@9ly&NwCW=7^&bs<81OmSFy>H+5>K3uZf=1!3
zXe`@*M$PhG;ay&n;K}0Grxf3!D?EW~IQl!xR8P)k%V8+nWHx@uhaf`z+Eqlu;
z1oyV|^^cQM58ND!(v%a`(_l{LV)I~Z(wvLjcVb7rF26~-kxTN_DMB&yjs`vbQV+vG
zHnj9efuv*_A)4Ty6tFf=YySE7&(`Bt3oJ_P3S
zF^SMjqLGq0QPif5h(E#7qI+awdZ^U*i=neekVEgwL#s#eDrz4Z{j~&~=XCfpPd1*gJ6h-|0a_Csj<)9mai&@qRi&&ZC1774i%ejcD{v6NIFte>Cm$q6KDogfQdo0e`0O{AFJ#R508UKHGRy
zTraX_W%wzgm4!`j~MduD1NBqUXv)#tpPewIS
zQYuRlwtQL}OO4}i(0jb-twv^oh$Ce-W(86kGBdP6Z9MzS@o0laac@pS`;qD}njx7d
z8(wKh`JmL*;lFs&mzvvln2
z(s#5&wf(!u2ilM+do)H(64~DLQ1o|VCh3gLZ$CVFjZ^H9a+BBRI2V{!3i~<}EF7>f
z7xHnlHR>{?R-bK2zH~|DoiU@Y-w6rVZvO+}?D`UqOTM{o=ngVf=D;S_ikfqCBu9qY
zs4s2=C(L_1E}+~^dh_a(kmcHXKUUbJ%F4o(e1*Za)3Lr)DC`~(cbQOVei6HjWQxN+
zo2g$XXI-ZzEi$cQUa~j;80<|IbaHDzuC9GIxQd)si|M?h2=*g
zFwy=o^n0*^lfZ)`odz!|5QhBY4OUb!0aND&c9>`c8|VHNyNLC3zoU!A?y&arKJpGG-Y*G=UpU|PjThMt
z`1S>Lcz(nV(kT>B@9#GJq^y`mZn4F_h!UVvQNA)oslo%j15?U%)>UvK&oh?By8MK*
z42juxgjU1^sL`$0&}*n1_8|))%#@-L91Z?Jf0e6M
zM^o@VVxD~pLbX-xcxG0cc2sBZl&0kxTbsLOcQP3^XP@9QAHI6e##P0_qwJhUyx7kj
zIx}>V$DN&EF(oTBQNF9R`(omLQht47yy1_770jJmC??_E{A9mmns;m3aQYm0O(^zP
zz^-(yHB+wZ2tNt@TWBEMvb%Wd9$_eyT19#7fJ0Oi9`B}rlc@7C)yhO;=)m60-RYG7YWcBR?z5A#>b1b1
z)9$&uBbQ6unA+RLOa~{twes*j(vl+Z!$(#zwad5WsQ8MQN*I-mf5_6%vs46gZKQ5<
zpElV?rS|6;7Tnxc`?Zz|SJEd-+FM>Rh@Ne(N^-YU@wBuQ{8umLr_?d*=wosD_*Cge
z%3~|h2S;|>?-m`LJmKCvFAx1d`LLneQ9>3aWS$#gZGW2aWSu$MR@AO2BP{zIh~}Q{
zU)crwwM$RUgTM9N;REJl*Ws;xZ}BdS$FzzS4FcYIQ9#f0CZX0#DCOMU!_F@%BK(m3qSO_vR7cio2To%2PP@t+OJZD2L%J}hd*V0DAwYt<9}P%sOG31Sfe!4O5@v0!mKbRnD)lue%q~O~FE}G=Nli;p!^#R?
zVNDfa*y>J7fi4q32LU-{=@yN9ged^p$(nstt3I)rU%%CR{o>l{>fBi{@m0RKpCXiG
zz#Nlp3>q{r{v{_ZET6fuv$073_d+R8ycWgs%;aOK4?BOzX5ew(XSZ`R`MR%el$fQ7syMu+#95Wp4G@ux%<_VPrp_x;
z!vU5|)-VB&n(hfTWLN)VN#?env0Lz-xAVRFwS*P+v(hCqCdsMoq%^z8PdR^43^6AEvQw
z0asp#at%NOh~DrThnT4iA;p_OcLn7rbPQcVpsh(~xp<%C&gY~eKaUr}qwVxw?Q%=}
zmFylA@kuJ*Y(LzAd$AGWhN^zM?koQi?}?#>F9CfCYaJ5p)oCd@TIWi~rtzG`6s0kJ
zJjA{|Q>C$!>!qZqVwWM6{%Q!+YG04dQQpO2GF)vEy^5Mr4%Sp+xOTD){n{{5Y+eN-
zlXFEI_lxi7#H_7|9@6?Y5G)ohiVGlv?>8jh@BCz;T>k2CIcG9y-=%=@O;z}tkf_n$pqtV!4_h&zGIi^
zUg)j_raKjqX2RgN{c^@y{Ive!V5#4dEShR^&Rr}|&@|mgg|id;VuOk;alx^O6P5K6U~+ZwT?}OH&E_Jw)#$tVo#_J~uFnk-XTO3AFoi
zbAaE`q}BZl$Q^sZz?+!A9TzG~kmFEy&hslCVf_vk690B~3k7zKEJxAo@7r8-+(
z%<1XzZ|2ipXZg&;6rm#b*7)(ygZQr#bbau5F$vf;k&)LWUwNBQB6ri3dx(zHg$W4J2XHlyU4+*B)Z15lq`;?J?t7al*!29
z6CQh`j<43bm4K*&QBc|UE2Bf}7g_DiNnXNqBb&Wc{YAO*Wnvyam00(|!Q372Nn8Es
z8+r~mNzB9t2|R?j4@5M_A}QvpOeX}*Zd9b~(^vdP0*ypa&kL9HtO1S?(OXLl{*kk({;dT5%z1`wnueX})pabi^Tc{4>&%C90
z>V8G_;=f{67ZiRGF8cu^aGjj<#uWFCS3vwJICE?RdGhYB&TU+11ZfYP0-cf|^?Rmk
z;h%sFwjo(5%~S<
z^}DpsY+6*LN0emcl0}u_Dru80VcgLX6L_R-LI&Zr>5Avpm(r4A~__N~oIm8O4
zAjT|NAMM^!zBYGOB26<(0?P)1(FbjlkSWyGK^dp;Io*5Z#YfW=W^qGB7!t*#`9f!I
z#JW`vr2
zJ%nl~_a7CY8gOk%N*$y`0gzx3luU-KJUr+`8~imxa0F_?5RJf`wK|R~;gi
zX2J6(aNz##`n1x8wZG!hiN3xWLoF%=oe-j0xn4UXC6FiO^Y(95Ro0``r%Q&A3ew74
zAv^qNDi}u;9J+@GsXpG`u8y(D_5?pN8s|UI{zv+cufnWbk!?c~+mMAyx8vm1L}W1B
zB*>2Dsyzk2W6?zNfxF(gI=5?+s=cod&BEO3CvfOBi-9rh)GTp
zn~rvg4S8g5TMm0G6;83Lx)$UolXXUnMRf>ai48m_PZg1Q13_bR4gcmn{l~d2z5~H*
zYYeR|g9{!!^L!AK89$t}=F(boM@uEkvo*LpRyKY$LQMMS7c7da9kg(+;Ex_m^U(am
zmT~vntV7#41H132D!~SCKZ5VJj{h!nE!&^H^C`_w`Kx4C%5l|Cw>v)~%M_X_;oA%M
zWJRIhN-aA8+I<9o{CB`p?V8g1$p*ajkExL;6W9K9c_bzlbrgkN#|@Fd?;4UZ6cjq>$ouM~=V
z26$JcrE@nf%g^$(rSG11*N=*$k*o{|V@IM`j@`xrhessR?(hOTNMx`
zIB))75`YhtpP5@}wQL^SEH}uTi?z}e6Xci}-p}~XCH}Zv6ju|8h2dzDJzi{lo9o{B
z$U0{1Qnz0VM=2@pe^{M{u#v;Dxu`r@AfR8fb|+Z+>sF_^X+alF7A>V3M)<%g$Ae>q
z%3O**P;aJBQ{#9vlYjg%Fu;>E$A*rBf7LdC=$VVeut*4B1ba>WQ%ybxrQcCWA=+=|
z6XDMmT*y%gd}6=G3fgvv%c>kcFJJYfz}NzwbF50Bas3uXZx7AIguI`en%>JqEqG=D
ziPO?<1A{fDL7T~L2)kc;EV*hAzjez3q^npx8%w*nLT$StES!ge0wclMsuIiOQ^Mc7
z+?h_6)?byJHs&ekuQijl>--xN18Xk0dMXP1A?h{P
zn>HQe`Alc8ORbd#v?X>5XWe)%`i9Tn!}p{{h9nVR6)0nQV%+Jjx}qLjQKgfxi+K*P
zK`BgZ|3F96xzE1~$Spr?`YiONe<6H#_r(S$&MB|McRU$Ej?GN49~Z_DIh9fY9n4Z;
zFL7QxB>qK0#daM%piF_ZA%f-U%Gc~{(WPkXm#Q@=G7q^NR1w!NU?7lL->aM8NmT@W
z&OQbn?|+oD)EE>v+&Y;Ow0fX}d)rf!YRI*J2Q%s-72=NvRy_9zO1MRARh3n7nRR?$
z{qwHorBX9ftRi>-MlRNrOmlW2BYf8rp7QY1tJe%bZUV>uxSG}s6DDs7eEaZ&eK>>l?q4cChA}l#
z`sC>deLyu8MaesRNiru1D+vq}7Aa~3BD~%VfZ2$Q5ygE@mPT*=@%e~e8dkRl7$fxB
z$xNUS4FHp-QP*fGigY?v*C6X>dn#brz-}tgX;GcrBV3Ptq
zt*U>Z$(eQV0MAh8cUX#V22!BeAUS2P>BxzdVavPb6ERe3;1+)~m9
z-ZeB)>iZNpdY*^7hVODFRKs^m+@WaMLu~Kl{U+vXmo|48?T|#$>@UydU--43!~Cs-
zcL_)yG2KDw2XBPH%uf=15qE;(8OUB*h)xH-G3_-D*1GSYG$Z8A?P&b*xb;Cb_~e
zzynjuYt~L?R`N-X?bo{rrbt@&^K9o(BEPGRW0`>KHN$dy6Yx6JvKU*~eG*ovp}^;y
zu{9)WZx;q8b>E$8`NE=zG6W>3V5JZj;8hk3pwNNQ3C)FhGcMFH$W)cnL-Ry
zl1DB^q@b4#P`%%ZSK`AhDRBL)5y=dsnuduvt8}@AwUZACMegOUP>0Eunc3>8c#qsK
zDMz;ABBQ95NjqfyznW02e>^4W)U)D04gvq^IO2`FaPl1)H)dr-f)H>)%`7w6UZ#N^STEVmGn(Xxpiwl~x
zBU7b~J%;p$B;3pl{!v5UI%*V@yr)DE#1xl~HdL0dn~f4fRQT78l3a!f`Q+f&l~@Uy
z;|%ZwaDrkW9Yq1YS-#}do0e;ag!-o2nQIMC=eOV}UU&mI&
zZdnj^c$mo`OO=aF@wKoBzGhaSdcSpAsI`Jc*>n#Xg}h3nhl3h2mR_2uMl9dn7R2gF8pqjMycj{sGznI4}G_~#%zii6No=-{%
zl&RJdTteqhnIsZIE39K2$aw7x)S0uD(vUP^ZxB6L{A>})p4PeMyW3?msFbUFUhI`g
z#xet$d!`Rk=v;Z|#K$ePq!=_HU~W7P=%#vqf;lq&C8*z3#f7vxiQYA%(eyM0J@xbGreEAHnr2b0v+I?X2l^g90&m3{$UfQW0iwfnFX4UNLj4h9Gs&kt1y
zq)mlWHGyt;4qmn*pn&v955WT9pgG~xHx&V*0ctUFvyFhFOumDWwKTekaY!wT76bU`
z!cpJXo9U4EbHGQ(yXOVv&&kI~`!X?h_d{W4