From b020589bd3d6d3c7d49b4e0428974c063defc2fd Mon Sep 17 00:00:00 2001 From: zhouzj <41366441+zzjjay@users.noreply.github.com> Date: Wed, 23 Nov 2022 11:12:33 +0800 Subject: [PATCH] [Cherry-pick] Rename ACT configs (#1546) * [ACT] Rename 'Quantization' to 'QuantAware'. (#1536) * Add automatic region division and Fix pre-tensor quant (#1517) Co-authored-by: zhouzj <41366441+zzjjay@users.noreply.github.com> Co-authored-by: gushiqiao <77222802+gushiqiao@users.noreply.github.com> --- .../quant/post_training_quantization.md | 3 + example/auto_compression/README.md | 2 +- .../detection/configs/picodet_s_qat_dis.yaml | 2 +- .../detection/configs/ppyoloe_l_qat_dis.yaml | 2 +- .../detection/configs/ppyoloe_s_qat_dis.yaml | 2 +- .../configs/ssd_mbv1_voc_qat_dis.yaml | 2 +- .../detection/configs/tinypose_qat_dis.yaml | 2 +- .../configs/yolov3_mbv1_qat_dis.yaml | 2 +- .../hyperparameter_tutorial.md | 4 +- .../configs/EfficientNetB0/qat_dis.yaml | 2 +- .../configs/GhostNet_x1_0/qat_dis.yaml | 2 +- .../configs/InceptionV3/qat_dis.yaml | 2 +- .../configs/MobileNetV1/qat_dis.yaml | 2 +- .../MobileNetV3_large_x1_0/qat_dis.yaml | 2 +- .../configs/PPHGNet_tiny/qat_dis.yaml | 2 +- .../configs/PPLCNetV2_base/qat_dis.yaml | 2 +- .../configs/PPLCNet_x1_0/qat_dis.yaml | 2 +- .../configs/ResNet50_vd/qat_dis.yaml | 2 +- .../configs/ShuffleNetV2_x1_0/qat_dis.yaml | 2 +- .../configs/SqueezeNet1_0/qat_dis.yaml | 2 +- .../qat_dis.yaml | 2 +- .../ocr/configs/ppocrv3_det_qat_dist.yaml | 2 +- .../configs/yolov5s_qat_dis.yaml | 2 +- .../configs/yolov6s_qat_dis.yaml | 2 +- .../configs/yolov6s_v2_qat_dis.yaml | 2 +- .../configs/yolov7_qat_dis.yaml | 2 +- .../configs/yolov7_tiny_qat_dis.yaml | 2 +- .../configs/BiSeNetV2/BiSeNetV2_qat.yaml | 2 +- .../configs/deeplabv3/deeplabv3_qat.yaml | 2 +- .../configs/hrnet/hrnet_qat.yaml | 2 +- .../configs/pp_humanseg/pp_humanseg_qat.yaml | 2 +- .../configs/pp_liteseg/pp_liteseg_qat.yaml | 2 +- .../configs/unet/unet_qat.yaml | 2 +- .../configs/mbv1_qat_dis.yaml | 2 +- .../configs/mobilenetv3_large_qat_dis.yaml | 2 +- .../picodet/configs/picodet_npu.yaml | 2 +- .../configs/picodet_npu_with_postprocess.yaml | 2 +- .../configs/yolov6s_fine_tune.yaml | 24 +- .../pytorch_yolo_series/fine_tune.py | 19 +- paddleslim/auto_compression/__init__.py | 3 +- paddleslim/auto_compression/auto_strategy.py | 12 +- paddleslim/auto_compression/compressor.py | 4 +- .../auto_compression/strategy_config.py | 8 +- .../quant/reconstruction_quantization.py | 256 ++++++++++++++++-- tests/act/qat_dist_train.yaml | 2 +- tests/test_reconstruct_quantization.py | 123 +++++---- 46 files changed, 369 insertions(+), 159 deletions(-) mode change 100755 => 100644 paddleslim/quant/reconstruction_quantization.py diff --git a/docs/zh_cn/tutorials/quant/post_training_quantization.md b/docs/zh_cn/tutorials/quant/post_training_quantization.md index 0077569b..09d22d17 100644 --- a/docs/zh_cn/tutorials/quant/post_training_quantization.md +++ b/docs/zh_cn/tutorials/quant/post_training_quantization.md @@ -72,6 +72,9 @@ $$ 说明: - 如果想使用bias_correction,可以在PaddleSlim的[离线量化接口](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/zh_cn/api_cn/static/quant/quantization_api.rst#quant_post_static)修改`bias_correction`参数为True即可,默认为False。 - 如果想使用Adaround方法,可以在PaddleSlim的[离线量化接口](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/zh_cn/api_cn/static/quant/quantization_api.rst#quant_post_static)修改`round_type`参数为`adaround`即可,默认为`round`。 +- 如果想使用BRECQ方法,可以在PaddleSlim的[量化重构接口](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/zh_cn/api_cn/static/quant/quantization_api.rst#quant_post_static)修改`recon_level`参数为`region-wise`即可,默认为`layer-wise`。 +- 如果想使用QDrop方法,可以在PaddleSlim的[量化重构接口](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/zh_cn/api_cn/static/quant/quantization_api.rst#quant_post_static)修改`simulate_activation_quant`参数为`True`即可,默认为`False`。 + ### 效果对比 diff --git a/example/auto_compression/README.md b/example/auto_compression/README.md index 67bb1c90..bf2a54ab 100644 --- a/example/auto_compression/README.md +++ b/example/auto_compression/README.md @@ -165,7 +165,7 @@ ac = AutoCompression( params_filename="inference.pdiparams", save_dir="MobileNetV1_quant", config={"QuantPost": {}, "HyperParameterOptimization": {'ptq_algo': ['avg'], 'max_quant_count': 3}}, - ### config={"Quantization": {}, "Distillation": {}}, ### 如果您的系统为Windows系统, 请使用当前这一行配置 + ### config={"QuantAware": {}, "Distillation": {}}, ### 如果您的系统为Windows系统, 请使用当前这一行配置 train_dataloader=train_loader, eval_dataloader=train_loader) ac.compress() diff --git a/example/auto_compression/detection/configs/picodet_s_qat_dis.yaml b/example/auto_compression/detection/configs/picodet_s_qat_dis.yaml index 3b1b08e1..72de6102 100644 --- a/example/auto_compression/detection/configs/picodet_s_qat_dis.yaml +++ b/example/auto_compression/detection/configs/picodet_s_qat_dis.yaml @@ -18,7 +18,7 @@ Distillation: - conv2d_154.tmp_1 - tmp_8 -Quantization: +QuantAware: use_pact: true activation_quantize_type: 'moving_average_abs_max' weight_bits: 8 diff --git a/example/auto_compression/detection/configs/ppyoloe_l_qat_dis.yaml b/example/auto_compression/detection/configs/ppyoloe_l_qat_dis.yaml index 730fb14a..d1c28b06 100644 --- a/example/auto_compression/detection/configs/ppyoloe_l_qat_dis.yaml +++ b/example/auto_compression/detection/configs/ppyoloe_l_qat_dis.yaml @@ -11,7 +11,7 @@ Distillation: alpha: 1.0 loss: soft_label -Quantization: +QuantAware: onnx_format: true use_pact: true activation_quantize_type: 'moving_average_abs_max' diff --git a/example/auto_compression/detection/configs/ppyoloe_s_qat_dis.yaml b/example/auto_compression/detection/configs/ppyoloe_s_qat_dis.yaml index be324ac7..2090baba 100644 --- a/example/auto_compression/detection/configs/ppyoloe_s_qat_dis.yaml +++ b/example/auto_compression/detection/configs/ppyoloe_s_qat_dis.yaml @@ -11,7 +11,7 @@ Distillation: alpha: 1.0 loss: soft_label -Quantization: +QuantAware: onnx_format: true use_pact: true activation_quantize_type: 'moving_average_abs_max' diff --git a/example/auto_compression/detection/configs/ssd_mbv1_voc_qat_dis.yaml b/example/auto_compression/detection/configs/ssd_mbv1_voc_qat_dis.yaml index fc532a0a..710a3a97 100644 --- a/example/auto_compression/detection/configs/ssd_mbv1_voc_qat_dis.yaml +++ b/example/auto_compression/detection/configs/ssd_mbv1_voc_qat_dis.yaml @@ -13,7 +13,7 @@ Distillation: - concat_2.tmp_0 - concat_1.tmp_0 -Quantization: +QuantAware: use_pact: True weight_quantize_type: 'channel_wise_abs_max' activation_quantize_type: 'moving_average_abs_max' diff --git a/example/auto_compression/detection/configs/tinypose_qat_dis.yaml b/example/auto_compression/detection/configs/tinypose_qat_dis.yaml index 237f7364..7cf508fc 100644 --- a/example/auto_compression/detection/configs/tinypose_qat_dis.yaml +++ b/example/auto_compression/detection/configs/tinypose_qat_dis.yaml @@ -12,7 +12,7 @@ Distillation: node: - conv2d_441.tmp_0 -Quantization: +QuantAware: use_pact: true activation_quantize_type: 'moving_average_abs_max' weight_quantize_type: 'channel_wise_abs_max' # 'abs_max' is layer wise quant diff --git a/example/auto_compression/detection/configs/yolov3_mbv1_qat_dis.yaml b/example/auto_compression/detection/configs/yolov3_mbv1_qat_dis.yaml index bc48a679..e0cf9a9b 100644 --- a/example/auto_compression/detection/configs/yolov3_mbv1_qat_dis.yaml +++ b/example/auto_compression/detection/configs/yolov3_mbv1_qat_dis.yaml @@ -13,7 +13,7 @@ Distillation: - conv2d_85.tmp_0 - conv2d_86.tmp_0 -Quantization: +QuantAware: activation_quantize_type: 'range_abs_max' quantize_op_types: - conv2d diff --git a/example/auto_compression/hyperparameter_tutorial.md b/example/auto_compression/hyperparameter_tutorial.md index 7a1218af..fc0f65f6 100644 --- a/example/auto_compression/hyperparameter_tutorial.md +++ b/example/auto_compression/hyperparameter_tutorial.md @@ -3,11 +3,11 @@ ## 1.1 各压缩方法超参解析 -### 1.1.1 量化训练(quantization) +### 1.1.1 量化训练(quant aware) 量化参数主要设置量化比特数和量化op类型,其中量化op包含卷积层(conv2d, depthwise_conv2d)和全连接层(mul, matmul_v2)。以下为只量化卷积层的示例: ```yaml -Quantization: +QuantAware: use_pact: false # 量化训练是否使用PACT方法 weight_quantize_type: 'channel_wise_abs_max' # 权重量化方式 quantize_op_types: [conv2d, depthwise_conv2d] # 量化OP列表 diff --git a/example/auto_compression/image_classification/configs/EfficientNetB0/qat_dis.yaml b/example/auto_compression/image_classification/configs/EfficientNetB0/qat_dis.yaml index 461f18e0..1bcc0e73 100644 --- a/example/auto_compression/image_classification/configs/EfficientNetB0/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/EfficientNetB0/qat_dis.yaml @@ -11,7 +11,7 @@ Distillation: node: - softmax_1.tmp_0 -Quantization: +QuantAware: use_pact: true activation_bits: 8 is_full_quantize: false diff --git a/example/auto_compression/image_classification/configs/GhostNet_x1_0/qat_dis.yaml b/example/auto_compression/image_classification/configs/GhostNet_x1_0/qat_dis.yaml index 71e2eeaf..0e91d4c0 100644 --- a/example/auto_compression/image_classification/configs/GhostNet_x1_0/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/GhostNet_x1_0/qat_dis.yaml @@ -10,7 +10,7 @@ Distillation: loss: l2 node: - softmax_0.tmp_0 -Quantization: +QuantAware: use_pact: true activation_bits: 8 is_full_quantize: false diff --git a/example/auto_compression/image_classification/configs/InceptionV3/qat_dis.yaml b/example/auto_compression/image_classification/configs/InceptionV3/qat_dis.yaml index 6276f703..3b1e4084 100644 --- a/example/auto_compression/image_classification/configs/InceptionV3/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/InceptionV3/qat_dis.yaml @@ -12,7 +12,7 @@ Distillation: loss: l2 node: - softmax_1.tmp_0 -Quantization: +QuantAware: is_full_quantize: false activation_quantize_type: moving_average_abs_max weight_quantize_type: channel_wise_abs_max diff --git a/example/auto_compression/image_classification/configs/MobileNetV1/qat_dis.yaml b/example/auto_compression/image_classification/configs/MobileNetV1/qat_dis.yaml index 9c3c2b97..8f74d745 100644 --- a/example/auto_compression/image_classification/configs/MobileNetV1/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/MobileNetV1/qat_dis.yaml @@ -10,7 +10,7 @@ Distillation: loss: l2 node: - softmax_0.tmp_0 -Quantization: +QuantAware: use_pact: true activation_bits: 8 is_full_quantize: false diff --git a/example/auto_compression/image_classification/configs/MobileNetV3_large_x1_0/qat_dis.yaml b/example/auto_compression/image_classification/configs/MobileNetV3_large_x1_0/qat_dis.yaml index e6a2e104..2da27da8 100644 --- a/example/auto_compression/image_classification/configs/MobileNetV3_large_x1_0/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/MobileNetV3_large_x1_0/qat_dis.yaml @@ -9,7 +9,7 @@ Distillation: alpha: 1.0 loss: soft_label -Quantization: +QuantAware: use_pact: true activation_bits: 8 is_full_quantize: false diff --git a/example/auto_compression/image_classification/configs/PPHGNet_tiny/qat_dis.yaml b/example/auto_compression/image_classification/configs/PPHGNet_tiny/qat_dis.yaml index 64d57117..50eb9898 100644 --- a/example/auto_compression/image_classification/configs/PPHGNet_tiny/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/PPHGNet_tiny/qat_dis.yaml @@ -11,7 +11,7 @@ Distillation: node: - softmax_1.tmp_0 -Quantization: +QuantAware: use_pact: true activation_bits: 8 is_full_quantize: false diff --git a/example/auto_compression/image_classification/configs/PPLCNetV2_base/qat_dis.yaml b/example/auto_compression/image_classification/configs/PPLCNetV2_base/qat_dis.yaml index 00c05888..ae6f25b0 100644 --- a/example/auto_compression/image_classification/configs/PPLCNetV2_base/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/PPLCNetV2_base/qat_dis.yaml @@ -11,7 +11,7 @@ Distillation: node: - softmax_1.tmp_0 -Quantization: +QuantAware: use_pact: true activation_bits: 8 is_full_quantize: false diff --git a/example/auto_compression/image_classification/configs/PPLCNet_x1_0/qat_dis.yaml b/example/auto_compression/image_classification/configs/PPLCNet_x1_0/qat_dis.yaml index d588f8a9..f0e67260 100644 --- a/example/auto_compression/image_classification/configs/PPLCNet_x1_0/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/PPLCNet_x1_0/qat_dis.yaml @@ -10,7 +10,7 @@ Distillation: loss: l2 node: - softmax_1.tmp_0 -Quantization: +QuantAware: use_pact: true activation_bits: 8 is_full_quantize: false diff --git a/example/auto_compression/image_classification/configs/ResNet50_vd/qat_dis.yaml b/example/auto_compression/image_classification/configs/ResNet50_vd/qat_dis.yaml index 078915aa..2d0ea1eb 100644 --- a/example/auto_compression/image_classification/configs/ResNet50_vd/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/ResNet50_vd/qat_dis.yaml @@ -11,7 +11,7 @@ Distillation: node: - softmax_0.tmp_0 -Quantization: +QuantAware: use_pact: true activation_bits: 8 is_full_quantize: false diff --git a/example/auto_compression/image_classification/configs/ShuffleNetV2_x1_0/qat_dis.yaml b/example/auto_compression/image_classification/configs/ShuffleNetV2_x1_0/qat_dis.yaml index 0c0ca531..31c618e4 100644 --- a/example/auto_compression/image_classification/configs/ShuffleNetV2_x1_0/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/ShuffleNetV2_x1_0/qat_dis.yaml @@ -10,7 +10,7 @@ Distillation: loss: l2 node: - softmax_0.tmp_0 -Quantization: +QuantAware: use_pact: true activation_bits: 8 is_full_quantize: false diff --git a/example/auto_compression/image_classification/configs/SqueezeNet1_0/qat_dis.yaml b/example/auto_compression/image_classification/configs/SqueezeNet1_0/qat_dis.yaml index 073f3872..4b9964af 100644 --- a/example/auto_compression/image_classification/configs/SqueezeNet1_0/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/SqueezeNet1_0/qat_dis.yaml @@ -10,7 +10,7 @@ Distillation: loss: l2 node: - softmax_0.tmp_0 -Quantization: +QuantAware: activation_bits: 8 is_full_quantize: false activation_quantize_type: moving_average_abs_max diff --git a/example/auto_compression/image_classification/configs/SwinTransformer_base_patch4_window7_224/qat_dis.yaml b/example/auto_compression/image_classification/configs/SwinTransformer_base_patch4_window7_224/qat_dis.yaml index ce8f746f..99f61b77 100644 --- a/example/auto_compression/image_classification/configs/SwinTransformer_base_patch4_window7_224/qat_dis.yaml +++ b/example/auto_compression/image_classification/configs/SwinTransformer_base_patch4_window7_224/qat_dis.yaml @@ -10,7 +10,7 @@ Distillation: loss: l2 node: - softmax_48.tmp_0 -Quantization: +QuantAware: use_pact: true activation_bits: 8 is_full_quantize: false diff --git a/example/auto_compression/ocr/configs/ppocrv3_det_qat_dist.yaml b/example/auto_compression/ocr/configs/ppocrv3_det_qat_dist.yaml index 7b54d420..b753dd71 100644 --- a/example/auto_compression/ocr/configs/ppocrv3_det_qat_dist.yaml +++ b/example/auto_compression/ocr/configs/ppocrv3_det_qat_dist.yaml @@ -9,7 +9,7 @@ Distillation: alpha: 1.0 loss: l2 -Quantization: +QuantAware: use_pact: true activation_bits: 8 is_full_quantize: false diff --git a/example/auto_compression/pytorch_yolo_series/configs/yolov5s_qat_dis.yaml b/example/auto_compression/pytorch_yolo_series/configs/yolov5s_qat_dis.yaml index c2b230d8..683f4a6f 100644 --- a/example/auto_compression/pytorch_yolo_series/configs/yolov5s_qat_dis.yaml +++ b/example/auto_compression/pytorch_yolo_series/configs/yolov5s_qat_dis.yaml @@ -12,7 +12,7 @@ Distillation: alpha: 1.0 loss: soft_label -Quantization: +QuantAware: onnx_format: true use_pact: true activation_quantize_type: 'moving_average_abs_max' diff --git a/example/auto_compression/pytorch_yolo_series/configs/yolov6s_qat_dis.yaml b/example/auto_compression/pytorch_yolo_series/configs/yolov6s_qat_dis.yaml index 9a3f7af3..ded46306 100644 --- a/example/auto_compression/pytorch_yolo_series/configs/yolov6s_qat_dis.yaml +++ b/example/auto_compression/pytorch_yolo_series/configs/yolov6s_qat_dis.yaml @@ -12,7 +12,7 @@ Distillation: alpha: 1.0 loss: soft_label -Quantization: +QuantAware: onnx_format: true activation_quantize_type: 'moving_average_abs_max' quantize_op_types: diff --git a/example/auto_compression/pytorch_yolo_series/configs/yolov6s_v2_qat_dis.yaml b/example/auto_compression/pytorch_yolo_series/configs/yolov6s_v2_qat_dis.yaml index 4c775392..92acc3be 100644 --- a/example/auto_compression/pytorch_yolo_series/configs/yolov6s_v2_qat_dis.yaml +++ b/example/auto_compression/pytorch_yolo_series/configs/yolov6s_v2_qat_dis.yaml @@ -13,7 +13,7 @@ Distillation: alpha: 1.0 loss: soft_label -Quantization: +QuantAware: onnx_format: true activation_quantize_type: 'moving_average_abs_max' quantize_op_types: diff --git a/example/auto_compression/pytorch_yolo_series/configs/yolov7_qat_dis.yaml b/example/auto_compression/pytorch_yolo_series/configs/yolov7_qat_dis.yaml index b7dcce83..29c92a99 100644 --- a/example/auto_compression/pytorch_yolo_series/configs/yolov7_qat_dis.yaml +++ b/example/auto_compression/pytorch_yolo_series/configs/yolov7_qat_dis.yaml @@ -12,7 +12,7 @@ Distillation: alpha: 1.0 loss: soft_label -Quantization: +QuantAware: onnx_format: true activation_quantize_type: 'moving_average_abs_max' quantize_op_types: diff --git a/example/auto_compression/pytorch_yolo_series/configs/yolov7_tiny_qat_dis.yaml b/example/auto_compression/pytorch_yolo_series/configs/yolov7_tiny_qat_dis.yaml index 7359e0ee..84290237 100644 --- a/example/auto_compression/pytorch_yolo_series/configs/yolov7_tiny_qat_dis.yaml +++ b/example/auto_compression/pytorch_yolo_series/configs/yolov7_tiny_qat_dis.yaml @@ -12,7 +12,7 @@ Distillation: alpha: 1.0 loss: soft_label -Quantization: +QuantAware: onnx_format: true activation_quantize_type: 'moving_average_abs_max' quantize_op_types: diff --git a/example/auto_compression/semantic_segmentation/configs/BiSeNetV2/BiSeNetV2_qat.yaml b/example/auto_compression/semantic_segmentation/configs/BiSeNetV2/BiSeNetV2_qat.yaml index 1de0705a..52700e2d 100644 --- a/example/auto_compression/semantic_segmentation/configs/BiSeNetV2/BiSeNetV2_qat.yaml +++ b/example/auto_compression/semantic_segmentation/configs/BiSeNetV2/BiSeNetV2_qat.yaml @@ -11,7 +11,7 @@ Distillation: node: - conv2d_103.tmp_1 -Quantization: +QuantAware: onnx_format: True quantize_op_types: - conv2d diff --git a/example/auto_compression/semantic_segmentation/configs/deeplabv3/deeplabv3_qat.yaml b/example/auto_compression/semantic_segmentation/configs/deeplabv3/deeplabv3_qat.yaml index 36c4e34e..3a2e8c62 100644 --- a/example/auto_compression/semantic_segmentation/configs/deeplabv3/deeplabv3_qat.yaml +++ b/example/auto_compression/semantic_segmentation/configs/deeplabv3/deeplabv3_qat.yaml @@ -11,7 +11,7 @@ Distillation: node: - conv2d_123.tmp_1 -Quantization: +QuantAware: onnx_format: True quantize_op_types: - conv2d diff --git a/example/auto_compression/semantic_segmentation/configs/hrnet/hrnet_qat.yaml b/example/auto_compression/semantic_segmentation/configs/hrnet/hrnet_qat.yaml index 1eec456e..8f852cdf 100644 --- a/example/auto_compression/semantic_segmentation/configs/hrnet/hrnet_qat.yaml +++ b/example/auto_compression/semantic_segmentation/configs/hrnet/hrnet_qat.yaml @@ -10,7 +10,7 @@ Distillation: node: - conv2d_613.tmp_1 -Quantization: +QuantAware: onnx_format: True quantize_op_types: - conv2d diff --git a/example/auto_compression/semantic_segmentation/configs/pp_humanseg/pp_humanseg_qat.yaml b/example/auto_compression/semantic_segmentation/configs/pp_humanseg/pp_humanseg_qat.yaml index 8893dc35..5b497a1e 100644 --- a/example/auto_compression/semantic_segmentation/configs/pp_humanseg/pp_humanseg_qat.yaml +++ b/example/auto_compression/semantic_segmentation/configs/pp_humanseg/pp_humanseg_qat.yaml @@ -10,7 +10,7 @@ Distillation: node: - batch_norm_47.tmp_2 -Quantization: +QuantAware: onnx_format: True quantize_op_types: - conv2d diff --git a/example/auto_compression/semantic_segmentation/configs/pp_liteseg/pp_liteseg_qat.yaml b/example/auto_compression/semantic_segmentation/configs/pp_liteseg/pp_liteseg_qat.yaml index 12eea7e2..f739354a 100644 --- a/example/auto_compression/semantic_segmentation/configs/pp_liteseg/pp_liteseg_qat.yaml +++ b/example/auto_compression/semantic_segmentation/configs/pp_liteseg/pp_liteseg_qat.yaml @@ -10,7 +10,7 @@ Distillation: node: - conv2d_95.tmp_0 -Quantization: +QuantAware: onnx_format: True quantize_op_types: - conv2d diff --git a/example/auto_compression/semantic_segmentation/configs/unet/unet_qat.yaml b/example/auto_compression/semantic_segmentation/configs/unet/unet_qat.yaml index ff055e2b..c25033f9 100644 --- a/example/auto_compression/semantic_segmentation/configs/unet/unet_qat.yaml +++ b/example/auto_compression/semantic_segmentation/configs/unet/unet_qat.yaml @@ -10,7 +10,7 @@ Distillation: node: - conv2d_37.tmp_1 -Quantization: +QuantAware: onnx_format: True quantize_op_types: - conv2d diff --git a/example/auto_compression/tensorflow_mobilenet/configs/mbv1_qat_dis.yaml b/example/auto_compression/tensorflow_mobilenet/configs/mbv1_qat_dis.yaml index 359ac18d..eda30fa3 100644 --- a/example/auto_compression/tensorflow_mobilenet/configs/mbv1_qat_dis.yaml +++ b/example/auto_compression/tensorflow_mobilenet/configs/mbv1_qat_dis.yaml @@ -38,7 +38,7 @@ Distillation: - batch_norm_26.tmp_3 - conv2d_42.tmp_1 -Quantization: +QuantAware: use_pact: true activation_bits: 8 is_full_quantize: false diff --git a/example/full_quantization/image_classification/configs/mobilenetv3_large_qat_dis.yaml b/example/full_quantization/image_classification/configs/mobilenetv3_large_qat_dis.yaml index 52c76219..8c72318b 100644 --- a/example/full_quantization/image_classification/configs/mobilenetv3_large_qat_dis.yaml +++ b/example/full_quantization/image_classification/configs/mobilenetv3_large_qat_dis.yaml @@ -9,7 +9,7 @@ Global: Distillation: alpha: 1.0 loss: soft_label -Quantization: +QuantAware: use_pact: true activation_bits: 8 activation_quantize_type: moving_average_abs_max diff --git a/example/full_quantization/picodet/configs/picodet_npu.yaml b/example/full_quantization/picodet/configs/picodet_npu.yaml index 37f20d7b..9bfffab4 100644 --- a/example/full_quantization/picodet/configs/picodet_npu.yaml +++ b/example/full_quantization/picodet/configs/picodet_npu.yaml @@ -11,7 +11,7 @@ Distillation: alpha: 1.0 loss: l2 -Quantization: +QuantAware: # Auto Compression use_pact: true activation_quantize_type: 'moving_average_abs_max' weight_bits: 8 diff --git a/example/full_quantization/picodet/configs/picodet_npu_with_postprocess.yaml b/example/full_quantization/picodet/configs/picodet_npu_with_postprocess.yaml index 4064df0d..6a291be1 100644 --- a/example/full_quantization/picodet/configs/picodet_npu_with_postprocess.yaml +++ b/example/full_quantization/picodet/configs/picodet_npu_with_postprocess.yaml @@ -11,7 +11,7 @@ Distillation: alpha: 1.0 loss: l2 -Quantization: +QuantAware: # Auto Compression use_pact: true activation_quantize_type: 'moving_average_abs_max' weight_bits: 8 diff --git a/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_fine_tune.yaml b/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_fine_tune.yaml index 971a7376..ad606121 100755 --- a/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_fine_tune.yaml +++ b/example/post_training_quantization/pytorch_yolo_series/configs/yolov6s_fine_tune.yaml @@ -1,5 +1,5 @@ arch: YOLOv6 -model_dir: ./yolov6s.onnx +model_dir: ./yolov6s.onnx dataset_dir: /dataset/coco/ model_filename: model.pdmodel params_filename: model.pdiparams @@ -8,25 +8,3 @@ val_image_dir: val2017 train_anno_path: annotations/instances_train2017.json val_anno_path: annotations/instances_val2017.json skip_tensor_list: None -regions: [['x2paddle_image_arrays','relu_8.tmp_0'], - ['relu_8.tmp_0','relu_15.tmp_0'], - ['relu_15.tmp_0','relu_21.tmp_0'], - ['concat_1.tmp_0','relu_26.tmp_0'], - ['concat_2.tmp_0', 'relu_30.tmp_0'], - ['relu_30.tmp_0', 'concat_4.tmp_0'], - ['relu_30.tmp_0', 'relu_31.tmp_0'], - ['concat_3.tmp_0', 'relu_35.tmp_0'], - ['relu_35.tmp_0', 'relu_36.tmp_0'], - ['concat_5.tmp_0', 'concat_10.tmp_0'], - ['relu_35.tmp_0', 'concat_8.tmp_0']] -region_weights_names: [['conv2d_0.w_0','conv2d_1.w_0','conv2d_2.w_0','conv2d_3.w_0','conv2d_4.w_0','conv2d_5.w_0','conv2d_6.w_0','conv2d_7.w_0','conv2d_8.w_0'], - ['conv2d_9.w_0','conv2d_10.w_0','conv2d_11.w_0','conv2d_12.w_0','conv2d_13.w_0','conv2d_14.w_0','conv2d_15.w_0'], - ['conv2d_16.w_0','conv2d_17.w_0','conv2d_18.w_0','conv2d_19.w_0','conv2d_20.w_0','conv2d_21.w_0'], - ['conv2d_22.w_0','conv2d_23.w_0','conv2d_24.w_0','conv2d_25.w_0','conv2d_26.w_0'], - ['conv2d_27.w_0','conv2d_28.w_0','conv2d_29.w_0','conv2d_30.w_0'], - ['conv2d_32.w_0','conv2d_34.w_0','conv2d_35.w_0','conv2d_37.w_0','conv2d_38.w_0','conv2d_39.w_0'], - ['conv2d_31.w_0'], - ['conv2d_33.w_0','conv2d_36.w_0','conv2d_40.w_0','conv2d_41.w_0'], - ['conv2d_42.w_0'], - ['conv2d_44.w_0','conv2d_47.w_0','conv2d_51.w_0','conv2d_52.w_0','conv2d_53.w_0','conv2d_54.w_0','conv2d_55.w_0','conv2d_56.w_0','conv2d_57.w_0','conv2d_58.w_0'], - ['conv2d_43.w_0','conv2d_45.w_0','conv2d_46.w_0','conv2d_49.w_0','conv2d_48.w_0','conv2d_50.w_0'],] \ No newline at end of file diff --git a/example/post_training_quantization/pytorch_yolo_series/fine_tune.py b/example/post_training_quantization/pytorch_yolo_series/fine_tune.py index ea777a47..144cde3e 100755 --- a/example/post_training_quantization/pytorch_yolo_series/fine_tune.py +++ b/example/post_training_quantization/pytorch_yolo_series/fine_tune.py @@ -43,8 +43,6 @@ def argsparser(): help="which device used to compress.") parser.add_argument( '--algo', type=str, default='avg', help="post quant algo.") - parser.add_argument( - '--round_type', type=str, default='adaround', help="round type.") parser.add_argument('--gpu', type=int, default=0, help='gpu index') parser.add_argument( @@ -57,6 +55,12 @@ def argsparser(): type=bool, default=False, help='simulate activation quant') + parser.add_argument( + '--epochs', type=int, default=20, help='steps to reconstruct') + parser.add_argument( + '--lr', type=float, default=0.1, help='learning rate of reconstruct') + parser.add_argument( + '--limit', type=int, default=5, help='size of each region') return parser @@ -102,12 +106,11 @@ def main(): weight_quantize_type='channel_wise_abs_max', recon_level=FLAGS.recon_level, simulate_activation_quant=FLAGS.simulate_activation_quant, - regions=config['regions'], - region_weights_names=config['region_weights_names'], - skip_tensor_list=config['skip_tensor_list'] - if 'skip_tensor_list' in config else None, - epochs=20, - lr=0.1) + regions=None, + region_weights_names=None, + epochs=FLAGS.epochs, + lr=FLAGS.lr, + limit=FLAGS.limit) if __name__ == '__main__': diff --git a/paddleslim/auto_compression/__init__.py b/paddleslim/auto_compression/__init__.py index cfc26259..bb0129b2 100644 --- a/paddleslim/auto_compression/__init__.py +++ b/paddleslim/auto_compression/__init__.py @@ -20,7 +20,8 @@ from .utils import * __all__ = [ "AutoCompression", - "Quantization", + "QuantAware", + "QuantPost", "Distillation", "MultiTeacherDistillation", "HyperParameterOptimization", diff --git a/paddleslim/auto_compression/auto_strategy.py b/paddleslim/auto_compression/auto_strategy.py index 35b36e8d..cfad16b7 100644 --- a/paddleslim/auto_compression/auto_strategy.py +++ b/paddleslim/auto_compression/auto_strategy.py @@ -125,17 +125,17 @@ def create_strategy_config(strategy_str, model_type): ### only platform is linux can use smac to do hyperparameter optimization ### choose quant_aware to do quantization in other platform if platform.system().lower() == 'linux': - quant_config = Quantization(**default_quant_config) + quant_config = QuantAware(**default_quant_config) hpo_config = HyperParameterOptimization(**hpo_config_tester) configs.append({ 'QuantPost': quant_config, 'HyperParameterOptimization': hpo_config }) else: - quant_config = Quantization(**default_quant_config) + quant_config = QuantAware(**default_quant_config) dis_config = Distillation() configs.append({ - 'Quantization': quant_config, + 'QuantAware': quant_config, 'Distillation': dis_config }) @@ -248,7 +248,7 @@ def get_final_quant_config(ptq_loss, model_type=None): return None ### if emd loss less than MAGIC_MAX_EMD_DISTANCE, select quant_post & hpo. elif ptq_loss < MAGIC_MAX_EMD_DISTANCE: - quant_config = Quantization(**default_quant_config) + quant_config = QuantAware(**default_quant_config) hpo_config = HyperParameterOptimization(**default_hpo_config) configs = [{ 'QuantPost': quant_config, @@ -257,9 +257,9 @@ def get_final_quant_config(ptq_loss, model_type=None): ### if emd loss greater than MAGIC_MAX_EMD_DISTANCE, select qat & dist. else: - quant_config = Quantization(**default_quant_config) + quant_config = QuantAware(**default_quant_config) dis_config = Distillation() - configs = [{'Quantization': quant_config, 'Distillation': dis_config}] + configs = [{'QuantAware': quant_config, 'Distillation': dis_config}] _logger.info("Start Quantization and Distillation Training.") return configs diff --git a/paddleslim/auto_compression/compressor.py b/paddleslim/auto_compression/compressor.py index d51d2830..e86551c7 100644 --- a/paddleslim/auto_compression/compressor.py +++ b/paddleslim/auto_compression/compressor.py @@ -88,7 +88,7 @@ class AutoCompression: Only one strategy(quant_post with hyperparameter optimization) can set train_config to None. Default: None. strategy_config(dict, list(dict), optional): The strategy config. You can set single config to get multi-strategy config, such as - 1. set ``Quantization`` and ``Distillation`` to get quant_aware and distillation compress config. + 1. set ``QuantAware`` and ``Distillation`` to get quant_aware and distillation compress config. The Quantization config can reference `https://github.com/PaddlePaddle/PaddleSlim/blob/develop/paddleslim/auto_compression/strategy_config.py#L55`_ . The Distillation config can reference `https://github.com/PaddlePaddle/PaddleSlim/blob/develop/paddleslim/auto_compression/strategy_config.py#L107`_ . 2. set ``QuantPost`` and ``HyperParameterOptimization`` to get quant_post and hyperparameter optimization compress config. @@ -350,7 +350,7 @@ class AutoCompression: strategy = [] config = [] for strategy_c in strategy_config: - quant_config = strategy_c.get("Quantization", None) + quant_config = strategy_c.get("QuantAware", None) hpo_config = strategy_c.get("HyperParameterOptimization", None) ptq_config = strategy_c.get("QuantPost", None) prune_config = strategy_c.get("ChannelPrune", None) diff --git a/paddleslim/auto_compression/strategy_config.py b/paddleslim/auto_compression/strategy_config.py index 0ebc341e..02d92042 100644 --- a/paddleslim/auto_compression/strategy_config.py +++ b/paddleslim/auto_compression/strategy_config.py @@ -16,7 +16,7 @@ from collections import namedtuple __all__ = [ "BaseStrategy", - "Quantization", + "QuantAware", "Distillation", "MultiTeacherDistillation", "HyperParameterOptimization", @@ -33,7 +33,7 @@ __all__ = [ ] SUPPORTED_CONFIG = [ - "Quantization", + "QuantAware", "Distillation", "MultiTeacherDistillation", "HyperParameterOptimization", @@ -52,7 +52,7 @@ class BaseStrategy: self.name = name -class Quantization(BaseStrategy): +class QuantAware(BaseStrategy): def __init__(self, quantize_op_types=[ 'conv2d', 'depthwise_conv2d', 'conv2d_transpose', 'mul', @@ -87,7 +87,7 @@ class Quantization(BaseStrategy): onnx_format(bool): Whether to export the quantized model with format of ONNX. Default is False. is_full_quantize(bool): If True, 'quantoze_op_types' will be TRANSFORM_PASS_OP_TYPES + QUANT_DEQUANT_PASS_OP_TYPES. Default: False. """ - super(Quantization, self).__init__("Quantization") + super(QuantAware, self).__init__("QuantAware") self.quantize_op_types = quantize_op_types self.weight_bits = weight_bits self.activation_bits = activation_bits diff --git a/paddleslim/quant/reconstruction_quantization.py b/paddleslim/quant/reconstruction_quantization.py old mode 100755 new mode 100644 index 60b3421e..f604b472 --- a/paddleslim/quant/reconstruction_quantization.py +++ b/paddleslim/quant/reconstruction_quantization.py @@ -166,8 +166,8 @@ class ReconstructionQuantization(PostTrainingQuantization): lr=self._config['lr'], bias_correction=self._bias_correction, epochs=self._config['epochs'], - scale_trainable=self._config['scale_trainable']) - self._program = reconstruction_quanter._run() + limit=self._config['limit']) + self._program, self._scale_dict = reconstruction_quanter._run() def _postprocessing(self): if self._algo is 'min_max': @@ -220,8 +220,8 @@ class ReconstructionQuanter(object): lr=0.1, bias_correction=False, epochs=20, - scale_trainable=False, - drop_prob=0.5): + drop_prob=0.5, + limit=5): ''' Reconstruction Quanter, used to optimize the rounding policy by reconstructing the intermediate output. @@ -259,20 +259,17 @@ class ReconstructionQuanter(object): lr(float, optional): The learning rate of Reconstruction Quanter. Default is 0.1. bias_correction(bool, optional): If set as True, use the bias correction method of https://arxiv.org/abs/1810.05723. Default is False. - scale_trainable: Wether weight‘s scale is trainable. Default is False. - drop_prob: The dropout probability of activation quantization, and it is valid only if + drop_prob(float, optional): The dropout probability of activation quantization, and it is valid only if simulate_activation_quant is True. Default is 0.5. + limit(int, optional): The size of each region. Default is 5. Returns: None ''' assert recon_level in [ 'layer-wise', 'region-wise' - ], "recon_level must be one of the ['layer-wise', 'region-wise'],but received: {}".format( + ], "recon_level must be one of the ['layer-wise', 'region-wise'], but received: {}".format( recon_level) - if recon_level == 'region-wise': - assert regions is not None, "The regions cannot be None." - assert region_weights_names is not None, "The region_weights_names cannot be None." self._simulate_activation_quant = simulate_activation_quant self._program = fp32_program self._data_loader = data_loader @@ -292,11 +289,18 @@ class ReconstructionQuanter(object): self._regions = regions self._region_weights_names = region_weights_names self._bias_correction = bias_correction - if self._recon_level == 'layer-wise': + self._limit = limit + + if recon_level == 'region-wise' and regions is None: + builder = RegionBuilder(program=self._program) + _logger.info('Begin Region division') + self._regions, self._region_weights_names = builder._create_regions( + limit=self._limit) + _logger.info('End Region division') + elif self._recon_level == 'layer-wise': regions, region_weights_names = self._get_layers() self._regions = regions self._region_weights_names = region_weights_names - self._scale_trainable = scale_trainable self._drop_prob = drop_prob def _get_layers(self): @@ -321,6 +325,13 @@ class ReconstructionQuanter(object): return regions, region_weights_names def _preprocess(self): + + if self._weight_quantize_type == 'channel_wise_abs_max': + for name in self._weight_var_names: + for i, s in enumerate(self._scale_dict[name]): + if s == 0.0: + self._scale_dict[name][i] = 1e-8 + data_name_map = {} for name in self._feed_list: data_name_map[name] = name @@ -357,8 +368,10 @@ class ReconstructionQuanter(object): region_ = self._regions[k] tmp_program.global_block().var(region_[0]).stop_gradient = True quant_op_out_name = region_[1] + _logger.info(f"Region's input: {region_[0]} output: {region_[1]}") + names = self._region_weights_names[k] - _logger.info(f"Current weights: {names}") + _logger.info(f"Current quanted weights: {names}") loss_function = ReconstructionQuanterLoss( program=tmp_program, weight_region_names=names) update_params = [ @@ -407,6 +420,9 @@ class ReconstructionQuanter(object): sys.stdout.flush() if i + 1 == self._num_iterations: break + + if self._weight_quantize_type == 'channel_wise_abs_max': + self._update_scale() self._update_weights_to_int() if self._bias_correction: self._bias_correction_w() @@ -472,7 +488,7 @@ class ReconstructionQuanter(object): scale = np.array(scale) scale = scale.reshape(scale.shape[0], 1) if len(shape) == 2: - scale = scale.repeat(shape[0], axis=0) + scale = scale.repeat(shape[0], axis=1).T else: scale = scale.repeat(shape[1] * shape[2] * shape[3], axis=1) scale = scale.reshape(shape) @@ -614,6 +630,9 @@ class ReconstructionQuanter(object): op.input('X')[0].endswith('scale') ) or _type == 'sigmoid': _inputs = {'X': op.input('X')[0]} + elif (_type == 'scale' and + op.input('X')[0].endswith('copy')): + _inputs = {'X': var._var} else: _inputs = {'X': op.input('X')[0] + '.rounding'} elif func == "_drop_quant_dequant": @@ -806,6 +825,202 @@ class ReconstructionQuanterLoss(object): return total_loss, rec_loss, round_loss +class PriorityQueue: + def __init__(self): + self._data = [] + self._ops = set() + self._idx = 0 + self._lazy_tag = True + + def pop(self): + if not self._lazy_tag: + self._data = sorted(self._data, key=lambda x: x[0]) + self._lazy_tag = True + if self._idx >= len(self._data): raise IndexError('Index out of range!') + ele = self._data[self._idx] + self._idx += 1 + return ele + + def push(self, depth, op): + if op in self._ops: return + self._data.append((depth, op)) + self._ops.add(op) + self._lazy_tag = False + + def empty(self): + return self._idx >= len(self._data) + + +class RegionBuilder(object): + def __init__(self, program): + self._program = program + self._graph = GraphWrapper(self._program) + self._op_idx_map = {} + for op in self._graph.ops(): + self._op_idx_map[op.idx()] = op + self._depth = {} + self._init_depth() + self._cache = {} + self._regions = [] + self._region_weights_names = [] + + def _init_depth(self): + for op in self._graph.ops(): + if len(self._graph.pre_ops(op)) == 0: + self._depth[op.idx()] = 0 + continue + + depths_cache = [] + for up_op in self._graph.pre_ops(op): + assert up_op.idx() in self._depth + depths_cache.append(self._depth[up_op.idx()]) + self._depth[op.idx()] = max(depths_cache) + 1 + + def _build(self, op, limit): + def _find_multi_input_ep(op): + least_first_queue = PriorityQueue() + + for down_op in self._graph.next_ops(op): + least_first_queue.push(self._depth[down_op.idx()], + down_op.idx()) + + while not least_first_queue.empty(): + iter_op_idx = least_first_queue.pop()[-1] + iter_op = self._op_idx_map[iter_op_idx] + if (least_first_queue.empty() and + len(self._graph.pre_ops(iter_op)) > 1): + return iter_op + for down_op in self._graph.next_ops(iter_op): + least_first_queue.push(self._depth[down_op.idx()], + down_op.idx()) + return None + + def _find_coherent_ep(op): + ops = self._graph.next_ops(op) + if len(ops) == 1: + following_op = ops[0] + if following_op.type() == 'fetch': + return None + inps = op.all_inputs() + non_parameter_input = 0 + for var in inps: + if not var._var.persistable: + non_parameter_input += 1 + upstream_ops = len(self._graph.pre_ops(following_op)) + if non_parameter_input == 1 and upstream_ops == 1: + return ops[0] + return None + + sp, ep, future_ep = op, op, op + while future_ep is not None: + if len(self._graph.next_ops(ep)) <= 1: + future_ep = _find_coherent_ep(ep) + else: + future_ep = _find_multi_input_ep(ep) + + if future_ep is None or self._depth[future_ep.idx()] - self._depth[ + sp.idx()] >= limit: + return self._create_region(sp, ep) + ep = future_ep + + return self._create_region(sp=sp, ep=ep) + + def _opset_matching(self, sp, ep): + + if sp.idx() in self._cache: return self._cache[sp.idx()] + + ret_collection = set() + + following_ops = self._graph.next_ops(sp) + + if (len(following_ops)) == 0: + return ret_collection.add(sp.idx()) + + for op in following_ops: + if op == ep: + ret_collection.update([sp.idx(), op.idx()]) + else: + further_res = self._opset_matching(sp=op, ep=ep) + + if further_res is None: + return None + + if len(further_res) > 0: + ret_collection.update(further_res) + ret_collection.add(sp.idx()) + self._cache[sp.idx()] = ret_collection + return ret_collection + + def opset_matching(self, sp, ep): + + ret_collection, candidates = set(), set() + for op in self._graph.ops(): + if op == sp: + candidates.add(op.idx()) + for idx in candidates: + op = self._op_idx_map[idx] + partial_matchings = self._opset_matching(sp=op, ep=ep) + if partial_matchings is None: + return None + if len(partial_matchings) > 0: + ret_collection.update(partial_matchings) + self._cache.clear() + return ret_collection + + def _create_region(self, sp, ep): + rps = self.opset_matching(sp, ep) + return sp, ep, rps + + def _create_regions(self, limit): + visited = [] + for op in self._graph.ops(): + region = [] + region_weight_names = [] + if op.type() == 'fill_constant': continue + if op.type() == 'feed': continue + if op.type() == 'fetch': continue + if op.idx() in visited: continue + + sp, ep, rps = self._build(op=op, limit=limit) + if rps is None: + continue + ops = [self._op_idx_map[idx] for idx in rps] + + # add region's input var + inps = sp.all_inputs() + for var in inps: + if not var._var.persistable: + region.append(var._var.name) + break + + # add region's output var + if ep.type() == 'batch_norm': + out_var = ep.outputs('Y') + else: + out_var = ep.all_outputs() + if not out_var[0]._var.persistable: + region.append(out_var[0]._var.name) + + for idx in rps: + visited.append(idx) + op = self._op_idx_map[idx] + if op.type() not in [ + "conv2d", "depthwise_conv2d", "mul", "matmul", + "matmul_v2" + ]: + continue + inps = op.all_inputs() + for var in inps: + if var._var.persistable: + region_weight_names.append(var._var.name) + + if len(region) < 2 or len(region_weight_names) < 1: continue + self._regions.append(region) + self._region_weights_names.append(region_weight_names) + + return self._regions, self._region_weights_names + + def quant_recon_static(executor, model_dir, quantize_model_path, @@ -846,7 +1061,8 @@ def quant_recon_static(executor, epochs=20, scale_trainable=False, drop_prob=0.5, - lr=0.1): + lr=0.1, + limit=6): """ The function utilizes static post training quantization method to quantize the fp32 model. It uses calibrate data to calculate the @@ -920,9 +1136,8 @@ def quant_recon_static(executor, skip_tensor_list(list): List of skip quant tensor name. is_use_cache_file(bool): This param is deprecated. cache_dir(str): This param is deprecated. - epochs: The number of steps in the reconstruction proces. Default is 20. - scale_trainable: Wether weight‘s scale is trainable. Default is False. - drop_prob: The dropout probability of activation quantization, and it is valid only if + epochs(int): The number of steps in the reconstruction proces. Default is 20. + drop_prob(float): The dropout probability of activation quantization, and it is valid only if simulate_activation_quant is True. Default is 0.5. regions(list[list], optional): The list of some regions, each region is a subgraph of fp32 program and it will have exact 1 input operation and 1 output operation. When @@ -930,6 +1145,7 @@ def quant_recon_static(executor, Default is None. region_weights_names(list[list], optional): The weight names inside every region. Default is None. + limit(int): The size of each region. Default is 6. Returns: None """ @@ -965,8 +1181,8 @@ def quant_recon_static(executor, regions=regions, region_weights_names=region_weights_names, epochs=epochs, - scale_trainable=scale_trainable, - lr=lr) + lr=lr, + limit=limit) reconstruction_quantization = ReconstructionQuantization( PTQCollections=PTQCollections, RSQCollections=RSQCollections) diff --git a/tests/act/qat_dist_train.yaml b/tests/act/qat_dist_train.yaml index d0493c49..82266ed7 100644 --- a/tests/act/qat_dist_train.yaml +++ b/tests/act/qat_dist_train.yaml @@ -1,5 +1,5 @@ # For unittests -Quantization: +QuantAware: quantize_op_types: - conv2d - depthwise_conv2d diff --git a/tests/test_reconstruct_quantization.py b/tests/test_reconstruct_quantization.py index b1582c3c..d98cbbd4 100755 --- a/tests/test_reconstruct_quantization.py +++ b/tests/test_reconstruct_quantization.py @@ -14,44 +14,51 @@ import sys sys.path.append("../") import unittest +import tempfile import paddle from paddleslim.quant import quant_post_static from static_case import StaticCase sys.path.append("../demo") -from models import MobileNet +from models import * from layers import conv_bn_layer import paddle.dataset.mnist as reader import numpy as np from paddleslim.quant import quant_recon_static -class TestRoundingOptimizer(StaticCase): +class ReconPTQ(unittest.TestCase): def __init__(self, *args, **kwargs): - super(TestRoundingOptimizer, self).__init__(*args, **kwargs) + super(ReconPTQ, self).__init__(*args, **kwargs) paddle.enable_static() + self.tmpdir = tempfile.TemporaryDirectory(prefix="test_") self._gen_model() def _gen_model(self): - image = paddle.static.data( - name='image', shape=[None, 1, 28, 28], dtype='float32') - label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') - model = MobileNet() - out = model.net(input=image, class_dim=10) - cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label) - avg_cost = paddle.mean(x=cost) - acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) - acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) - optimizer = paddle.optimizer.Momentum( - momentum=0.9, - learning_rate=0.01, - weight_decay=paddle.regularizer.L2Decay(4e-5)) - optimizer.minimize(avg_cost) - main_prog = paddle.static.default_main_program() - val_prog = main_prog.clone(for_test=True) place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda( ) else paddle.CPUPlace() exe = paddle.static.Executor(place) - exe.run(paddle.static.default_startup_program()) + main_program = paddle.static.Program() + startup_program = paddle.static.Program() + with paddle.static.program_guard(main_program, startup_program): + image = paddle.static.data( + name='image', shape=[None, 1, 28, 28], dtype='float32') + label = paddle.static.data( + name='label', shape=[None, 1], dtype='int64') + model = MobileNetV2() + out = model.net(input=image, class_dim=10) + cost = paddle.nn.functional.loss.cross_entropy( + input=out, label=label) + avg_cost = paddle.mean(x=cost) + acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) + acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) + + val_program = main_program.clone(for_test=True) + optimizer = paddle.optimizer.Momentum( + momentum=0.9, + learning_rate=0.01, + weight_decay=paddle.regularizer.L2Decay(4e-5)) + optimizer.minimize(avg_cost) + exe.run(startup_program) def transform(x): return np.reshape(x, [1, 28, 28]) @@ -95,64 +102,66 @@ class TestRoundingOptimizer(StaticCase): 'train iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'. format(iter, cost, top1, top5)) - train(main_prog) + train(main_program) paddle.fluid.io.save_inference_model( - dirname='./test_rounding_optimizer', - feeded_var_names=[image.name, label.name], - target_vars=[avg_cost, acc_top1, acc_top5], - main_program=val_prog, + dirname=self.tmpdir.name, + feeded_var_names=[image.name], + target_vars=[out], + main_program=val_program, executor=exe, - model_filename='model', - params_filename='params') - + model_filename='model.pdmodel', + params_filename='params.pdiparams') + print(f"saved infer model to [{self.tmpdir.name}]") self.data_loader = sample_generator_creator() - self._regions = [['image', 'batch_norm_26.tmp_4']] - self._region_weights_names = [[ - 'conv1_weights', 'conv2_1_dw_weights', 'conv2_1_sep_weights', - 'conv2_2_dw_weights', 'conv2_2_sep_weights', 'conv3_1_dw_weights', - 'conv3_1_sep_weights', 'conv3_2_dw_weights', 'conv3_2_sep_weights', - 'conv4_1_dw_weights', 'conv4_1_sep_weights', 'conv4_2_dw_weights', - 'conv4_2_sep_weights', 'conv5_1_dw_weights', 'conv5_1_sep_weights', - 'conv5_2_dw_weights', 'conv5_2_sep_weights', 'conv5_3_dw_weights', - 'conv5_3_sep_weights', 'conv5_4_dw_weights', 'conv5_4_sep_weights', - 'conv5_5_dw_weights', 'conv5_5_sep_weights', 'conv5_6_dw_weights', - 'conv5_6_sep_weights', 'conv6_dw_weights', 'conv6_sep_weights' - ]] - - def test_qdrop(self): + def __del__(self): + self.tmpdir.cleanup() + + +class TestReconRegion(ReconPTQ): + def __init__(self, *args, **kwargs): + super(TestReconRegion, self).__init__(*args, **kwargs) + + def test_qdrop_region(self): place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda( ) else paddle.CPUPlace() exe = paddle.static.Executor(place) quant_recon_static( exe, - './test_rounding_optimizer', - quantize_model_path='rsq_out', + self.tmpdir.name, + quantize_model_path='output_region', sample_generator=self.data_loader, - model_filename='model', - params_filename='params', - batch_nums=10, + model_filename='model.pdmodel', + params_filename='params.pdiparams', + batch_nums=1, + epochs=1, algo='abs_max', - regions=self._regions, - region_weights_names=self._region_weights_names, + regions=None, + region_weights_names=None, recon_level='region-wise', simulate_activation_quant=True) - def test_qdrop(self): + +class TestReconLayer(ReconPTQ): + def __init__(self, *args, **kwargs): + super(TestReconLayer, self).__init__(*args, **kwargs) + + def test_qdrop_layer(self): place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda( ) else paddle.CPUPlace() exe = paddle.static.Executor(place) quant_recon_static( exe, - './test_rounding_optimizer', - quantize_model_path='rsq_out', + self.tmpdir.name, + quantize_model_path='output_layer', sample_generator=self.data_loader, - model_filename='model', - params_filename='params', - batch_nums=10, + model_filename='model.pdmodel', + params_filename='params.pdiparams', + batch_nums=1, + epochs=1, algo='KL', - regions=self._regions, - region_weights_names=self._region_weights_names, + regions=None, + region_weights_names=None, recon_level='layer-wise', simulate_activation_quant=True, bias_correction=True) -- GitLab