Add dygraph quantization (#2129)

* add dygraph quant

Add dygraph quantization (#2129)
* add dygraph quant
942e8f27 · Bai Yifan · GitHub · 6b76b6fc · 942e8f27 · 942e8f27
5 changed file
--- a/dygraph/configs/slim/README.md
+++ b/dygraph/configs/slim/README.md
@@ -3,6 +3,7 @@
 在PaddleDetection中, 提供了基于[PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim)进行模型压缩的完整教程和benchmark。目前支持的方法：
 - [剪裁](prune)
+- [量化](quant)
 推荐您使用剪裁和蒸馏联合训练，或者使用剪裁和量化，进行检测模型压缩。 下面以YOLOv3为例，进行剪裁、蒸馏和量化实验。
@@ -17,6 +18,17 @@
 | YOLOv3-MobileNetV1      |  baseline | 24.13          |  93          |   608    | 289.9ms | 75.1       | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/yolov3_mobilenet_v1_270e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml)  |  -  |
 | YOLOv3-MobileNetV1      |  剪裁-l1_norm(sensity) | 15.78(-34.49%) |  66(-29%) |   608   | - | 77.6(+2.5) | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/slim/yolov3_mobilenet_v1_voc_prune_l1_norm.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml)  |  [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/slim/prune/yolov3_prune_l1_norm.yml)  |
+### 量化
+#### COCO上benchmark
+| 模型               | 压缩策略     | 输入尺寸 |   Box AP    |                             下载                             |                         模型配置文件                         |                       压缩算法配置文件                       |
+| ------------------ | ------------ | -------- | :---------: | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: |
+| YOLOv3-MobileNetV1 | baseline     | 608      |    28.8     | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) |                              -                               |
+| YOLOv3-MobileNetV1 | 普通在线量化 | 608      | 27.5 (-1.3) | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/slim/yolov3_mobilenet_v1_coco_qat.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/quant/yolov3_mobilenet_v1_qat.yml) |
+| YOLOv3-MobileNetV3 | baseline     | 608      |    31.4     | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/yolov3_mobilenet_v3_large_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml) |                              -                               |
+| YOLOv3-MobileNetV3 | PACT在线量化 | 608      | 29.0 (-2.4) | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/slim/yolov3_mobilenet_v3_coco_qat.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/quant/yolov3_mobilenet_v3_qat.yml) |
 - SD855预测时延为使用PaddleLite部署，使用arm8架构并使用4线程(4 Threads)推理时延
 ## 实验环境

--- a/dygraph/configs/slim/quant/yolov3_mobilenet_v1_qat.yml
+++ b/dygraph/configs/slim/quant/yolov3_mobilenet_v1_qat.yml
+# Weights of yolov3_mobilenet_v1_coco
+pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/yolov3_mobilenet_v1_270e_coco.pdparams
+load_static_weights: False
+weight_type: resume
+slim: QAT
+QAT:
+  quant_config: {
+    'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max',
+    'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9,
+    'quantizable_layer_type': ['Conv2D', 'Linear']}
+  print_model: True
--- a/dygraph/configs/slim/quant/yolov3_mobilenet_v3_qat.yml
+++ b/dygraph/configs/slim/quant/yolov3_mobilenet_v3_qat.yml
+# Weights of yolov3_mobilenet_v3_coco
+pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/yolov3_mobilenet_v3_large_270e_coco.pdparams
+load_static_weights: False
+weight_type: resume
+slim: QAT
+QAT:
+  quant_config: {
+    'weight_preprocess_type': 'PACT',
+    'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max',
+    'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9,
+    'quantizable_layer_type': ['Conv2D', 'Linear']}
+  print_model: True
--- a/dygraph/ppdet/slim/__init__.py
+++ b/dygraph/ppdet/slim/__init__.py
@@ -13,5 +13,7 @@
 # limitations under the License.
 from . import prune
+from . import quant
 from .prune import *
+from .quant import *
--- a/dygraph/ppdet/slim/quant.py
+++ b/dygraph/ppdet/slim/quant.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle
+from paddle.utils import try_import
+from ppdet.core.workspace import register, serializable
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+@register
+@serializable
+class QAT(object):
+    def __init__(self, quant_config, print_model):
+        super(QAT, self).__init__()
+        self.quant_config = quant_config
+        self.print_model = print_model
+    def __call__(self, model):
+        paddleslim = try_import('paddleslim')
+        self.quanter = paddleslim.dygraph.quant.QAT(config=self.quant_config)
+        if self.print_model:
+            logger.info("Model before quant:")
+            logger.info(model)
+        self.quanter.quantize(model)
+        if self.print_model:
+            logger.info("Quantized model:")
+            logger.info(model)
+        return model