From e56acbf062cf500ab328cb5eb3d062141305202b Mon Sep 17 00:00:00 2001 From: Guanghua Yu <742925032@qq.com> Date: Sun, 26 Sep 2021 10:33:57 +0800 Subject: [PATCH] add ptq demo and ptq api docs (#894) --- demo/dygraph/post_quant/README.md | 103 ++++++++++ demo/dygraph/post_quant/eval.py | 138 ++++++++++++++ demo/dygraph/post_quant/ptq.py | 179 ++++++++++++++++++ demo/dygraph/quant/train.py | 2 +- demo/models/__init__.py | 5 +- demo/models/dygraph/__init__.py | 1 + .../quant => models/dygraph}/mobilenet_v3.py | 16 +- docs/zh_cn/api_cn/dygraph/quanter/qat.rst | 141 +++++++++++++- paddleslim/dygraph/quant/ptq.py | 10 +- 9 files changed, 584 insertions(+), 11 deletions(-) create mode 100644 demo/dygraph/post_quant/README.md create mode 100644 demo/dygraph/post_quant/eval.py create mode 100644 demo/dygraph/post_quant/ptq.py create mode 100644 demo/models/dygraph/__init__.py rename demo/{dygraph/quant => models/dygraph}/mobilenet_v3.py (95%) diff --git a/demo/dygraph/post_quant/README.md b/demo/dygraph/post_quant/README.md new file mode 100644 index 00000000..12ae0bd6 --- /dev/null +++ b/demo/dygraph/post_quant/README.md @@ -0,0 +1,103 @@ +# 动态图离线量化 + +本示例介绍如何对动态图模型进行离线量化,示例以常用的MobileNetV1和MobileNetV3模型为例,介绍如何对其进行离线量化。 + + +## 分类模型的离线量化流程 + +#### 准备数据 + +在当前目录下创建``data``文件夹,将``ImageNet``的验证集解压在``data``文件夹下,解压后``data/ILSVRC2012``文件夹下应包含以下文件: +- ``'val'``文件夹,验证图片 +- ``'val_list.txt'``文件 + +#### 准备需要离线量化的模型 + +- 对于paddle vision支持的[模型](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/vision/models):`[lenet, mobilenetv1, mobilenetv2, resnet, vgg]`可以直接使用vision内置的模型定义和ImageNet预训练权重 +- 对于paddle vision暂未支持的模型,例如mobilenetv3,需要自行定义好模型结构以及准备相应的预训练权重 + - 本示例使用的是的mobilenetv3模型,在ImageNet数据集上Top1精度达到75.0: [预训练权重下载](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x1_0_pretrained.pdparams) + + +#### 初始化离线量化接口 + +- 保持默认离线量化配置 +```python +ptq = PTQ() +``` + +- 如mobilenetv3等模型,需修改默认离线量化配置,按照如下方式修改: +```python +ptq_config = {'activation_quantizer': 'HistQuantizer', 'upsample_bins': 127, 'hist_percent': 0.999} +ptq = PTQ(**ptq_config) +``` + + +#### 得到离线量化模型 + +通过ptq.quantize接口即可得到离线量化后的模型结构 + +```python +quant_model = ptq.quantize(fp32_model) +``` + +如果需要对模型进行fuse融合,设置`fuse=True`即可,fuse_list默认是None,表示对网络所有层进行fuse优化,如果需要自定义fuse某些层,可根据如下方式增加`fuse_list`,fuse后的模型更小,推理可能更快,精度持平或可能降低。 +```python +count = 0 +fuse_list = [] +for name, layer in fp32_model.named_sublayers(): + if isinstance(layer, nn.Conv2D): + fuse_list.append([name]) + if isinstance(layer, nn.BatchNorm2D): + fuse_list[count].append(name) + count += 1 +quant_model = ptq.quantize(fp32_model, fuse=True, fuse_list=fuse_list) +``` + +#### 校准模型 + +```python +calibrate(quant_model, val_dataset, FLAGS.quant_batch_num, + FLAGS.quant_batch_size) +``` + +## 启动命令 + +- MobileNetV1 + + ```bash + python3.7 ptq.py \ + --data=dataset/ILSVRC2012/ \ + --model=mobilenet_v1 \ + --quant_batch_num=10 \ + --quant_batch_size=10 \ + --output_dir="output_ptq" + ``` +- MobileNetV3 + + 对于MobileNetV3,直接使用默认离线量化配置进行校准,精度损失较大,为降低量化损失,在代码中默认设置了`skip_se_quant=True`,将`SE模块`跳过量化,并且调整batch_size和激活量化方式,启动命令如下: + + ```bash + python3.7 ptq.py \ + --data=dataset/ILSVRC2012/ \ + --model=mobilenet_v3 \ + --pretrain_weight=MobileNetV3_large_x1_0_pretrained.pdparams \ + --quant_batch_num=10 \ + --quant_batch_size=32 \ + --output_dir="output_ptq" + ``` + +## 评估精度 + +```bash +python3.7 eval.py --model_path=output_ptq/mobilenet_v3/int8_infer/ --data_dir=dataset/ILSVRC2012/ --use_gpu=True +``` + +- 评估时支持CPU,并且不依赖TensorRT,MKLDNN。 + + +## 量化结果 + +| 模型 | FP32模型准确率(Top1/Top5) | 量化方法 | 量化模型准确率(Top1/Top5) | +| ----------- | --------------------------- | ------------ | --------------------------- | +| MobileNetV1 | 70.82/89.63 | 离线量化 | 70.49/89.41 | +| MobileNetV3 | 74.98/92.13 | 离线量化 | 71.14/90.17 | diff --git a/demo/dygraph/post_quant/eval.py b/demo/dygraph/post_quant/eval.py new file mode 100644 index 00000000..6bc9149b --- /dev/null +++ b/demo/dygraph/post_quant/eval.py @@ -0,0 +1,138 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import numpy as np +import time +import sys +import argparse +import math + +import paddle +import paddle.inference as paddle_infer +from ptq import ImageNetValDataset + + +def eval(): + # create predictor + model_file = os.path.join(FLAGS.model_path, FLAGS.model_filename) + params_file = os.path.join(FLAGS.model_path, FLAGS.params_filename) + config = paddle_infer.Config(model_file, params_file) + if FLAGS.use_gpu: + config.enable_use_gpu(1000, 0) + if not FLAGS.ir_optim: + config.switch_ir_optim(False) + + predictor = paddle_infer.create_predictor(config) + + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + # prepare data + val_dataset = ImageNetValDataset(FLAGS.data_dir) + eval_loader = paddle.io.DataLoader( + val_dataset, batch_size=FLAGS.batch_size, num_workers=5) + + cost_time = 0. + total_num = 0. + correct_1_num = 0 + correct_5_num = 0 + for batch_id, data in enumerate(eval_loader()): + # set input + img_np = np.array([tensor.numpy() for tensor in data[0]]) + label_np = np.array([tensor.numpy() for tensor in data[1]]) + + input_handle.reshape(img_np.shape) + input_handle.copy_from_cpu(img_np) + + # run + t1 = time.time() + predictor.run() + t2 = time.time() + cost_time += (t2 - t1) + + output_data = output_handle.copy_to_cpu() + + # calculate accuracy + for i in range(len(label_np)): + label = label_np[i][0] + result = output_data[i, :] + index = result.argsort() + total_num += 1 + if index[-1] == label: + correct_1_num += 1 + if label in index[-5:]: + correct_5_num += 1 + + if batch_id % 10 == 0: + acc1 = correct_1_num / total_num + acc5 = correct_5_num / total_num + avg_time = cost_time / total_num + print( + "batch_id {}, acc1 {:.3f}, acc5 {:.3f}, avg time {:.5f} sec/img". + format(batch_id, acc1, acc5, avg_time)) + + if FLAGS.test_samples > 0 and \ + (batch_id + 1)* FLAGS.batch_size >= FLAGS.test_samples: + break + + acc1 = correct_1_num / total_num + acc5 = correct_5_num / total_num + avg_time = cost_time / total_num + print("End test: test image {}".format(total_num)) + print("test_acc1 {:.4f}, test_acc5 {:.4f}, avg time {:.5f} sec/img".format( + acc1, acc5, avg_time)) + print("\n") + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--model_path', type=str, default="", help="The inference model path.") + parser.add_argument( + '--model_filename', + type=str, + default="model.pdmodel", + help="model filename") + parser.add_argument( + '--params_filename', + type=str, + default="model.pdiparams", + help="params filename") + parser.add_argument( + '--data_dir', + type=str, + default="dataset/ILSVRC2012/", + help="The ImageNet dataset root dir.") + parser.add_argument( + '--test_samples', + type=int, + default=-1, + help="Test samples. If set -1, use all test samples") + parser.add_argument( + '--batch_size', type=int, default=10, help="Batch size.") + parser.add_argument( + '--use_gpu', type=bool, default=False, help=" Whether use gpu or not.") + parser.add_argument( + '--ir_optim', type=bool, default=False, help="Enable ir optim.") + + FLAGS = parser.parse_args() + + eval() diff --git a/demo/dygraph/post_quant/ptq.py b/demo/dygraph/post_quant/ptq.py new file mode 100644 index 00000000..7830cb3d --- /dev/null +++ b/demo/dygraph/post_quant/ptq.py @@ -0,0 +1,179 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import print_function + +import argparse +import contextlib +import os +import time +import math +import numpy as np +from PIL import Image + +import paddle +from paddle.io import Dataset +from paddle.vision.transforms import transforms +import paddle.vision.models as models +import paddle.nn as nn + +from paddleslim import PTQ + +import sys +sys.path.append(os.path.dirname("__file__")) +sys.path.append( + os.path.join(os.path.dirname("__file__"), os.path.pardir, os.path.pardir)) +from models.dygraph.mobilenet_v3 import MobileNetV3_large_x1_0 + + +class ImageNetValDataset(Dataset): + def __init__(self, data_dir, image_size=224, resize_short_size=256): + super(ImageNetValDataset, self).__init__() + train_file_list = os.path.join(data_dir, 'train_list.txt') + val_file_list = os.path.join(data_dir, 'val_list.txt') + test_file_list = os.path.join(data_dir, 'test_list.txt') + self.data_dir = data_dir + + normalize = transforms.Normalize( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.120, 57.375]) + self.transform = transforms.Compose([ + transforms.Resize(resize_short_size), + transforms.CenterCrop(image_size), transforms.Transpose(), normalize + ]) + + with open(val_file_list) as flist: + lines = [line.strip() for line in flist] + self.data = [line.split() for line in lines] + + def __getitem__(self, index): + img_path, label = self.data[index] + img_path = os.path.join(self.data_dir, img_path) + img = Image.open(img_path).convert('RGB') + label = np.array([label]).astype(np.int64) + return self.transform(img), label + + def __len__(self): + return len(self.data) + + +def calibrate(model, dataset, batch_num, batch_size): + data_loader = paddle.io.DataLoader( + dataset, batch_size=batch_size, num_workers=5) + + for idx, data in enumerate(data_loader()): + img = data[0] + label = data[1] + + out = model(img) + + if (idx + 1) % 50 == 0: + print("idx:" + str(idx)) + if (batch_num > 0) and (idx + 1 >= batch_num): + break + + +def main(): + # 1 load model + model_list = [x for x in models.__dict__["__all__"]] + model_list.append('mobilenet_v3') + assert FLAGS.model in model_list, "Expected FLAGS.model in {}, but received {}".format( + model_list, FLAGS.model) + if FLAGS.model == 'mobilenet_v3': + fp32_model = MobileNetV3_large_x1_0(skip_se_quant=True) + else: + fp32_model = models.__dict__[FLAGS.model](pretrained=True) + if FLAGS.pretrain_weight: + info_dict = paddle.load(FLAGS.pretrain_weight) + fp32_model.load_dict(info_dict) + print('Finish loading model weights:{}'.format(FLAGS.pretrain_weight)) + fp32_model.eval() + for name, layer in fp32_model.named_sublayers(): + print(name, layer) + count = 0 + fuse_list = [] + for name, layer in fp32_model.named_sublayers(): + if isinstance(layer, nn.Conv2D): + fuse_list.append([name]) + if isinstance(layer, nn.BatchNorm2D): + fuse_list[count].append(name) + count += 1 + if FLAGS.model == 'resnet50': + fuse_list = None + val_dataset = ImageNetValDataset(FLAGS.data) + + # 2 quantizations + if FLAGS.model == 'mobilenet_v3': + ptq_config = { + 'activation_quantizer': 'HistQuantizer', + 'upsample_bins': 127, + 'hist_percent': 0.999 + } + ptq = PTQ(**ptq_config) + else: + ptq = PTQ() + quant_model = ptq.quantize(fp32_model, fuse=FLAGS.fuse, fuse_list=fuse_list) + + print("Start calibrate...") + calibrate(quant_model, val_dataset, FLAGS.quant_batch_num, + FLAGS.quant_batch_size) + + # 3 save + quant_output_dir = os.path.join(FLAGS.output_dir, FLAGS.model, "int8_infer", + "model") + input_spec = paddle.static.InputSpec( + shape=[None, 3, 224, 224], dtype='float32') + ptq.save_quantized_model(quant_model, quant_output_dir, [input_spec]) + + fp32_output_dir = os.path.join(FLAGS.output_dir, FLAGS.model, "fp32_infer", + "model") + paddle.jit.save(fp32_model, fp32_output_dir, [input_spec]) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser("Quantization on ImageNet") + + # model + parser.add_argument( + "--model", type=str, default='mobilenet_v3', help="model name") + parser.add_argument( + "--pretrain_weight", + type=str, + default=None, + help="pretrain weight path") + parser.add_argument( + "--output_dir", type=str, default='output', help="save dir") + parser.add_argument("--fuse", type=bool, default=False, help="fuse layers") + + # data + parser.add_argument( + '--data', + default="/dataset/ILSVRC2012", + help='path to dataset (should have subdirectories named "train" and "val"' + ) + parser.add_argument( + '--val_dir', + default="val", + help='the dir that saves val images for paddle.Model') + + # train + parser.add_argument( + "--quant_batch_num", default=10, type=int, help="batch num for quant") + parser.add_argument( + "--quant_batch_size", default=10, type=int, help="batch size for quant") + + FLAGS = parser.parse_args() + assert FLAGS.data, "error: must provide data path" + + main() diff --git a/demo/dygraph/quant/train.py b/demo/dygraph/quant/train.py index e46f9a05..3c040d5f 100644 --- a/demo/dygraph/quant/train.py +++ b/demo/dygraph/quant/train.py @@ -33,11 +33,11 @@ from paddleslim.common import get_logger from paddleslim.dygraph.quant import QAT sys.path.append(os.path.join(os.path.dirname("__file__"))) -from mobilenet_v3 import MobileNetV3_large_x1_0 from optimizer import create_optimizer sys.path.append( os.path.join(os.path.dirname("__file__"), os.path.pardir, os.path.pardir)) from utility import add_arguments, print_arguments +from models.dygraph.mobilenet_v3 import MobileNetV3_large_x1_0 _logger = get_logger(__name__, level=logging.INFO) diff --git a/demo/models/__init__.py b/demo/models/__init__.py index b6771d70..4c9deb08 100644 --- a/demo/models/__init__.py +++ b/demo/models/__init__.py @@ -6,13 +6,14 @@ from .mobilenet_v2 import MobileNetV2_x0_25, MobileNetV2 from .pvanet import PVANet from .slimfacenet import SlimFaceNet_A_x0_60, SlimFaceNet_B_x0_75, SlimFaceNet_C_x0_75 from .mobilenet_v3 import * +from .dygraph import * __all__ = [ "model_list", "MobileNet", "ResNet34", "ResNet50", "MobileNetV2", "PVANet", "ResNet50_vd", "ResNet101_vd", "MobileNetV2_x0_25" ] model_list = [ - 'MobileNet', 'ResNet34', 'ResNet50', 'MobileNetV2', 'PVANet', - 'ResNet50_vd', "ResNet101_vd", "MobileNetV2_x0_25" + 'MobileNet', 'ResNet34', 'ResNet50', 'MobileNetV2', 'PVANet', 'ResNet50_vd', + "ResNet101_vd", "MobileNetV2_x0_25" ] __all__ += mobilenet_v3.__all__ diff --git a/demo/models/dygraph/__init__.py b/demo/models/dygraph/__init__.py new file mode 100644 index 00000000..c3961685 --- /dev/null +++ b/demo/models/dygraph/__init__.py @@ -0,0 +1 @@ +from __future__ import absolute_import diff --git a/demo/dygraph/quant/mobilenet_v3.py b/demo/models/dygraph/mobilenet_v3.py similarity index 95% rename from demo/dygraph/quant/mobilenet_v3.py rename to demo/models/dygraph/mobilenet_v3.py index 64fd1a19..c719c149 100644 --- a/demo/dygraph/quant/mobilenet_v3.py +++ b/demo/models/dygraph/mobilenet_v3.py @@ -1,4 +1,4 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -50,8 +50,10 @@ class MobileNetV3(nn.Layer): scale=1.0, model_name="small", dropout_prob=0.2, - class_dim=1000): + class_dim=1000, + skip_se_quant=False): super(MobileNetV3, self).__init__() + self.skip_se_quant = skip_se_quant inplanes = 16 if model_name == "large": @@ -121,6 +123,7 @@ class MobileNetV3(nn.Layer): stride=s, use_se=se, act=nl, + skip_se_quant=self.skip_se_quant, name="conv" + str(i + 2))) self.block_list.append(block) inplanes = make_divisible(scale * c) @@ -229,6 +232,7 @@ class ResidualUnit(nn.Layer): stride, use_se, act=None, + skip_se_quant=False, name=''): super(ResidualUnit, self).__init__() self.if_shortcut = stride == 1 and in_c == out_c @@ -254,7 +258,8 @@ class ResidualUnit(nn.Layer): act=act, name=name + "_depthwise") if self.if_se: - self.mid_se = SEModule(mid_c, name=name + "_se") + self.mid_se = SEModule( + mid_c, skip_se_quant=skip_se_quant, name=name + "_se") self.linear_conv = ConvBNLayer( in_c=mid_c, out_c=out_c, @@ -277,7 +282,7 @@ class ResidualUnit(nn.Layer): class SEModule(nn.Layer): - def __init__(self, channel, reduction=4, name=""): + def __init__(self, channel, reduction=4, skip_se_quant=False, name=""): super(SEModule, self).__init__() self.avg_pool = AdaptiveAvgPool2D(1) self.conv1 = Conv2D( @@ -296,6 +301,9 @@ class SEModule(nn.Layer): padding=0, weight_attr=ParamAttr(name + "_2_weights"), bias_attr=ParamAttr(name=name + "_2_offset")) + if skip_se_quant: + self.conv1.skip_quant = True + self.conv2.skip_quant = True def forward(self, inputs): outputs = self.avg_pool(inputs) diff --git a/docs/zh_cn/api_cn/dygraph/quanter/qat.rst b/docs/zh_cn/api_cn/dygraph/quanter/qat.rst index bc9364d4..3100fd9c 100644 --- a/docs/zh_cn/api_cn/dygraph/quanter/qat.rst +++ b/docs/zh_cn/api_cn/dygraph/quanter/qat.rst @@ -1,9 +1,29 @@ QAT ================== +模型量化包含三种量化方法,分别是动态离线量化方法、静态离线量化方法和量化训练方法。 + +下图展示了如何选择模型量化方法。 + + +.. image:: https://user-images.githubusercontent.com/52520497/83991261-cbe55800-a97e-11ea-880c-d83fb7924454.png + :scale: 80 % + :alt: 图1:选择模型量化方法 + :align: center + +下图综合对比了模型量化方法的使用条件、易用性、精度损失和预期收益。 + +.. image:: https://user-images.githubusercontent.com/52520497/83991268-cee04880-a97e-11ea-9ecd-2d0f04a15205.png + :scale: 80 % + :alt: 图2:综合对比模型量化方法 + :align: center + +量化训练 +------------------- + .. py:class:: paddleslim.QAT(config=None, weight_preprocess=None, act_preprocess=None, weight_quantize=None, act_quantize=None) -`源代码 `_ +`源代码 `_ 使用量化训练方法(Quant Aware Training, QAT)得到模拟量化模型,在需要量化的算子前插入模拟量化节点,为其激活和权重输入提前执行`量化-反量化`逻辑。 @@ -96,7 +116,7 @@ QAT 将指定的动态图量化模型导出为静态图预测模型,用于预测部署。 - 量化预测模型可以使用`netron`软件打开,进行可视化查看。该量化预测模型和普通FP32预测模型一样,可以使用PaddleLite和PaddleInference加载预测,具体请参考`推理部署`章节。 + 量化预测模型可以使用`VisualDL`软件打开,进行可视化查看。该量化预测模型和普通FP32预测模型一样,可以使用PaddleLite和PaddleInference加载预测,具体请参考`推理部署`章节。 **参数:** @@ -131,3 +151,120 @@ QAT .. + +静态离线量化 +------------------- + +.. py:class:: paddleslim.PTQ(activation_quantizer='KLQuantizer', weight_quantizer='PerChannelAbsmaxQuantizer', **kwargs) + +`源代码 `_ + +**参数:** + +- **activation_quantizer(str, Optional)** - 激活量化方式。 可选择`KLQuantizer`、`HistQuantizer`和`AbsmaxQuantizer`,默认为`KLQuantizer`。 + +- **weight_quantizer(str, Optional)** - 激活量化方式。 可选择`AbsmaxQuantizer`和`PerChannelAbsmaxQuantizer`,默认为`PerChannelAbsmaxQuantizer`。 + +**返回:** 离线量化器实例。 + +**KLQuantizer参数:** +- **quant_bits(int): ** - 量化比特数,默认是8。 +- **bins(int): ** - 指定统计的区间个数,默认是1024。 +- **upsample_bins(int): ** - 上采样统计的区间个数,默认是64。 + +**HistQuantizer参数:** +- **quant_bits(int): ** - 量化比特数,默认是8。 +- **bins(int): ** - 指定统计的区间个数,默认是1024。 +- **upsample_bins(int): ** - 上采样统计的区间个数,默认是64。 +- **hist_percent(float): ** - 采样百分比,默认是0.99999。 + +**AbsmaxQuantizer参数:** +- **quant_bits(int): ** - 量化比特数,默认是8。 + + +**示例代码:** + +.. code-block:: python + + from paddleslim import PTQ + ptq = PTQ() +.. + +如果想要更改离线量化默认配置,可以给PTQ()传入dict,例如下面所示: + +.. code-block:: python + + from paddleslim import PTQ + ptq_config = {'activation_quantizer': 'HistQuantizer', 'upsample_bins': 127, 'hist_percent': 0.999} + ptq = PTQ(**ptq_config) +.. + + .. py:method:: quantize(model, fuse=False, fuse_list=None) + + 对模型进行离线量化的处理,插入量化-反量化节点。 + + **参数:** + + - **model(paddle.nn.Layer)** - 一个paddle Layer的实例,需要包含支持量化的算子,如:`Conv, Linear`。 + - **fuse(bool)** - 是否对模型进行fuse融合,默认是False。 + - **fuse_list(list)** - 如果对模型进行fuse融合,需要在fuse_list中添加需要fuse的层,默认是None。 + + **示例代码:** + + .. code-block:: python + from paddleslim import PTQ + from paddle.vision.models import mobilenet_v1 + fp32_model = mobilenet_v1(pretrained=True) + ptq = PTQ() + quant_model = ptq.quantize(fp32_model) + .. + + 如果需要对模型进行fuse融合,可根据如下方式增加`fuse_list`,目前支持`Conv2D`和`BatchNorm2D`的融合,fuse后的模型更小,推理可能更快,精度持平或可能降低。 + + .. code-block:: python + + fuse_list = [] + for name, layer in fp32_model.named_sublayers(): + if isinstance(layer, nn.Conv2D): + fuse_list.append([name]) + if isinstance(layer, nn.BatchNorm2D): + fuse_list[count].append(name) + quant_model = ptq.quantize(fp32_model, fuse=True, fuse_list=fuse_list) + .. + + .. py:method:: save_quantized_model(model, path, input_spec=None) + + 将指定的动态图量化模型导出为静态图预测模型,用于预测部署。 + + 量化预测模型可以使用`VisualDL`软件打开,进行可视化查看。该量化预测模型和普通FP32预测模型一样,可以使用PaddleLite和PaddleInference加载预测,具体请参考`推理部署`章节。 + + **参数:** + + - **model(paddle.nn.Layer)** - 量化训练结束,需要导出的量化模型,该模型由`quantize`接口产出。 + + - **path(str)** - 导出的量化预测模型保存的路径,导出后在该路径下可以找到`model`和`params`文件。 + + - **input_spec(list[InputSpec|Tensor], Optional)** - 描述存储模型forward方法的输入,可以通过InputSpec或者示例Tensor进行描述。如果为 None ,所有原 Layer forward方法的输入变量将都会被配置为存储模型的输入变量。默认为 None。 + + + **示例:** + + + .. code-block:: python + + from paddleslim import PTQ + from paddle.vision.models import mobilenet_v1 + fp32_model = mobilenet_v1(pretrained=True) + ptq = PTQ() + quant_model = ptq.quantize(fp32_model) + + ptq.save_quantized_model( + quant_model, + './quant_model', + input_spec=[paddle.static.InputSpec(shape=[None, 3, 224, 224], dtype='float32')]) + .. + +动态离线量化 +------------------- + +动态离线量化接口请参考`quant_post_dynamic`API。 diff --git a/paddleslim/dygraph/quant/ptq.py b/paddleslim/dygraph/quant/ptq.py index eedcdca6..2d8e47d8 100644 --- a/paddleslim/dygraph/quant/ptq.py +++ b/paddleslim/dygraph/quant/ptq.py @@ -41,15 +41,21 @@ class PTQ(object): """ def __init__(self, - activation_quantizer=Q.KLQuantizer(), - weight_quantizer=Q.PerChannelAbsmaxQuantizer()): + activation_quantizer='KLQuantizer', + weight_quantizer='PerChannelAbsmaxQuantizer', + **kwargs): """ Args: activation_quantizer(Quantizer): The quantizer method for activation. + Can be set to `KLQuantizer`/`HistQuantizer`/`AbsmaxQuantizer`. Default: KLQuantizer. weight_quantizer(Quantizer): The quantizer method for weight. + Can be set to `AbsmaxQuantizer`/`PerChannelAbsmaxQuantizer`. Default: PerChannelAbsmaxQuantizer. """ + print("activation_quantizer", activation_quantizer) + activation_quantizer = eval(activation_quantizer)(**kwargs) + weight_quantizer = eval(weight_quantizer)() assert isinstance(activation_quantizer, tuple(Q.SUPPORT_ACT_QUANTIZERS)) assert isinstance(weight_quantizer, tuple(Q.SUPPORT_WT_QUANTIZERS)) -- GitLab