diff --git a/deploy/slim/quant/README.md b/deploy/slim/quant/README.md new file mode 100644 index 0000000000000000000000000000000000000000..90e9fe371be276855c5a1da11c1244c924ebe4d6 --- /dev/null +++ b/deploy/slim/quant/README.md @@ -0,0 +1,107 @@ + +## 介绍 +复杂的模型有利于提高模型的性能,但也导致模型中存在一定冗余,模型量化将全精度缩减到定点数减少这种冗余,达到减少模型计算复杂度,提高模型推理性能的目的。 +模型量化可以在基本不损失模型的精度的情况下,将FP32精度的模型参数转换为Int8精度,减小模型参数大小并加速计算,使用量化后的模型在移动端等部署时更具备速度优势。 + +本教程将介绍如何使用飞桨模型压缩库PaddleSlim做PaddleClas模型的压缩。 +[PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim) 集成了模型剪枝、量化(包括量化训练和离线量化)、蒸馏和神经网络搜索等多种业界常用且领先的模型压缩功能,如果您感兴趣,可以关注并了解。 + +在开始本教程之前,建议先了解[PaddleClas模型的训练方法](../../../docs/zh_CN/tutorials/quick_start.md)以及[PaddleSlim](https://paddleslim.readthedocs.io/zh_CN/latest/index.html) + + +## 快速开始 +量化多适用于轻量模型在移动端的部署,当训练出一个模型后,如果希望进一步的压缩模型大小并加速预测,可使用量化的方法压缩模型。 + +模型量化主要包括五个步骤: +1. 安装 PaddleSlim +2. 准备训练好的模型 +3. 量化训练 +4. 导出量化推理模型 +5. 量化模型预测部署 + +### 1. 安装PaddleSlim + +* 可以通过pip install的方式进行安装。 + +```bash +pip3.7 install paddleslim==2.0.0 +``` + +* 如果获取PaddleSlim的最新特性,可以从源码安装。 + +```bash +git clone https://github.com/PaddlePaddle/PaddleSlim.git +cd Paddleslim +python3.7 setup.py install +``` + +### 2. 准备训练好的模型 + +PaddleClas提供了一系列训练好的[模型](../../../docs/zh_CN/models/models_intro.md),如果待量化的模型不在列表中,需要按照[常规训练](../../../docs/zh_CN/tutorials/getting_started.md)方法得到训练好的模型。 + +### 3. 量化训练 +量化训练包括离线量化训练和在线量化训练,在线量化训练效果更好,需加载预训练模型,在定义好量化策略后即可对模型进行量化。 + + +量化训练的代码位于`deploy/slim/quant/quant.py` 中,训练指令如下: + +* CPU/单机单卡启动 + +```bash +python3.7 deploy/slim/quant/quant.py \ + -c configs/MobileNetV3/MobileNetV3_large_x1_0.yaml \ + -o pretrained_model="./MobileNetV3_large_x1_0_pretrained" +``` + +* 单机单卡/单机多卡/多机多卡启动 + +```bash +export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +python3.7 -m paddle.distributed.launch \ + --gpus="0,1,2,3,4,5,6,7" \ + deploy/slim/quant/quant.py \ + -c configs/MobileNetV3/MobileNetV3_large_x1_0.yaml \ + -o pretrained_model="./MobileNetV3_large_x1_0_pretrained" +``` + + +* 下面是量化`MobileNetV3_large_x1_0`模型的训练示例脚本。 + +```bash +# 下载预训练模型 +wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x1_0_pretrained.pdparams +# 启动训练,这里如果因为显存限制,batch size无法设置过大,可以将batch size和learning rate同比例缩小。 +python3.7 -m paddle.distributed.launch \ + --gpus="0,1,2,3,4,5,6,7" \ + deploy/slim/quant/quant.py \ + -c configs/MobileNetV3/MobileNetV3_large_x1_0.yaml \ + -o pretrained_model="./MobileNetV3_large_x1_0_pretrained" + -o LEARNING_RATE.params.lr=0.13 \ + -o epochs=100 +``` +如果要训练识别模型的量化,修改配置文件和加载的模型参数即可。 + +### 4. 导出模型 + +在得到量化训练保存的模型后,可以将其导出为inference model,用于预测部署: + +```bash +python3.7 deploy/slim/quant/export_model.py \ + -m MobileNetV3_large_x1_0 \ + -p output/MobileNetV3_large_x1_0/best_model/ppcls \ + -o ./MobileNetV3_large_x1_0_infer/ \ + --img_size=224 \ + --class_dim=1000 +``` + + +### 5. 量化模型部署 + +上述步骤导出的量化模型,参数精度仍然是FP32,但是参数的数值范围是int8,导出的模型可以通过PaddleLite的opt模型转换工具完成模型转换。 +量化模型部署的可参考 [移动端模型部署](../../lite/readme.md) + + +## 量化训练超参数建议 + +* 量化训练时,建议加载常规训练得到的预训练模型,加速量化训练收敛。 +* 量化训练时,建议初始学习率修改为常规训练的`1/20~1/10`,同时将训练epoch数修改为常规训练的`1/5~1/2`,其他配置信息不建议修改。 diff --git a/deploy/slim/quant/export_model.py b/deploy/slim/quant/export_model.py new file mode 100644 index 0000000000000000000000000000000000000000..2e332c07990ceb8a6ed4ede275d07aea790df4a0 --- /dev/null +++ b/deploy/slim/quant/export_model.py @@ -0,0 +1,94 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import sys +__dir__ = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(__dir__) +sys.path.append(os.path.abspath(os.path.join(__dir__, '..', '..', '..'))) +sys.path.append( + os.path.abspath(os.path.join(__dir__, '..', '..', '..', 'tools'))) + +from ppcls.modeling import architectures +from ppcls.utils.save_load import load_dygraph_pretrain +import paddle +import paddle.nn.functional as F +from paddle.jit import to_static +from paddleslim.dygraph.quant import QAT + +from pact_helper import get_default_quant_config + + +def parse_args(): + def str2bool(v): + return v.lower() in ("true", "t", "1") + + parser = argparse.ArgumentParser() + parser.add_argument("-m", "--model", type=str) + parser.add_argument("-p", "--pretrained_model", type=str) + parser.add_argument("-o", "--output_path", type=str, default="./inference") + parser.add_argument("--class_dim", type=int, default=1000) + parser.add_argument("--load_static_weights", type=str2bool, default=False) + parser.add_argument("--img_size", type=int, default=224) + + return parser.parse_args() + + +class Net(paddle.nn.Layer): + def __init__(self, net, class_dim, model=None): + super(Net, self).__init__() + self.pre_net = net(class_dim=class_dim) + self.model = model + + def forward(self, inputs): + x = self.pre_net(inputs) + if self.model == "GoogLeNet": + x = x[0] + x = F.softmax(x) + return x + + +def main(): + args = parse_args() + + net = architectures.__dict__[args.model] + model = Net(net, args.class_dim, args.model) + + # get QAT model + quant_config = get_default_quant_config() + # TODO(littletomatodonkey): add PACT for export model + # quant_config["activation_preprocess_type"] = "PACT" + quanter = QAT(config=quant_config) + quanter.quantize(model) + + load_dygraph_pretrain( + model.pre_net, + path=args.pretrained_model, + load_static_weights=args.load_static_weights) + model.eval() + + save_path = os.path.join(args.output_path, "inference") + quanter.save_quantized_model( + model, + save_path, + input_spec=[ + paddle.static.InputSpec( + shape=[None, 3, args.img_size, args.img_size], dtype='float32') + ]) + print('inference QAT model is saved to {}'.format(save_path)) + + +if __name__ == "__main__": + main() diff --git a/deploy/slim/quant/pact_helper.py b/deploy/slim/quant/pact_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..679ca21bc497ba3e44d962a2af42900169f5f26f --- /dev/null +++ b/deploy/slim/quant/pact_helper.py @@ -0,0 +1,41 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle + + +def get_default_quant_config(): + quant_config = { + # weight preprocess type, default is None and no preprocessing is performed. + 'weight_preprocess_type': None, + # activation preprocess type, default is None and no preprocessing is performed. + 'activation_preprocess_type': None, + # weight quantize type, default is 'channel_wise_abs_max' + 'weight_quantize_type': 'channel_wise_abs_max', + # activation quantize type, default is 'moving_average_abs_max' + 'activation_quantize_type': 'moving_average_abs_max', + # weight quantize bit num, default is 8 + 'weight_bits': 8, + # activation quantize bit num, default is 8 + 'activation_bits': 8, + # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8' + 'dtype': 'int8', + # window size for 'range_abs_max' quantization. default is 10000 + 'window_size': 10000, + # The decay coefficient of moving average, default is 0.9 + 'moving_rate': 0.9, + # for dygraph quantization, layers of type in quantizable_layer_type will be quantized + 'quantizable_layer_type': ['Conv2D', 'Linear'], + } + return quant_config diff --git a/deploy/slim/quant/quant.py b/deploy/slim/quant/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..4377fcd09f3814af863cb45e69994ebfc78937af --- /dev/null +++ b/deploy/slim/quant/quant.py @@ -0,0 +1,128 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import os +import sys +__dir__ = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(__dir__) +sys.path.append(os.path.abspath(os.path.join(__dir__, '..', '..', '..'))) +sys.path.append( + os.path.abspath(os.path.join(__dir__, '..', '..', '..', 'tools'))) + +import paddle +from paddleslim.dygraph.quant import QAT + +from ppcls.data import Reader +from ppcls.utils.config import get_config +from ppcls.utils.save_load import init_model, save_model +from ppcls.utils import logger +import program + +from pact_helper import get_default_quant_config + + +def parse_args(): + parser = argparse.ArgumentParser("PaddleClas train script") + parser.add_argument( + '-c', + '--config', + type=str, + default='configs/ResNet/ResNet50.yaml', + help='config file path') + parser.add_argument( + '-o', + '--override', + action='append', + default=[], + help='config options to be overridden') + args = parser.parse_args() + return args + + +def main(args): + paddle.seed(12345) + + config = get_config(args.config, overrides=args.override, show=True) + # assign the place + use_gpu = config.get("use_gpu", True) + place = paddle.set_device('gpu' if use_gpu else 'cpu') + + trainer_num = paddle.distributed.get_world_size() + use_data_parallel = trainer_num != 1 + config["use_data_parallel"] = use_data_parallel + + if config["use_data_parallel"]: + paddle.distributed.init_parallel_env() + + net = program.create_model(config.ARCHITECTURE, config.classes_num) + + # prepare to quant + quant_config = get_default_quant_config() + quant_config["activation_preprocess_type"] = "PACT" + quanter = QAT(config=quant_config) + quanter.quantize(net) + + optimizer, lr_scheduler = program.create_optimizer( + config, parameter_list=net.parameters()) + + init_model(config, net, optimizer) + + if config["use_data_parallel"]: + net = paddle.DataParallel(net) + + train_dataloader = Reader(config, 'train', places=place)() + + if config.validate: + valid_dataloader = Reader(config, 'valid', places=place)() + + last_epoch_id = config.get("last_epoch", -1) + best_top1_acc = 0.0 # best top1 acc record + best_top1_epoch = last_epoch_id + for epoch_id in range(last_epoch_id + 1, config.epochs): + net.train() + # 1. train with train dataset + program.run(train_dataloader, config, net, optimizer, lr_scheduler, + epoch_id, 'train') + + # 2. validate with validate dataset + if config.validate and epoch_id % config.valid_interval == 0: + net.eval() + with paddle.no_grad(): + top1_acc = program.run(valid_dataloader, config, net, None, + None, epoch_id, 'valid') + if top1_acc > best_top1_acc: + best_top1_acc = top1_acc + best_top1_epoch = epoch_id + model_path = os.path.join(config.model_save_dir, + config.ARCHITECTURE["name"]) + save_model(net, optimizer, model_path, "best_model") + message = "The best top1 acc {:.5f}, in epoch: {:d}".format( + best_top1_acc, best_top1_epoch) + logger.info(message) + + # 3. save the persistable model + if epoch_id % config.save_interval == 0: + model_path = os.path.join(config.model_save_dir, + config.ARCHITECTURE["name"]) + save_model(net, optimizer, model_path, epoch_id) + + +if __name__ == '__main__': + args = parse_args() + main(args) diff --git a/ppcls/utils/save_load.py b/ppcls/utils/save_load.py index 511814c60f0d7f49b1055e4cdbc04befcfda7606..d65b9954842d22b9422e6187e52499633b34ee97 100644 --- a/ppcls/utils/save_load.py +++ b/ppcls/utils/save_load.py @@ -105,7 +105,7 @@ def init_model(config, net, optimizer=None): load model from checkpoint or pretrained_model """ checkpoints = config.get('checkpoints') - if checkpoints: + if checkpoints and optimizer is not None: assert os.path.exists(checkpoints + ".pdparams"), \ "Given dir {}.pdparams not exist.".format(checkpoints) assert os.path.exists(checkpoints + ".pdopt"), \ @@ -114,7 +114,7 @@ def init_model(config, net, optimizer=None): opti_dict = paddle.load(checkpoints + ".pdopt") net.set_dict(para_dict) optimizer.set_state_dict(opti_dict) - logger.info("Finish initing model from {}".format(checkpoints)) + logger.info("Finish load checkpoints from {}".format(checkpoints)) return pretrained_model = config.get('pretrained_model') @@ -129,7 +129,7 @@ def init_model(config, net, optimizer=None): path=pretrained_model, load_static_weights=load_static_weights) logger.info( - logger.coloring("Finish initing model from {}".format( + logger.coloring("Finish load pretrained model from {}".format( pretrained_model), "HEADER"))