diff --git a/demo/quant/quant_post/README.md b/demo/quant/quant_post/README.md new file mode 100755 index 0000000000000000000000000000000000000000..20ee633e12bdef27aadb309b6a81631c6a5239aa --- /dev/null +++ b/demo/quant/quant_post/README.md @@ -0,0 +1,96 @@ +# 离线量化示例 + +本示例介绍如何使用离线量化接口``paddleslim.quant.quant_post``来对训练好的分类模型进行离线量化, 该接口无需对模型进行训练就可得到量化模型,减少模型的存储空间和显存占用。 + +## 接口介绍 +``` +quant_post(executor, + model_dir, + quantize_model_path, + sample_generator, + model_filename=None, + params_filename=None, + batch_size=16, + batch_nums=None, + scope=None, + algo='KL', + quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"]) +``` + +参数介绍: +- executor (fluid.Executor): 执行模型的executor,可以在cpu或者gpu上执行。 +- model_dir(str): 需要量化的模型所在的文件夹。 +- quantize_model_path(str): 保存量化后的模型的路径 +- sample_generator(python generator): 读取数据样本,每次返回一个样本。 +- model_filename(str, optional): 模型文件名,如果需要量化的模型的参数存在一个文件中,则需要设置``model_filename``为模型文件的名称,否则设置为``None``即可。默认值是``None``。 +- params_filename(str): 参数文件名,如果需要量化的模型的参数存在一个文件中,则需要设置``params_filename``为参数文件的名称,否则设置为``None``即可。默认值是``None``。 +- batch_size(int): 每个batch的图片数量。默认值为16 。 +- batch_nums(int, optional): 迭代次数。如果设置为``None``,则会一只运行到``sample_generator`` 迭代结束, 否则,迭代次数为``batch_nums``, 也就是说参与对``Scale``进行校正的样本个数为 ``'batch_nums' * 'batch_size' ``. +- scope(fluid.Scope, optional): 用来获取和写入``Variable``, 如果设置为``None``,则使用``fluid.global_scope()``. +- algo(str): 量化时使用的算法名称,可为``'KL'``或者``'direct'``。该参数仅针对激活值的量化,因为参数值的量化使用的方式为``'channel_wise_abs_max'``. 当``algo`` 设置为``'direct'``时,使用``'abs_max'``计算``Scale``值,当设置为``'KL'``时,则使用``KL``散度的方法来计算``Scale``值。默认值为``'KL'``。 +- quantizable_op_type(list[str]): 需要量化的``op``类型列表。默认值为``["conv2d", "depthwise_conv2d", "mul"]``。 + +## 分类模型的离线量化流程 + +### 准备数据 + +在当前文件夹下创建``data``文件夹,将``imagenet``数据集解压在``data``文件夹下,解压后``data``文件夹下应包含以下文件: +- ``'train'``文件夹,训练图片 +- ``'train_list.txt'``文件 +- ``'val'``文件夹,验证图片 +- ``'val_list.txt'``文件 + +### 准备需要量化的模型 +因为离线量化接口只支持加载通过``fluid.io.save_inference_model``接口保存的模型,因此如果您的模型是通过其他接口保存的,那需要先将模型进行转化。本示例将以分类模型为例进行说明。 + +首先在[imagenet分类模型](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification#%E5%B7%B2%E5%8F%91%E5%B8%83%E6%A8%A1%E5%9E%8B%E5%8F%8A%E5%85%B6%E6%80%A7%E8%83%BD)中下载训练好的``mobilenetv1``模型。 + +在当前文件夹下创建``'pretrain'``文件夹,将``mobilenetv1``模型在该文件夹下解压,解压后的目录为``pretrain/MobileNetV1_pretrained`` + +### 导出模型 +通过运行以下命令可将模型转化为离线量化接口: +``` +python export_model.py --model "MobileNet" --pretrained_model ./pretrain/MobileNetV1_pretrained --data imagenet +``` +转化之后的模型存储在``inference_model/MobileNet/``文件夹下,可看到该文件夹下有``'model'``, ``'weights'``两个文件。 + +### 离线量化 +接下来对导出的模型文件进行离线量化,离线量化的脚本为[quant_post.py](./quant_post.py),脚本中使用接口``paddleslim.quant.quant_post``对模型进行离线量化。运行命令为: +``` +python quant_post.py --model_path ./inference_model/MobileNet --save_path ./quant_model_train/MobileNet --model_filename model --params_filename weights +``` + +- ``model_path``: 需要量化的模型坐在的文件夹 +- ``save_path``: 量化后的模型保存的路径 +- ``model_filename``: 如果需要量化的模型的参数文件保存在一个文件中,则设置为该模型的模型文件名称,如果参数文件保存在多个文件中,则不需要设置。 +- ``params_filename``: 如果需要量化的模型的参数文件保存在一个文件中,则设置为该模型的参数文件名称,如果参数文件保存在多个文件中,则不需要设置。 + +运行以上命令后,可在``${save_path}``下看到量化后的模型文件和参数文件。 + +> 使用的量化算法为``'KL'``, 使用训练集中的160张图片进行量化参数的校正。 + + +### 测试精度 + +使用[eval.py](./eval.py)脚本对量化前后的模型进行测试,得到模型的分类精度进行对比。 + +首先测试量化前的模型的精度,运行以下命令: +``` +python eval.py --model_path ./inference_model/MobileNet --model_name model --params_name weights +``` +精度输出为: +``` +top1_acc/top5_acc= [0.70913923 0.89548034] +``` + +使用以下命令测试离线量化后的模型的精度: + +``` +python eval.py --model_path ./quant_model_train/MobileNet +``` + +精度输出为 +``` +top1_acc/top5_acc= [0.70141864 0.89086477] +``` +从以上精度对比可以看出,对``mobilenet``在``imagenet``上的分类模型进行离线量化后 ``top1``精度损失为``0.77%``, ``top5``精度损失为``0.46%``. diff --git a/demo/quant/quant_post/eval.py b/demo/quant/quant_post/eval.py new file mode 100755 index 0000000000000000000000000000000000000000..8d5cfa003d8b7077224ae2f54194069aadc3dc90 --- /dev/null +++ b/demo/quant/quant_post/eval.py @@ -0,0 +1,95 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import sys +import numpy as np +import argparse +import functools + +import paddle +import paddle.fluid as fluid +sys.path.append('../../') +import imagenet_reader as reader +from utility import add_arguments, print_arguments + +parser = argparse.ArgumentParser(description=__doc__) +# yapf: disable +add_arg = functools.partial(add_arguments, argparser=parser) +add_arg('use_gpu', bool, True, "Whether to use GPU or not.") +add_arg('model_path', str, "./pruning/checkpoints/resnet50/2/eval_model/", "Whether to use pretrained model.") +add_arg('model_name', str, None, "model filename for inference model") +add_arg('params_name', str, None, "params filename for inference model") +# yapf: enable + + +def eval(args): + # parameters from arguments + + place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + + val_program, feed_target_names, fetch_targets = fluid.io.load_inference_model( + args.model_path, + exe, + model_filename=args.model_name, + params_filename=args.params_name) + val_reader = paddle.batch(reader.val(), batch_size=128) + feeder = fluid.DataFeeder( + place=place, feed_list=feed_target_names, program=val_program) + + results = [] + for batch_id, data in enumerate(val_reader()): + + # top1_acc, top5_acc + if len(feed_target_names) == 1: + # eval "infer model", which input is image, output is classification probability + image = [[d[0]] for d in data] + label = [[d[1]] for d in data] + feed_data = feeder.feed(image) + pred = exe.run(val_program, + feed=feed_data, + fetch_list=fetch_targets) + pred = np.array(pred[0]) + label = np.array(label) + sort_array = pred.argsort(axis=1) + top_1_pred = sort_array[:, -1:][:, ::-1] + top_1 = np.mean(label == top_1_pred) + top_5_pred = sort_array[:, -5:][:, ::-1] + acc_num = 0 + for i in range(len(label)): + if label[i][0] in top_5_pred[i]: + acc_num += 1 + top_5 = float(acc_num) / len(label) + results.append([top_1, top_5]) + else: + # eval "eval model", which inputs are image and label, output is top1 and top5 accuracy + result = exe.run(val_program, + feed=feeder.feed(data), + fetch_list=fetch_targets) + result = [np.mean(r) for r in result] + results.append(result) + result = np.mean(np.array(results), axis=0) + print("top1_acc/top5_acc= {}".format(result)) + sys.stdout.flush() + + +def main(): + args = parser.parse_args() + print_arguments(args) + eval(args) + + +if __name__ == '__main__': + main() diff --git a/demo/quant/quant_post/export_model.py b/demo/quant/quant_post/export_model.py new file mode 100755 index 0000000000000000000000000000000000000000..dbfeb2b042139ec85b390ccd6f242c0aa93e8835 --- /dev/null +++ b/demo/quant/quant_post/export_model.py @@ -0,0 +1,88 @@ +import os +import sys +import logging +import paddle +import argparse +import functools +import math +import time +import numpy as np +import paddle.fluid as fluid +sys.path.append(sys.path[0] + "/../../../") +from paddleslim.common import get_logger +sys.path.append(sys.path[0] + "/../../") +import models +from utility import add_arguments, print_arguments + +_logger = get_logger(__name__, level=logging.INFO) + +parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable +add_arg('use_gpu', bool, True, "Whether to use GPU or not.") +add_arg('model', str, "MobileNet", "The target model.") +add_arg('pretrained_model', str, "../pretrained_model/MobileNetV1_pretained", "Whether to use pretrained model.") +add_arg('data', str, "mnist", "Which data to use. 'mnist' or 'imagenet'") +add_arg('test_period', int, 10, "Test period in epoches.") +# yapf: enable + +model_list = [m for m in dir(models) if "__" not in m] + + +def export_model(args): + if args.data == "mnist": + import paddle.dataset.mnist as reader + train_reader = reader.train() + val_reader = reader.test() + class_dim = 10 + image_shape = "1,28,28" + elif args.data == "imagenet": + import imagenet_reader as reader + train_reader = reader.train() + val_reader = reader.val() + class_dim = 1000 + image_shape = "3,224,224" + else: + raise ValueError("{} is not supported.".format(args.data)) + + image_shape = [int(m) for m in image_shape.split(",")] + image = fluid.data( + name='image', shape=[None] + image_shape, dtype='float32') + assert args.model in model_list, "{} is not in lists: {}".format( + args.model, model_list) + # model definition + model = models.__dict__[args.model]() + out = model.net(input=image, class_dim=class_dim) + val_program = fluid.default_main_program().clone(for_test=True) + place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + if args.pretrained_model: + + def if_exist(var): + return os.path.exists( + os.path.join(args.pretrained_model, var.name)) + + fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) + else: + assert False, "args.pretrained_model must set" + + fluid.io.save_inference_model( + './inference_model/' + args.model, + feeded_var_names=[image.name], + target_vars=[out], + executor=exe, + main_program=val_program, + model_filename='model', + params_filename='weights') + + +def main(): + args = parser.parse_args() + print_arguments(args) + export_model(args) + + +if __name__ == '__main__': + main() diff --git a/demo/quant/quant_post/quant_post.py b/demo/quant/quant_post/quant_post.py new file mode 100755 index 0000000000000000000000000000000000000000..5a2c1c834c82e125adad7a597f0d8667d8b19bfc --- /dev/null +++ b/demo/quant/quant_post/quant_post.py @@ -0,0 +1,61 @@ +import os +import sys +import logging +import paddle +import argparse +import functools +import math +import time +import numpy as np +import paddle.fluid as fluid + +import reader +sys.path.append(sys.path[0] + "/../../../") +from paddleslim.common import get_logger +from paddleslim.quant import quant_post +sys.path.append(sys.path[0] + "/../../") +from utility import add_arguments, print_arguments + +_logger = get_logger(__name__, level=logging.INFO) + +parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable +add_arg('batch_size', int, 16, "Minibatch size.") +add_arg('batch_num', int, 10, "Batch number") +add_arg('use_gpu', bool, True, "Whether to use GPU or not.") +add_arg('model_path', str, "./inference_model/MobileNet/", "model dir") +add_arg('save_path', str, "./quant_model/MobileNet/", "model dir to save quanted model") +add_arg('model_filename', str, None, "model file name") +add_arg('params_filename', str, None, "params file name") +# yapf: enable + + +def quantize(args): + val_reader = reader.train() + + place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() + + assert os.path.exists(args.model_path), "args.model_path doesn't exist" + assert os.path.isdir(args.model_path), "args.model_path must be a dir" + + exe = fluid.Executor(place) + quant_post( + executor=exe, + model_dir=args.model_path, + quantize_model_path=args.save_path, + sample_generator=val_reader, + model_filename=args.model_filename, + params_filename=args.params_filename, + batch_size=args.batch_size, + batch_nums=args.batch_num) + + +def main(): + args = parser.parse_args() + print_arguments(args) + quantize(args) + + +if __name__ == '__main__': + main()