From de9eb717d5e45542a0f2abdc16f6116ff2a05260 Mon Sep 17 00:00:00 2001
From: Liufang Sang
@@ -106,6 +94,30 @@ QuantizationFreezePass主要用于改变IrGraph中量化op和反量化op的顺 图4:应用TransformForMobilePass后的结果
+## 评估 + +### 每个epoch保存的评估模型 +因为量化的最终模型只有在end_epoch时保存一次,不能保证保存的模型是最好的,因此 +如果在配置文件中设置了`checkpoint_path`,则每个epoch会保存一个量化后的用于评估的模型, +该模型会保存在`${checkpoint_path}/${epoch_id}/eval_model/`路径下,包含`__model__`和`__params__`两个文件。 +其中,`__model__`用于保存模型结构信息,`__params__`用于保存参数(parameters)信息。模型结构和训练时一样。 + +如果不需要保存评估模型,可以在定义Compressor对象时,将`save_eval_model`选项设置为False(默认为True)。 + +脚本PaddleSlim/classification/eval.py中为使用该模型在评估数据集上做评估的示例。 + +在评估之后,选取效果最好的epoch的模型,可使用脚本 PaddleSlim/classification/freeze.py将该模型转化为以上介绍的三种模型:float模型,int8模型,mo +bile模型,需要配置的参数为: + +- model_path, 加载的模型路径,`为${checkpoint_path}/${epoch_id}/eval_model/` +- weight_quant_type 模型参数的量化方式,和配置文件中的类型保持一致 +- save_path `float`, `int8`, `mobile`模型的保存路径,分别为 `${save_path}/float/`, `${save_path}/int8/`, `${save_path}/mobile/` + +### 最终评估模型 +最终使用的评估模型是float模型,使用脚本PaddleSlim/classification/eval.py中为使用该模型在评估数据集上做评估的示例。 + +## 预测 + ### python预测 float预测模型可直接使用原生PaddlePaddle Fluid预测方法进行预测。 @@ -139,7 +151,7 @@ fluid.optimizer.Momentum(momentum=0.9, values=[0.0001, 0.00001]), regularization=fluid.regularizer.L2Decay(1e-4)) ``` -batch size 1024 +8卡,batch size 1024,epoch 30, 挑选好的结果 ### MobileNetV2 @@ -171,6 +183,7 @@ fluid.optimizer.Momentum(momentum=0.9, values=[0.0001, 0.00001]), regularization=fluid.regularizer.L2Decay(1e-4)) ``` -batch size 1024 +8卡,batch size 1024,epoch 30, 挑选好的结果 + ## FAQ diff --git a/PaddleSlim/classification/quantization/compress.py b/PaddleSlim/classification/quantization/compress.py index f2914b57..4894684f 100644 --- a/PaddleSlim/classification/quantization/compress.py +++ b/PaddleSlim/classification/quantization/compress.py @@ -53,12 +53,12 @@ def compress(args): val_program = fluid.default_main_program().clone() # quantization usually use small learning rate - values = [1e-4, 1e-5, 1e-6] + values = [1e-4, 1e-5] opt = fluid.optimizer.Momentum( momentum=0.9, learning_rate=fluid.layers.piecewise_decay( - boundaries=[5000 * 30, 5000 * 60], values=values), - regularization=fluid.regularizer.L2Decay(4e-5)) + boundaries=[5000 * 12], values=values), + regularization=fluid.regularizer.L2Decay(1e-4)) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) diff --git a/PaddleSlim/classification/quantization/configs/mobilenet_v1.yaml b/PaddleSlim/classification/quantization/configs/mobilenet_v1.yaml index 20ce8f65..2f88ec9c 100644 --- a/PaddleSlim/classification/quantization/configs/mobilenet_v1.yaml +++ b/PaddleSlim/classification/quantization/configs/mobilenet_v1.yaml @@ -3,7 +3,7 @@ strategies: quantization_strategy: class: 'QuantizationStrategy' start_epoch: 0 - end_epoch: 0 + end_epoch: 29 float_model_save_path: './output/mobilenet_v1/float' mobile_model_save_path: './output/mobilenet_v1/mobile' int8_model_save_path: './output/mobilenet_v1/int8' @@ -14,7 +14,7 @@ strategies: save_in_nodes: ['image'] save_out_nodes: ['fc_0.tmp_2'] compressor: - epoch: 1 + epoch: 30 checkpoint_path: './checkpoints/mobilenet_v1/' strategies: - quantization_strategy diff --git a/PaddleSlim/classification/quantization/configs/mobilenet_v2.yaml b/PaddleSlim/classification/quantization/configs/mobilenet_v2.yaml index 836dcb25..b3de9344 100644 --- a/PaddleSlim/classification/quantization/configs/mobilenet_v2.yaml +++ b/PaddleSlim/classification/quantization/configs/mobilenet_v2.yaml @@ -3,7 +3,7 @@ strategies: quantization_strategy: class: 'QuantizationStrategy' start_epoch: 0 - end_epoch: 0 + end_epoch: 29 float_model_save_path: './output/mobilenet_v2/float' mobile_model_save_path: './output/mobilenet_v2/mobile' int8_model_save_path: './output/mobilenet_v2/int8' @@ -14,7 +14,7 @@ strategies: save_in_nodes: ['image'] save_out_nodes: ['fc_0.tmp_2'] compressor: - epoch: 1 + epoch: 30 checkpoint_path: './checkpoints/mobilenet_v2/' strategies: - quantization_strategy diff --git a/PaddleSlim/classification/quantization/configs/resnet50.yaml b/PaddleSlim/classification/quantization/configs/resnet34.yaml similarity index 60% rename from PaddleSlim/classification/quantization/configs/resnet50.yaml rename to PaddleSlim/classification/quantization/configs/resnet34.yaml index c5105d03..5ff6eeb2 100644 --- a/PaddleSlim/classification/quantization/configs/resnet50.yaml +++ b/PaddleSlim/classification/quantization/configs/resnet34.yaml @@ -3,10 +3,10 @@ strategies: quantization_strategy: class: 'QuantizationStrategy' start_epoch: 0 - end_epoch: 0 - float_model_save_path: './output/resnet50/float' - mobile_model_save_path: './output/resnet50/mobile' - int8_model_save_path: './output/resnet50/int8' + end_epoch: 29 + float_model_save_path: './output/resnet34/float' + mobile_model_save_path: './output/resnet34/mobile' + int8_model_save_path: './output/resnet34/int8' weight_bits: 8 activation_bits: 8 weight_quantize_type: 'abs_max' @@ -14,7 +14,7 @@ strategies: save_in_nodes: ['image'] save_out_nodes: ['fc_0.tmp_2'] compressor: - epoch: 2 - checkpoint_path: './checkpoints/resnet50/' + epoch: 30 + checkpoint_path: './checkpoints/resnet34/' strategies: - quantization_strategy diff --git a/PaddleSlim/classification/quantization/freeze.py b/PaddleSlim/classification/quantization/freeze.py new file mode 100644 index 00000000..d1a547d5 --- /dev/null +++ b/PaddleSlim/classification/quantization/freeze.py @@ -0,0 +1,125 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import sys +import numpy as np +import argparse +import functools +import logging + +import paddle +import paddle.fluid as fluid +from paddle.fluid.framework import IrGraph +from paddle.fluid import core +from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass +from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass +from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass +from paddle.fluid.contrib.slim.quantization import TransformForMobilePass +sys.path.append("..") +import imagenet_reader as reader +sys.path.append("../../") +from utility import add_arguments, print_arguments + +logging.basicConfig(format='%(asctime)s-%(levelname)s: %(message)s') +_logger = logging.getLogger(__name__) +_logger.setLevel(logging.INFO) + +parser = argparse.ArgumentParser(description=__doc__) +# yapf: disable +add_arg = functools.partial(add_arguments, argparser=parser) +add_arg('use_gpu', bool, True, "Whether to use GPU or not.") +add_arg('model_path', str, "./pruning/checkpoints/resnet50/2/eval_model/", "Whether to use pretrained model.") +add_arg('save_path', str, './output', 'Path to save inference model') +add_arg('weight_quant_type', str, 'abs_max', 'quantization type for weight') +# yapf: enable + +def eval(args): + # parameters from arguments + + place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + + val_program, feed_names, fetch_targets = fluid.io.load_inference_model(args.model_path, + exe, + model_filename="__model__", + params_filename="__params__") + val_reader = paddle.batch(reader.val(), batch_size=128) + feeder = fluid.DataFeeder(place=place, feed_list=feed_names, program=val_program) + + results=[] + for batch_id, data in enumerate(val_reader()): + + # top1_acc, top5_acc + result = exe.run(val_program, + feed=feeder.feed(data), + fetch_list=fetch_targets) + result = [np.mean(r) for r in result] + results.append(result) + result = np.mean(np.array(results), axis=0) + print("top1_acc/top5_acc= {}".format(result)) + sys.stdout.flush() + _logger.info("freeze the graph for inference") + test_graph = IrGraph(core.Graph(val_program.desc), for_test=True) + + freeze_pass = QuantizationFreezePass( + scope=fluid.global_scope(), + place=place, + weight_quantize_type=args.weight_quant_type) + freeze_pass.apply(test_graph) + server_program = test_graph.to_program() + fluid.io.save_inference_model( + dirname=os.path.join(args.save_path, 'float'), + feeded_var_names=feed_names, + target_vars=fetch_targets, + executor=exe, + main_program=server_program, + model_filename='model', + params_filename='params') + + _logger.info("convert the weights into int8 type") + convert_int8_pass = ConvertToInt8Pass( + scope=fluid.global_scope(), + place=place) + convert_int8_pass.apply(test_graph) + server_int8_program = test_graph.to_program() + fluid.io.save_inference_model( + dirname=os.path.join(args.save_path, 'int8'), + feeded_var_names=feed_names, + target_vars=fetch_targets, + executor=exe, + main_program=server_int8_program, + model_filename='model', + params_filename='params') + + _logger.info("convert the freezed pass to paddle-lite execution") + mobile_pass = TransformForMobilePass() + mobile_pass.apply(test_graph) + mobile_program = test_graph.to_program() + fluid.io.save_inference_model( + dirname=os.path.join(args.save_path, 'mobile'), + feeded_var_names=feed_names, + target_vars=fetch_targets, + executor=exe, + main_program=mobile_program, + model_filename='model', + params_filename='params') + +def main(): + args = parser.parse_args() + print_arguments(args) + eval(args) + +if __name__ == '__main__': + main() diff --git a/PaddleSlim/classification/quantization/run.sh b/PaddleSlim/classification/quantization/run.sh index adf67f38..6c62fc5b 100644 --- a/PaddleSlim/classification/quantization/run.sh +++ b/PaddleSlim/classification/quantization/run.sh @@ -4,7 +4,7 @@ root_url="http://paddle-imagenet-models-name.bj.bcebos.com" MobileNetV1="MobileNetV1_pretrained.tar" MobileNetV2="MobileNetV2_pretrained.tar" -ResNet50="ResNet50_pretrained.tar" +ResNet34="ResNet34_pretrained.tar" pretrain_dir='../pretrain' if [ ! -d ${pretrain_dir} ]; then @@ -23,9 +23,9 @@ if [ ! -f ${MobileNetV2} ]; then tar xf ${MobileNetV2} fi -if [ ! -f ${ResNet50} ]; then - wget ${root_url}/${ResNet50} - tar xf ${ResNet50} +if [ ! -f ${ResNet34} ]; then + wget ${root_url}/${ResNet34} + tar xf ${ResNet34} fi cd - @@ -37,14 +37,14 @@ export FLAGS_eager_delete_tensor_gb=0.0 export CUDA_VISIBLE_DEVICES=0 ## for quantization for mobilenet_v1 -python -u compress.py \ - --model "MobileNet" \ - --use_gpu 1 \ - --batch_size 32 \ - --pretrained_model ../pretrain/MobileNetV1_pretrained \ - --config_file "./configs/mobilenet_v1.yaml" \ -> mobilenet_v1.log 2>&1 & -tailf mobilenet_v1.log +#python -u compress.py \ +# --model "MobileNet" \ +# --use_gpu 1 \ +# --batch_size 256 \ +# --pretrained_model ../pretrain/MobileNetV1_pretrained \ +# --config_file "./configs/mobilenet_v1.yaml" \ +#> mobilenet_v1.log 2>&1 & +#tailf mobilenet_v1.log ## for quantization of mobilenet_v2 #python -u compress.py \ @@ -56,12 +56,12 @@ tailf mobilenet_v1.log # > mobilenet_v2.log 2>&1 & #tailf mobilenet_v2.log -# for compression of resnet50 -#python -u compress.py \ -# --model "ResNet50" \ -# --use_gpu 1 \ -# --batch_size 32 \ -# --pretrained_model ../pretrain/ResNet50_pretrained \ -# --config_file "./configs/resnet50.yaml" \ -# > resnet50.log 2>&1 & -#tailf resnet50.log +# for compression of resnet34 +python -u compress.py \ + --model "ResNet34" \ + --use_gpu 1 \ + --batch_size 32 \ + --pretrained_model ../pretrain/ResNet34_pretrained \ + --config_file "./configs/resnet34.yaml" \ + > resnet34.log 2>&1 & +tailf resnet34.log -- GitLab