From 2c6f0b0d55f254d07eed400502eac207ae61f58c Mon Sep 17 00:00:00 2001 From: baiyfbupt Date: Tue, 15 Sep 2020 20:17:23 +0800 Subject: [PATCH] add slim quantization --- deploy/slim/quantization/README.md | 34 ++++ deploy/slim/quantization/export_model.py | 129 ++++++++++++++ deploy/slim/quantization/quant.py | 204 ++++++++++++++++++++++ ppocr/modeling/architectures/det_model.py | 6 +- ppocr/modeling/architectures/rec_model.py | 2 + ppocr/modeling/heads/rec_ctc_head.py | 13 +- tools/program.py | 4 +- 7 files changed, 384 insertions(+), 8 deletions(-) create mode 100755 deploy/slim/quantization/README.md create mode 100644 deploy/slim/quantization/export_model.py create mode 100755 deploy/slim/quantization/quant.py diff --git a/deploy/slim/quantization/README.md b/deploy/slim/quantization/README.md new file mode 100755 index 00000000..42d1ad1a --- /dev/null +++ b/deploy/slim/quantization/README.md @@ -0,0 +1,34 @@ +> 运行示例前请先安装1.2.0或更高版本PaddleSlim + +# 模型量化压缩教程 + +## 概述 + +该示例使用PaddleSlim提供的[量化压缩API](https://paddlepaddle.github.io/PaddleSlim/api/quantization_api/)对检测模型进行压缩。 +在阅读该示例前,建议您先了解以下内容: + +- [OCR模型的常规训练方法](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/detection.md) +- [PaddleSlim使用文档](https://paddlepaddle.github.io/PaddleSlim/) + +## 安装PaddleSlim +可按照[PaddleSlim使用文档](https://paddlepaddle.github.io/PaddleSlim/)中的步骤安装PaddleSlim。 + + + +## 量化训练 + +进入PaddleOCR根目录,通过以下命令对模型进行量化: + +```bash +python deploy/slim/quantization/quant.py -c configs/det/det_mv3_db.yml -o Global.pretrain_weights=det_mv3_db/best_accuracy Global.save_model_dir=./output/quant_model +``` + + + +## 评估并导出 + +在得到量化训练保存的模型后,我们可以将其导出为inference_model,用于预测部署: + +```bash +python deploy/slim/quantization/export_model.py -c configs/det/det_mv3_db.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_model_dir=./output/quant_model +``` diff --git a/deploy/slim/quantization/export_model.py b/deploy/slim/quantization/export_model.py new file mode 100644 index 00000000..d0d08b30 --- /dev/null +++ b/deploy/slim/quantization/export_model.py @@ -0,0 +1,129 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +__dir__ = os.path.dirname(__file__) +sys.path.append(__dir__) +sys.path.append(os.path.abspath(os.path.join(__dir__, '..', '..', '..'))) +sys.path.append( + os.path.abspath(os.path.join(__dir__, '..', '..', '..', 'tools'))) + + +def set_paddle_flags(**kwargs): + for key, value in kwargs.items(): + if os.environ.get(key, None) is None: + os.environ[key] = str(value) + + +# NOTE(paddle-dev): All of these flags should be +# set before `import paddle`. Otherwise, it would +# not take any effect. +set_paddle_flags( + FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory +) + +import program +from paddle import fluid +from ppocr.utils.utility import initial_logger +logger = initial_logger() +from ppocr.utils.save_load import init_model, load_params +from ppocr.utils.character import CharacterOps +from ppocr.utils.utility import create_module +from ppocr.data.reader_main import reader_main + +from paddleslim.quant import quant_aware, convert +from paddle.fluid.layer_helper import LayerHelper +from eval_utils.eval_det_utils import eval_det_run +from eval_utils.eval_rec_utils import eval_rec_run + + +def main(): + # 1. quantization configs + quant_config = { + # weight quantize type, default is 'channel_wise_abs_max' + 'weight_quantize_type': 'channel_wise_abs_max', + # activation quantize type, default is 'moving_average_abs_max' + 'activation_quantize_type': 'moving_average_abs_max', + # weight quantize bit num, default is 8 + 'weight_bits': 8, + # activation quantize bit num, default is 8 + 'activation_bits': 8, + # ops of name_scope in not_quant_pattern list, will not be quantized + 'not_quant_pattern': ['skip_quant'], + # ops of type in quantize_op_types, will be quantized + 'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'], + # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8' + 'dtype': 'int8', + # window size for 'range_abs_max' quantization. defaulf is 10000 + 'window_size': 10000, + # The decay coefficient of moving average, default is 0.9 + 'moving_rate': 0.9, + } + + startup_prog, eval_program, place, config, alg_type = program.preprocess() + + feeded_var_names, target_vars, fetches_var_name = program.build_export( + config, eval_program, startup_prog) + + eval_program = eval_program.clone(for_test=True) + exe = fluid.Executor(place) + exe.run(startup_prog) + + eval_program = quant_aware( + eval_program, place, quant_config, scope=None, for_test=True) + + init_model(config, eval_program, exe) + + # 2. Convert the program before save inference program + # The dtype of eval_program's weights is float32, but in int8 range. + + eval_program = convert(eval_program, place, quant_config, scope=None) + + eval_fetch_name_list = fetches_var_name + eval_fetch_varname_list = [v.name for v in target_vars] + eval_reader = reader_main(config=config, mode="eval") + quant_info_dict = {'program':eval_program,\ + 'reader':eval_reader,\ + 'fetch_name_list':eval_fetch_name_list,\ + 'fetch_varname_list':eval_fetch_varname_list} + + if alg_type == 'det': + final_metrics = eval_det_run(exe, config, quant_info_dict, "eval") + else: + final_metrics = eval_rec_run(exe, config, quant_info_dict, "eval") + print(final_metrics) + + # 3. Save inference model + model_path = "./quant_model" + if not os.path.isdir(model_path): + os.makedirs(model_path) + + fluid.io.save_inference_model( + dirname=model_path, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + executor=exe, + main_program=eval_program, + model_filename=model_path + '/model', + params_filename=model_path + '/params') + print("model saved as {}".format(model_path)) + + +if __name__ == '__main__': + main() diff --git a/deploy/slim/quantization/quant.py b/deploy/slim/quantization/quant.py new file mode 100755 index 00000000..70e15de7 --- /dev/null +++ b/deploy/slim/quantization/quant.py @@ -0,0 +1,204 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +__dir__ = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(__dir__) +sys.path.append(os.path.abspath(os.path.join(__dir__, '..', '..', '..'))) +sys.path.append( + os.path.abspath(os.path.join(__dir__, '..', '..', '..', 'tools'))) + + +def set_paddle_flags(**kwargs): + for key, value in kwargs.items(): + if os.environ.get(key, None) is None: + os.environ[key] = str(value) + + +# NOTE(paddle-dev): All of these flags should be +# set before `import paddle`. Otherwise, it would +# not take any effect. +set_paddle_flags( + FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory +) + +import tools.program as program +from paddle import fluid +from ppocr.utils.utility import initial_logger +logger = initial_logger() +from ppocr.data.reader_main import reader_main +from ppocr.utils.save_load import init_model +from paddle.fluid.contrib.model_stat import summary + +# quant dependencies +import paddle +import paddle.fluid as fluid +from paddleslim.quant import quant_aware, convert +from paddle.fluid.layer_helper import LayerHelper + + +def main(): + train_build_outputs = program.build( + config, train_program, startup_program, mode='train') + train_loader = train_build_outputs[0] + train_fetch_name_list = train_build_outputs[1] + train_fetch_varname_list = train_build_outputs[2] + train_opt_loss_name = train_build_outputs[3] + model_average = train_build_outputs[-1] + + eval_program = fluid.Program() + eval_build_outputs = program.build( + config, eval_program, startup_program, mode='eval') + eval_fetch_name_list = eval_build_outputs[1] + eval_fetch_varname_list = eval_build_outputs[2] + eval_program = eval_program.clone(for_test=True) + + train_reader = reader_main(config=config, mode="train") + train_loader.set_sample_list_generator(train_reader, places=place) + + eval_reader = reader_main(config=config, mode="eval") + + exe = fluid.Executor(place) + exe.run(startup_program) + + def pact(x, name=None): + helper = LayerHelper("pact", **locals()) + dtype = 'float32' + init_thres = 20 + u_param_attr = fluid.ParamAttr( + name=x.name + '_pact', + initializer=fluid.initializer.ConstantInitializer(value=init_thres), + regularizer=fluid.regularizer.L2Decay(0.0001), + learning_rate=1) + u_param = helper.create_parameter( + attr=u_param_attr, shape=[1], dtype=dtype) + x = fluid.layers.elementwise_sub( + x, fluid.layers.relu(fluid.layers.elementwise_sub(x, u_param))) + x = fluid.layers.elementwise_add( + x, fluid.layers.relu(fluid.layers.elementwise_sub(-u_param, x))) + return x + + def get_optimizer(): + return fluid.optimizer.AdamOptimizer(0.001) + + # 1. quantization configs + quant_config = { + # weight quantize type, default is 'channel_wise_abs_max' + 'weight_quantize_type': 'channel_wise_abs_max', + # activation quantize type, default is 'moving_average_abs_max' + 'activation_quantize_type': 'moving_average_abs_max', + # weight quantize bit num, default is 8 + 'weight_bits': 8, + # activation quantize bit num, default is 8 + 'activation_bits': 8, + # ops of name_scope in not_quant_pattern list, will not be quantized + 'not_quant_pattern': ['skip_quant'], + # ops of type in quantize_op_types, will be quantized + 'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'], + # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8' + 'dtype': 'int8', + # window size for 'range_abs_max' quantization. defaulf is 10000 + 'window_size': 10000, + # The decay coefficient of moving average, default is 0.9 + 'moving_rate': 0.9, + } + + # 2. quantization transform programs (training aware) + # Make some quantization transforms in the graph before training and testing. + # According to the weight and activation quantization type, the graph will be added + # some fake quantize operators and fake dequantize operators. + act_preprocess_func = pact + optimizer_func = get_optimizer + executor = exe + + eval_program = quant_aware( + eval_program, + place, + quant_config, + scope=None, + act_preprocess_func=act_preprocess_func, + optimizer_func=optimizer_func, + executor=executor, + for_test=True) + quant_train_program = quant_aware( + train_program, + place, + quant_config, + scope=None, + act_preprocess_func=act_preprocess_func, + optimizer_func=optimizer_func, + executor=executor, + for_test=False, + return_program=True) + + # compile program for multi-devices + train_compile_program = program.create_multi_devices_program( + quant_train_program, train_opt_loss_name, for_quant=True) + + # dump mode structure + if config['Global']['debug']: + if train_alg_type == 'rec' and 'attention' in config['Global'][ + 'loss_type']: + logger.warning('Does not suport dump attention...') + else: + summary(quant_train_program) + + init_model(config, quant_train_program, exe) + + train_info_dict = {'compile_program':train_compile_program,\ + 'train_program':quant_train_program,\ + 'reader':train_loader,\ + 'fetch_name_list':train_fetch_name_list,\ + 'fetch_varname_list':train_fetch_varname_list,\ + 'model_average': model_average} + + eval_info_dict = {'program':eval_program,\ + 'reader':eval_reader,\ + 'fetch_name_list':eval_fetch_name_list,\ + 'fetch_varname_list':eval_fetch_varname_list} + + if train_alg_type == 'det': + program.train_eval_det_run(config, exe, train_info_dict, eval_info_dict) + else: + program.train_eval_rec_run(config, exe, train_info_dict, eval_info_dict) + + +def test_reader(): + logger.info(config) + train_reader = reader_main(config=config, mode="train") + import time + starttime = time.time() + count = 0 + try: + for data in train_reader(): + count += 1 + if count % 1 == 0: + batch_time = time.time() - starttime + starttime = time.time() + logger.info("reader:", count, len(data), batch_time) + except Exception as e: + logger.info(e) + logger.info("finish reader: {}, Success!".format(count)) + + +if __name__ == '__main__': + startup_program, train_program, place, config, train_alg_type = program.preprocess( + ) + main() +# test_reader() diff --git a/ppocr/modeling/architectures/det_model.py b/ppocr/modeling/architectures/det_model.py index 54d3a479..e4c32b8e 100644 --- a/ppocr/modeling/architectures/det_model.py +++ b/ppocr/modeling/architectures/det_model.py @@ -67,6 +67,7 @@ class DetModel(object): image = fluid.layers.data( name='image', shape=image_shape, dtype='float32') + image.stop_gradient = False if mode == "train": if self.algorithm == "EAST": h, w = int(image_shape[1] // 4), int(image_shape[2] // 4) @@ -108,7 +109,10 @@ class DetModel(object): name='tvo', shape=[9, 128, 128], dtype='float32') input_tco = fluid.layers.data( name='tco', shape=[3, 128, 128], dtype='float32') - feed_list = [image, input_score, input_border, input_mask, input_tvo, input_tco] + feed_list = [ + image, input_score, input_border, input_mask, input_tvo, + input_tco + ] labels = {'input_score': input_score,\ 'input_border': input_border,\ 'input_mask': input_mask,\ diff --git a/ppocr/modeling/architectures/rec_model.py b/ppocr/modeling/architectures/rec_model.py index f2e24abd..26146204 100755 --- a/ppocr/modeling/architectures/rec_model.py +++ b/ppocr/modeling/architectures/rec_model.py @@ -68,6 +68,7 @@ class RecModel(object): image_shape.insert(0, -1) if mode == "train": image = fluid.data(name='image', shape=image_shape, dtype='float32') + image.stop_gradient = False if self.loss_type == "attention": label_in = fluid.data( name='label_in', @@ -146,6 +147,7 @@ class RecModel(object): ) image_shape = deepcopy(self.image_shape) image = fluid.data(name='image', shape=image_shape, dtype='float32') + image.stop_gradient = False if self.loss_type == "srn": encoder_word_pos = fluid.data( name="encoder_word_pos", diff --git a/ppocr/modeling/heads/rec_ctc_head.py b/ppocr/modeling/heads/rec_ctc_head.py index 6b8635e4..84948c2b 100755 --- a/ppocr/modeling/heads/rec_ctc_head.py +++ b/ppocr/modeling/heads/rec_ctc_head.py @@ -35,12 +35,13 @@ class CTCPredict(object): self.fc_decay = params.get("fc_decay", 0.0004) def __call__(self, inputs, labels=None, mode=None): - encoder_features = self.encoder(inputs) - if self.encoder_type != "reshape": - encoder_features = fluid.layers.concat(encoder_features, axis=1) - name = "ctc_fc" - para_attr, bias_attr = get_para_bias_attr( - l2_decay=self.fc_decay, k=encoder_features.shape[1], name=name) + with fluid.scope_guard("skip_quant"): + encoder_features = self.encoder(inputs) + if self.encoder_type != "reshape": + encoder_features = fluid.layers.concat(encoder_features, axis=1) + name = "ctc_fc" + para_attr, bias_attr = get_para_bias_attr( + l2_decay=self.fc_decay, k=encoder_features.shape[1], name=name) predict = fluid.layers.fc(input=encoder_features, size=self.char_num + 1, param_attr=para_attr, diff --git a/tools/program.py b/tools/program.py index 56f6b699..be133ac2 100755 --- a/tools/program.py +++ b/tools/program.py @@ -225,10 +225,12 @@ def build_export(config, main_prog, startup_prog): return feeded_var_names, target_vars, fetches_var_name -def create_multi_devices_program(program, loss_var_name): +def create_multi_devices_program(program, loss_var_name, for_quant=False): build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = True + if for_quant: + build_strategy.fuse_all_reduce_ops = False exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_iteration_per_drop_scope = 1 compile_program = fluid.CompiledProgram(program).with_data_parallel( -- GitLab