From 9ab90c96a2789a1733869fa18e699a23fc2ad191 Mon Sep 17 00:00:00 2001 From: Guanghua Yu <742925032@qq.com> Date: Thu, 4 Aug 2022 19:05:08 +0800 Subject: [PATCH] support quant onnx format in ACT (#1322) --- demo/quant/pact_quant_aware/train.py | 30 +++++++++---------- demo/quant/quant_aware/train.py | 7 ++--- .../configs/yolov5s_qat_dis.yaml | 1 + paddleslim/auto_compression/compressor.py | 13 ++++---- .../auto_compression/strategy_config.py | 3 ++ paddleslim/quant/quanter.py | 24 +++++++-------- 6 files changed, 41 insertions(+), 37 deletions(-) diff --git a/demo/quant/pact_quant_aware/train.py b/demo/quant/pact_quant_aware/train.py index fb70c0fc..67945a45 100644 --- a/demo/quant/pact_quant_aware/train.py +++ b/demo/quant/pact_quant_aware/train.py @@ -65,6 +65,8 @@ add_arg('use_pact', bool, True, "Whether to use PACT or not.") add_arg('analysis', bool, False, "Whether analysis variables distribution.") +add_arg('onnx_format', bool, False, + "Whether use onnx format or not.") add_arg('ce_test', bool, False, "Whether to CE test.") # yapf: enable @@ -257,6 +259,8 @@ def compress(args): 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, + # Whether use onnx format or not + 'onnx_format': args.onnx_format, } # 2. quantization transform programs (training aware) @@ -298,9 +302,9 @@ def compress(args): places, quant_config, scope=None, - act_preprocess_func=act_preprocess_func, - optimizer_func=optimizer_func, - executor=executor, + act_preprocess_func=None, + optimizer_func=None, + executor=None, for_test=True) compiled_train_prog = quant_aware( train_prog, @@ -425,29 +429,23 @@ def compress(args): # 3. Freeze the graph after training by adjusting the quantize # operators' order for the inference. # The dtype of float_program's weights is float32, but in int8 range. - float_program, int8_program = convert(val_program, places, quant_config, \ - scope=None, \ - save_int8=True) + model_path = os.path.join(quantization_model_save_dir, args.model) + if not os.path.isdir(model_path): + os.makedirs(model_path) + float_program = convert(val_program, places, quant_config) _logger.info("eval best_model after convert") final_acc1 = test(best_epoch, float_program) _logger.info("final acc:{}".format(final_acc1)) # 4. Save inference model - model_path = os.path.join(quantization_model_save_dir, args.model, - 'act_' + quant_config['activation_quantize_type'] - + '_w_' + quant_config['weight_quantize_type']) - float_path = os.path.join(model_path, 'float') - if not os.path.isdir(model_path): - os.makedirs(model_path) - paddle.fluid.io.save_inference_model( - dirname=float_path, + dirname=model_path, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=float_program, - model_filename=float_path + '/model', - params_filename=float_path + '/params') + model_filename=model_path + '/model.pdmodel', + params_filename=model_path + '/model.pdiparams') def main(): diff --git a/demo/quant/quant_aware/train.py b/demo/quant/quant_aware/train.py index abf6073e..7fc133a4 100644 --- a/demo/quant/quant_aware/train.py +++ b/demo/quant/quant_aware/train.py @@ -126,6 +126,8 @@ def compress(args): 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, + # Whether use onnx format or not + 'onnx_format': args.onnx_format, } pretrain = True @@ -294,10 +296,7 @@ def compress(args): # operators' order for the inference. # The dtype of float_program's weights is float32, but in int8 range. ############################################################################################################ - float_program, int8_program = convert(val_program, places, quant_config, \ - scope=None, \ - save_int8=True, - onnx_format=args.onnx_format) + float_program = convert(val_program, places, quant_config) print("eval best_model after convert") final_acc1 = test(best_epoch, float_program) ############################################################################################################ diff --git a/example/auto_compression/pytorch_yolov5/configs/yolov5s_qat_dis.yaml b/example/auto_compression/pytorch_yolov5/configs/yolov5s_qat_dis.yaml index ef9bf8b7..0f932a9e 100644 --- a/example/auto_compression/pytorch_yolov5/configs/yolov5s_qat_dis.yaml +++ b/example/auto_compression/pytorch_yolov5/configs/yolov5s_qat_dis.yaml @@ -14,6 +14,7 @@ Distillation: Quantization: use_pact: true + onnx_format: False activation_quantize_type: 'moving_average_abs_max' quantize_op_types: - conv2d diff --git a/paddleslim/auto_compression/compressor.py b/paddleslim/auto_compression/compressor.py index af16e6e9..5587e92d 100644 --- a/paddleslim/auto_compression/compressor.py +++ b/paddleslim/auto_compression/compressor.py @@ -787,15 +787,18 @@ class AutoCompression: os.remove(os.path.join(self.tmp_dir, 'best_model.pdopt')) os.remove(os.path.join(self.tmp_dir, 'best_model.pdparams')) - if 'qat' in strategy: - test_program, int8_program = convert(test_program, self._places, self._quant_config, \ - scope=paddle.static.global_scope(), \ - save_int8=True) - model_dir = os.path.join(self.tmp_dir, 'strategy_{}'.format(str(strategy_idx + 1))) if not os.path.exists(model_dir): os.makedirs(model_dir) + + if 'qat' in strategy: + test_program = convert( + test_program, + self._places, + self._quant_config, + scope=paddle.static.global_scope()) + feed_vars = [ test_program.global_block().var(name) for name in test_program_info.feed_target_names diff --git a/paddleslim/auto_compression/strategy_config.py b/paddleslim/auto_compression/strategy_config.py index 5226a7c8..aad5e23e 100644 --- a/paddleslim/auto_compression/strategy_config.py +++ b/paddleslim/auto_compression/strategy_config.py @@ -65,6 +65,7 @@ class Quantization(BaseStrategy): window_size=10000, moving_rate=0.9, for_tensorrt=False, + onnx_format=False, is_full_quantize=False): """ Quantization Config. @@ -80,6 +81,7 @@ class Quantization(BaseStrategy): window_size(int): Window size for 'range_abs_max' quantization. Default: 10000. moving_rate(float): The decay coefficient of moving average. Default: 0.9. for_tensorrt(bool): If True, 'quantize_op_types' will be TENSORRT_OP_TYPES. Default: False. + onnx_format(bool): Whether to export the quantized model with format of ONNX. Default is False. is_full_quantize(bool): If True, 'quantoze_op_types' will be TRANSFORM_PASS_OP_TYPES + QUANT_DEQUANT_PASS_OP_TYPES. Default: False. """ super(Quantization, self).__init__("Quantization") @@ -95,6 +97,7 @@ class Quantization(BaseStrategy): self.window_size = window_size self.moving_rate = moving_rate self.for_tensorrt = for_tensorrt + self.onnx_format = onnx_format self.is_full_quantize = is_full_quantize diff --git a/paddleslim/quant/quanter.py b/paddleslim/quant/quanter.py index 9e07c03c..3ecff9bf 100755 --- a/paddleslim/quant/quanter.py +++ b/paddleslim/quant/quanter.py @@ -91,7 +91,9 @@ _quant_config_default = { # if True, 'quantize_op_types' will be TENSORRT_OP_TYPES 'for_tensorrt': False, # if True, 'quantoze_op_types' will be TRANSFORM_PASS_OP_TYPES + QUANT_DEQUANT_PASS_OP_TYPES - 'is_full_quantize': False + 'is_full_quantize': False, + # if True, use onnx format to quant. + 'onnx_format': False, } @@ -222,7 +224,6 @@ def quant_aware(program, act_preprocess_func=None, optimizer_func=None, executor=None, - onnx_format=False, return_program=False, draw_graph=False): """Add quantization and dequantization operators to "program" @@ -236,7 +237,9 @@ def quant_aware(program, Default: None. scope(paddle.static.Scope): Scope records the mapping between variable names and variables, similar to brackets in programming languages. Usually users can use - `paddle.static.global_scope `_. When ``None`` will use `paddle.static.global_scope() `_ . Default: ``None``. + `paddle.static.global_scope `_. + When ``None`` will use `paddle.static.global_scope() `_ . + Default: ``None``. for_test(bool): If the 'program' parameter is a test program, this parameter should be set to ``True``. Otherwise, set to ``False``.Default: False weight_quantize_func(function): Function that defines how to quantize weight. Using this @@ -291,7 +294,8 @@ def quant_aware(program, elif op_type in QUANT_DEQUANT_PASS_OP_TYPES: quant_dequant_ops.append(op_type) if len(transform_pass_ops) > 0: - trannsform_func = 'QuantizationTransformPassV2' if onnx_format else 'QuantizationTransformPass' + trannsform_func = 'QuantizationTransformPassV2' if config[ + 'onnx_format'] else 'QuantizationTransformPass' transform_pass = eval(trannsform_func)( scope=scope, place=place, @@ -313,7 +317,8 @@ def quant_aware(program, transform_pass.apply(main_graph) if len(quant_dequant_ops) > 0: - qdq_func = 'AddQuantDequantPassV2' if onnx_format else 'AddQuantDequantPass' + qdq_func = 'AddQuantDequantPassV2' if config[ + 'onnx_format'] else 'AddQuantDequantPass' quant_dequant_pass = eval(qdq_func)( scope=scope, place=place, @@ -516,12 +521,7 @@ def quant_post_static( quant_post = quant_post_static -def convert(program, - place, - config=None, - scope=None, - save_int8=False, - onnx_format=False): +def convert(program, place, config=None, scope=None, save_int8=False): """ convert quantized and well-trained ``program`` to final quantized ``program``that can be used to save ``inference model``. @@ -560,7 +560,7 @@ def convert(program, _logger.info("convert config {}".format(config)) test_graph = IrGraph(core.Graph(program.desc), for_test=True) - if onnx_format: + if config['onnx_format']: quant_weight_pass = QuantWeightPass(scope, place) quant_weight_pass.apply(test_graph) else: -- GitLab