diff --git a/demo/quant/pact_quant_aware/train.py b/demo/quant/pact_quant_aware/train.py index fb70c0fc22fae462249b54a02145749b2ef40a1b..67945a455d261681046f42caa19e8f1c18a37380 100644 --- a/demo/quant/pact_quant_aware/train.py +++ b/demo/quant/pact_quant_aware/train.py @@ -65,6 +65,8 @@ add_arg('use_pact', bool, True, "Whether to use PACT or not.") add_arg('analysis', bool, False, "Whether analysis variables distribution.") +add_arg('onnx_format', bool, False, + "Whether use onnx format or not.") add_arg('ce_test', bool, False, "Whether to CE test.") # yapf: enable @@ -257,6 +259,8 @@ def compress(args): 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, + # Whether use onnx format or not + 'onnx_format': args.onnx_format, } # 2. quantization transform programs (training aware) @@ -298,9 +302,9 @@ def compress(args): places, quant_config, scope=None, - act_preprocess_func=act_preprocess_func, - optimizer_func=optimizer_func, - executor=executor, + act_preprocess_func=None, + optimizer_func=None, + executor=None, for_test=True) compiled_train_prog = quant_aware( train_prog, @@ -425,29 +429,23 @@ def compress(args): # 3. Freeze the graph after training by adjusting the quantize # operators' order for the inference. # The dtype of float_program's weights is float32, but in int8 range. - float_program, int8_program = convert(val_program, places, quant_config, \ - scope=None, \ - save_int8=True) + model_path = os.path.join(quantization_model_save_dir, args.model) + if not os.path.isdir(model_path): + os.makedirs(model_path) + float_program = convert(val_program, places, quant_config) _logger.info("eval best_model after convert") final_acc1 = test(best_epoch, float_program) _logger.info("final acc:{}".format(final_acc1)) # 4. Save inference model - model_path = os.path.join(quantization_model_save_dir, args.model, - 'act_' + quant_config['activation_quantize_type'] - + '_w_' + quant_config['weight_quantize_type']) - float_path = os.path.join(model_path, 'float') - if not os.path.isdir(model_path): - os.makedirs(model_path) - paddle.fluid.io.save_inference_model( - dirname=float_path, + dirname=model_path, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=float_program, - model_filename=float_path + '/model', - params_filename=float_path + '/params') + model_filename=model_path + '/model.pdmodel', + params_filename=model_path + '/model.pdiparams') def main(): diff --git a/demo/quant/quant_aware/train.py b/demo/quant/quant_aware/train.py index abf6073ec7bce5f870a5b3c1d0ca545351791833..7fc133a465f6e1f90cee34848a024897faeaf85d 100644 --- a/demo/quant/quant_aware/train.py +++ b/demo/quant/quant_aware/train.py @@ -126,6 +126,8 @@ def compress(args): 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, + # Whether use onnx format or not + 'onnx_format': args.onnx_format, } pretrain = True @@ -294,10 +296,7 @@ def compress(args): # operators' order for the inference. # The dtype of float_program's weights is float32, but in int8 range. ############################################################################################################ - float_program, int8_program = convert(val_program, places, quant_config, \ - scope=None, \ - save_int8=True, - onnx_format=args.onnx_format) + float_program = convert(val_program, places, quant_config) print("eval best_model after convert") final_acc1 = test(best_epoch, float_program) ############################################################################################################ diff --git a/example/auto_compression/pytorch_yolov5/configs/yolov5s_qat_dis.yaml b/example/auto_compression/pytorch_yolov5/configs/yolov5s_qat_dis.yaml index ef9bf8b7cbfcfbca983af4f7ecc7a23ce6109af4..0f932a9e61641a9b609c77bd6f435faa31557e79 100644 --- a/example/auto_compression/pytorch_yolov5/configs/yolov5s_qat_dis.yaml +++ b/example/auto_compression/pytorch_yolov5/configs/yolov5s_qat_dis.yaml @@ -14,6 +14,7 @@ Distillation: Quantization: use_pact: true + onnx_format: False activation_quantize_type: 'moving_average_abs_max' quantize_op_types: - conv2d diff --git a/paddleslim/auto_compression/compressor.py b/paddleslim/auto_compression/compressor.py index af16e6e910ff4407596a14c5251d6d32fe2be322..5587e92d4ab6bba057251fdf3389a5ea13025309 100644 --- a/paddleslim/auto_compression/compressor.py +++ b/paddleslim/auto_compression/compressor.py @@ -787,15 +787,18 @@ class AutoCompression: os.remove(os.path.join(self.tmp_dir, 'best_model.pdopt')) os.remove(os.path.join(self.tmp_dir, 'best_model.pdparams')) - if 'qat' in strategy: - test_program, int8_program = convert(test_program, self._places, self._quant_config, \ - scope=paddle.static.global_scope(), \ - save_int8=True) - model_dir = os.path.join(self.tmp_dir, 'strategy_{}'.format(str(strategy_idx + 1))) if not os.path.exists(model_dir): os.makedirs(model_dir) + + if 'qat' in strategy: + test_program = convert( + test_program, + self._places, + self._quant_config, + scope=paddle.static.global_scope()) + feed_vars = [ test_program.global_block().var(name) for name in test_program_info.feed_target_names diff --git a/paddleslim/auto_compression/strategy_config.py b/paddleslim/auto_compression/strategy_config.py index 5226a7c843cc1d81d0bd4b3339b3e140a801b3b2..aad5e23e1a5d8dae0c39d9f392a885db1d64696a 100644 --- a/paddleslim/auto_compression/strategy_config.py +++ b/paddleslim/auto_compression/strategy_config.py @@ -65,6 +65,7 @@ class Quantization(BaseStrategy): window_size=10000, moving_rate=0.9, for_tensorrt=False, + onnx_format=False, is_full_quantize=False): """ Quantization Config. @@ -80,6 +81,7 @@ class Quantization(BaseStrategy): window_size(int): Window size for 'range_abs_max' quantization. Default: 10000. moving_rate(float): The decay coefficient of moving average. Default: 0.9. for_tensorrt(bool): If True, 'quantize_op_types' will be TENSORRT_OP_TYPES. Default: False. + onnx_format(bool): Whether to export the quantized model with format of ONNX. Default is False. is_full_quantize(bool): If True, 'quantoze_op_types' will be TRANSFORM_PASS_OP_TYPES + QUANT_DEQUANT_PASS_OP_TYPES. Default: False. """ super(Quantization, self).__init__("Quantization") @@ -95,6 +97,7 @@ class Quantization(BaseStrategy): self.window_size = window_size self.moving_rate = moving_rate self.for_tensorrt = for_tensorrt + self.onnx_format = onnx_format self.is_full_quantize = is_full_quantize diff --git a/paddleslim/quant/quanter.py b/paddleslim/quant/quanter.py index 9e07c03c6ee5c1bb657393bfbc175d72ebd558fc..3ecff9bf31341d59b8f85ae9b087d0459b2ac8de 100755 --- a/paddleslim/quant/quanter.py +++ b/paddleslim/quant/quanter.py @@ -91,7 +91,9 @@ _quant_config_default = { # if True, 'quantize_op_types' will be TENSORRT_OP_TYPES 'for_tensorrt': False, # if True, 'quantoze_op_types' will be TRANSFORM_PASS_OP_TYPES + QUANT_DEQUANT_PASS_OP_TYPES - 'is_full_quantize': False + 'is_full_quantize': False, + # if True, use onnx format to quant. + 'onnx_format': False, } @@ -222,7 +224,6 @@ def quant_aware(program, act_preprocess_func=None, optimizer_func=None, executor=None, - onnx_format=False, return_program=False, draw_graph=False): """Add quantization and dequantization operators to "program" @@ -236,7 +237,9 @@ def quant_aware(program, Default: None. scope(paddle.static.Scope): Scope records the mapping between variable names and variables, similar to brackets in programming languages. Usually users can use - `paddle.static.global_scope `_. When ``None`` will use `paddle.static.global_scope() `_ . Default: ``None``. + `paddle.static.global_scope `_. + When ``None`` will use `paddle.static.global_scope() `_ . + Default: ``None``. for_test(bool): If the 'program' parameter is a test program, this parameter should be set to ``True``. Otherwise, set to ``False``.Default: False weight_quantize_func(function): Function that defines how to quantize weight. Using this @@ -291,7 +294,8 @@ def quant_aware(program, elif op_type in QUANT_DEQUANT_PASS_OP_TYPES: quant_dequant_ops.append(op_type) if len(transform_pass_ops) > 0: - trannsform_func = 'QuantizationTransformPassV2' if onnx_format else 'QuantizationTransformPass' + trannsform_func = 'QuantizationTransformPassV2' if config[ + 'onnx_format'] else 'QuantizationTransformPass' transform_pass = eval(trannsform_func)( scope=scope, place=place, @@ -313,7 +317,8 @@ def quant_aware(program, transform_pass.apply(main_graph) if len(quant_dequant_ops) > 0: - qdq_func = 'AddQuantDequantPassV2' if onnx_format else 'AddQuantDequantPass' + qdq_func = 'AddQuantDequantPassV2' if config[ + 'onnx_format'] else 'AddQuantDequantPass' quant_dequant_pass = eval(qdq_func)( scope=scope, place=place, @@ -516,12 +521,7 @@ def quant_post_static( quant_post = quant_post_static -def convert(program, - place, - config=None, - scope=None, - save_int8=False, - onnx_format=False): +def convert(program, place, config=None, scope=None, save_int8=False): """ convert quantized and well-trained ``program`` to final quantized ``program``that can be used to save ``inference model``. @@ -560,7 +560,7 @@ def convert(program, _logger.info("convert config {}".format(config)) test_graph = IrGraph(core.Graph(program.desc), for_test=True) - if onnx_format: + if config['onnx_format']: quant_weight_pass = QuantWeightPass(scope, place) quant_weight_pass.apply(test_graph) else: