From 837773c119ff01888bc30ee29d044281f82e0e2c Mon Sep 17 00:00:00 2001 From: Guanghua Yu <742925032@qq.com> Date: Tue, 27 Sep 2022 14:29:00 +0800 Subject: [PATCH] update quantization new format (#1425) --- paddleslim/quant/quanter.py | 148 ++++++++++-------------------------- 1 file changed, 41 insertions(+), 107 deletions(-) diff --git a/paddleslim/quant/quanter.py b/paddleslim/quant/quanter.py index 10a60e2c..c4ad8e18 100755 --- a/paddleslim/quant/quanter.py +++ b/paddleslim/quant/quanter.py @@ -43,9 +43,10 @@ try: from paddle.fluid.contrib.slim.quantization import QuantWeightPass from paddle.fluid.contrib.slim.quantization import AddQuantDequantPassV2 from paddle.fluid.contrib.slim.quantization import PostTrainingQuantizationProgram + from paddle.fluid.contrib.slim.quantization import OutScaleForInferencePassV2 except: _logger.warning( - "Some functions fail to import, please update PaddlePaddle version to 2.3+" + "Some functions fail to import, please update PaddlePaddle version to 2.4+" ) WEIGHT_QUANTIZATION_TYPES = [ @@ -109,56 +110,6 @@ _quant_config_default = { } -class OutScaleForInferencePassV2(object): - def __init__(self, scope=None): - """ - This pass is used for setting output scales of some operators. - These output scales may be used by tensorRT or some other inference engines. - - Args: - scope(fluid.Scope): The scope is used to initialize these new parameters. - """ - self._scope = scope - self._teller_set = utils._out_scale_op_list - - def apply(self, graph): - """ - Get output scales from the scope and set these scales in op_descs - of operators in the teller_set. - - Args: - graph(IrGraph): the target graph. - """ - assert isinstance(graph, - IrGraph), 'graph must be the instance of IrGraph.' - collect_dict = collections.OrderedDict() - op_nodes = graph.all_op_nodes() - for op_node in op_nodes: - if op_node.name() in self._teller_set: - var_names = utils._get_op_output_var_names(op_node) - for var_name in var_names: - in_node = graph._find_node_by_name(op_node.outputs, - var_name) - if in_node.dtype() not in \ - [core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32]: - continue - - collect_dict[var_name] = {} - scale_name = self._scale_name(var_name) - scale_var = self._scope.find_var(scale_name) - assert scale_var is not None, \ - "Can not find {} variable in the scope".format(scale_name) - scale_value = np.array(scale_var.get_tensor())[0] - collect_dict[var_name]['scale'] = float(scale_value) - return graph, collect_dict - - def _scale_name(self, var_name): - """ - Return the scale name for the var named `var_name`. - """ - return "%s@scale" % (var_name) - - def load_dict(): with open(VARS_MAPPING_TABLE, 'r') as file: data = file.read() @@ -515,35 +466,37 @@ def quant_aware(program, return quant_program -def quant_post_static( - executor, - model_dir, - quantize_model_path, - batch_generator=None, - sample_generator=None, - data_loader=None, - model_filename=None, - params_filename=None, - save_model_filename='model.pdmodel', - save_params_filename='model.pdiparams', - batch_size=1, - batch_nums=None, - scope=None, - algo='hist', - round_type='round', - hist_percent=0.9999, - bias_correction=False, - quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"], - is_full_quantize=False, - weight_bits=8, - activation_bits=8, - activation_quantize_type='range_abs_max', - weight_quantize_type='channel_wise_abs_max', - optimize_model=False, - onnx_format=False, - skip_tensor_list=None, - is_use_cache_file=False, - cache_dir="./temp_post_training"): +def quant_post_static(executor, + model_dir, + quantize_model_path, + batch_generator=None, + sample_generator=None, + data_loader=None, + model_filename=None, + params_filename=None, + save_model_filename='model.pdmodel', + save_params_filename='model.pdiparams', + batch_size=1, + batch_nums=None, + scope=None, + algo='hist', + round_type='round', + hist_percent=0.9999, + bias_correction=False, + quantizable_op_type=[ + "conv2d", "depthwise_conv2d", "mul", "matmul", + "matmul_v2" + ], + is_full_quantize=False, + weight_bits=8, + activation_bits=8, + activation_quantize_type='range_abs_max', + weight_quantize_type='channel_wise_abs_max', + optimize_model=False, + onnx_format=False, + skip_tensor_list=None, + is_use_cache_file=False, + cache_dir="./temp_post_training"): """ The function utilizes static post training quantization method to quantize the fp32 model. It uses calibrate data to calculate the @@ -676,22 +629,6 @@ def quant_post_static( quantize_model_path, model_filename=save_model_filename, params_filename=save_params_filename) - if onnx_format: - try: - collect_dict = post_training_quantization._calibration_scales - save_quant_table_path = os.path.join(quantize_model_path, - 'calibration_table.txt') - with open(save_quant_table_path, 'w') as txt_file: - for tensor_name in collect_dict.keys(): - write_line = '{} {}'.format( - tensor_name, collect_dict[tensor_name]['scale']) + '\n' - txt_file.write(write_line) - _logger.info("Quantization clip ranges of tensors is save in: {}". - format(save_quant_table_path)) - except: - _logger.warning( - "Unable to generate `calibration_table.txt`, please update PaddlePaddle >= 2.3.3" - ) # We have changed the quant_post to quant_post_static. @@ -748,17 +685,14 @@ def convert(program, if config['onnx_format']: quant_weight_pass = QuantWeightPass(scope, place) quant_weight_pass.apply(test_graph) - out_scale_infer_pass = OutScaleForInferencePassV2(scope=scope) - _, collect_dict = out_scale_infer_pass.apply(test_graph) - save_quant_table_path = os.path.join(save_clip_ranges_path, - 'calibration_table.txt') - with open(save_quant_table_path, 'w') as txt_file: - for tensor_name in collect_dict.keys(): - write_line = '{} {}'.format( - tensor_name, collect_dict[tensor_name]['scale']) + '\n' - txt_file.write(write_line) - _logger.info("Quantization clip ranges of tensors is save in: {}". - format(save_quant_table_path)) + try: + out_scale_infer_pass = OutScaleForInferencePassV2( + scope=scope, place=place, quant_bits=config['activation_bits']) + out_scale_infer_pass.apply(test_graph) + except: + _logger.warning( + "Unable to convert quant model with onnx_format=True, please update PaddlePaddle >= 2.4.0" + ) else: out_scale_infer_pass = OutScaleForInferencePass(scope=scope) out_scale_infer_pass.apply(test_graph) -- GitLab