diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py index abfe06a332689d27c438257b73e6efdd2de1643c..68b4cfdc661b4ac0f4a37db7f30089d6886520e2 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py @@ -515,6 +515,8 @@ class ImperativeCalcOutputScale(object): self._out_scale_dict[ops_list[op_count]]) op_count += 1 + self._set_skip_quant_attr(inference_program) + # save the final quantized model that has output scales save_inference_model( dirname=dirname, @@ -537,9 +539,12 @@ class ImperativeCalcOutputScale(object): Init the scale params for calculating output scales and save them in the target layer. After the users define the dygraph model, the hooks for calculating output - scales will not execute immediately. If the users load the checkpoint now, - the scale params have not been created, so them cann't be loaded. - Therefore, define the scale params in the beginning. + scales will not execute immediately. If the users load parameters form + checkpoint and save the quantized inference model immediately, the inference + model would not be saved successfully. Beacuse the dygraph_to_static requires + that the parameters created in __init__, but the uniqueness of hook make it + impossible to create parameters in __init__. To avoid this mistake, we define + the scale parameters in the beginning instead of hook. """ def _create_param(in_layer, first_name, last_name, dtype): @@ -587,6 +592,33 @@ class ImperativeCalcOutputScale(object): op_type = op_type.replace('relu', 're_lu') return op_type in layer_name + def _set_skip_quant_attr(self, program): + block = program.global_block() + for op in block.ops: + if self._is_skip_quant_op(block, op): + op._set_attr("skip_quant", True) + + def _is_skip_quant_op(self, block, in_op): + """ + The input op should be skipped quantization. + 1. the type of input op should be conv2d, depthwise_conv2d or matmul + 2. the previous ops of the input op are not fake_quantize_dequantize ops + """ + + def _find_previous_op(block, var_name): + for op in block.ops: + if var_name in op.output_arg_names: + return op + + target_op_types = ["conv2d", "depthwise_conv2d", "matmul"] + if in_op.type not in target_op_types: + return False + + previous_ops = [_find_previous_op(block, arg_name) \ + for arg_name in in_op.input_arg_names] + return any(op is not None and op.type not in utils.fake_quantize_dequantize_types \ + for op in previous_ops ) + def _calc_output_scale_hook(self, layer, input, output): """ Create the MovingAverageAbsMaxScale layer for the target layer if needed. diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py index 1ff4a408e051f9377e35499509d9861d654435ca..3bf655265c6f22d174a774ea61327677a56b5397 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py @@ -52,6 +52,12 @@ supported_quant_layers_map = { 'LayerNorm': paddle.nn.LayerNorm, } +fake_quantize_dequantize_types = [ + "fake_quantize_dequantize_abs_max", + "fake_quantize_dequantize_channel_wise_abs_max", + "fake_quantize_dequantize_moving_average_abs_max" +] + out_scale_layers_list = ( paddle.nn.Conv2D, paddle.nn.Linear, paddle.nn.MaxPool2D, paddle.nn.BatchNorm, paddle.nn.BatchNorm2D, paddle.nn.SyncBatchNorm, diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py index 83ddac41965c516043d4e8074570c7c78b79d89f..ed29375d22bb9febc1771fb6ca4eaac661a4c75a 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py @@ -393,12 +393,16 @@ class TestImperativeOutSclae(unittest.TestCase): if 'fake' in op.type: static_ops.remove(op) + op_count = 0 for i in range(len(dynamic_ops)): if dynamic_ops[i].has_attr("out_threshold"): + op_count += 1 self.assertTrue(dynamic_ops[i].type == static_ops[i].type) self.assertTrue(dynamic_ops[i].attr("out_threshold") == static_ops[i].attr("out_threshold")) + self.assertTrue(op_count == 13) + class TestSaveQuanztizedModelFromCheckPoint(unittest.TestCase): def test_save_quantized_model(self): @@ -459,11 +463,14 @@ class TestSaveQuanztizedModelFromCheckPoint(unittest.TestCase): if 'fake' in op.type: static_ops.remove(op) + op_count = 0 for i in range(len(dynamic_ops)): if dynamic_ops[i].has_attr("out_threshold"): + op_count += 1 self.assertTrue(dynamic_ops[i].type == static_ops[i].type) self.assertTrue(dynamic_ops[i].attr("out_threshold") == static_ops[i].attr("out_threshold")) + self.assertTrue(op_count == 13) class TestSaveQuantizedModel_Warning(unittest.TestCase): diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py index 0561055e6e05713b05095b6ad345426b8ac308bf..bda02769cea861908d90fa7ec44f64a696593987 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py @@ -200,9 +200,12 @@ class TestImperativeOutSclae(unittest.TestCase): params_filename="lenet" + INFER_PARAMS_SUFFIX)) model_ops = inference_program.global_block().ops - conv2d_count, mul_count = 0, 0 + conv2d_count, matmul_count = 0, 0 + conv2d_skip_count, matmul_skip_count = 0, 0 for i, op in enumerate(model_ops): if op.type == 'conv2d': + if op.has_attr("skip_quant"): + conv2d_skip_count += 1 if conv2d_count > 0: self.assertTrue( 'fake_quantize_dequantize' in model_ops[i - 1].type) @@ -211,14 +214,19 @@ class TestImperativeOutSclae(unittest.TestCase): 'fake_quantize_dequantize' not in model_ops[i - 1].type) conv2d_count += 1 - if op.type == 'mul': - if mul_count > 0: + if op.type == 'matmul': + if op.has_attr("skip_quant"): + matmul_skip_count += 1 + if matmul_count > 0: self.assertTrue( 'fake_quantize_dequantize' in model_ops[i - 1].type) else: self.assertTrue( 'fake_quantize_dequantize' not in model_ops[i - 1].type) - mul_count += 1 + matmul_count += 1 + + self.assertTrue(conv2d_skip_count == 1) + self.assertTrue(matmul_skip_count == 1) if __name__ == '__main__':