diff --git a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py index 3d60e808951460f8bdab2b89a8b489a3da90c49b..49cb4ea4311eea98f5a97e2e74c75f0fc751f591 100644 --- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py +++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py @@ -262,7 +262,7 @@ class PostTrainingQuantization(object): ] self._support_weight_quantize_type = ['abs_max', 'channel_wise_abs_max'] self._support_algo_type = [ - 'KL', 'hist', 'avg', 'mse', 'emd', 'abs_max', 'min_max' + 'KL', 'hist', 'avg', 'mse', 'emd', 'abs_max', 'min_max', 'ptf' ] assert round_type in ['adaround', 'round'] self._round_type = round_type @@ -284,7 +284,7 @@ class PostTrainingQuantization(object): "data_loader only accepts `paddle.io.DataLoader` or Generator instance." assert batch_size > 0, "The batch_size should be greater than 0." assert algo in self._support_algo_type, \ - "The algo should be KL, hist, mse, avg, abs_max or min_max." + "The algo should be KL, hist, mse, avg, abs_max, min_max or ptf." assert activation_quantize_type in self._support_activation_quantize_type, \ "The activation_quantize_type ({}) should in ({}).".format( activation_quantize_type, self._support_activation_quantize_type) @@ -621,6 +621,8 @@ class PostTrainingQuantization(object): self._sample_mse() elif self._algo == "emd": self._sample_emd() + elif self._algo == "ptf": + self._sample_ptf() elif self._algo in ["KL", "hist"]: self._sample_histogram() @@ -810,6 +812,58 @@ class PostTrainingQuantization(object): hist, _ = np.histogram(var_tensor_abs, bins=bins) self._sampling_act_histogram[var_name][0] += hist + def l2_loss(self, gt, pred): + return ((gt - pred)**2).mean() + + def _sample_ptf(self): + """ + The following code are modified from: + https://github.com/megvii-research/FQ-ViT/ + """ + if self._quantized_threshold == {}: + for var_name in self._quantized_weight_var_name: + var_tensor = utils.load_variable_data(self._scope, var_name) + if self._weight_quantize_type == "abs_max": + abs_max_value = float(np.max(np.abs(var_tensor))) + elif self._weight_quantize_type == "channel_wise_abs_max": + abs_max_value = [] + if self._weight_op_pairs[ + var_name] in utils._channelwise_quant_axis1_ops: + for i in range(var_tensor.shape[1]): + abs_max_value.append( + float(np.max(np.abs(var_tensor[:, i])))) + else: + for i in range(var_tensor.shape[0]): + abs_max_value.append( + float(np.max(np.abs(var_tensor[i])))) + self._quantized_threshold[var_name] = abs_max_value + + for var_name in self._quantized_act_var_name: + var_tensor = utils.load_variable_data(self._scope, var_name) + abs_max_value = float(np.max(np.abs(var_tensor))) + q_max = 2**(self._activation_bits - 1) - 1 + scale8 = abs_max_value / q_max + scale4 = scale8 / 2 + scale2 = scale4 / 2 + scale1 = scale2 / 2 + quant_dequant_var_scale1 = np.clip(np.round(var_tensor / scale1), 0, + q_max) * scale1 + quant_dequant_var_scale2 = np.clip(np.round(var_tensor / scale2), 0, + q_max) * scale2 + quant_dequant_var_scale4 = np.clip(np.round(var_tensor / scale4), 0, + q_max) * scale4 + quant_dequant_var_scale8 = np.clip(np.round(var_tensor / scale8), 0, + q_max) * scale8 + score1 = self.l2_loss(var_tensor, quant_dequant_var_scale1) + score2 = self.l2_loss(var_tensor, quant_dequant_var_scale2) + score4 = self.l2_loss(var_tensor, quant_dequant_var_scale4) + score8 = self.l2_loss(var_tensor, quant_dequant_var_scale8) + score = [score1, score2, score4, score8] + mask = 2**score.index(min(score)) + scale = scale1 * mask + threshold = q_max * scale + self._quantized_threshold[var_name] = threshold + def _save_input_threhold(self): ''' Save input threshold to the quantized op. @@ -1034,7 +1088,7 @@ class PostTrainingQuantization(object): argname_index[0] + str(argname_index[1]) + "_threshold", "post_hist") - elif self._algo in ["avg", "abs_max", "mse", "emd"]: + elif self._algo in ["avg", "abs_max", "mse", "emd", "ptf"]: save_info(op_node, out_var_name, self._quantized_threshold, "out_threshold", "post_" + str(self._algo)) save_info( diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py index 25707d0c8c91ef7b4a39ae2862d64961a422bebb..70b04ebf5ef5e931514e7362d00958960a43ec25 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py @@ -454,5 +454,29 @@ class TestPostTrainingAvgONNXFormatForMobilenetv1(TestPostTrainingQuantization): onnx_format=onnx_format) +class TestPostTrainingPtfForMobilenetv1(TestPostTrainingQuantization): + + def test_post_training_ptf_mobilenetv1(self): + model = "MobileNet-V1" + algo = "ptf" + round_type = "round" + data_urls = [ + 'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' + ] + data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] + quantizable_op_type = [ + "conv2d", + "mul", + ] + is_full_quantize = False + is_use_cache_file = False + is_optimize_model = False + # The accuracy diff of post-training quantization (abs_max) maybe bigger + diff_threshold = 0.05 + self.run_test(model, algo, round_type, data_urls, data_md5s, + quantizable_op_type, is_full_quantize, is_use_cache_file, + is_optimize_model, diff_threshold) + + if __name__ == '__main__': unittest.main()