add new ptq method ptf (#44246)

* add new ptq method ptf * add post training quantization mobilenetv1 test for ptf * add post training quantization mobilenetv1 test for ptf test=allcases

add new ptq method ptf (#44246)
* add new ptq method ptf * add post training quantization mobilenetv1 test for ptf * add post training quantization mobilenetv1 test for ptf test=allcases
f4bc69ec · handiz · GitHub · f901f020 · f4bc69ec · f4bc69ec
2 changed file
--- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
+++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
@@ -262,7 +262,7 @@ class PostTrainingQuantization(object):
        ]
        self._support_weight_quantize_type = ['abs_max', 'channel_wise_abs_max']
        self._support_algo_type = [
-            'KL', 'hist', 'avg', 'mse', 'emd', 'abs_max', 'min_max'
+            'KL', 'hist', 'avg', 'mse', 'emd', 'abs_max', 'min_max', 'ptf'
        ]
        assert round_type in ['adaround', 'round']
        self._round_type = round_type
@@ -284,7 +284,7 @@ class PostTrainingQuantization(object):
                "data_loader only accepts `paddle.io.DataLoader` or Generator instance."
        assert batch_size > 0, "The batch_size should be greater than 0."
        assert algo in self._support_algo_type, \
-            "The algo should be KL, hist, mse, avg, abs_max or min_max."
+            "The algo should be KL, hist, mse, avg, abs_max, min_max or ptf."
        assert activation_quantize_type in self._support_activation_quantize_type, \
            "The activation_quantize_type ({}) should in ({}).".format(
            activation_quantize_type, self._support_activation_quantize_type)
@@ -621,6 +621,8 @@ class PostTrainingQuantization(object):
            self._sample_mse()
        elif self._algo == "emd":
            self._sample_emd()
+        elif self._algo == "ptf":
+            self._sample_ptf()
        elif self._algo in ["KL", "hist"]:
            self._sample_histogram()
@@ -810,6 +812,58 @@ class PostTrainingQuantization(object):
            hist, _ = np.histogram(var_tensor_abs, bins=bins)
            self._sampling_act_histogram[var_name][0] += hist
+    def l2_loss(self, gt, pred):
+        return ((gt - pred)**2).mean()
+    def _sample_ptf(self):
+        """
+        The following code are modified from:
+        https://github.com/megvii-research/FQ-ViT/
+        """
+        if self._quantized_threshold == {}:
+            for var_name in self._quantized_weight_var_name:
+                var_tensor = utils.load_variable_data(self._scope, var_name)
+                if self._weight_quantize_type == "abs_max":
+                    abs_max_value = float(np.max(np.abs(var_tensor)))
+                elif self._weight_quantize_type == "channel_wise_abs_max":
+                    abs_max_value = []
+                    if self._weight_op_pairs[
+                            var_name] in utils._channelwise_quant_axis1_ops:
+                        for i in range(var_tensor.shape[1]):
+                            abs_max_value.append(
+                                float(np.max(np.abs(var_tensor[:, i]))))
+                    else:
+                        for i in range(var_tensor.shape[0]):
+                            abs_max_value.append(
+                                float(np.max(np.abs(var_tensor[i]))))
+                self._quantized_threshold[var_name] = abs_max_value
+        for var_name in self._quantized_act_var_name:
+            var_tensor = utils.load_variable_data(self._scope, var_name)
+            abs_max_value = float(np.max(np.abs(var_tensor)))
+            q_max = 2**(self._activation_bits - 1) - 1
+            scale8 = abs_max_value / q_max
+            scale4 = scale8 / 2
+            scale2 = scale4 / 2
+            scale1 = scale2 / 2
+            quant_dequant_var_scale1 = np.clip(np.round(var_tensor / scale1), 0,
+                                               q_max) * scale1
+            quant_dequant_var_scale2 = np.clip(np.round(var_tensor / scale2), 0,
+                                               q_max) * scale2
+            quant_dequant_var_scale4 = np.clip(np.round(var_tensor / scale4), 0,
+                                               q_max) * scale4
+            quant_dequant_var_scale8 = np.clip(np.round(var_tensor / scale8), 0,
+                                               q_max) * scale8
+            score1 = self.l2_loss(var_tensor, quant_dequant_var_scale1)
+            score2 = self.l2_loss(var_tensor, quant_dequant_var_scale2)
+            score4 = self.l2_loss(var_tensor, quant_dequant_var_scale4)
+            score8 = self.l2_loss(var_tensor, quant_dequant_var_scale8)
+            score = [score1, score2, score4, score8]
+            mask = 2**score.index(min(score))
+            scale = scale1 * mask
+            threshold = q_max * scale
+            self._quantized_threshold[var_name] = threshold
    def _save_input_threhold(self):
        '''
        Save input threshold to the quantized op.
@@ -1034,7 +1088,7 @@ class PostTrainingQuantization(object):
                    argname_index[0] + str(argname_index[1]) + "_threshold",
                    "post_hist")
-            elif self._algo in ["avg", "abs_max", "mse", "emd"]:
+            elif self._algo in ["avg", "abs_max", "mse", "emd", "ptf"]:
                save_info(op_node, out_var_name, self._quantized_threshold,
                          "out_threshold", "post_" + str(self._algo))
                save_info(

--- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
@@ -454,5 +454,29 @@ class TestPostTrainingAvgONNXFormatForMobilenetv1(TestPostTrainingQuantization):
                      onnx_format=onnx_format)
+class TestPostTrainingPtfForMobilenetv1(TestPostTrainingQuantization):
+    def test_post_training_ptf_mobilenetv1(self):
+        model = "MobileNet-V1"
+        algo = "ptf"
+        round_type = "round"
+        data_urls = [
+            'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
+        ]
+        data_md5s = ['13892b0716d26443a8cdea15b3c6438b']
+        quantizable_op_type = [
+            "conv2d",
+            "mul",
+        ]
+        is_full_quantize = False
+        is_use_cache_file = False
+        is_optimize_model = False
+        # The accuracy diff of post-training quantization (abs_max) maybe bigger
+        diff_threshold = 0.05
+        self.run_test(model, algo, round_type, data_urls, data_md5s,
+                      quantizable_op_type, is_full_quantize, is_use_cache_file,
+                      is_optimize_model, diff_threshold)
 if __name__ == '__main__':
    unittest.main()