From 8b74fc4fa78eb13aec82255acde8a55514a012c4 Mon Sep 17 00:00:00 2001 From: juncaipeng <52520497+juncaipeng@users.noreply.github.com> Date: Thu, 19 Dec 2019 14:10:52 +0800 Subject: [PATCH] Fix post training quantization (#21745) * fix post training quantization bug of memory constrained, support the input be different, test=develop --- .../post_training_quantization.py | 98 ++++++++++++++----- ..._post_training_quantization_mobilenetv1.py | 18 ++-- ...est_post_training_quantization_resnet50.py | 5 +- 3 files changed, 87 insertions(+), 34 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py index 117de330cd..cc5e87b22e 100644 --- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py +++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. import math +import os +import re import logging import numpy as np from ....executor import global_scope @@ -43,7 +45,9 @@ class PostTrainingQuantization(object): scope=None, algo="KL", quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"], - is_full_quantize=False): + is_full_quantize=False, + is_use_cache_file=False, + cache_dir="./temp_post_training"): ''' The class utilizes post training quantization methon to quantize the fp32 model. It uses calibrate data to calculate the scale factor of @@ -78,9 +82,16 @@ class PostTrainingQuantization(object): that will be quantized. Default is ["conv2d", "depthwise_conv2d", "mul"]. is_full_quantized(bool, optional): If set is_full_quantized as True, - apply quantization to all supported quantizable op type. If set + apply quantization to all supported quantizable op type. If set is_full_quantized as False, only apply quantization to the op type according to the input quantizable_op_type. + is_use_cache_file(bool, optional): If set is_use_cache_file as False, + all temp data will be saved in memory. If set is_use_cache_file as True, + it will save temp data to disk. When the fp32 model is complex or + the number of calibrate data is large, we should set is_use_cache_file + as True. Defalut is False. + cache_dir(str, optional): When is_use_cache_file is True, set cache_dir as + the directory for saving temp data. Default is ./temp_post_training. Returns: None @@ -129,6 +140,10 @@ class PostTrainingQuantization(object): self._batch_nums = batch_nums self._scope = global_scope() if scope == None else scope self._algo = algo + self._is_use_cache_file = is_use_cache_file + self._cache_dir = cache_dir + if self._is_use_cache_file and not os.path.exists(self._cache_dir): + os.mkdir(self._cache_dir) supported_quantizable_op_type = \ QuantizationTransformPass._supported_quantizable_op_type + \ @@ -150,8 +165,8 @@ class PostTrainingQuantization(object): self._op_real_in_out_name = _op_real_in_out_name self._bit_length = 8 - self._quantized_weight_var_name = [] - self._quantized_act_var_name = [] + self._quantized_weight_var_name = set() + self._quantized_act_var_name = set() self._sampling_data = {} self._quantized_var_scale_factor = {} @@ -174,7 +189,8 @@ class PostTrainingQuantization(object): feed=data, fetch_list=self._fetch_list, return_numpy=False) - self._sample_data() + self._sample_data(batch_id) + if batch_id % 5 == 0: _logger.info("run batch: " + str(batch_id)) batch_id += 1 @@ -238,10 +254,9 @@ class PostTrainingQuantization(object): op_type = op.type if op_type in self._quantizable_op_type: if op_type in ("conv2d", "depthwise_conv2d"): - self._quantized_act_var_name.append(op.input("Input")[0]) - self._quantized_weight_var_name.append( - op.input("Filter")[0]) - self._quantized_act_var_name.append(op.output("Output")[0]) + self._quantized_act_var_name.add(op.input("Input")[0]) + self._quantized_weight_var_name.add(op.input("Filter")[0]) + self._quantized_act_var_name.add(op.output("Output")[0]) elif op_type == "mul": if self._is_input_all_not_persistable( op, persistable_var_names): @@ -249,9 +264,9 @@ class PostTrainingQuantization(object): _logger.warning("Skip quant a mul op for two " "input variables are not persistable") else: - self._quantized_act_var_name.append(op.input("X")[0]) - self._quantized_weight_var_name.append(op.input("Y")[0]) - self._quantized_act_var_name.append(op.output("Out")[0]) + self._quantized_act_var_name.add(op.input("X")[0]) + self._quantized_weight_var_name.add(op.input("Y")[0]) + self._quantized_act_var_name.add(op.output("Out")[0]) else: # process other quantizable op type, the input must all not persistable if self._is_input_all_not_persistable( @@ -260,10 +275,10 @@ class PostTrainingQuantization(object): op_type] for input_name in input_output_name_list[0]: for var_name in op.input(input_name): - self._quantized_act_var_name.append(var_name) + self._quantized_act_var_name.add(var_name) for output_name in input_output_name_list[1]: for var_name in op.output(output_name): - self._quantized_act_var_name.append(var_name) + self._quantized_act_var_name.add(var_name) # set activation variables to be persistable, so can obtain # the tensor data in sample_data @@ -271,7 +286,7 @@ class PostTrainingQuantization(object): if var.name in self._quantized_act_var_name: var.persistable = True - def _sample_data(self): + def _sample_data(self, iter): ''' Sample the tensor data of quantized variables, applied in every iteration. @@ -281,11 +296,20 @@ class PostTrainingQuantization(object): var_tensor = self._load_var_value(var_name) self._sampling_data[var_name] = var_tensor - for var_name in self._quantized_act_var_name: - if var_name not in self._sampling_data: - self._sampling_data[var_name] = [] - var_tensor = self._load_var_value(var_name) - self._sampling_data[var_name].append(var_tensor) + if self._is_use_cache_file: + for var_name in self._quantized_act_var_name: + var_tensor = self._load_var_value(var_name) + var_tensor = var_tensor.ravel() + save_path = os.path.join(self._cache_dir, + var_name + "_" + str(iter) + ".npy") + np.save(save_path, var_tensor) + else: + for var_name in self._quantized_act_var_name: + if var_name not in self._sampling_data: + self._sampling_data[var_name] = [] + var_tensor = self._load_var_value(var_name) + var_tensor = var_tensor.ravel() + self._sampling_data[var_name].append(var_tensor) def _calculate_scale_factor(self): ''' @@ -302,13 +326,33 @@ class PostTrainingQuantization(object): var_name] = scale_factor_per_channel # apply kl quantization for activation - for var_name in self._quantized_act_var_name: - if self._algo == "KL": - self._quantized_var_scale_factor[var_name] = \ - self._get_kl_scaling_factor(np.abs(self._sampling_data[var_name])) - else: - self._quantized_var_scale_factor[var_name] = \ - np.max(np.abs(self._sampling_data[var_name])) + if self._is_use_cache_file: + for var_name in self._quantized_act_var_name: + sampling_data = [] + filenames = [f for f in os.listdir(self._cache_dir) \ + if re.match(var_name + '_[0-9]+.npy', f)] + for filename in filenames: + file_path = os.path.join(self._cache_dir, filename) + sampling_data.append(np.load(file_path)) + os.remove(file_path) + sampling_data = np.concatenate(sampling_data) + + if self._algo == "KL": + self._quantized_var_scale_factor[var_name] = \ + self._get_kl_scaling_factor(np.abs(sampling_data)) + else: + self._quantized_var_scale_factor[var_name] = \ + np.max(np.abs(sampling_data)) + else: + for var_name in self._quantized_act_var_name: + self._sampling_data[var_name] = np.concatenate( + self._sampling_data[var_name]) + if self._algo == "KL": + self._quantized_var_scale_factor[var_name] = \ + self._get_kl_scaling_factor(np.abs(self._sampling_data[var_name])) + else: + self._quantized_var_scale_factor[var_name] = \ + np.max(np.abs(self._sampling_data[var_name])) def _update_program(self): ''' diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py index 5180723da4..0cd804cc51 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py @@ -237,7 +237,8 @@ class TestPostTrainingQuantization(unittest.TestCase): def generate_quantized_model(self, model_path, algo="KL", - is_full_quantize=False): + is_full_quantize=False, + is_use_cache_file=False): try: os.system("mkdir " + self.int8_model) except Exception as e: @@ -259,11 +260,13 @@ class TestPostTrainingQuantization(unittest.TestCase): model_dir=model_path, algo=algo, quantizable_op_type=quantizable_op_type, - is_full_quantize=is_full_quantize) + is_full_quantize=is_full_quantize, + is_use_cache_file=is_use_cache_file) ptq.quantize() ptq.save_quantized_model(self.int8_model) - def run_test(self, model, algo, data_urls, data_md5s): + def run_test(self, model, algo, data_urls, data_md5s, is_full_quantize, + is_use_cache_file): infer_iterations = self.infer_iterations batch_size = self.batch_size sample_iterations = self.sample_iterations @@ -277,8 +280,8 @@ class TestPostTrainingQuantization(unittest.TestCase): print("Start INT8 post training quantization for {0} on {1} images ...". format(model, sample_iterations * batch_size)) - self.generate_quantized_model( - model_cache_folder + "/model", algo=algo, is_full_quantize=True) + self.generate_quantized_model(model_cache_folder + "/model", algo, + is_full_quantize, is_use_cache_file) print("Start INT8 inference for {0} on {1} images ...".format( model, infer_iterations * batch_size)) @@ -305,7 +308,10 @@ class TestPostTrainingForMobilenetv1(TestPostTrainingQuantization): 'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' ] data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] - self.run_test(model, algo, data_urls, data_md5s) + is_full_quantize = True + is_use_cache_file = False + self.run_test(model, algo, data_urls, data_md5s, is_full_quantize, + is_use_cache_file) if __name__ == '__main__': diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py index e93d68e857..815f2e4332 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py @@ -25,7 +25,10 @@ class TestPostTrainingForResnet50(TestPostTrainingQuantization): 'http://paddle-inference-dist.bj.bcebos.com/int8/resnet50_int8_model.tar.gz' ] data_md5s = ['4a5194524823d9b76da6e738e1367881'] - self.run_test(model, algo, data_urls, data_md5s) + is_full_quantize = False + is_use_cache_file = True + self.run_test(model, algo, data_urls, data_md5s, is_full_quantize, + is_use_cache_file) if __name__ == '__main__': -- GitLab