Merge branch 'quant_post' into 'develop'

add post training quantization api quant_post See merge request !24

Merge branch 'quant_post' into 'develop'
add post training quantization api quant_post See merge request !24
9fb9b6d2 · wanghaoshuang · 7d0e73e8 · e904a37c · 9fb9b6d2
隐藏空白更改
内联并排

Showing with 60 addition and 10 deletion

paddleslim/quant/quanter.py paddleslim/quant/quanter.py +60 -10

未找到文件。
--- a/paddleslim/quant/quanter.py
+++ b/paddleslim/quant/quanter.py
@@ -20,6 +20,7 @@ from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
 from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass
 from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass
 from paddle.fluid.contrib.slim.quantization import TransformForMobilePass
+from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization
 from paddle.fluid.contrib.slim.quantization import AddQuantDequantPass
 from paddle.fluid import core
@@ -186,19 +187,68 @@ def quant_aware(program, place, config, scope=None, for_test=False):
    return quant_program
-def quant_post(program, place, config, scope=None):
+def quant_post(executor,
+               model_dir,
+               quantize_model_path,
+               sample_generator,
+               model_filename=None,
+               params_filename=None,
+               batch_size=16,
+               batch_nums=None,
+               scope=None,
+               algo='KL',
+               quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"]):
    """
-    add quantization ops in program. the program returned is not trainable.
+    The function utilizes post training quantization method to quantize the 
+    fp32 model. It uses calibrate data to calculate the scale factor of 
+    quantized variables, and inserts fake quant/dequant op to obtain the 
+    quantized model.
    Args:
-        program(fluid.Program): program
+        executor(fluid.Executor): The executor to load, run and save the 
-        scope(fluid.Scope): the scope to store var, it's should be the value of program's scope, usually it's fluid.global_scope().
+            quantized model.
-        place(fluid.CPUPlace or fluid.CUDAPlace): place
+        model_dir(str): The path of fp32 model that will be quantized, and 
-        config(dict): configs for quantization, default values are in quant_config_default dict.
+            the model and params that saved by fluid.io.save_inference_model 
-        for_test: is for test program.
+            are under the path.
-    Return:
+        quantize_model_path(str): The path to save quantized model using api
-        fluid.Program: the quantization program is not trainable.
+            fluid.io.save_inference_model.
+        sample_generator(Python Generator): The sample generator provides 
+            calibrate data for DataLoader, and it only returns a sample every time.
+        model_filename(str, optional): The name of model file. If parameters 
+            are saved in separate files, set it as 'None'. Default is 'None'.
+        params_filename(str, optional): The name of params file.
+                When all parameters are saved in a single file, set it 
+                as filename. If parameters are saved in separate files, 
+                set it as 'None'. Default is 'None'.
+        batch_size(int, optional): The batch size of DataLoader, default is 16.
+        batch_nums(int, optional): If batch_nums is not None, the number of calibrate 
+                        data is 'batch_size*batch_nums'. If batch_nums is None, use all data
+                        generated by sample_generator  as calibrate data.
+        scope(fluid.Scope, optional): The scope to run program, use it to load 
+                        and save variables. If scope is None, will use fluid.global_scope().
+        algo(str, optional): If algo=KL, use KL-divergenc method to 
+                        get the more precise scale factor. If algo='direct', use 
+                        abs_max method to get the scale factor. Default is 'KL'.
+        quantizable_op_type(list[str], optional): The list of op types
+                        that will be quantized. Default is ["conv2d", "depthwise_conv2d", 
+                        "mul"].
+    Returns:
+        None
    """
-    pass
+    post_training_quantization = PostTrainingQuantization(
+        executor=executor,
+        sample_generator=sample_generator,
+        model_dir=model_dir,
+        model_filename=model_filename,
+        params_filename=params_filename,
+        batch_size=batch_size,
+        batch_nums=batch_nums,
+        scope=scope,
+        algo=algo,
+        quantizable_op_type=quantizable_op_type,
+        is_full_quantize=False)
+    post_training_quantization.quantize()
+    post_training_quantization.save_quantized_model(quantize_model_path)
 def convert(program, place, config, scope=None, save_int8=False):