提交 9fb9b6d2 编写于 作者: W wanghaoshuang

Merge branch 'quant_post' into 'develop'

add post training quantization api quant_post

See merge request !24
...@@ -20,6 +20,7 @@ from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass ...@@ -20,6 +20,7 @@ from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass
from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass
from paddle.fluid.contrib.slim.quantization import TransformForMobilePass from paddle.fluid.contrib.slim.quantization import TransformForMobilePass
from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization
from paddle.fluid.contrib.slim.quantization import AddQuantDequantPass from paddle.fluid.contrib.slim.quantization import AddQuantDequantPass
from paddle.fluid import core from paddle.fluid import core
...@@ -186,19 +187,68 @@ def quant_aware(program, place, config, scope=None, for_test=False): ...@@ -186,19 +187,68 @@ def quant_aware(program, place, config, scope=None, for_test=False):
return quant_program return quant_program
def quant_post(program, place, config, scope=None): def quant_post(executor,
model_dir,
quantize_model_path,
sample_generator,
model_filename=None,
params_filename=None,
batch_size=16,
batch_nums=None,
scope=None,
algo='KL',
quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"]):
""" """
add quantization ops in program. the program returned is not trainable. The function utilizes post training quantization method to quantize the
fp32 model. It uses calibrate data to calculate the scale factor of
quantized variables, and inserts fake quant/dequant op to obtain the
quantized model.
Args: Args:
program(fluid.Program): program executor(fluid.Executor): The executor to load, run and save the
scope(fluid.Scope): the scope to store var, it's should be the value of program's scope, usually it's fluid.global_scope(). quantized model.
place(fluid.CPUPlace or fluid.CUDAPlace): place model_dir(str): The path of fp32 model that will be quantized, and
config(dict): configs for quantization, default values are in quant_config_default dict. the model and params that saved by fluid.io.save_inference_model
for_test: is for test program. are under the path.
Return: quantize_model_path(str): The path to save quantized model using api
fluid.Program: the quantization program is not trainable. fluid.io.save_inference_model.
sample_generator(Python Generator): The sample generator provides
calibrate data for DataLoader, and it only returns a sample every time.
model_filename(str, optional): The name of model file. If parameters
are saved in separate files, set it as 'None'. Default is 'None'.
params_filename(str, optional): The name of params file.
When all parameters are saved in a single file, set it
as filename. If parameters are saved in separate files,
set it as 'None'. Default is 'None'.
batch_size(int, optional): The batch size of DataLoader, default is 16.
batch_nums(int, optional): If batch_nums is not None, the number of calibrate
data is 'batch_size*batch_nums'. If batch_nums is None, use all data
generated by sample_generator as calibrate data.
scope(fluid.Scope, optional): The scope to run program, use it to load
and save variables. If scope is None, will use fluid.global_scope().
algo(str, optional): If algo=KL, use KL-divergenc method to
get the more precise scale factor. If algo='direct', use
abs_max method to get the scale factor. Default is 'KL'.
quantizable_op_type(list[str], optional): The list of op types
that will be quantized. Default is ["conv2d", "depthwise_conv2d",
"mul"].
Returns:
None
""" """
pass post_training_quantization = PostTrainingQuantization(
executor=executor,
sample_generator=sample_generator,
model_dir=model_dir,
model_filename=model_filename,
params_filename=params_filename,
batch_size=batch_size,
batch_nums=batch_nums,
scope=scope,
algo=algo,
quantizable_op_type=quantizable_op_type,
is_full_quantize=False)
post_training_quantization.quantize()
post_training_quantization.save_quantized_model(quantize_model_path)
def convert(program, place, config, scope=None, save_int8=False): def convert(program, place, config, scope=None, save_int8=False):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册