diff --git a/paddleslim/quant/quant_embedding.py b/paddleslim/quant/quant_embedding.py index 46a81db65c55f91fdf5525bf0da25414598a0b71..de7c22879759fee6844c95a80a11a6a8a48fbe09 100755 --- a/paddleslim/quant/quant_embedding.py +++ b/paddleslim/quant/quant_embedding.py @@ -233,20 +233,23 @@ def _quant_embedding_abs_max(graph, scope, place, config): def quant_embedding(program, place, config, scope=None): - """ - quant lookup_table op parameters + """quantize lookup_table op parameters + Args: program(fluid.Program): infer program - scope(fluid.Scope): the scope to store var, when is None will use fluid.global_scope() - place(fluid.CPUPlace or fluid.CUDAPlace): place - config(dict): config to quant. The keys are 'params_name', 'quantize_type', \ + scope(fluid.Scope): Scope records the mapping between variable names and variables, similar to brackets in programming languages. Usually users can use `fluid.global_scope() `_ . When ``None`` will use `fluid.global_scope() `_. Default : ``None``. + place(fluid.CPUPlace or fluid.CUDAPlace): This parameter represents the executor run on which device. + config(dict): config to quantize. The keys are 'params_name', 'quantize_type', \ 'quantize_bits', 'dtype', 'threshold'. \ - 'params_name': parameter name to quant, must be set. - 'quantize_type': quantize type, supported types are ['abs_max']. default is "abs_max". - 'quantize_bits': quantize bits, supported bits are [8]. default is 8. - 'dtype': quantize dtype, supported dtype are ['int8']. default is 'int8'. - 'threshold': threshold to clip tensor before quant. When threshold is not set, \ + ``params_name`` is parameter name to quantize, must be set. + ``quantize_type`` is quantize type, supported types are ['abs_max'], default is "abs_max". + ``quantize_bits`` supported bits are [8] and default is 8. + ``dtype`` is quantize dtype, supported dtype are ['int8'], default is 'int8'. + ``threshold`` is threshold to clip tensor before quant. When threshold is not set, \ tensor will not be clipped. + + Returns: + None """ assert isinstance(config, dict), "config must be dict" config = _merge_config(copy.deepcopy(default_config), config) diff --git a/paddleslim/quant/quanter.py b/paddleslim/quant/quanter.py index b726ec329fa90c5aca61bf4b495bacdde72999d8..ce99f8607250fc9aab56ed8fcdbf458f3a0e9366 100755 --- a/paddleslim/quant/quanter.py +++ b/paddleslim/quant/quanter.py @@ -158,17 +158,23 @@ def _parse_configs(user_config): def quant_aware(program, place, config=None, scope=None, for_test=False): - """ - add trainable quantization ops in program. + """Add quantization and dequantization operators to "program" + for quantization training or testing. + Args: - program(fluid.Program): program to quant - place(fluid.CPUPlace or fluid.CUDAPlace): CPU or CUDA device - config(dict, optional): configs for quantization. if None, will use default config. Default is None. - scope(fluid.Scope): the scope to store var, it should be program's scope. if None, will use fluid.global_scope(). - default is None. - for_test(bool): if program is test program, set True when program is for test, False when program is for train. Default is False. - Return: - fluid.Program: user can finetune this quantization program to enhance the accuracy. + program(fluid.Program): training or testing ``program``. + place(fluid.CPUPlace or fluid.CUDAPlace): This parameter represents + the executor run on which device. + config(dict, optional): configs for quantization. if None, will use default config. + Default: None. + scope(fluid.Scope): Scope records the mapping between variable names and variables, + similar to brackets in programming languages. Usually users can use + `fluid.global_scope `_. When ``None`` will use `fluid.global_scope() `_ . Default: ``None``. + for_test(bool): If the 'program' parameter is a test program, this parameter should be set to ``True``. + Otherwise, set to ``False``.Default: False + + Returns: + fluid.CompiledProgram | fluid.Program: Program with quantization and dequantization ``operators`` """ scope = fluid.global_scope() if not scope else scope @@ -237,25 +243,25 @@ def quant_post(executor, """ The function utilizes post training quantization method to quantize the fp32 model. It uses calibrate data to calculate the scale factor of - quantized variables, and inserts fake quant/dequant op to obtain the - quantized model. + quantized variables, and inserts fake quantization and dequantization + operators to obtain the quantized model. Args: executor(fluid.Executor): The executor to load, run and save the quantized model. model_dir(str): The path of fp32 model that will be quantized, and - the model and params that saved by fluid.io.save_inference_model + the model and params that saved by ``fluid.io.save_inference_model`` are under the path. quantize_model_path(str): The path to save quantized model using api - fluid.io.save_inference_model. + ``fluid.io.save_inference_model``. sample_generator(Python Generator): The sample generator provides calibrate data for DataLoader, and it only returns a sample every time. model_filename(str, optional): The name of model file. If parameters - are saved in separate files, set it as 'None'. Default is 'None'. + are saved in separate files, set it as 'None'. Default: 'None'. params_filename(str, optional): The name of params file. When all parameters are saved in a single file, set it as filename. If parameters are saved in separate files, - set it as 'None'. Default is 'None'. + set it as 'None'. Default : 'None'. batch_size(int, optional): The batch size of DataLoader, default is 16. batch_nums(int, optional): If batch_nums is not None, the number of calibrate data is 'batch_size*batch_nums'. If batch_nums is None, use all data @@ -264,15 +270,16 @@ def quant_post(executor, and save variables. If scope is None, will use fluid.global_scope(). algo(str, optional): If algo=KL, use KL-divergenc method to get the more precise scale factor. If algo='direct', use - abs_max method to get the scale factor. Default is 'KL'. + abs_max method to get the scale factor. Default: 'KL'. quantizable_op_type(list[str], optional): The list of op types - that will be quantized. Default is ["conv2d", "depthwise_conv2d", + that will be quantized. Default: ["conv2d", "depthwise_conv2d", "mul"]. is_full_quantize(bool): if True, apply quantization to all supported quantizable op type. If False, only apply quantization to the input quantizable_op_type. Default is False. is_use_cache_file(bool): If False, all temp data will be saved in memory. If True, - all temp data will be saved to disk. Defalut is False. + all temp data will be saved to disk. Defalut: False. cache_dir(str): When 'is_use_cache_file' is True, temp data will be save in 'cache_dir'. Default is './temp_post_training'. + Returns: None """ @@ -296,22 +303,23 @@ def quant_post(executor, def convert(program, place, config=None, scope=None, save_int8=False): """ - change quantization ops order in program. return program that can used by Paddle-Lite. + convert quantized and well-trained ``program`` to final quantized ``program`` that can be used to save ``inference model``. + Args: - program(fluid.Program): program that returned by quant_aware - place(fluid.CPUPlace or fluid.CUDAPlace): CPU or CUDA device - scope(fluid.Scope, optional): the scope to store var, it should be program's scope. if None, will use fluid.global_scope(). - default is None. - config(dict, optional): configs for convert. if set None, will use default config. Default is None.\ - It must be same with config that used in 'quant_aware'. - save_int8: if return int8 freezed program. Int8 program can only be used to check size of model weights. \ - It cannot be used in Fluid or Paddle-Lite. - Return: - freezed_program(fluid.Program): freezed program which can be used for inference. - parameters is float32 type, but it's value in int8 range. - freezed_program_int8(fluid.Program): freezed int8 program. - when save_int8 is False, return freezed_program. - when save_int8 is True, return freezed_program and freezed_program_int8 + program(fluid.Program): quantized and well-trained ``test program``. + place(fluid.CPUPlace or fluid.CUDAPlace): This parameter represents the executor run on which device. + config(dict, optional): configs for convert. if set None, will use default config. + It must be same with config that used in 'quant_aware'. Default: None. + scope(fluid.Scope, optional): Scope records the mapping between variable names and variables, + similar to brackets in programming languages. Usually users can use + `fluid.global_scope `_. When ``None`` will use `fluid.global_scope() `_ . Default: ``None``. + save_int8: Whether to return ``program`` which model parameters' dtype is ``int8``. + This parameter can only be used to get model size. Default: ``False``. + + Returns: + Tuple : freezed program which can be used for inference. + when ``save_int8`` is False, return ``freezed_program(fluid.Program)``. + when ``save_int8`` is True, return ``freezed_program(fluid.Program)`` and ``freezed_program_int8(fluid.Program)`` """ scope = fluid.global_scope() if not scope else scope