update quantization en api format (#97)

7c9fa0dd · Liufang Sang · GitHub · 2c73ed17 · 7c9fa0dd · 7c9fa0dd
显示空白变更内容
内联并排

Showing with 55 addition and 44 deletion

paddleslim/quant/quant_embedding.py paddleslim/quant/quant_embedding.py +13 -10

paddleslim/quant/quanter.py paddleslim/quant/quanter.py +42 -34

未找到文件。
--- a/paddleslim/quant/quant_embedding.py
+++ b/paddleslim/quant/quant_embedding.py
@@ -233,20 +233,23 @@ def _quant_embedding_abs_max(graph, scope, place, config):


 def quant_embedding(program, place, config, scope=None):
-    """
-    quant lookup_table op parameters
+    """quantize lookup_table op parameters
+
    Args:
        program(fluid.Program): infer program
-        scope(fluid.Scope): the scope to store var, when is None will use fluid.global_scope()
-        place(fluid.CPUPlace or fluid.CUDAPlace): place
-        config(dict): config to quant. The keys are 'params_name', 'quantize_type', \
+        scope(fluid.Scope): Scope records the mapping between variable names and variables, similar to brackets in programming languages. Usually users can use `fluid.global_scope() <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_ . When ``None`` will use `fluid.global_scope() <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_. Default : ``None``.
+        place(fluid.CPUPlace or fluid.CUDAPlace): This parameter represents the executor run on which device.
+        config(dict): config to quantize. The keys are 'params_name', 'quantize_type', \
                'quantize_bits', 'dtype', 'threshold'. \
-                'params_name': parameter name to quant, must be set.
-                'quantize_type': quantize type, supported types are ['abs_max']. default is "abs_max".
-                'quantize_bits': quantize bits, supported bits are [8].  default is 8.
-                'dtype': quantize dtype, supported dtype are ['int8']. default is 'int8'.
-                'threshold': threshold to clip tensor before quant. When threshold is not set, \
+                ``params_name`` is parameter name to quantize, must be set.
+                ``quantize_type`` is  quantize type, supported types are ['abs_max'], default is "abs_max".
+                ``quantize_bits`` supported bits are [8] and default is 8.
+                ``dtype`` is quantize dtype, supported dtype are ['int8'], default is 'int8'.
+                ``threshold`` is threshold to clip tensor before quant. When threshold is not set, \
                        tensor will not be clipped.
+
+    Returns:
+        None
    """
    assert isinstance(config, dict), "config must be dict"
    config = _merge_config(copy.deepcopy(default_config), config)

--- a/paddleslim/quant/quanter.py
+++ b/paddleslim/quant/quanter.py
@@ -158,17 +158,23 @@ def _parse_configs(user_config):


 def quant_aware(program, place, config=None, scope=None, for_test=False):
-    """
-    add trainable quantization ops in program.
+    """Add quantization  and dequantization operators to "program" 
+    for quantization training or testing.
+
    Args:
-        program(fluid.Program): program to quant
-        place(fluid.CPUPlace or fluid.CUDAPlace): CPU or CUDA device
-        config(dict, optional): configs for quantization. if None, will use default config. Default is None.
-        scope(fluid.Scope): the scope to store var, it should be program's scope. if None, will use fluid.global_scope().
-            default is None.
-        for_test(bool): if program is test program, set True when program is for test, False when program is for train. Default is False.
-    Return:
-        fluid.Program: user can finetune this quantization program to enhance the accuracy.
+        program(fluid.Program): training or testing ``program``.
+        place(fluid.CPUPlace or fluid.CUDAPlace): This parameter represents 
+            the executor run on which device.
+        config(dict, optional): configs for quantization. if None, will use default config. 
+            Default: None.
+        scope(fluid.Scope): Scope records the mapping between variable names and variables, 
+            similar to brackets in programming languages. Usually users can use 
+            `fluid.global_scope <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_.              When ``None`` will use `fluid.global_scope() <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_ . Default: ``None``.
+        for_test(bool): If the 'program' parameter is a test program, this parameter should be set to ``True``. 
+            Otherwise, set to ``False``.Default: False
+    
+    Returns:
+        fluid.CompiledProgram | fluid.Program: Program with quantization and dequantization ``operators``
    """

    scope = fluid.global_scope() if not scope else scope
@@ -237,25 +243,25 @@ def quant_post(executor,
    """
    The function utilizes post training quantization method to quantize the 
    fp32 model. It uses calibrate data to calculate the scale factor of 
-    quantized variables, and inserts fake quant/dequant op to obtain the 
-    quantized model.
+    quantized variables, and inserts fake quantization and dequantization 
+    operators to obtain the quantized model.

    Args:
        executor(fluid.Executor): The executor to load, run and save the 
            quantized model.
        model_dir(str): The path of fp32 model that will be quantized, and 
-            the model and params that saved by fluid.io.save_inference_model 
+            the model and params that saved by ``fluid.io.save_inference_model`` 
            are under the path.
        quantize_model_path(str): The path to save quantized model using api
-            fluid.io.save_inference_model.
+            ``fluid.io.save_inference_model``.
        sample_generator(Python Generator): The sample generator provides 
            calibrate data for DataLoader, and it only returns a sample every time.
        model_filename(str, optional): The name of model file. If parameters 
-            are saved in separate files, set it as 'None'. Default is 'None'.
+            are saved in separate files, set it as 'None'. Default: 'None'.
        params_filename(str, optional): The name of params file.
                When all parameters are saved in a single file, set it 
                as filename. If parameters are saved in separate files, 
-                set it as 'None'. Default is 'None'.
+                set it as 'None'. Default : 'None'.
        batch_size(int, optional): The batch size of DataLoader, default is 16.
        batch_nums(int, optional): If batch_nums is not None, the number of calibrate 
                        data is 'batch_size*batch_nums'. If batch_nums is None, use all data
@@ -264,15 +270,16 @@ def quant_post(executor,
                        and save variables. If scope is None, will use fluid.global_scope().
        algo(str, optional): If algo=KL, use KL-divergenc method to 
                        get the more precise scale factor. If algo='direct', use 
-                        abs_max method to get the scale factor. Default is 'KL'.
+                        abs_max method to get the scale factor. Default: 'KL'.
        quantizable_op_type(list[str], optional): The list of op types
-                        that will be quantized. Default is ["conv2d", "depthwise_conv2d", 
+                        that will be quantized. Default: ["conv2d", "depthwise_conv2d", 
                        "mul"].
        is_full_quantize(bool): if True, apply quantization to all supported quantizable op type.
                        If False, only apply quantization to the input quantizable_op_type. Default is False.
        is_use_cache_file(bool): If False, all temp data will be saved in memory. If True,
-                                all temp data will be saved to disk. Defalut is False.
+                                all temp data will be saved to disk. Defalut: False.
        cache_dir(str): When 'is_use_cache_file' is True, temp data will be save in 'cache_dir'. Default is './temp_post_training'.
+    
    Returns:
        None
    """
@@ -296,22 +303,23 @@ def quant_post(executor,

 def convert(program, place, config=None, scope=None, save_int8=False):
    """
-    change quantization ops order in program. return program that can used by Paddle-Lite.
+    convert quantized and well-trained ``program`` to final  quantized ``program`` that can be used to  save ``inference model``.
+    
    Args:
-        program(fluid.Program): program that returned by quant_aware
-        place(fluid.CPUPlace or fluid.CUDAPlace): CPU or CUDA device
-        scope(fluid.Scope, optional):  the scope to store var, it should be program's scope. if None, will use fluid.global_scope().
-            default is None.
-        config(dict, optional): configs for convert. if set None, will use default config. Default is None.\
-                It must be same with config that used in 'quant_aware'.
-        save_int8: if return int8 freezed program. Int8 program can only be used to check size of model weights. \
-                It cannot be used in Fluid or Paddle-Lite.
-    Return:
-        freezed_program(fluid.Program): freezed program which can be used for inference.
-                       parameters is float32 type, but it's value in int8 range.
-        freezed_program_int8(fluid.Program): freezed int8 program.
-        when save_int8 is False, return freezed_program.
-        when save_int8 is True, return freezed_program and freezed_program_int8
+        program(fluid.Program): quantized and well-trained ``test program``.
+        place(fluid.CPUPlace or fluid.CUDAPlace): This parameter represents the executor run on which device.
+        config(dict, optional): configs for convert. if set None, will use default config. 
+            It must be same with config that used in 'quant_aware'. Default: None.
+        scope(fluid.Scope, optional):  Scope records the mapping between variable names and variables, 
+            similar to brackets in programming languages. Usually users can use 
+            `fluid.global_scope <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_.              When ``None`` will use `fluid.global_scope() <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_ . Default: ``None``.
+        save_int8: Whether to return ``program`` which model parameters' dtype is ``int8``. 
+            This parameter can only be used to get model size. Default: ``False``.
+
+    Returns:
+        Tuple : freezed program which can be used for inference.
+        when ``save_int8`` is False, return ``freezed_program(fluid.Program)``.
+        when ``save_int8`` is True, return ``freezed_program(fluid.Program)`` and ``freezed_program_int8(fluid.Program)``
    """
    scope = fluid.global_scope() if not scope else scope