[Quantization] Save output threshold by argname_index (#25272)

* Save output threshold by argname_index, test=develop

[Quantization] Save output threshold by argname_index (#25272)
* Save output threshold by argname_index, test=develop
d8f4714b · cc · GitHub · 64b46122 · d8f4714b · d8f4714b
2 changed file
--- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
+++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
@@ -28,6 +28,7 @@ from .quantization_pass import AddQuantDequantPass
 from .quantization_pass import _out_scale_op_list
 from .quantization_pass import _get_op_input_var_names
 from .quantization_pass import _get_op_output_var_names
+from .quantization_pass import _get_output_name_index

 __all__ = ['PostTrainingQuantization', 'WeightQuantization']

@@ -405,6 +406,10 @@ class PostTrainingQuantization(object):
                                    model_filename=self._model_filename,
                                    params_filename=self._params_filename)

+        if self._program.num_blocks > 1:
+            _logger.error("The post training quantization requires that the "
+                          "program only has one block.")
+
        if self._optimize_model:
            self._optimize_fp32_model()

@@ -450,6 +455,9 @@ class PostTrainingQuantization(object):
        persistable_var_names = _all_persistable_var_names(self._program)
        for op in self._program.global_block().ops:
            op_type = op.type
+            if self._is_full_quantize and \
+                op_type not in self._quantizable_op_type:
+                _logger.warning(op_type + " is not supported for quantization.")
            # For quantized ops, sample inputs and outputs
            if op_type in self._quantizable_op_type:
                collect_var_name(
@@ -685,13 +693,25 @@ class PostTrainingQuantization(object):
                op._set_attr("quantization_type", quantized_type)

        def analysis_and_save_info(op_node, out_var_name):
+            argname_index = _get_output_name_index(op_node, out_var_name)
+            assert argname_index is not None, \
+                out_var_name + " is not the output of the op"
            if self._algo == "KL":
+                # For compatibility, we save output threshold by two methods.
                save_info(op_node, out_var_name,
                          self._quantized_var_kl_threshold, "out_threshold",
                          "post_kl")
+                save_info(
+                    op_node, out_var_name, self._quantized_var_kl_threshold,
+                    argname_index[0] + str(argname_index[1]) + "_threshold",
+                    "post_kl")
            elif self._algo == "abs_max":
                save_info(op_node, out_var_name, self._quantized_var_abs_max,
                          "out_threshold", "post_abs_max")
+                save_info(
+                    op_node, out_var_name, self._quantized_var_abs_max,
+                    argname_index[0] + str(argname_index[1]) + "_threshold",
+                    "post_kl")
            elif self._algo == "min_max":
                save_info(op_node, out_var_name, self._quantized_var_min,
                          "out_min", "post_min_max")

--- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
@@ -127,6 +127,22 @@ def _get_op_output_var_names(op):
    return var_names


+def _get_output_name_index(op, output_var_name):
+    """Get the output name and index of the var_name in the op"""
+    assert isinstance(op, (IrNode, Operator)), \
+        "The input op should be IrNode or Operator."
+    op_name = op.name() if isinstance(op, IrNode) \
+        else op.type
+    name_list = _op_real_in_out_name[op_name][1]
+    res = None
+    for name in name_list:
+        var_name = op.output(name)
+        for index, val in enumerate(var_name):
+            if val == output_var_name:
+                res = (name, index)
+    return res
+
+
 def _init_var_node(var_node, value, scope, place):
    assert isinstance(value,
                      np.ndarray), 'The type of value should be numpy array.'
@@ -1528,13 +1544,19 @@ class OutScaleForInferencePass(object):
        op_nodes = graph.all_op_nodes()
        for op_node in op_nodes:
            if op_node.name() in self._teller_set:
-                output_var_name = _get_op_output_var_names(op_node)
-                assert len(output_var_name) == 1, "Only support collecting " \
-                    "output for op that only has an activation output for now."
-                scale_name = self._scale_name(output_var_name[0])
+                var_names = _get_op_output_var_names(op_node)
+                for var_name in var_names:
+                    # For compatibility, we save output threshold by two methods.
+                    scale_name = self._scale_name(var_name)
                    scale_v = np.array(
                        self._scope.find_var(scale_name).get_tensor())[0]
                    op_node.op()._set_attr("out_threshold", float(scale_v))
+
+                    argname_index = _get_output_name_index(op_node, var_name)
+                    assert argname_index is not None, \
+                        var_name + " is not the output of the op"
+                    op_node.op()._set_attr(argname_index[0] + str(argname_index[1]) \
+                        + "_threshold", float(scale_v))
        graph.resolve_hazard()
        return graph