From 12b9b03ef898487eef89f196729eb1cc0e47e618 Mon Sep 17 00:00:00 2001
From: Guanghua Yu <742925032@qq.com>
Date: Mon, 31 Oct 2022 13:58:15 +0800
Subject: [PATCH] [cherry-pick] update dygraph PTQ export_model api (#47415)

* update dygraph PTQ export_model api

* remove postprocess
---
 .../slim/quantization/imperative/ptq.py       | 178 +++++++++++-------
 .../quantization/imperative/ptq_registry.py   |  35 ++--
 2 files changed, 132 insertions(+), 81 deletions(-)

diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py
index cccc5d90fba..febdacdf43e 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py
@@ -31,9 +31,9 @@ from .ptq_registry import PTQRegistry
 
 __all__ = ['ImperativePTQ']
 
-_logger = get_logger(__name__,
-                     logging.INFO,
-                     fmt='%(asctime)s-%(levelname)s: %(message)s')
+_logger = get_logger(
+    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
+)
 
 
 class ImperativePTQ(object):
@@ -75,17 +75,20 @@ class ImperativePTQ(object):
         Return
             quantized_model(paddle.nn.Layer): The quantized model.
         """
-        assert isinstance(model, paddle.nn.Layer), \
-            "The model must be the instance of paddle.nn.Layer."
+        assert isinstance(
+            model, paddle.nn.Layer
+        ), "The model must be the instance of paddle.nn.Layer."
         if not inplace:
             model = copy.deepcopy(model)
         if fuse:
             model.eval()
             model = fuse_utils.fuse_layers(model, fuse_list)
         for name, layer in model.named_sublayers():
-            if PTQRegistry.is_supported_layer(layer) \
-                and utils.is_leaf_layer(layer) \
-                and not self._is_skip_layer(layer):
+            if (
+                PTQRegistry.is_supported_layer(layer)
+                and utils.is_leaf_layer(layer)
+                and not self._is_skip_layer(layer)
+            ):
 
                 # Add quant config
                 quant_config = copy.deepcopy(self._quant_config)
@@ -98,7 +101,8 @@ class ImperativePTQ(object):
                 quant_hook_handle = layer.register_forward_post_hook(hook)
                 quant_config.quant_hook_handle = quant_hook_handle
                 layer._forward_post_hooks.move_to_end(
-                    quant_hook_handle._hook_id, last=False)
+                    quant_hook_handle._hook_id, last=False
+                )
 
         return model
 
@@ -110,14 +114,14 @@ class ImperativePTQ(object):
 
         Args:
             model (Layer): The model to be saved.
-            path (str): The path prefix to save model. The format is 
+            path (str): The path prefix to save model. The format is
                 ``dirname/file_prefix`` or ``file_prefix``.
             input_spec (list[InputSpec|Tensor], optional): Describes the input
                 of the saved model's forward method, which can be described by
-                InputSpec or example Tensor. If None, all input variables of 
+                InputSpec or example Tensor. If None, all input variables of
                 the original Layer's forward method would be the inputs of
                 the saved model. Default None.
-            **configs (dict, optional): Other save configuration options for
+            **config (dict, optional): Other save configuration options for
                 compatibility. We do not recommend using these configurations,
                 they may be removed in the future. If not necessary, DO NOT use
                 them. Default None.
@@ -125,16 +129,17 @@ class ImperativePTQ(object):
                 (1) output_spec (list[Tensor]): Selects the output targets of
                 the saved model. By default, all return variables of original
                 Layer's forward method are kept as the output of the saved model.
-                If the provided ``output_spec`` list is not all output variables, 
+                If the provided ``output_spec`` list is not all output variables,
                 the saved model will be pruned according to the given
-                ``output_spec`` list. 
+                ``output_spec`` list.
 
         Returns:
             None
         """
 
-        assert isinstance(model, paddle.nn.Layer), \
-            "The model must be the instance of paddle.nn.Layer."
+        assert isinstance(
+            model, paddle.nn.Layer
+        ), "The model must be the instance of paddle.nn.Layer."
 
         # Convert and save dygraph quantized model
         self._convert(model)
@@ -156,12 +161,16 @@ class ImperativePTQ(object):
         model_filename = basename + INFER_MODEL_SUFFIX
         params_filename = basename + INFER_PARAMS_SUFFIX
 
-        [infer_program, feed_target_names,
-         fetch_targets] = (paddle.fluid.io.load_inference_model(
-             dirname=dirname,
-             executor=exe,
-             model_filename=model_filename,
-             params_filename=params_filename))
+        [
+            infer_program,
+            feed_target_names,
+            fetch_targets,
+        ] = paddle.fluid.io.load_inference_model(
+            dirname=dirname,
+            executor=exe,
+            model_filename=model_filename,
+            params_filename=params_filename,
+        )
 
         # Process inference program
         self._clean_up(infer_program)
@@ -169,13 +178,15 @@ class ImperativePTQ(object):
         self._remove_scale_op(infer_program)
 
         # Save final program
-        paddle.fluid.io.save_inference_model(dirname=dirname,
-                                             feeded_var_names=feed_target_names,
-                                             target_vars=fetch_targets,
-                                             executor=exe,
-                                             main_program=infer_program.clone(),
-                                             model_filename=model_filename,
-                                             params_filename=params_filename)
+        paddle.fluid.io.save_inference_model(
+            dirname=dirname,
+            feeded_var_names=feed_target_names,
+            target_vars=fetch_targets,
+            executor=exe,
+            main_program=infer_program.clone(),
+            model_filename=model_filename,
+            params_filename=params_filename,
+        )
 
         if is_dynamic_mode:
             paddle.disable_static()
@@ -213,8 +224,9 @@ class ImperativePTQ(object):
         Returns:
             None
         """
-        assert isinstance(model, paddle.nn.Layer), \
-            "The input model must be the instance of paddle.nn.Layer."
+        assert isinstance(
+            model, paddle.nn.Layer
+        ), "The input model must be the instance of paddle.nn.Layer."
 
         total_num = 0
         cur_num = 0
@@ -226,8 +238,9 @@ class ImperativePTQ(object):
             if self._is_quant_layer(sub_layer):
                 cur_num += 1
                 if cur_num % 5 == 0:
-                    _logger.info("Process the %s / %s layer" %
-                                 (cur_num, total_num))
+                    _logger.info(
+                        "Process the %s / %s layer" % (cur_num, total_num)
+                    )
 
                 quant_config = sub_layer._quant_config
 
@@ -236,7 +249,7 @@ class ImperativePTQ(object):
                 quant_config.out_act_quantizer.cal_thresholds()
 
                 if PTQRegistry.is_simulated_quant_layer(sub_layer):
-                    weights = (sub_layer.weight, )
+                    weights = (sub_layer.weight,)
                     quant_config.wt_quantizer.sample_data(sub_layer, weights)
                     quant_config.wt_quantizer.cal_thresholds()
 
@@ -250,18 +263,25 @@ class ImperativePTQ(object):
         Returns:
             None
         """
-        assert isinstance(sub_layer, paddle.nn.Layer), \
-            "The input model must be the instance of paddle.nn.Layer."
+        assert isinstance(
+            sub_layer, paddle.nn.Layer
+        ), "The input model must be the instance of paddle.nn.Layer."
 
         layer_info = PTQRegistry.layer_info(sub_layer)
 
         output_names = layer_info.output_names
         output_thresholds = quant_config.out_act_quantizer.thresholds
         assert len(output_names) == 1
-        assert len(output_thresholds) == 1
-        save_name = output_names[0] + str(0) + "_threshold"
-        sub_layer._set_op_attrs({save_name: output_thresholds[0]})
-        sub_layer._set_op_attrs({"out_threshold": output_thresholds[0]})
+        if len(output_thresholds) == 1:
+            save_name = output_names[0] + str(0) + "_threshold"
+            sub_layer._set_op_attrs({save_name: output_thresholds[0]})
+            sub_layer._set_op_attrs({"out_threshold": output_thresholds[0]})
+        else:
+            _logger.warning(
+                "output_thresholds shape of {} need to be 1, but received {}".format(
+                    output_names[0], len(output_thresholds)
+                )
+            )
 
     def _wrap_simulated_layers(self, model):
         """
@@ -272,12 +292,14 @@ class ImperativePTQ(object):
         Returns:
             None
         """
-        assert isinstance(model, paddle.nn.Layer), \
-            "The input model must be the instance of paddle.nn.Layer."
+        assert isinstance(
+            model, paddle.nn.Layer
+        ), "The input model must be the instance of paddle.nn.Layer."
 
         for name, sub_layer in model.named_sublayers():
-            if self._is_quant_layer(sub_layer) \
-                and PTQRegistry.is_simulated_quant_layer(sub_layer):
+            if self._is_quant_layer(
+                sub_layer
+            ) and PTQRegistry.is_simulated_quant_layer(sub_layer):
 
                 quant_config = sub_layer._quant_config
                 assert quant_config.enable_in_act_quantizer == True
@@ -303,36 +325,44 @@ class ImperativePTQ(object):
                     "activation_bits": in_act_quantizer.quant_bits,
                 }
 
-                quant_layer = quant_layers.__dict__[quant_layer_name](sub_layer,
-                                                                      **kwargs)
+                quant_layer = quant_layers.__dict__[quant_layer_name](
+                    sub_layer, **kwargs
+                )
 
                 # save the input thresholds
                 assert hasattr(quant_layer, "_fake_quant_input")
                 assert hasattr(quant_layer._fake_quant_input, "_scale")
-                assert len(in_act_quantizer.thresholds) == 1
-                input_threshold = np.array([in_act_quantizer.thresholds[0]],
-                                           dtype=np.float32)
-                quant_layer._fake_quant_input._scale.set_value(input_threshold)
+                if len(in_act_quantizer.thresholds) == 1:
+                    input_threshold = np.array(
+                        [in_act_quantizer.thresholds[0]], dtype=np.float32
+                    )
+                    quant_layer._fake_quant_input._scale.set_value(
+                        input_threshold
+                    )
 
                 assert hasattr(quant_layer, "_fake_quant_weight")
                 assert hasattr(quant_layer._fake_quant_weight, "_scale")
                 assert len(wt_quantizer.thresholds) == 1
                 weight_threshold = wt_quantizer.thresholds[0]
                 if isinstance(weight_threshold, list):
-                    weight_threshold = np.array(weight_threshold,
-                                                dtype=np.float32)
+                    weight_threshold = np.array(
+                        weight_threshold, dtype=np.float32
+                    )
                 else:
-                    weight_threshold = np.array([weight_threshold],
-                                                dtype=np.float32)
+                    weight_threshold = np.array(
+                        [weight_threshold], dtype=np.float32
+                    )
                 quant_layer._fake_quant_weight._scale.set_value(
-                    weight_threshold)
+                    weight_threshold
+                )
 
                 # save the output thresholds
                 self._save_output_thresholds(quant_layer, quant_config)
 
                 # replace the layer
-                parent_layer, sub_name = \
-                    utils.find_parent_layer_and_sub_name(model, name)
+                parent_layer, sub_name = utils.find_parent_layer_and_sub_name(
+                    model, name
+                )
                 setattr(parent_layer, sub_name, quant_layer)
 
     def _gather_input_thresholds(self, program, scope):
@@ -351,30 +381,37 @@ class ImperativePTQ(object):
                 if previous_op is None:
                     continue
 
-                if "quantize_dequantize" in previous_op.type or \
-                    previous_op.type == "moving_average_abs_max_scale":
+                if (
+                    "quantize_dequantize" in previous_op.type
+                    or previous_op.type == "moving_average_abs_max_scale"
+                ):
                     attr_name = previous_op.output('OutScale')[0]
                     in_threshold = utils.load_variable_data(scope, attr_name)
                     in_threshold = utils.fp_numpy_to_naive(in_threshold)
                     argname, index = utils._get_input_name_index(
-                        op, in_var_name)
-                    op._set_attr(argname + str(index) + "_threshold",
-                                 in_threshold)
+                        op, in_var_name
+                    )
+                    op._set_attr(
+                        argname + str(index) + "_threshold", in_threshold
+                    )
                     op._set_attr("with_quant_attr", True)
                 else:
                     for out_var_name in utils._get_op_output_var_names(
-                            previous_op):
+                        previous_op
+                    ):
                         if out_var_name != in_var_name:
                             continue
                         argname, index = utils._get_output_name_index(
-                            previous_op, out_var_name)
+                            previous_op, out_var_name
+                        )
                         attr_name = argname + str(index) + "_threshold"
                         if not previous_op.has_attr(attr_name):
                             continue
                         threshold = previous_op.attr(attr_name)
 
                         argname, index = utils._get_input_name_index(
-                            op, in_var_name)
+                            op, in_var_name
+                        )
                         attr_name = argname + str(index) + "_threshold"
                         op._set_attr(attr_name, threshold)
                         op._set_attr("with_quant_attr", True)
@@ -390,8 +427,11 @@ class ImperativePTQ(object):
         """
 
         def _helper(op, next_op, old_attr_name, new_attr_name):
-            if op.has_attr(old_attr_name) and next_op.has_attr(old_attr_name) \
-                and op.attr(old_attr_name) == next_op.attr(old_attr_name):
+            if (
+                op.has_attr(old_attr_name)
+                and next_op.has_attr(old_attr_name)
+                and op.attr(old_attr_name) == next_op.attr(old_attr_name)
+            ):
                 threshold = op.attr(old_attr_name)
                 op._remove_attr(old_attr_name)
                 next_op._remove_attr(old_attr_name)
@@ -417,8 +457,8 @@ class ImperativePTQ(object):
                 old_attr_name = argname + str(index) + "_threshold"
 
                 argname, index = utils._get_output_name_index(
-                    next_op,
-                    next_op.output("Out")[0])
+                    next_op, next_op.output("Out")[0]
+                )
                 new_attr_name = argname + str(index) + "_threshold"
 
                 _helper(op, next_op, old_attr_name, new_attr_name)
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_registry.py b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_registry.py
index a6b8033bc78..e7b6a243abe 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_registry.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_registry.py
@@ -41,6 +41,7 @@ PTQ_LAYERS_INFO = [
     LayerInfo(paddle.nn.ReLU, ['X'], [], ['Out']),
     LayerInfo(paddle.nn.ReLU6, ['X'], [], ['Out']),
     LayerInfo(paddle.nn.Hardswish, ['X'], [], ['Out']),
+    LayerInfo(paddle.nn.Swish, ['X'], [], ['Out']),
     LayerInfo(paddle.nn.Sigmoid, ['X'], [], ['Out']),
     LayerInfo(paddle.nn.Softmax, ['X'], [], ['Out']),
     LayerInfo(paddle.nn.Tanh, ['X'], [], ['Out']),
@@ -48,10 +49,15 @@ PTQ_LAYERS_INFO = [
 ]
 
 QUANT_LAYERS_INFO = [
-    LayerInfo(paddle.nn.quant.quant_layers.QuantizedConv2D, ['Input'],
-              ['Filter'], ['Output']),
-    LayerInfo(paddle.nn.quant.quant_layers.QuantizedLinear, ['X'], ['Y'],
-              ['Out']),
+    LayerInfo(
+        paddle.nn.quant.quant_layers.QuantizedConv2D,
+        ['Input'],
+        ['Filter'],
+        ['Output'],
+    ),
+    LayerInfo(
+        paddle.nn.quant.quant_layers.QuantizedLinear, ['X'], ['Y'], ['Out']
+    ),
 ]
 
 SIMULATED_LAYERS = [paddle.nn.Conv2D, paddle.nn.Linear]
@@ -61,6 +67,7 @@ class PTQRegistry(object):
     """
     Register the supported layers for PTQ and provide layers info.
     """
+
     supported_layers_map = {}
     registered_layers_map = {}
     is_inited = False
@@ -89,8 +96,9 @@ class PTQRegistry(object):
             flag(bool): Whther the layer is supported.
         """
         cls._init()
-        return layer in cls.supported_layers_map or \
-            isinstance(layer, tuple(cls.supported_layers_map.keys()))
+        return layer in cls.supported_layers_map or isinstance(
+            layer, tuple(cls.supported_layers_map.keys())
+        )
 
     @classmethod
     def is_registered_layer(cls, layer):
@@ -102,8 +110,9 @@ class PTQRegistry(object):
             flag(bool): Wether the layer is register layer_info.
         """
         cls._init()
-        return layer in cls.registered_layers_map or \
-            isinstance(layer, tuple(cls.registered_layers_map.keys()))
+        return layer in cls.registered_layers_map or isinstance(
+            layer, tuple(cls.registered_layers_map.keys())
+        )
 
     @classmethod
     def is_simulated_quant_layer(cls, layer):
@@ -114,8 +123,9 @@ class PTQRegistry(object):
         Returns:
             flag(bool): Whther the layer is supported.
         """
-        return layer in SIMULATED_LAYERS or \
-            isinstance(layer, tuple(SIMULATED_LAYERS))
+        return layer in SIMULATED_LAYERS or isinstance(
+            layer, tuple(SIMULATED_LAYERS)
+        )
 
     @classmethod
     def layer_info(cls, layer):
@@ -126,8 +136,9 @@ class PTQRegistry(object):
         Returns:
             layer_info(LayerInfo): The layer info of the input layer.
         """
-        assert cls.is_registered_layer(layer), \
-            "The input layer is not register."
+        assert cls.is_registered_layer(
+            layer
+        ), "The input layer is not register."
 
         for layer_key, layer_info in cls.registered_layers_map.items():
             if layer == layer_key or isinstance(layer, layer_key):
-- 
GitLab