From 6fa4ff18399f440b966f4657db297a0a543915e5 Mon Sep 17 00:00:00 2001
From: whs <wanghaoshuang@baidu.com>
Date: Fri, 17 Jun 2022 14:51:44 +0800
Subject: [PATCH] Make ACT support inputs with implicit dimensions (#1175)

---
 paddleslim/analysis/latency_predictor.py      |  3 +-
 paddleslim/auto_compression/compressor.py     | 96 ++++++++++++++++---
 paddleslim/auto_compression/utils/fake_ptq.py |  1 -
 paddleslim/auto_compression/utils/predict.py  |  4 +-
 4 files changed, 86 insertions(+), 18 deletions(-)

diff --git a/paddleslim/analysis/latency_predictor.py b/paddleslim/analysis/latency_predictor.py
index 1a72c1aa..c6747511 100644
--- a/paddleslim/analysis/latency_predictor.py
+++ b/paddleslim/analysis/latency_predictor.py
@@ -239,6 +239,7 @@ class TableLatencyPredictor(LatencyPredictor):
                 data_type=data_type)
             features = get_features_from_paramkey(param_key, op_type, data_type)
             latency = nearest_interpolate(features, data)
-            assert latency != None, f'{param_key} is not in the table.'
+            if latency is None:
+                return 0.
 
         return latency
diff --git a/paddleslim/auto_compression/compressor.py b/paddleslim/auto_compression/compressor.py
index 4d47ee10..66ca9eb5 100644
--- a/paddleslim/auto_compression/compressor.py
+++ b/paddleslim/auto_compression/compressor.py
@@ -30,6 +30,7 @@ from ..analysis import TableLatencyPredictor
 from .create_compressed_program import build_distill_program, build_quant_program, build_prune_program, remove_unused_var_nodes
 from .strategy_config import ProgramInfo, merge_config
 from .auto_strategy import prepare_strategy, get_final_quant_config, create_strategy_config, create_train_config
+from .utils.predict import with_variable_shape
 
 _logger = get_logger(__name__, level=logging.INFO)
 
@@ -47,6 +48,7 @@ class AutoCompression:
                  params_filename,
                  save_dir,
                  train_dataloader,
+                 input_shapes=None,
                  train_config=None,
                  strategy_config=None,
                  target_speedup=None,
@@ -71,6 +73,13 @@ class AutoCompression:
             train_data_loader(Python Generator, Paddle.io.DataLoader): The
                 Generator or Dataloader provides train data, and it could
                 return a batch every time.
+            input_shapes(dict|tuple|list): It is used when the model has implicit dimensions except batch size. 
+                If it is a dict, the key is the name of input and the value is the shape. 
+                Given the input shape of input "X" is [-1, 3, -1, -1] which means the batch size, hight
+                and width is variable. And the input_shapes can be set {"X": [-1, 3, 512, 512]}.
+                If it is a list or tuple, the number of model's inputs should be 1. And the shape of input
+                will be set input_shapes. None means keeping the original shapes, then
+                the compression strategies searching may be skipped. Default: None.
             train_config(dict, optional): The train config in the compression process, the key can 
                 reference `<https://github.com/PaddlePaddle/PaddleSlim/blob/develop/paddleslim/auto_compression/strategy_config.py#L103>`_ . 
                 Only one strategy(quant_post with hyperparameter optimization) can set train_config 
@@ -122,18 +131,13 @@ class AutoCompression:
         self.train_dataloader = train_dataloader
         self.target_speedup = target_speedup
         self.eval_function = eval_callback
+        self.deploy_hardware = deploy_hardware
 
         if eval_dataloader is None:
             eval_dataloader = self._get_eval_dataloader(train_dataloader)
         self.eval_dataloader = eval_dataloader
 
         paddle.enable_static()
-
-        if deploy_hardware in TableLatencyPredictor.hardware_list:
-            self.deploy_hardware = deploy_hardware
-        else:
-            self.deploy_hardware = None
-
         self._exe, self._places = self._prepare_envs()
         self.model_type = self._get_model_type(self._exe, model_dir,
                                                model_filename, params_filename)
@@ -141,6 +145,19 @@ class AutoCompression:
         if self.train_config is not None and self.train_config.use_fleet:
             fleet.init(is_collective=True)
 
+        if with_variable_shape(
+                self.model_dir,
+                model_filename=model_filename,
+                params_filename=params_filename) and input_shapes is not None:
+
+            infer_shape_model = self.create_tmp_dir(
+                self.final_dir, prefix="infer_shape_model_")
+            self._infer_shape(model_dir, self.model_filename,
+                              self.params_filename, input_shapes,
+                              infer_shape_model)
+            self.model_dir = infer_shape_model
+            self.model_filename = "infered_shape.pdmodel"
+            self.params_filename = "infered_shape.pdiparams"
         if self.strategy_config is None:
             strategy_config = prepare_strategy(
                 self._exe, self._places, self.model_dir, self.model_filename,
@@ -155,14 +172,62 @@ class AutoCompression:
 
         self._strategy, self._config = self._prepare_strategy(
             self.strategy_config)
-        #print(self._strategy, self._config[0].__dict__)
-        #sys.exit()
 
         # If train_config is None, set default train_config
         if self.train_config is None:
             self.train_config = create_train_config(self.strategy_config,
                                                     self.model_type)
 
+    def _infer_shape(self, model_dir, model_filename, params_filename,
+                     input_shapes, save_path):
+        assert type(input_shapes) in [
+            dict, list, tuple
+        ], f'Type of input_shapes should be in [dict, tuple or list] but got {type(input_shapes)}.'
+        paddle.enable_static()
+        exe = paddle.static.Executor(paddle.CPUPlace())
+        [inference_program, feed_target_names, fetch_targets] = (
+            paddle.static.load_inference_model(
+                model_dir,
+                exe,
+                model_filename=model_filename,
+                params_filename=params_filename))
+
+        if type(input_shapes) in [list, tuple]:
+            assert len(
+                feed_target_names
+            ) == 1, f"The number of model's inputs should be 1 but got {feed_target_names}."
+            input_shapes = {feed_target_names[0]: input_shapes}
+
+        feed_vars = []
+        for var_ in inference_program.list_vars():
+            if var_.name in feed_target_names:
+                feed_vars.append(var_)
+                var_.desc.set_shape(input_shapes[var_.name])
+
+        for block in inference_program.blocks:
+            for op in block.ops:
+                if op.type not in ["feed", "fetch"]:
+                    op.desc.infer_shape(block.desc)
+
+        save_path = os.path.join(save_path, "infered_shape")
+        os.makedirs(save_path)
+        paddle.static.save_inference_model(
+            save_path, feed_vars, fetch_targets, exe, program=inference_program)
+        _logger.info(f"Saved model infered shape to {save_path}")
+
+    @property
+    def deploy_hardware(self):
+        return self._deploy_hardware
+
+    @deploy_hardware.setter
+    def deploy_hardware(self, value):
+        if value is not None:
+            # Fail-fast when deploy hardware is set explicitly
+            assert (
+                value in TableLatencyPredictor.hardware_list
+            ), f"Hardware should be in supported list {TableLatencyPredictor.hardware_list} but got {value}. Or you can set deploy_hardware None."
+        self._deploy_hardware = value
+
     def _get_eval_dataloader(self, train_dataloader):
         def _gen():
             len_loader = len(list(train_dataloader()))
@@ -394,14 +459,17 @@ class AutoCompression:
         program_info.program = compiled_prog
         return program_info
 
-    def compress(self):
+    def create_tmp_dir(self, base_dir, prefix="tmp"):
         # create a new temp directory in final dir
-        s_datetime = strftime("%Y-%m-%d-%H-%M-%S", gmtime())
-        tmp_base_name = "_".join(["tmp", str(os.getpid()), s_datetime])
-        self.tmp_dir = os.path.join(self.final_dir, tmp_base_name)
-        if not os.path.exists(self.tmp_dir):
-            os.makedirs(self.tmp_dir)
+        s_datetime = strftime("%Y-%m-%d-%H:%M:%S", gmtime())
+        tmp_base_name = "_".join([prefix, str(os.getpid()), s_datetime])
+        tmp_dir = os.path.join(base_dir, tmp_base_name)
+        if not os.path.exists(tmp_dir):
+            os.makedirs(tmp_dir)
+        return tmp_dir
 
+    def compress(self):
+        self.tmp_dir = create_tmp_dir(self.final_dir)
         for strategy_idx, (
                 strategy,
                 config) in enumerate(zip(self._strategy, self._config)):
diff --git a/paddleslim/auto_compression/utils/fake_ptq.py b/paddleslim/auto_compression/utils/fake_ptq.py
index 83d36006..91304ed0 100644
--- a/paddleslim/auto_compression/utils/fake_ptq.py
+++ b/paddleslim/auto_compression/utils/fake_ptq.py
@@ -51,7 +51,6 @@ def post_quant_fake(executor,
         for op_type in _quantizable_op_type:
             assert op_type in _support_quantize_op_type, \
                 op_type + " is not supported for quantization."
-
     _program, _feed_list, _fetch_list = paddle.fluid.io.load_inference_model(
         model_dir,
         executor,
diff --git a/paddleslim/auto_compression/utils/predict.py b/paddleslim/auto_compression/utils/predict.py
index 2bc316c2..a6e5d219 100644
--- a/paddleslim/auto_compression/utils/predict.py
+++ b/paddleslim/auto_compression/utils/predict.py
@@ -11,8 +11,8 @@ def with_variable_shape(model_dir, model_filename=None, params_filename=None):
     Whether the shape of model's input is variable.
     Args:
         path_prefix(str | None): Directory path to save model + model name without suffix.
-        model_filename(str): specify model_filename if you don't want to use default name. Default : 'None'.
-        params_filename(str): specify params_filename if you don't want to use default name. Default : 'None'.
+        model_filename(str): specify model_filename if you don't want to use default name. Default : None.
+        params_filename(str): specify params_filename if you don't want to use default name. Default : None.
     Returns:
         bool: Whether the shape of model's input is variable.
     """
-- 
GitLab