diff --git a/paddleslim/auto_compression/auto_strategy.py b/paddleslim/auto_compression/auto_strategy.py index 9bfc850f73cdd745274b03615eb0836e707205a9..22477721877390db10aecd1d27f9f6b2419502d4 100644 --- a/paddleslim/auto_compression/auto_strategy.py +++ b/paddleslim/auto_compression/auto_strategy.py @@ -105,7 +105,7 @@ def create_strategy_config(strategy_str, model_type): 'prune_strategy': 'gmp', ### default unstruture prune strategy is gmp 'prune_mode': 'ratio', - 'pruned_ratio': float(tmp_s[1]), + 'ratio': float(tmp_s[1]), 'local_sparsity': True, 'prune_params_type': 'conv1x1_only' } diff --git a/paddleslim/auto_compression/compressor.py b/paddleslim/auto_compression/compressor.py index de54ad1ce7b3dc10c57e86a28c2a7780b177adbc..dd67782af84f74a9e2ee030104570137674261bb 100644 --- a/paddleslim/auto_compression/compressor.py +++ b/paddleslim/auto_compression/compressor.py @@ -205,13 +205,14 @@ class AutoCompression: train_configs = [train_config] for idx in range(1, len(self._strategy)): - if 'qat' in self._strategy[idx]: - ### if compress strategy more than one, the train config in the yaml set for prune - ### the train config for quantization is extrapolate from the yaml + if 'qat' in self._strategy[idx] or 'ptq' in self._strategy[idx]: + ### If compress strategy more than one, the TrainConfig in the yaml only used in prune. + ### The TrainConfig for quantization is extrapolate from above. tmp_train_config = copy.deepcopy(train_config.__dict__) ### the epoch, train_iter, learning rate of quant is 10% of the prune compress - tmp_train_config['epochs'] = max( - int(train_config.epochs * 0.1), 1) + if self.model_type != 'transformer': + tmp_train_config['epochs'] = max( + int(train_config.epochs * 0.1), 1) if train_config.train_iter is not None: tmp_train_config['train_iter'] = int( train_config.train_iter * 0.1) @@ -228,8 +229,6 @@ class AutoCompression: map(lambda x: x * 0.1, train_config.learning_rate[ 'values'])) train_cfg = TrainConfig(**tmp_train_config) - elif 'ptq' in self._strategy[idx]: - train_cfg = None else: tmp_train_config = copy.deepcopy(train_config.__dict__) train_cfg = TrainConfig(**tmp_train_config) @@ -802,11 +801,12 @@ class AutoCompression: for name in test_program_info.feed_target_names ] + model_name = '.'.join(self.model_filename.split( + '.')[:-1]) if self.model_filename is not None else 'model' + path_prefix = os.path.join(model_dir, model_name) paddle.static.save_inference_model( - path_prefix=str(model_dir), + path_prefix=path_prefix, feed_vars=feed_vars, fetch_vars=test_program_info.fetch_targets, executor=self._exe, - program=test_program, - model_filename=self.model_filename, - params_filename=self.params_filename) + program=test_program) diff --git a/paddleslim/auto_compression/utils/fake_ptq.py b/paddleslim/auto_compression/utils/fake_ptq.py index 74a6cf03ca03772adcc0744783a09cc0830194d7..fbecc224f663c39403f4741aa903a3cbaf5e9188 100644 --- a/paddleslim/auto_compression/utils/fake_ptq.py +++ b/paddleslim/auto_compression/utils/fake_ptq.py @@ -1,3 +1,4 @@ +import os import paddle from paddle.fluid.framework import IrGraph from paddle.framework import core @@ -111,10 +112,11 @@ def post_quant_fake(executor, _program = graph.to_program() feed_vars = [_program.global_block().var(name) for name in _feed_list] + model_name = model_filename.split('.')[ + 0] if model_filename is not None else 'model' + save_model_path = os.path.join(save_model_path, model_name) paddle.static.save_inference_model( path_prefix=save_model_path, - model_filename=model_filename, - params_filename=params_filename, feed_vars=feed_vars, fetch_vars=_fetch_list, executor=executor, diff --git a/paddleslim/auto_compression/utils/load_model.py b/paddleslim/auto_compression/utils/load_model.py index b56346b5154eece32dc87871ffbffaa62491e81d..bb61ab5626eca704e3becbf67a2c3711624b283b 100644 --- a/paddleslim/auto_compression/utils/load_model.py +++ b/paddleslim/auto_compression/utils/load_model.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import paddle __all__ = ['load_inference_model'] @@ -29,8 +30,16 @@ def load_inference_model(path_prefix, model_filename=model_filename, params_filename=params_filename)) else: - [inference_program, feed_target_names, fetch_targets] = ( - paddle.static.load_inference_model( - path_prefix=path_prefix, executor=executor)) + model_name = '.'.join(model_filename.split('.') + [:-1]) if model_filename is not None else 'model' + if os.path.exists(os.path.join(path_prefix, model_name + '.pdmodel')): + model_path_prefix = os.path.join(path_prefix, model_name) + [inference_program, feed_target_names, fetch_targets] = ( + paddle.static.load_inference_model( + path_prefix=model_path_prefix, executor=executor)) + else: + [inference_program, feed_target_names, fetch_targets] = ( + paddle.static.load_inference_model( + path_prefix=path_prefix, executor=executor)) return [inference_program, feed_target_names, fetch_targets] diff --git a/paddleslim/auto_compression/utils/prune_model.py b/paddleslim/auto_compression/utils/prune_model.py index 520ccbc296ba381a970f0d3093305e4779c0c023..426a1859c4419fd4bb0d4db3f8f097d5894c223b 100644 --- a/paddleslim/auto_compression/utils/prune_model.py +++ b/paddleslim/auto_compression/utils/prune_model.py @@ -86,14 +86,15 @@ def get_sparse_model(executor, places, model_file, param_file, ratio, feed_vars = [ inference_program.global_block().var(name) for name in feed_target_names ] + model_name = '.'.join(model_name.split('.') + [:-1]) if model_name is not None else 'model' + save_path = os.path.join(save_path, model_name) static.save_inference_model( save_path, feed_vars=feed_vars, fetch_vars=fetch_targets, executor=executor, - program=inference_program, - model_filename=model_name, - params_filename=param_name) + program=inference_program) print("The pruned model is saved in: ", save_path) @@ -160,11 +161,12 @@ def get_prune_model(executor, places, model_file, param_file, ratio, save_path): feed_vars = [ main_program.global_block().var(name) for name in feed_target_names ] + model_name = '.'.join(model_name.split('.') + [:-1]) if model_name is not None else 'model' + save_path = os.path.join(save_path, model_name) static.save_inference_model( save_path, feed_vars=feed_vars, fetch_vars=fetch_targets, executor=executor, - program=main_program, - model_filename=model_name, - params_filename=param_name) + program=main_program) diff --git a/paddleslim/quant/post_quant_hpo.py b/paddleslim/quant/post_quant_hpo.py index 7efebbc740c1c22534dc505bc68589ae94a6cfa7..e92742d09c9d4e5a4d0fc40e63429e79fac7c21a 100755 --- a/paddleslim/quant/post_quant_hpo.py +++ b/paddleslim/quant/post_quant_hpo.py @@ -307,7 +307,7 @@ def quantize(cfg): quant_scope = paddle.static.Scope() with paddle.static.scope_guard(float_scope): [float_inference_program, feed_target_names, fetch_targets]= fluid.io.load_inference_model( \ - dirname=g_quant_config.model_filename, \ + dirname=g_quant_config.float_infer_model_path, \ model_filename=g_quant_config.model_filename, params_filename=g_quant_config.params_filename, executor=g_quant_config.executor) float_metric = g_quant_config.eval_function( @@ -320,8 +320,8 @@ def quantize(cfg): model_filename=g_quant_config.model_filename, params_filename=g_quant_config.params_filename, executor=g_quant_config.executor) quant_metric = g_quant_config.eval_function( - g_quant_config.executor, inference_program, feed_target_names, - fetch_targets) + g_quant_config.executor, quant_inference_program, + feed_target_names, fetch_targets) emd_loss = float(abs(float_metric - quant_metric)) / float_metric