fix latency_table multiprocess (#1151)

c95f309f · ceci3 · GitHub · d207622b · c95f309f · c95f309f
6 changed file
--- a/paddleslim/analysis/_utils.py
+++ b/paddleslim/analysis/_utils.py
@@ -19,8 +19,9 @@ import paddle
 import paddleslim
 import subprocess
 import time
-import urllib.request as request
 import ssl
+import requests
+import shutil
 __all__ = [
    "save_cls_model", "save_det_model", "nearest_interpolate", "opt_model",
    "load_predictor"
@@ -29,6 +30,36 @@ __all__ = [
 PREDICTOR_URL = 'https://paddlemodels.bj.bcebos.com/PaddleSlim/analysis/'
+def _get_download(url, fullname):
+    # using requests.get method
+    fname = os.path.basename(fullname)
+    try:
+        req = requests.get(url, stream=True)
+    except Exception as e:  # requests.exceptions.ConnectionError
+        logger.info("Downloading {} from {} failed with exception {}".format(
+            fname, url, str(e)))
+        return False
+    if req.status_code != 200:
+        raise RuntimeError("Downloading from {} failed with code "
+                           "{}!".format(url, req.status_code))
+    # For protecting download interupted, download to
+    # tmp_fullname firstly, move tmp_fullname to fullname
+    # after download finished
+    tmp_fullname = fullname + "_tmp"
+    with open(tmp_fullname, 'wb') as f:
+        for chunk in req.iter_content(chunk_size=1024):
+            f.write(chunk)
+    try:
+        shutil.move(tmp_fullname, fullname)
+    except:
+        shutil.rmtree(tmp_fullname, ignore_errors=True)
+    return fullname
 def opt_model(opt="paddle_lite_opt",
              model_file='',
              param_file='',
@@ -203,12 +234,13 @@ def download_predictor(op_dir, op):
        os.makedirs(op_dir)
    op_path = os.path.join(op_dir, op + '_predictor.pkl')
-    if not os.path.exists(op_path):
+    url = PREDICTOR_URL + op_path
-        # NOTE: To solve the 'SSL: certificate verify failed' error.
+    while not (os.path.exists(op_path)):
-        ssl._create_default_https_context = ssl._create_unverified_context
+        if not _get_download(url, op_path):
-        url = PREDICTOR_URL + op_path
+            time.sleep(1)
-        request.urlretrieve(url, op_path)
+            continue
-        print('Successfully download {}!'.format(op_path))
+    print('Successfully download {}!'.format(op_path))
    return op_path
@@ -222,5 +254,4 @@ def load_predictor(op_type, op_dir, data_type='fp32'):
    op_path = download_predictor(op_dir, op)
    with open(op_path, 'rb') as f:
        model = pickle.load(f)
    return model
--- a/paddleslim/analysis/latency_predictor.py
+++ b/paddleslim/analysis/latency_predictor.py
@@ -18,14 +18,14 @@ import os
 import pickle
 import shutil
 import subprocess
-from .parse_ops import get_key_from_op
-from .extract_features import get_data_from_tables, get_features_from_paramkey
-from ._utils import opt_model, load_predictor, nearest_interpolate
-import paddle
-import paddleslim
 import warnings
 import urllib.request as request
 import ssl
+import paddle
+from .parse_ops import get_key_from_op
+from .extract_features import get_data_from_tables, get_features_from_paramkey
+from ._utils import opt_model, load_predictor, nearest_interpolate, _get_download
+from ..core import GraphWrapper
 __all__ = ["LatencyPredictor", "TableLatencyPredictor"]
 TABLE_URL = 'https://paddlemodels.bj.bcebos.com/PaddleSlim/analysis/'
@@ -89,12 +89,13 @@ class TableLatencyPredictor(LatencyPredictor):
            self.threads = 4
            self.table_file = f'{self.hardware}_threads_4_power_mode_0.pkl'
            self.predictor_state = True
-            if not os.path.exists(self.table_file):
+            url = TABLE_URL + self.table_file
-                # NOTE: To solve the 'SSL: certificate verify failed' error.
+            while not (os.path.exists(self.table_file)):
-                ssl._create_default_https_context = ssl._create_unverified_context
+                if not _get_download(url, self.table_file):
-                url = TABLE_URL + self.table_file
+                    time.sleep(1)
-                request.urlretrieve(url, self.table_file)
+                    continue
-                print('Successfully download {}!'.format(self.table_file))
+            print('Successfully download {}!'.format(self.table_file))
        assert os.path.exists(
            self.table_file
        ), f'{self.table_file} does not exist. If you want to use our table files, please set \'table_file\' in {TableLatencyPredictor.hardware_list}'
@@ -177,7 +178,7 @@ class TableLatencyPredictor(LatencyPredictor):
            fluid_program = paddle.fluid.framework.Program.parse_from_string(
                f.read())
-        graph = paddleslim.core.GraphWrapper(fluid_program)
+        graph = GraphWrapper(fluid_program)
        if input_shape != None:
            ori_shape = self._get_input_shape(graph)

--- a/paddleslim/auto_compression/auto_strategy.py
+++ b/paddleslim/auto_compression/auto_strategy.py
@@ -140,7 +140,9 @@ def create_train_config(strategy_str, model_type):
    return train_config
-def prepare_strategy(model_dir,
+def prepare_strategy(executor,
+                     places,
+                     model_dir,
                     model_filename,
                     params_filename,
                     target_speedup=None,
@@ -152,6 +154,8 @@ def prepare_strategy(model_dir,
    ### use hardware latency tabel if support
    if deploy_hardware is not None:
        compressed_time_dict = predict_compressed_model(
+            executor,
+            places,
            model_dir,
            model_filename,
            params_filename,

--- a/paddleslim/auto_compression/compressor.py
+++ b/paddleslim/auto_compression/compressor.py
@@ -136,8 +136,9 @@ class AutoCompression:
        if self.strategy_config is None:
            strategy_config = prepare_strategy(
-                self.model_dir, self.model_filename, self.params_filename,
+                self._exe, self._places, self.model_dir, self.model_filename,
-                self.target_speedup, self.deploy_hardware, self.model_type)
+                self.params_filename, self.target_speedup, self.deploy_hardware,
+                self.model_type)
            self.strategy_config = strategy_config
        elif isinstance(self.strategy_config, dict):
            self.strategy_config = [self.strategy_config]

--- a/paddleslim/auto_compression/utils/predict.py
+++ b/paddleslim/auto_compression/utils/predict.py
 import os
+import shutil
 import paddle
-from paddleslim.analysis import TableLatencyPredictor
+from ...analysis import TableLatencyPredictor
 from .prune_model import get_sparse_model, get_prune_model
 from .fake_ptq import post_quant_fake
-import shutil
-def predict_compressed_model(model_dir,
+def predict_compressed_model(executor,
+                             places,
+                             model_dir,
                             model_filename,
                             params_filename,
                             hardware='SD710'):
@@ -41,10 +43,8 @@ def predict_compressed_model(model_dir,
        model_file=model_file, param_file=param_file, data_type='fp32')
    latency_dict.update({'origin_fp32': latency})
    paddle.enable_static()
-    place = paddle.CPUPlace()
-    exe = paddle.static.Executor(place)
    post_quant_fake(
-        exe,
+        executor,
        model_dir=model_dir,
        model_filename=model_filename,
        params_filename=params_filename,
@@ -64,6 +64,8 @@ def predict_compressed_model(model_dir,
    for prune_ratio in [0.3, 0.4, 0.5, 0.6]:
        get_prune_model(
+            executor,
+            places,
            model_file=model_file,
            param_file=param_file,
            ratio=prune_ratio,
@@ -78,7 +80,7 @@ def predict_compressed_model(model_dir,
        latency_dict.update({f'prune_{prune_ratio}_fp32': latency})
        post_quant_fake(
-            exe,
+            executor,
            model_dir=prune_model_path,
            model_filename=model_filename,
            params_filename=params_filename,
@@ -98,6 +100,8 @@ def predict_compressed_model(model_dir,
    for sparse_ratio in [0.70, 0.75, 0.80, 0.85, 0.90, 0.95]:
        get_sparse_model(
+            executor,
+            places,
            model_file=model_file,
            param_file=param_file,
            ratio=sparse_ratio,
@@ -112,7 +116,7 @@ def predict_compressed_model(model_dir,
        latency_dict.update({f'sparse_{sparse_ratio}_fp32': latency})
        post_quant_fake(
-            exe,
+            executor,
            model_dir=sparse_model_path,
            model_filename=model_filename,
            params_filename=params_filename,

--- a/paddleslim/auto_compression/utils/prune_model.py
+++ b/paddleslim/auto_compression/utils/prune_model.py
 import os
 import time
+import numpy as np
 import paddle
 import paddle.fluid as fluid
 import paddle.static as static
-from paddleslim.prune import Pruner
+from ...prune import Pruner
-from paddleslim.core import GraphWrapper
+from ...core import GraphWrapper
-import numpy as np
 __all__ = ["get_sparse_model", "get_prune_model"]
-def get_sparse_model(model_file, param_file, ratio, save_path):
+def get_sparse_model(executor, places, model_file, param_file, ratio,
+                     save_path):
    """
    Using the unstructured sparse algorithm to compress the network. 
    This interface is only used to evaluate the latency of the compressed network, and does not consider the loss of accuracy.
@@ -34,12 +35,14 @@ def get_sparse_model(model_file, param_file, ratio, save_path):
    main_prog = static.Program()
    startup_prog = static.Program()
-    exe = paddle.static.Executor(paddle.CPUPlace())
+    executor.run(startup_prog)
-    exe.run(startup_prog)
    [inference_program, feed_target_names, fetch_targets] = (
        fluid.io.load_inference_model(
-            folder, exe, model_filename=model_name, params_filename=param_name))
+            folder,
+            executor,
+            model_filename=model_name,
+            params_filename=param_name))
    thresholds = {}
    graph = GraphWrapper(inference_program)
@@ -88,14 +91,14 @@ def get_sparse_model(model_file, param_file, ratio, save_path):
        save_path,
        feeded_var_names=feed_target_names,
        target_vars=fetch_targets,
-        executor=exe,
+        executor=executor,
        main_program=inference_program,
        model_filename=model_name,
        params_filename=param_name)
    print("The pruned model is saved in: ", save_path)
-def get_prune_model(model_file, param_file, ratio, save_path):
+def get_prune_model(executor, places, model_file, param_file, ratio, save_path):
    """
    Using the structured pruning algorithm to compress the network. 
    This interface is only used to evaluate the latency of the compressed network, and does not consider the loss of accuracy.
@@ -121,14 +124,15 @@ def get_prune_model(model_file, param_file, ratio, save_path):
    main_prog = static.Program()
    startup_prog = static.Program()
-    place = paddle.CPUPlace()
-    exe = paddle.static.Executor(place)
    scope = static.global_scope()
-    exe.run(startup_prog)
+    executor.run(startup_prog)
    [inference_program, feed_target_names, fetch_targets] = (
        fluid.io.load_inference_model(
-            folder, exe, model_filename=model_name, params_filename=param_name))
+            folder,
+            executor,
+            model_filename=model_name,
+            params_filename=param_name))
    prune_params = []
    graph = GraphWrapper(inference_program)
@@ -152,7 +156,7 @@ def get_prune_model(model_file, param_file, ratio, save_path):
        scope,
        params=prune_params,
        ratios=ratios,
-        place=place,
+        place=places,
        lazy=False,
        only_graph=False,
        param_backup=None,
@@ -162,7 +166,7 @@ def get_prune_model(model_file, param_file, ratio, save_path):
        save_path,
        feeded_var_names=feed_target_names,
        target_vars=fetch_targets,
-        executor=exe,
+        executor=executor,
        main_program=main_program,
        model_filename=model_name,
        params_filename=param_name)