未验证 提交 c95f309f 编写于 作者: C ceci3 提交者: GitHub

fix latency_table multiprocess (#1151)

上级 d207622b
......@@ -19,8 +19,9 @@ import paddle
import paddleslim
import subprocess
import time
import urllib.request as request
import ssl
import requests
import shutil
__all__ = [
"save_cls_model", "save_det_model", "nearest_interpolate", "opt_model",
"load_predictor"
......@@ -29,6 +30,36 @@ __all__ = [
PREDICTOR_URL = 'https://paddlemodels.bj.bcebos.com/PaddleSlim/analysis/'
def _get_download(url, fullname):
# using requests.get method
fname = os.path.basename(fullname)
try:
req = requests.get(url, stream=True)
except Exception as e: # requests.exceptions.ConnectionError
logger.info("Downloading {} from {} failed with exception {}".format(
fname, url, str(e)))
return False
if req.status_code != 200:
raise RuntimeError("Downloading from {} failed with code "
"{}!".format(url, req.status_code))
# For protecting download interupted, download to
# tmp_fullname firstly, move tmp_fullname to fullname
# after download finished
tmp_fullname = fullname + "_tmp"
with open(tmp_fullname, 'wb') as f:
for chunk in req.iter_content(chunk_size=1024):
f.write(chunk)
try:
shutil.move(tmp_fullname, fullname)
except:
shutil.rmtree(tmp_fullname, ignore_errors=True)
return fullname
def opt_model(opt="paddle_lite_opt",
model_file='',
param_file='',
......@@ -203,12 +234,13 @@ def download_predictor(op_dir, op):
os.makedirs(op_dir)
op_path = os.path.join(op_dir, op + '_predictor.pkl')
if not os.path.exists(op_path):
# NOTE: To solve the 'SSL: certificate verify failed' error.
ssl._create_default_https_context = ssl._create_unverified_context
url = PREDICTOR_URL + op_path
request.urlretrieve(url, op_path)
print('Successfully download {}!'.format(op_path))
url = PREDICTOR_URL + op_path
while not (os.path.exists(op_path)):
if not _get_download(url, op_path):
time.sleep(1)
continue
print('Successfully download {}!'.format(op_path))
return op_path
......@@ -222,5 +254,4 @@ def load_predictor(op_type, op_dir, data_type='fp32'):
op_path = download_predictor(op_dir, op)
with open(op_path, 'rb') as f:
model = pickle.load(f)
return model
......@@ -18,14 +18,14 @@ import os
import pickle
import shutil
import subprocess
from .parse_ops import get_key_from_op
from .extract_features import get_data_from_tables, get_features_from_paramkey
from ._utils import opt_model, load_predictor, nearest_interpolate
import paddle
import paddleslim
import warnings
import urllib.request as request
import ssl
import paddle
from .parse_ops import get_key_from_op
from .extract_features import get_data_from_tables, get_features_from_paramkey
from ._utils import opt_model, load_predictor, nearest_interpolate, _get_download
from ..core import GraphWrapper
__all__ = ["LatencyPredictor", "TableLatencyPredictor"]
TABLE_URL = 'https://paddlemodels.bj.bcebos.com/PaddleSlim/analysis/'
......@@ -89,12 +89,13 @@ class TableLatencyPredictor(LatencyPredictor):
self.threads = 4
self.table_file = f'{self.hardware}_threads_4_power_mode_0.pkl'
self.predictor_state = True
if not os.path.exists(self.table_file):
# NOTE: To solve the 'SSL: certificate verify failed' error.
ssl._create_default_https_context = ssl._create_unverified_context
url = TABLE_URL + self.table_file
request.urlretrieve(url, self.table_file)
print('Successfully download {}!'.format(self.table_file))
url = TABLE_URL + self.table_file
while not (os.path.exists(self.table_file)):
if not _get_download(url, self.table_file):
time.sleep(1)
continue
print('Successfully download {}!'.format(self.table_file))
assert os.path.exists(
self.table_file
), f'{self.table_file} does not exist. If you want to use our table files, please set \'table_file\' in {TableLatencyPredictor.hardware_list}'
......@@ -177,7 +178,7 @@ class TableLatencyPredictor(LatencyPredictor):
fluid_program = paddle.fluid.framework.Program.parse_from_string(
f.read())
graph = paddleslim.core.GraphWrapper(fluid_program)
graph = GraphWrapper(fluid_program)
if input_shape != None:
ori_shape = self._get_input_shape(graph)
......
......@@ -140,7 +140,9 @@ def create_train_config(strategy_str, model_type):
return train_config
def prepare_strategy(model_dir,
def prepare_strategy(executor,
places,
model_dir,
model_filename,
params_filename,
target_speedup=None,
......@@ -152,6 +154,8 @@ def prepare_strategy(model_dir,
### use hardware latency tabel if support
if deploy_hardware is not None:
compressed_time_dict = predict_compressed_model(
executor,
places,
model_dir,
model_filename,
params_filename,
......
......@@ -136,8 +136,9 @@ class AutoCompression:
if self.strategy_config is None:
strategy_config = prepare_strategy(
self.model_dir, self.model_filename, self.params_filename,
self.target_speedup, self.deploy_hardware, self.model_type)
self._exe, self._places, self.model_dir, self.model_filename,
self.params_filename, self.target_speedup, self.deploy_hardware,
self.model_type)
self.strategy_config = strategy_config
elif isinstance(self.strategy_config, dict):
self.strategy_config = [self.strategy_config]
......
import os
import shutil
import paddle
from paddleslim.analysis import TableLatencyPredictor
from ...analysis import TableLatencyPredictor
from .prune_model import get_sparse_model, get_prune_model
from .fake_ptq import post_quant_fake
import shutil
def predict_compressed_model(model_dir,
def predict_compressed_model(executor,
places,
model_dir,
model_filename,
params_filename,
hardware='SD710'):
......@@ -41,10 +43,8 @@ def predict_compressed_model(model_dir,
model_file=model_file, param_file=param_file, data_type='fp32')
latency_dict.update({'origin_fp32': latency})
paddle.enable_static()
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)
post_quant_fake(
exe,
executor,
model_dir=model_dir,
model_filename=model_filename,
params_filename=params_filename,
......@@ -64,6 +64,8 @@ def predict_compressed_model(model_dir,
for prune_ratio in [0.3, 0.4, 0.5, 0.6]:
get_prune_model(
executor,
places,
model_file=model_file,
param_file=param_file,
ratio=prune_ratio,
......@@ -78,7 +80,7 @@ def predict_compressed_model(model_dir,
latency_dict.update({f'prune_{prune_ratio}_fp32': latency})
post_quant_fake(
exe,
executor,
model_dir=prune_model_path,
model_filename=model_filename,
params_filename=params_filename,
......@@ -98,6 +100,8 @@ def predict_compressed_model(model_dir,
for sparse_ratio in [0.70, 0.75, 0.80, 0.85, 0.90, 0.95]:
get_sparse_model(
executor,
places,
model_file=model_file,
param_file=param_file,
ratio=sparse_ratio,
......@@ -112,7 +116,7 @@ def predict_compressed_model(model_dir,
latency_dict.update({f'sparse_{sparse_ratio}_fp32': latency})
post_quant_fake(
exe,
executor,
model_dir=sparse_model_path,
model_filename=model_filename,
params_filename=params_filename,
......
import os
import time
import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.static as static
from paddleslim.prune import Pruner
from paddleslim.core import GraphWrapper
import numpy as np
from ...prune import Pruner
from ...core import GraphWrapper
__all__ = ["get_sparse_model", "get_prune_model"]
def get_sparse_model(model_file, param_file, ratio, save_path):
def get_sparse_model(executor, places, model_file, param_file, ratio,
save_path):
"""
Using the unstructured sparse algorithm to compress the network.
This interface is only used to evaluate the latency of the compressed network, and does not consider the loss of accuracy.
......@@ -34,12 +35,14 @@ def get_sparse_model(model_file, param_file, ratio, save_path):
main_prog = static.Program()
startup_prog = static.Program()
exe = paddle.static.Executor(paddle.CPUPlace())
exe.run(startup_prog)
executor.run(startup_prog)
[inference_program, feed_target_names, fetch_targets] = (
fluid.io.load_inference_model(
folder, exe, model_filename=model_name, params_filename=param_name))
folder,
executor,
model_filename=model_name,
params_filename=param_name))
thresholds = {}
graph = GraphWrapper(inference_program)
......@@ -88,14 +91,14 @@ def get_sparse_model(model_file, param_file, ratio, save_path):
save_path,
feeded_var_names=feed_target_names,
target_vars=fetch_targets,
executor=exe,
executor=executor,
main_program=inference_program,
model_filename=model_name,
params_filename=param_name)
print("The pruned model is saved in: ", save_path)
def get_prune_model(model_file, param_file, ratio, save_path):
def get_prune_model(executor, places, model_file, param_file, ratio, save_path):
"""
Using the structured pruning algorithm to compress the network.
This interface is only used to evaluate the latency of the compressed network, and does not consider the loss of accuracy.
......@@ -121,14 +124,15 @@ def get_prune_model(model_file, param_file, ratio, save_path):
main_prog = static.Program()
startup_prog = static.Program()
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)
scope = static.global_scope()
exe.run(startup_prog)
executor.run(startup_prog)
[inference_program, feed_target_names, fetch_targets] = (
fluid.io.load_inference_model(
folder, exe, model_filename=model_name, params_filename=param_name))
folder,
executor,
model_filename=model_name,
params_filename=param_name))
prune_params = []
graph = GraphWrapper(inference_program)
......@@ -152,7 +156,7 @@ def get_prune_model(model_file, param_file, ratio, save_path):
scope,
params=prune_params,
ratios=ratios,
place=place,
place=places,
lazy=False,
only_graph=False,
param_backup=None,
......@@ -162,7 +166,7 @@ def get_prune_model(model_file, param_file, ratio, save_path):
save_path,
feeded_var_names=feed_target_names,
target_vars=fetch_targets,
executor=exe,
executor=executor,
main_program=main_program,
model_filename=model_name,
params_filename=param_name)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册