diff --git a/paddleslim/analysis/_utils.py b/paddleslim/analysis/_utils.py index f0673b1f769561feab9c591492d62e6c552aa086..0a2a06f5accd92303ddbb9fadbab36226dd2ad92 100644 --- a/paddleslim/analysis/_utils.py +++ b/paddleslim/analysis/_utils.py @@ -18,7 +18,7 @@ import pickle import paddle import paddleslim import subprocess -import sklearn +import time __all__ = [ "save_cls_model", "save_det_model", "save_seg_model", "nearest_interpolate", "opt_model", "load_predictor" @@ -29,10 +29,11 @@ def opt_model(opt="paddle_lite_opt", model_file='', param_file='', optimize_out_type='protobuf', - valid_targets='arm'): + valid_targets='arm', + enable_fp16=False): assert os.path.exists(model_file) and os.path.exists( param_file), f'{model_file} or {param_file} does not exist.' - save_dir = f'./opt_models_tmp/{os.getpid()}' + save_dir = f'./opt_models_tmp/{os.getpid()}_{time.time()}' if not os.path.exists(save_dir): os.makedirs(save_dir) @@ -41,8 +42,8 @@ def opt_model(opt="paddle_lite_opt", model_out = os.path.join(save_dir, 'pbmodel') else: model_out = os.path.join(save_dir, 'model') - - cmd = f'{opt} --model_file={model_file} --param_file={param_file} --optimize_out_type={optimize_out_type} --optimize_out={model_out} --valid_targets={valid_targets}' + enable_fp16 = str(enable_fp16).lower() + cmd = f'{opt} --model_file={model_file} --param_file={param_file} --optimize_out_type={optimize_out_type} --optimize_out={model_out} --valid_targets={valid_targets} --enable_fp16={enable_fp16}' print(f'commands:{cmd}') m = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) diff --git a/paddleslim/analysis/extract_features.py b/paddleslim/analysis/extract_features.py index aa646a921100b5a484f02e821b144eb075337633..acb30841ed7ba93b71f8538b8b3fcd3cb7a8aeba 100644 --- a/paddleslim/analysis/extract_features.py +++ b/paddleslim/analysis/extract_features.py @@ -52,8 +52,13 @@ def get_features_from_paramkey(param_key, op_type, data_type): features = None if 'conv2d' in op_type: - flag_quant = 'quant=None' if data_type == 'fp32' else 'quant=True' - if flag_quant not in param_key: + if data_type == 'fp16': + quant_bits = 'bit_length=16' + elif data_type == 'int8': + quant_bits = 'bit_length=8' + else: + quant_bits = 'bit_length=None' + if quant_bits not in param_key: return None weight = re.search(r'weight=(\(\d*, \d*, \d*, \d*\))', @@ -178,7 +183,7 @@ def get_features_from_paramkey(param_key, op_type, data_type): 'leaky_relu' in op_type or 'tanh' in op_type or 'swish' in op_type or 'softmax' in op_type or 'hard_sigmoid' in op_type or 'sigmoid' in op_type or 'gelu' in op_type or 'clip' in op_type or - 'shape' in op_type or 'interp_v2' in op_type): + 'shape' in op_type or 'interp_v2' in op_type or 'sqrt' in op_type): inputs = re.search(r'in=(\((-?\d+,* *)+\))', param_key).group().split('=')[-1].strip( diff --git a/paddleslim/analysis/latency_predictor.py b/paddleslim/analysis/latency_predictor.py index 579c29eb05c584bf3a5e932829b586b549fb6943..6760c07e5a9b6a0d06dd0d02703f305d93cda654 100644 --- a/paddleslim/analysis/latency_predictor.py +++ b/paddleslim/analysis/latency_predictor.py @@ -16,7 +16,7 @@ import os import pickle -import time +import shutil import subprocess from .parse_ops import get_key_from_op from .extract_features import get_data_from_tables, get_features_from_paramkey @@ -71,15 +71,16 @@ class TableLatencyPredictor(LatencyPredictor): self.hardware = None self.threads = None self.predictor_state = False + self.predictor = {} self._initial_table() def _initial_table(self): if self.table_file in ['SD625', 'SD710', 'SD845', 'SD865']: self.hardware = self.table_file - if self.hardware in ['SD625', 'SD710']: - self.predictor_state = True self.threads = 4 self.table_file = f'{self.hardware}_threads_4_power_mode_0.pkl' + if self.hardware in ['SD625', 'SD710']: + self.predictor_state = True if not os.path.exists(self.table_file): subprocess.call( f'wget https://paddlemodels.bj.bcebos.com/PaddleSlim/analysis/{self.table_file}', @@ -115,6 +116,19 @@ class TableLatencyPredictor(LatencyPredictor): break return in_shape + def _preload_predictor(self, data_type='fp32'): + op_types = [ + 'depthwise_conv2d', 'conv2d', 'pool2d', 'matmul', 'elementwise_add', + 'elementwise_mul', 'concat', 'calib', 'swish' + ] + op_dir = self.table_file.split('.')[0] + '_batchsize_1' + for op_type in op_types: + model = load_predictor(op_type, op_dir, data_type) + key = op_type + if 'conv2d' in op_type: + key = f'{op_type}_{data_type}' + self.predictor[key] = model + def predict(self, model_file, param_file, @@ -125,22 +139,27 @@ class TableLatencyPredictor(LatencyPredictor): Args: model_file(str), param_file(str): The inference model(*.pdmodel, *.pdiparams). - data_type(str): Data type, fp32 or int8. Default : fp32 + data_type(str): Data type, fp32, fp16 or int8. threads(int): threads num input_shape(list): Generally, the input shape is confirmed when saving the inference model and the parameter is only effective for input shape that has variable length. Returns: latency(float): The latency of the model. """ - assert data_type in ['fp32', 'int8' - ], f'data_type must be one of [fp32, int8]' + assert data_type in ['fp32', 'int8', 'fp16' + ], f'data_type must be one of [fp32, int8, fp16]' if self.hardware and self.threads != threads: self._change_table(threads) + if self.predictor_state and f'conv2d_{data_type}' not in self.predictor: + self._preload_predictor(data_type) + + enable_fp16 = True if data_type == 'fp16' else False pbmodel_file = opt_model( model_file=model_file, param_file=param_file, - optimize_out_type='protobuf', ) + optimize_out_type='protobuf', + enable_fp16=enable_fp16) paddle.enable_static() with open(pbmodel_file, "rb") as f: @@ -176,7 +195,7 @@ class TableLatencyPredictor(LatencyPredictor): warnings.warn("OperatorType\tCalledTimes") for key in new_op: warnings.warn(f"{key.ljust(15)}\t{new_op[key]}") - + shutil.rmtree(os.path.dirname(pbmodel_file)) return latency def op_predictor(self, op_type, param_key, data_type): @@ -185,18 +204,20 @@ class TableLatencyPredictor(LatencyPredictor): Args: op_type: The operator's type param_key: The operator's parameter information. - data_type: Data type, fp32 or int8. Default : int8 + data_type: Data type, fp32 or int8. Returns: latency(float): The latency of the operator. """ latency = 0.0 - op_dir = self.table_file.split('.')[0] + '_batchsize_1' if op_type in [ 'depthwise_conv2d', 'conv2d', 'pool2d', 'matmul', 'elementwise_add', 'elementwise_mul', 'concat', 'calib', 'swish' ]: - predictor = load_predictor(op_type, op_dir, data_type) + key = op_type + if 'conv2d' in op_type: + key = f'{op_type}_{data_type}' + predictor = self.predictor[key] features = get_features_from_paramkey(param_key, op_type, data_type) latency = predictor.predict([features]) else: diff --git a/paddleslim/analysis/parse_ops.py b/paddleslim/analysis/parse_ops.py index 5490428135d325fb4ed11eeac5f9a54d6a915ecc..ecb8d5297b995ead752b97163b364d77510fb2d3 100644 --- a/paddleslim/analysis/parse_ops.py +++ b/paddleslim/analysis/parse_ops.py @@ -24,25 +24,30 @@ def get_key_from_op(op): if 'conv2d' in op_type: out_shape = op.all_outputs()[0].shape() in_shape = op.all_inputs()[-1].shape() + in_name = op.all_inputs()[1].name() weight_shape = op.all_inputs()[-2].shape() - kernel = weight_shape[2] + weight_shape = (out_shape[1], weight_shape[1], weight_shape[2], weight_shape[3]) + stride = op.attr('strides')[1] padding = op.attr('paddings')[1] groups = op.attr('groups') dilation = op.attr('dilations')[1] - int8 = op.attr('enable_int8') + quant = op.attr('enable_int8') bit_length = op.attr('bit_length') + if op.attr(in_name+'_fp16') == 'fp16': + quant = True + bit_length = 16 - param_key = f'{op_type} in={in_shape} weight={weight_shape} out={out_shape} pad={padding} stride={stride} group={groups} dilation={dilation} quant={int8} bit_length={bit_length}' + param_key = f'{op_type} in={in_shape} weight={weight_shape} out={out_shape} pad={padding} stride={stride} group={groups} dilation={dilation} quant={quant} bit_length={bit_length}' elif op_type == 'matmul' or op_type == 'matmul_v2': X = op.all_inputs()[0].shape() Y = op.all_inputs()[1].shape() out_shape = op.all_outputs()[0].shape() - int8 = op.attr('enable_int8') + quant = op.attr('enable_int8') bit_length = op.attr('bit_length') - param_key = f'{op_type} X={X} Y={Y} out={out_shape} quant={int8} bit_length={bit_length}' + param_key = f'{op_type} X={X} Y={Y} out={out_shape} quant={quant} bit_length={bit_length}' elif 'batch_norm' in op_type or 'layer_norm' in op_type: out_shape = op.all_outputs()[-1].shape() @@ -67,14 +72,12 @@ def get_key_from_op(op): elif op_type in [ 'hard_swish', 'relu', 'leaky_relu', 'tanh', 'swish', 'softmax', - 'hard_sigmoid', 'sigmoid', 'gelu', 'clip', 'shape' + 'hard_sigmoid', 'sigmoid', 'gelu', 'clip', 'shape', 'sqrt' ] or 'transpose' in op_type or 'interp_v2' in op_type: in_shape = op.all_inputs()[-1].shape() + out_shape = op.all_outputs()[0].shape() - param_key = f'{op_type} in={in_shape}' - in_shape = op.all_inputs()[-1].shape() - - param_key = f'{op_type} in={in_shape}' + param_key = f'{op_type} in={in_shape} out={out_shape}' elif op_type in ['fill_constant', 'range', 'cast'] or 'expand' in op_type: