未验证 提交 e8772486 编写于 作者: W Wilber 提交者: GitHub

update inference trt ut framework (#35418)

上级 e8a88164
......@@ -354,6 +354,12 @@ struct PD_INFER_DECL AnalysisConfig {
///
bool tensorrt_engine_enabled() const { return use_tensorrt_; }
///
/// \brief Get the TensorRT engine precision.
///
/// \return Precision Get the TensorRT engine precision.
///
Precision tensorrt_precision_mode() const { return tensorrt_precision_mode_; }
///
/// \brief Set min, max, opt shape for TensorRT Dynamic shape mode.
/// \param min_input_shape The min input shape of the subgraph input.
/// \param max_input_shape The max input shape of the subgraph input.
......@@ -366,7 +372,14 @@ struct PD_INFER_DECL AnalysisConfig {
std::map<std::string, std::vector<int>> max_input_shape,
std::map<std::string, std::vector<int>> optim_input_shape,
bool disable_trt_plugin_fp16 = false);
///
/// \brief A boolean state telling whether the trt dynamic_shape is used.
///
/// \return bool Whether the trt dynamic_shape is used.
///
bool tensorrt_dynamic_shape_enabled() const {
return min_input_shape_.empty();
}
///
/// \brief Prevent ops running in Paddle-TRT
/// NOTE: just experimental, not an official stable API, easy to be broken.
......
......@@ -288,7 +288,7 @@ py::bytes SerializePDTensorToBytes(PaddleTensor &tensor) { // NOLINT
return static_cast<py::bytes>(ss.str());
}
void CopyPaddleInferTensor(paddle_infer::Tensor &dst,
void CopyPaddleInferTensor(paddle_infer::Tensor &dst, // NOLINT
const paddle_infer::Tensor &src) {
return paddle_infer::contrib::TensorUtils::CopyTensor(&dst, src);
}
......@@ -555,6 +555,7 @@ void BindAnalysisConfig(py::module *m) {
py::arg("min_subgraph_size") = 3,
py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32,
py::arg("use_static") = false, py::arg("use_calib_mode") = true)
.def("tensorrt_precision_mode", &AnalysisConfig::tensorrt_precision_mode)
.def("set_trt_dynamic_shape_info",
&AnalysisConfig::SetTRTDynamicShapeInfo,
py::arg("min_input_shape") =
......@@ -564,6 +565,8 @@ void BindAnalysisConfig(py::module *m) {
py::arg("optim_input_shape") =
std::map<std::string, std::vector<int>>({}),
py::arg("disable_trt_plugin_fp16") = false)
.def("tensorrt_dynamic_shape_enabled",
&AnalysisConfig::tensorrt_dynamic_shape_enabled)
.def("enable_tensorrt_oss", &AnalysisConfig::EnableTensorRtOSS)
.def("tensorrt_oss_enabled", &AnalysisConfig::tensorrt_oss_enabled)
.def("exp_disable_tensorrt_ops", &AnalysisConfig::Exp_DisableTensorRtOPs)
......
......@@ -16,6 +16,7 @@ import numpy as np
import unittest
import abc
import os
import enum
import logging
import paddle
import paddle.fluid as fluid
......@@ -29,10 +30,22 @@ from program_config import TensorConfig, OpConfig, ProgramConfig, create_fake_mo
logging.basicConfig(level=logging.INFO, format="%(message)s")
class SkipReasons(enum.Enum):
# Paddle not support, but trt support, we need to add the feature.
TRT_NOT_IMPLEMENTED = 0
# TRT not support.
TRT_NOT_SUPPORT = 1
# Implement wrong.
ALGO_WRONG = 2
# Quant model, only to run in INT8 mode.
QUANT_MODEL = 3
class AutoScanTest(unittest.TestCase):
def __init__(self, methodName='runTest'):
paddle.enable_static()
super(AutoScanTest, self).__init__(methodName)
self.skip_cases = []
@abc.abstractmethod
def sample_program_configs(self) -> List[ProgramConfig]:
......@@ -46,6 +59,18 @@ class AutoScanTest(unittest.TestCase):
def sample_predictor_configs(self) -> List[paddle_infer.Config]:
raise NotImplementedError
@abc.abstractmethod
def add_skip_case(
self,
teller: [Callable[[ProgramConfig, paddle_infer.Config], bool]],
reason: SkipReasons,
note: str):
self.skip_cases.append((teller, reason, note))
@abc.abstractmethod
def check_program_validity(self, program_config: ProgramConfig) -> bool:
raise NotImplementedError
def run_test_config(self, model, params, prog_config, pred_config,
feed_data) -> Dict[str, np.ndarray]:
'''
......@@ -56,7 +81,7 @@ class AutoScanTest(unittest.TestCase):
for name, _ in prog_config.inputs.items():
input_tensor = predictor.get_input_handle(name)
input_tensor.copy_from_cpu(feed_data[name]['shape'])
input_tensor.copy_from_cpu(feed_data[name]['data'])
if feed_data[name]['lod'] is not None:
input_tensor.set_lod(feed_data[name]['lod'])
predictor.run()
......@@ -66,26 +91,6 @@ class AutoScanTest(unittest.TestCase):
result[out_name] = predictor.get_output_handle(o_name).copy_to_cpu()
return result
def assert_op_size(self, trt_engine_num, paddle_op_num):
cur_path = os.path.dirname(__file__)
last_passed_program = os.path.join(
cur_path, 'transpose_flatten_concat_fuse_pass.pdmodel')
model_bytes = paddle.static.load_from_file(last_passed_program)
pg = paddle.static.deserialize_program(model_bytes)
main_block = pg.desc.block(0)
op_size = main_block.op_size()
op_types = [
main_block.op(i).type() == 'tensorrt_engine' for i in range(op_size)
]
trt_engine_size = sum(op_types)
paddle_op_size = op_size - trt_engine_size
self.assertTrue(trt_engine_size == trt_engine_num,
'trt_engine_num is {}, but got {}!'.format(
trt_engine_size, trt_engine_num))
self.assertTrue(paddle_op_size == paddle_op_num,
'paddle_op_num is {}, but got {}!'.format(
paddle_op_size, paddle_op_num))
def assert_tensors_near(self,
threshold: float,
tensors: List[Dict[str, np.array]]):
......@@ -98,42 +103,6 @@ class AutoScanTest(unittest.TestCase):
first[key], arr, atol=threshold),
"Output has diff between GPU and TensorRT. ")
def run_test(self,
trt_engine_num: int,
paddle_op_num: int,
threshold=1e-5,
quant=False,
error_msg=None):
for prog_config in self.sample_program_configs():
model, params = create_fake_model(prog_config)
if quant:
model, params = create_quant_model(model, params)
for batch_size in self.batch_size_set:
feed_data = {}
log_str = ' -- Input tensor info: '
for name, tensor_config in prog_config.inputs.items():
tensor_shape = tensor_config.shape.copy()
tensor_shape[0] = batch_size
feed_data[name] = {
'shape': np.random.random(tensor_shape).astype(
tensor_config.dtype),
'lod': tensor_config.lod
}
log_str += str({
name: {
'shape': tensor_shape,
'lod': tensor_config.lod
}
})
logging.info(log_str)
results: List[Dict[str, Tensor]] = []
for pred_config in self.sample_predictor_configs():
results.append(
self.run_test_config(model, params, prog_config,
pred_config, feed_data))
try:
self.assert_tensors_near(
threshold=threshold, tensors=results)
self.assert_op_size(trt_engine_num, paddle_op_num)
except:
logging.info('ERROR OCCURED: ' + error_msg)
@abc.abstractmethod
def run_test(self, quant=False):
raise NotImplementedError
......@@ -30,24 +30,24 @@ from paddle.fluid.executor import global_scope
class TensorConfig:
'''
A config builder for a input or a weight.
InputVar's shape can be [-1, xxx], batch_size
'''
def __init__(self,
shape: [List[int]],
dtype: [str]="float32",
data: Optional[np.array]=None,
lod: [List[List[int]]]=None):
lod: Optional[List[List[int]]]=None,
data_gen: Optional[Callable[..., np.array]]=None):
'''
shape: The shape of the tensor.
dtype: The data type of the tensor.
data: The value of WeightVar. for input, it should be None
'''
self.shape = shape
self.dtype = dtype
self.data = data
self.lod = lod
self.data_gen = data_gen
self.data = data_gen()
self.dtype = data_gen().dtype
self.shape = data_gen().shape
def __repr__(self):
return str({'shape': self.shape, 'lod': self.lod, 'dtype': self.dtype})
class OpConfig:
......@@ -63,6 +63,11 @@ class OpConfig:
self.outputs = outputs
self.attrs = attrs
def __repr__(self):
log_str = self.type
log_str += str(self.attrs)
return log_str
class ProgramConfig:
''' A config builder for generating a Program. '''
......@@ -77,6 +82,19 @@ class ProgramConfig:
self.inputs = inputs
self.outputs = outputs
def __repr__(self):
log_str = ''
for i in range(len(self.ops)):
if i != len(self.ops) - 1:
log_str += repr(self.ops[i]) + ' + '
else:
log_str += repr(self.ops[i])
log_str += ' -- '
for t, v in self.inputs.items():
log_str += '[' + t + ': ' + str(v) + ']'
return log_str
def create_fake_model(program_config):
''' Create a Paddle model(in memory) according to the given config. '''
......
......@@ -12,15 +12,69 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from trt_layer_auto_scan_test import TrtLayerAutoScanTest
from program_config import TensorConfig
from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons
from program_config import TensorConfig, ProgramConfig
import numpy as np
import paddle.inference as paddle_infer
from functools import partial
from typing import Optional, List, Callable, Dict, Any, Set
class TrtConvertConv2dTest(TrtLayerAutoScanTest):
def setUp(self):
self.ops_config = [{
def check_program_validity(self, program_config: ProgramConfig) -> bool:
# TODO: This is just the example to remove the wrong attrs.
inputs = program_config.inputs
weights = program_config.weights
attrs = [
program_config.ops[i].attrs
for i in range(len(program_config.ops))
]
# groups restriction.
if inputs['input_data'].shape[1] != weights['conv2d_weight'].shape[
1] * attrs[0]['groups']:
return False
# others restriction, todo.
return True
def sample_program_configs(self):
def generate_input1(attrs: List[Dict[str, Any]]):
# TODO: This is just the example to illustrate the releation between axis and input.
# for each attr, can generate different datas
if attrs[0]['groups'] == 1:
return np.ones([2, 3, 64, 64]).astype(np.float32)
else:
return np.ones([1, 3, 64, 64]).astype(np.float32)
def generate_weight1(attrs: List[Dict[str, Any]]):
return np.random.random([24, 3, 3, 3]).astype(np.float32)
# for strides in [[1,1], [2,2]]:
# for paddings in [[0,3], [3,1]]:
# for groups in [1]:
# for padding_algotithm in ['EXPLICIT']:
# for dilations in [[1,1]]:
# for data_format in ['NCHW']:
for strides in [[1, 1], [2, 2], [1, 2], [2, 3]]:
for paddings in [[0, 3], [3, 1], [1, 1, 1, 1], [2, 1, 1, 3]]:
for groups in [1, 2]:
for padding_algotithm in ['EXPLICIT', 'SAME', 'VALID']:
for dilations in [[1, 1], [1, 2]]:
for data_format in ['NCHW']:
dics = [{
"data_fromat": data_format,
"dilations": dilations,
"padding_algorithm": padding_algotithm,
"groups": groups,
"paddings": paddings,
"strides": strides,
"data_format": data_format
}, {}]
ops_config = [{
"op_type": "conv2d",
"op_inputs": {
"Input": ["input_data"],
......@@ -29,14 +83,7 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
"op_outputs": {
"Output": ["conv_output_data"]
},
"op_attrs": {
"data_format": ["NCHW"],
"dilations": [[1, 1]],
"padding_algorithm": ["EXPLICIT"],
"groups": [1],
"paddings": [[0, 3], [3, 1]],
"strides": [[1, 1], [2, 2]],
}
"op_attrs": dics[0]
}, {
"op_type": "relu",
"op_inputs": {
......@@ -45,48 +92,143 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
"op_outputs": {
"Out": ["relu_output_data"]
},
"op_attrs": {}
"op_attrs": dics[1]
}]
self.batch_size_set = [1, 2, 4]
ops = self.generate_op_config(ops_config)
program_config = ProgramConfig(
ops=ops,
weights={
"conv2d_weight": TensorConfig(
data_gen=partial(generate_weight1,
dics))
},
inputs={
"input_data": TensorConfig(
data_gen=partial(generate_input1,
dics))
},
outputs=["relu_output_data"])
# if config is invalid, we should skip that cases.
if not self.check_program_validity(
program_config):
continue
yield program_config
def sample_predictor_configs(
self, program_config) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape(attrs):
if len(attrs[0]['paddings']) == 4:
self.dynamic_shape.min_input_shape = {
"input_data": [1, 3, 32, 32],
'': []
}
self.dynamic_shape.max_input_shape = {
"input_data": [4, 3, 64, 64],
'': []
}
self.dynamic_shape.opt_input_shape = {
"input_data": [1, 3, 64, 64],
'': []
}
else:
self.dynamic_shape.min_input_shape = {
"input_data": [1, 3, 32, 32]
}
self.dynamic_shape.max_input_shape = {
"input_data": [4, 3, 64, 64]
}
self.dynamic_shape.opt_input_shape = {
"input_data": [1, 3, 64, 64]
}
def update_program_input_and_weight_with_attr(self, op_attr_list):
weight = np.random.randn(24, 3, 3, 3).astype("float32")
filter = TensorConfig(shape=[24, 3, 3, 3], data=weight)
if op_attr_list[0]["data_format"] == "NCHW":
input_data = TensorConfig(shape=[-1, 3, 64, 64])
def generate_trt_nodes_num(attrs, dynamic_shape):
# TODO: This is just the example, need to be fixed.
if len(attrs[0]['paddings']) == 4:
return 0, 3
else:
input_data = TensorConfig(shape=[-1, 64, 64, 3])
self.program_weights = {"conv2d_weight": filter}
self.program_inputs = {"input_data": input_data}
self.program_outputs = ["relu_output_data"]
return 1, 2
def test_check_fp32_output(self):
self.trt_param.precision = paddle_infer.PrecisionType.Float32
# the fused tensorrt engine num is 1, and paddle op num is 2(feed and fetch).
self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-5)
attrs = [
program_config.ops[i].attrs
for i in range(len(program_config.ops))
]
def test_check_fp16_output(self):
# for static_shape
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-2)
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), 1e-2
self.trt_param.precision = paddle_infer.PrecisionType.Int8
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), 1e-1
def test_dynamic_shape_fp32_check_output(self):
# for dynamic_shape
generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32
self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]}
self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]}
self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]}
self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-5)
def test_dynamic_shape_fp16_check_output(self):
yield self.create_inference_config(), generate_trt_nodes_num(attrs,
True), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]}
self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]}
self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]}
self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-2)
def test_trt_int8_check_output(self):
yield self.create_inference_config(), generate_trt_nodes_num(attrs,
True), 1e-2
self.trt_param.precision = paddle_infer.PrecisionType.Int8
self.run_test(
trt_engine_num=1, paddle_op_num=2, quant=True, threshold=1e-1)
yield self.create_inference_config(), generate_trt_nodes_num(attrs,
True), 1e-1
def add_skip_trt_case(self):
# TODO(wilber): This is just the example to illustrate the skip usage.
def teller1(program_config, predictor_config):
if program_config.ops[0].attrs['groups'] == 2:
return True
return False
self.add_skip_case(
teller1, SkipReasons.ALGO_WRONG,
"Need to repair the case: ......TODO, just for the example")
def teller2(program_config, predictor_config):
if len(program_config.ops[0].attrs['paddings']) == 4:
return True
return False
self.add_skip_case(
teller2, SkipReasons.TRT_NOT_IMPLEMENTED,
"NOT Implemented: we need to add support in the future ....TODO, just for the example"
)
def teller3(program_config, predictor_config):
if (
program_config.ops[0].attrs['dilations'][0] == 1 and
program_config.ops[0].attrs['dilations'][0] == 2
) or program_config.ops[0].attrs['padding_algorithm'] != 'EXPLICIT':
return True
return False
self.add_skip_case(teller3, SkipReasons.TRT_NOT_SUPPORT,
"TODO, just for the example")
def teller4(program_config, predictor_config):
if program_config.ops[0].attrs['strides'][0] != program_config.ops[
0].attrs['strides'][1] or program_config.ops[0].attrs[
'strides'][0] == program_config.ops[0].attrs['strides'][
1] == 2:
return True
return False
self.add_skip_case(teller4, SkipReasons.TRT_NOT_SUPPORT,
"TODO, just for the example")
def test(self):
self.add_skip_trt_case()
self.run_test()
def test_quant(self):
self.add_skip_trt_case()
self.run_test(quant=True)
if __name__ == "__main__":
......
......@@ -16,6 +16,7 @@ import numpy as np
import unittest
import itertools
import abc
import enum
import logging
import paddle
import paddle.fluid as fluid
......@@ -23,9 +24,9 @@ import paddle.fluid.core as core
import paddle.inference as paddle_infer
from paddle import compat as cpt
from typing import *
from program_config import TensorConfig, OpConfig, ProgramConfig
from auto_scan_test import AutoScanTest
from typing import Optional, List, Callable, Dict, Any, Set
from program_config import TensorConfig, OpConfig, ProgramConfig, create_fake_model, create_quant_model
from auto_scan_test import AutoScanTest, SkipReasons
logging.basicConfig(level=logging.INFO, format="%(message)s")
......@@ -60,7 +61,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
def __init__(self, methodName='runTest'):
super(TrtLayerAutoScanTest, self).__init__(methodName)
self.trt_param = self.TensorRTParam(
workspace_size=0,
workspace_size=1024,
max_batch_size=4,
min_subgraph_size=0,
precision=paddle_infer.PrecisionType.Float32,
......@@ -68,62 +69,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
use_calib_mode=False)
self.dynamic_shape = self.DynamicShapeParam({}, {}, {}, False)
def update_program_input_and_weight_with_attr(self, op_attr_list):
raise NotImplementedError
@abc.abstractmethod
def sample_program_configs(self):
all_op_attrs_keys = []
all_op_attrs_values = []
for op_config in self.ops_config:
all_op_attrs_keys.append(list(op_config["op_attrs"].keys()))
all_op_attrs_values.extend(list(op_config["op_attrs"].values()))
if len(all_op_attrs_values) == 0:
all_op_attrs_values.append([None])
for attrs_sample in itertools.product(*all_op_attrs_values):
op_attr_list = []
index = 0
ops = []
log_str = 'TEST_CASE: '
for i in range(len(self.ops_config)):
op_config = self.ops_config[i]
op_attr = dict(
zip(
list(op_config["op_attrs"].keys()), attrs_sample[
index:index + len(op_config["op_attrs"])]))
if i != len(self.ops_config) - 1:
log_str += op_config['op_type'] + str(op_attr) + ' + '
else:
log_str += op_config['op_type'] + str(op_attr)
op_attr_list.append(op_attr)
index = index + len(op_config["op_attrs"])
ops.append(
OpConfig(
type=op_config["op_type"],
inputs=op_config["op_inputs"],
outputs=op_config["op_outputs"],
attrs=op_attr))
logging.info(log_str)
self.update_program_input_and_weight_with_attr(op_attr_list)
# if no weight need to save, we create a place_holder to help seriazlie params.
if not self.program_weights:
self.program_weights = {
"place_holder_weight": TensorConfig(
shape=[1], data=np.array([1]).astype(np.float32))
}
program_config = ProgramConfig(
ops=ops,
weights=self.program_weights,
inputs=self.program_inputs,
outputs=self.program_outputs)
yield program_config
def create_program_config(
self, use_trt=True,
precision_mode=paddle_infer.PrecisionType.Float32):
def create_inference_config(self, use_trt=True) -> paddle_infer.Config:
config = paddle_infer.Config()
config.disable_glog_info()
config.enable_use_gpu(100, 0)
......@@ -133,7 +79,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
max_batch_size=self.trt_param.max_batch_size,
workspace_size=self.trt_param.workspace_size,
min_subgraph_size=self.trt_param.min_subgraph_size,
precision_mode=precision_mode,
precision_mode=self.trt_param.precision,
use_static=self.trt_param.use_static,
use_calib_mode=self.trt_param.use_calib_mode)
if len(self.dynamic_shape.min_input_shape
......@@ -148,32 +94,152 @@ class TrtLayerAutoScanTest(AutoScanTest):
self.dynamic_shape.disable_trt_plugin_fp16)
return config
@abc.abstractmethod
def sample_predictor_configs(self):
def precision_to_str(p):
if p == paddle_infer.PrecisionType.Float32:
return 'float32'
elif p == paddle_infer.PrecisionType.Half:
return 'half'
elif p == paddle_infer.PrecisionType.Int8:
return 'int8'
def assert_tensors_near(self,
threshold: float,
tensor: Dict[str, np.array],
baseline: Dict[str, np.array]):
for key, arr in tensor.items():
self.assertTrue(
np.allclose(
baseline[key], arr, atol=threshold),
"Output has diff between GPU and TensorRT. ")
def assert_op_size(self, trt_engine_num, paddle_op_num):
last_passed_program = 'transpose_flatten_concat_fuse_pass.pdmodel'
model_bytes = paddle.static.load_from_file(last_passed_program)
pg = paddle.static.deserialize_program(model_bytes)
main_block = pg.desc.block(0)
op_size = main_block.op_size()
op_types = [
main_block.op(i).type() == 'tensorrt_engine' for i in range(op_size)
]
trt_engine_size = sum(op_types)
paddle_op_size = op_size - trt_engine_size
self.assertTrue(trt_engine_size == trt_engine_num,
'trt_engine_num is {}, but got {}!'.format(
trt_engine_size, trt_engine_num))
self.assertTrue(paddle_op_size == paddle_op_num,
'paddle_op_num is {}, but got {}!'.format(
paddle_op_size, paddle_op_num))
def skip_log(self, msg: str):
logging.warning("SKIP: " + msg)
def fail_log(self, msg: str):
logging.error("FAILE: " + msg)
def success_log(self, msg: str):
logging.info("SUCCESS: " + msg)
def validate(self, func: Callable[..., bool]):
pass
def generate_op_config(self,
ops_config: List[Dict[str, Any]]) -> List[OpConfig]:
ops = []
for i in range(len(ops_config)):
op_config = ops_config[i]
ops.append(
OpConfig(
type=op_config['op_type'],
inputs=op_config['op_inputs'],
outputs=op_config['op_outputs'],
attrs=op_config['op_attrs']))
return ops
def inference_config_str(self, config: paddle_infer.Config):
dic = {}
enable_trt = config.tensorrt_engine_enabled()
trt_precison = config.tensorrt_precision_mode()
trt_dynamic_shape = config.tensorrt_dynamic_shape_enabled()
if enable_trt:
dic['use_trt'] = True
dic['trt_precision'] = trt_precison
dic['use_dynamic_shape'] = trt_dynamic_shape
else:
raise NotImplementedError('not supported type.')
dic['use_trt'] = False
return str(dic)
trt_log_str = ''
if len(self.dynamic_shape.min_input_shape
) != 0 and self.dynamic_shape.min_input_shape.keys(
) == self.dynamic_shape.max_input_shape.keys(
) and self.dynamic_shape.min_input_shape.keys(
) == self.dynamic_shape.opt_input_shape.keys():
trt_log_str += 'dynamic_shape '
def run_test(self, quant=False):
if quant:
def teller(program_config, predictor_config):
if predictor_config.tensorrt_precision_mode(
) == paddle_infer.PrecisionType.Int8:
return False
return True
self.add_skip_case(teller, SkipReasons.QUANT_MODEL,
"Only test QUANT model")
else:
trt_log_str += 'static_shape '
trt_log_str += precision_to_str(self.trt_param.precision)
logging.info(' --------- gpu inference ---------')
yield self.create_program_config(use_trt=False)
logging.info(' --------- trt ' + trt_log_str +
' inference ---------')
yield self.create_program_config(
use_trt=True, precision_mode=self.trt_param.precision)
def teller(program_config, predictor_config):
if predictor_config.tensorrt_precision_mode(
) == paddle_infer.PrecisionType.Int8:
return True
return False
self.add_skip_case(teller, SkipReasons.QUANT_MODEL,
"Not test QUANT model")
for prog_config in self.sample_program_configs():
model, params = create_fake_model(prog_config)
if quant:
model, params = create_quant_model(model, params)
feed_data = {}
for name, tensor_config in prog_config.inputs.items():
feed_data[name] = {
'data': tensor_config.data,
'lod': tensor_config.lod
}
results: List[Dict[str, Tensor]] = []
# baseline: gpu run
gpu_config = self.create_inference_config(use_trt=False)
results.append(
self.run_test_config(model, params, prog_config, gpu_config,
feed_data))
self.success_log('RUN_GPU_BASELINE ' + str(prog_config) + ' vs ' +
self.inference_config_str(gpu_config))
for pred_config, nodes_num, threshold in self.sample_predictor_configs(
prog_config):
skip_flag = False
for skip_info in self.skip_cases:
if skip_info[0](prog_config, pred_config):
skip_flag = True
if skip_info[1] == SkipReasons.ALGO_WRONG:
self.skip_log("[ALGO_WRONG] " + skip_info[
2] + ' ' + repr(prog_config) + ' vs ' + self.
inference_config_str(pred_config))
elif skip_info[1] == SkipReasons.TRT_NOT_IMPLEMENTED:
self.skip_log("[TRT_NOT_IMPLEMENTED] " + skip_info[
2] + ' ' + repr(prog_config) + ' vs ' + self.
inference_config_str(pred_config))
elif skip_info[1] == SkipReasons.TRT_NOT_SUPPORT:
self.skip_log("[TRT_NOT_SUPPORT] " + skip_info[
2] + ' ' + repr(prog_config) + ' vs ' + self.
inference_config_str(pred_config))
elif skip_info[1] == SkipReasons.QUANT_MODEL:
pass
else:
raise NotImplementedError
if skip_flag:
continue
try:
results.append(
self.run_test_config(model, params, prog_config,
pred_config, feed_data))
self.assert_tensors_near(threshold, results[-1], results[0])
self.assert_op_size(nodes_num[0], nodes_num[1])
except Exception as e:
self.fail_log(
str(prog_config) + ' vs ' + self.inference_config_str(
pred_config) + str(e))
continue
self.success_log('RUN ' + str(prog_config) + ' vs ' +
self.inference_config_str(pred_config))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册