未验证 提交 e8772486 编写于 作者: W Wilber 提交者: GitHub

update inference trt ut framework (#35418)

上级 e8a88164
...@@ -354,6 +354,12 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -354,6 +354,12 @@ struct PD_INFER_DECL AnalysisConfig {
/// ///
bool tensorrt_engine_enabled() const { return use_tensorrt_; } bool tensorrt_engine_enabled() const { return use_tensorrt_; }
/// ///
/// \brief Get the TensorRT engine precision.
///
/// \return Precision Get the TensorRT engine precision.
///
Precision tensorrt_precision_mode() const { return tensorrt_precision_mode_; }
///
/// \brief Set min, max, opt shape for TensorRT Dynamic shape mode. /// \brief Set min, max, opt shape for TensorRT Dynamic shape mode.
/// \param min_input_shape The min input shape of the subgraph input. /// \param min_input_shape The min input shape of the subgraph input.
/// \param max_input_shape The max input shape of the subgraph input. /// \param max_input_shape The max input shape of the subgraph input.
...@@ -366,7 +372,14 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -366,7 +372,14 @@ struct PD_INFER_DECL AnalysisConfig {
std::map<std::string, std::vector<int>> max_input_shape, std::map<std::string, std::vector<int>> max_input_shape,
std::map<std::string, std::vector<int>> optim_input_shape, std::map<std::string, std::vector<int>> optim_input_shape,
bool disable_trt_plugin_fp16 = false); bool disable_trt_plugin_fp16 = false);
///
/// \brief A boolean state telling whether the trt dynamic_shape is used.
///
/// \return bool Whether the trt dynamic_shape is used.
///
bool tensorrt_dynamic_shape_enabled() const {
return min_input_shape_.empty();
}
/// ///
/// \brief Prevent ops running in Paddle-TRT /// \brief Prevent ops running in Paddle-TRT
/// NOTE: just experimental, not an official stable API, easy to be broken. /// NOTE: just experimental, not an official stable API, easy to be broken.
......
...@@ -288,7 +288,7 @@ py::bytes SerializePDTensorToBytes(PaddleTensor &tensor) { // NOLINT ...@@ -288,7 +288,7 @@ py::bytes SerializePDTensorToBytes(PaddleTensor &tensor) { // NOLINT
return static_cast<py::bytes>(ss.str()); return static_cast<py::bytes>(ss.str());
} }
void CopyPaddleInferTensor(paddle_infer::Tensor &dst, void CopyPaddleInferTensor(paddle_infer::Tensor &dst, // NOLINT
const paddle_infer::Tensor &src) { const paddle_infer::Tensor &src) {
return paddle_infer::contrib::TensorUtils::CopyTensor(&dst, src); return paddle_infer::contrib::TensorUtils::CopyTensor(&dst, src);
} }
...@@ -555,6 +555,7 @@ void BindAnalysisConfig(py::module *m) { ...@@ -555,6 +555,7 @@ void BindAnalysisConfig(py::module *m) {
py::arg("min_subgraph_size") = 3, py::arg("min_subgraph_size") = 3,
py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32, py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32,
py::arg("use_static") = false, py::arg("use_calib_mode") = true) py::arg("use_static") = false, py::arg("use_calib_mode") = true)
.def("tensorrt_precision_mode", &AnalysisConfig::tensorrt_precision_mode)
.def("set_trt_dynamic_shape_info", .def("set_trt_dynamic_shape_info",
&AnalysisConfig::SetTRTDynamicShapeInfo, &AnalysisConfig::SetTRTDynamicShapeInfo,
py::arg("min_input_shape") = py::arg("min_input_shape") =
...@@ -564,6 +565,8 @@ void BindAnalysisConfig(py::module *m) { ...@@ -564,6 +565,8 @@ void BindAnalysisConfig(py::module *m) {
py::arg("optim_input_shape") = py::arg("optim_input_shape") =
std::map<std::string, std::vector<int>>({}), std::map<std::string, std::vector<int>>({}),
py::arg("disable_trt_plugin_fp16") = false) py::arg("disable_trt_plugin_fp16") = false)
.def("tensorrt_dynamic_shape_enabled",
&AnalysisConfig::tensorrt_dynamic_shape_enabled)
.def("enable_tensorrt_oss", &AnalysisConfig::EnableTensorRtOSS) .def("enable_tensorrt_oss", &AnalysisConfig::EnableTensorRtOSS)
.def("tensorrt_oss_enabled", &AnalysisConfig::tensorrt_oss_enabled) .def("tensorrt_oss_enabled", &AnalysisConfig::tensorrt_oss_enabled)
.def("exp_disable_tensorrt_ops", &AnalysisConfig::Exp_DisableTensorRtOPs) .def("exp_disable_tensorrt_ops", &AnalysisConfig::Exp_DisableTensorRtOPs)
......
...@@ -16,6 +16,7 @@ import numpy as np ...@@ -16,6 +16,7 @@ import numpy as np
import unittest import unittest
import abc import abc
import os import os
import enum
import logging import logging
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -29,10 +30,22 @@ from program_config import TensorConfig, OpConfig, ProgramConfig, create_fake_mo ...@@ -29,10 +30,22 @@ from program_config import TensorConfig, OpConfig, ProgramConfig, create_fake_mo
logging.basicConfig(level=logging.INFO, format="%(message)s") logging.basicConfig(level=logging.INFO, format="%(message)s")
class SkipReasons(enum.Enum):
# Paddle not support, but trt support, we need to add the feature.
TRT_NOT_IMPLEMENTED = 0
# TRT not support.
TRT_NOT_SUPPORT = 1
# Implement wrong.
ALGO_WRONG = 2
# Quant model, only to run in INT8 mode.
QUANT_MODEL = 3
class AutoScanTest(unittest.TestCase): class AutoScanTest(unittest.TestCase):
def __init__(self, methodName='runTest'): def __init__(self, methodName='runTest'):
paddle.enable_static() paddle.enable_static()
super(AutoScanTest, self).__init__(methodName) super(AutoScanTest, self).__init__(methodName)
self.skip_cases = []
@abc.abstractmethod @abc.abstractmethod
def sample_program_configs(self) -> List[ProgramConfig]: def sample_program_configs(self) -> List[ProgramConfig]:
...@@ -46,6 +59,18 @@ class AutoScanTest(unittest.TestCase): ...@@ -46,6 +59,18 @@ class AutoScanTest(unittest.TestCase):
def sample_predictor_configs(self) -> List[paddle_infer.Config]: def sample_predictor_configs(self) -> List[paddle_infer.Config]:
raise NotImplementedError raise NotImplementedError
@abc.abstractmethod
def add_skip_case(
self,
teller: [Callable[[ProgramConfig, paddle_infer.Config], bool]],
reason: SkipReasons,
note: str):
self.skip_cases.append((teller, reason, note))
@abc.abstractmethod
def check_program_validity(self, program_config: ProgramConfig) -> bool:
raise NotImplementedError
def run_test_config(self, model, params, prog_config, pred_config, def run_test_config(self, model, params, prog_config, pred_config,
feed_data) -> Dict[str, np.ndarray]: feed_data) -> Dict[str, np.ndarray]:
''' '''
...@@ -56,7 +81,7 @@ class AutoScanTest(unittest.TestCase): ...@@ -56,7 +81,7 @@ class AutoScanTest(unittest.TestCase):
for name, _ in prog_config.inputs.items(): for name, _ in prog_config.inputs.items():
input_tensor = predictor.get_input_handle(name) input_tensor = predictor.get_input_handle(name)
input_tensor.copy_from_cpu(feed_data[name]['shape']) input_tensor.copy_from_cpu(feed_data[name]['data'])
if feed_data[name]['lod'] is not None: if feed_data[name]['lod'] is not None:
input_tensor.set_lod(feed_data[name]['lod']) input_tensor.set_lod(feed_data[name]['lod'])
predictor.run() predictor.run()
...@@ -66,26 +91,6 @@ class AutoScanTest(unittest.TestCase): ...@@ -66,26 +91,6 @@ class AutoScanTest(unittest.TestCase):
result[out_name] = predictor.get_output_handle(o_name).copy_to_cpu() result[out_name] = predictor.get_output_handle(o_name).copy_to_cpu()
return result return result
def assert_op_size(self, trt_engine_num, paddle_op_num):
cur_path = os.path.dirname(__file__)
last_passed_program = os.path.join(
cur_path, 'transpose_flatten_concat_fuse_pass.pdmodel')
model_bytes = paddle.static.load_from_file(last_passed_program)
pg = paddle.static.deserialize_program(model_bytes)
main_block = pg.desc.block(0)
op_size = main_block.op_size()
op_types = [
main_block.op(i).type() == 'tensorrt_engine' for i in range(op_size)
]
trt_engine_size = sum(op_types)
paddle_op_size = op_size - trt_engine_size
self.assertTrue(trt_engine_size == trt_engine_num,
'trt_engine_num is {}, but got {}!'.format(
trt_engine_size, trt_engine_num))
self.assertTrue(paddle_op_size == paddle_op_num,
'paddle_op_num is {}, but got {}!'.format(
paddle_op_size, paddle_op_num))
def assert_tensors_near(self, def assert_tensors_near(self,
threshold: float, threshold: float,
tensors: List[Dict[str, np.array]]): tensors: List[Dict[str, np.array]]):
...@@ -98,42 +103,6 @@ class AutoScanTest(unittest.TestCase): ...@@ -98,42 +103,6 @@ class AutoScanTest(unittest.TestCase):
first[key], arr, atol=threshold), first[key], arr, atol=threshold),
"Output has diff between GPU and TensorRT. ") "Output has diff between GPU and TensorRT. ")
def run_test(self, @abc.abstractmethod
trt_engine_num: int, def run_test(self, quant=False):
paddle_op_num: int, raise NotImplementedError
threshold=1e-5,
quant=False,
error_msg=None):
for prog_config in self.sample_program_configs():
model, params = create_fake_model(prog_config)
if quant:
model, params = create_quant_model(model, params)
for batch_size in self.batch_size_set:
feed_data = {}
log_str = ' -- Input tensor info: '
for name, tensor_config in prog_config.inputs.items():
tensor_shape = tensor_config.shape.copy()
tensor_shape[0] = batch_size
feed_data[name] = {
'shape': np.random.random(tensor_shape).astype(
tensor_config.dtype),
'lod': tensor_config.lod
}
log_str += str({
name: {
'shape': tensor_shape,
'lod': tensor_config.lod
}
})
logging.info(log_str)
results: List[Dict[str, Tensor]] = []
for pred_config in self.sample_predictor_configs():
results.append(
self.run_test_config(model, params, prog_config,
pred_config, feed_data))
try:
self.assert_tensors_near(
threshold=threshold, tensors=results)
self.assert_op_size(trt_engine_num, paddle_op_num)
except:
logging.info('ERROR OCCURED: ' + error_msg)
...@@ -30,24 +30,24 @@ from paddle.fluid.executor import global_scope ...@@ -30,24 +30,24 @@ from paddle.fluid.executor import global_scope
class TensorConfig: class TensorConfig:
''' '''
A config builder for a input or a weight. A config builder for a input or a weight.
InputVar's shape can be [-1, xxx], batch_size
''' '''
def __init__(self, def __init__(self,
shape: [List[int]], lod: Optional[List[List[int]]]=None,
dtype: [str]="float32", data_gen: Optional[Callable[..., np.array]]=None):
data: Optional[np.array]=None,
lod: [List[List[int]]]=None):
''' '''
shape: The shape of the tensor. shape: The shape of the tensor.
dtype: The data type of the tensor. dtype: The data type of the tensor.
data: The value of WeightVar. for input, it should be None data: The value of WeightVar. for input, it should be None
''' '''
self.shape = shape
self.dtype = dtype
self.data = data
self.lod = lod self.lod = lod
self.data_gen = data_gen
self.data = data_gen()
self.dtype = data_gen().dtype
self.shape = data_gen().shape
def __repr__(self):
return str({'shape': self.shape, 'lod': self.lod, 'dtype': self.dtype})
class OpConfig: class OpConfig:
...@@ -63,6 +63,11 @@ class OpConfig: ...@@ -63,6 +63,11 @@ class OpConfig:
self.outputs = outputs self.outputs = outputs
self.attrs = attrs self.attrs = attrs
def __repr__(self):
log_str = self.type
log_str += str(self.attrs)
return log_str
class ProgramConfig: class ProgramConfig:
''' A config builder for generating a Program. ''' ''' A config builder for generating a Program. '''
...@@ -77,6 +82,19 @@ class ProgramConfig: ...@@ -77,6 +82,19 @@ class ProgramConfig:
self.inputs = inputs self.inputs = inputs
self.outputs = outputs self.outputs = outputs
def __repr__(self):
log_str = ''
for i in range(len(self.ops)):
if i != len(self.ops) - 1:
log_str += repr(self.ops[i]) + ' + '
else:
log_str += repr(self.ops[i])
log_str += ' -- '
for t, v in self.inputs.items():
log_str += '[' + t + ': ' + str(v) + ']'
return log_str
def create_fake_model(program_config): def create_fake_model(program_config):
''' Create a Paddle model(in memory) according to the given config. ''' ''' Create a Paddle model(in memory) according to the given config. '''
......
...@@ -12,81 +12,223 @@ ...@@ -12,81 +12,223 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from trt_layer_auto_scan_test import TrtLayerAutoScanTest from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons
from program_config import TensorConfig from program_config import TensorConfig, ProgramConfig
import numpy as np import numpy as np
import paddle.inference as paddle_infer import paddle.inference as paddle_infer
from functools import partial
from typing import Optional, List, Callable, Dict, Any, Set
class TrtConvertConv2dTest(TrtLayerAutoScanTest): class TrtConvertConv2dTest(TrtLayerAutoScanTest):
def setUp(self): def check_program_validity(self, program_config: ProgramConfig) -> bool:
self.ops_config = [{ # TODO: This is just the example to remove the wrong attrs.
"op_type": "conv2d", inputs = program_config.inputs
"op_inputs": { weights = program_config.weights
"Input": ["input_data"], attrs = [
"Filter": ["conv2d_weight"] program_config.ops[i].attrs
}, for i in range(len(program_config.ops))
"op_outputs": { ]
"Output": ["conv_output_data"]
},
"op_attrs": {
"data_format": ["NCHW"],
"dilations": [[1, 1]],
"padding_algorithm": ["EXPLICIT"],
"groups": [1],
"paddings": [[0, 3], [3, 1]],
"strides": [[1, 1], [2, 2]],
}
}, {
"op_type": "relu",
"op_inputs": {
"X": ["conv_output_data"]
},
"op_outputs": {
"Out": ["relu_output_data"]
},
"op_attrs": {}
}]
self.batch_size_set = [1, 2, 4]
def update_program_input_and_weight_with_attr(self, op_attr_list):
weight = np.random.randn(24, 3, 3, 3).astype("float32")
filter = TensorConfig(shape=[24, 3, 3, 3], data=weight)
if op_attr_list[0]["data_format"] == "NCHW":
input_data = TensorConfig(shape=[-1, 3, 64, 64])
else:
input_data = TensorConfig(shape=[-1, 64, 64, 3])
self.program_weights = {"conv2d_weight": filter}
self.program_inputs = {"input_data": input_data}
self.program_outputs = ["relu_output_data"]
def test_check_fp32_output(self):
self.trt_param.precision = paddle_infer.PrecisionType.Float32
# the fused tensorrt engine num is 1, and paddle op num is 2(feed and fetch).
self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-5)
def test_check_fp16_output(self): # groups restriction.
self.trt_param.precision = paddle_infer.PrecisionType.Half if inputs['input_data'].shape[1] != weights['conv2d_weight'].shape[
self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-2) 1] * attrs[0]['groups']:
return False
def test_dynamic_shape_fp32_check_output(self): # others restriction, todo.
self.trt_param.precision = paddle_infer.PrecisionType.Float32
self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} return True
self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]}
self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]} def sample_program_configs(self):
self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-5) def generate_input1(attrs: List[Dict[str, Any]]):
# TODO: This is just the example to illustrate the releation between axis and input.
# for each attr, can generate different datas
if attrs[0]['groups'] == 1:
return np.ones([2, 3, 64, 64]).astype(np.float32)
else:
return np.ones([1, 3, 64, 64]).astype(np.float32)
def generate_weight1(attrs: List[Dict[str, Any]]):
return np.random.random([24, 3, 3, 3]).astype(np.float32)
# for strides in [[1,1], [2,2]]:
# for paddings in [[0,3], [3,1]]:
# for groups in [1]:
# for padding_algotithm in ['EXPLICIT']:
# for dilations in [[1,1]]:
# for data_format in ['NCHW']:
for strides in [[1, 1], [2, 2], [1, 2], [2, 3]]:
for paddings in [[0, 3], [3, 1], [1, 1, 1, 1], [2, 1, 1, 3]]:
for groups in [1, 2]:
for padding_algotithm in ['EXPLICIT', 'SAME', 'VALID']:
for dilations in [[1, 1], [1, 2]]:
for data_format in ['NCHW']:
dics = [{
"data_fromat": data_format,
"dilations": dilations,
"padding_algorithm": padding_algotithm,
"groups": groups,
"paddings": paddings,
"strides": strides,
"data_format": data_format
}, {}]
ops_config = [{
"op_type": "conv2d",
"op_inputs": {
"Input": ["input_data"],
"Filter": ["conv2d_weight"]
},
"op_outputs": {
"Output": ["conv_output_data"]
},
"op_attrs": dics[0]
}, {
"op_type": "relu",
"op_inputs": {
"X": ["conv_output_data"]
},
"op_outputs": {
"Out": ["relu_output_data"]
},
"op_attrs": dics[1]
}]
ops = self.generate_op_config(ops_config)
program_config = ProgramConfig(
ops=ops,
weights={
"conv2d_weight": TensorConfig(
data_gen=partial(generate_weight1,
dics))
},
inputs={
"input_data": TensorConfig(
data_gen=partial(generate_input1,
dics))
},
outputs=["relu_output_data"])
# if config is invalid, we should skip that cases.
if not self.check_program_validity(
program_config):
continue
yield program_config
def sample_predictor_configs(
self, program_config) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape(attrs):
if len(attrs[0]['paddings']) == 4:
self.dynamic_shape.min_input_shape = {
"input_data": [1, 3, 32, 32],
'': []
}
self.dynamic_shape.max_input_shape = {
"input_data": [4, 3, 64, 64],
'': []
}
self.dynamic_shape.opt_input_shape = {
"input_data": [1, 3, 64, 64],
'': []
}
else:
self.dynamic_shape.min_input_shape = {
"input_data": [1, 3, 32, 32]
}
self.dynamic_shape.max_input_shape = {
"input_data": [4, 3, 64, 64]
}
self.dynamic_shape.opt_input_shape = {
"input_data": [1, 3, 64, 64]
}
def generate_trt_nodes_num(attrs, dynamic_shape):
# TODO: This is just the example, need to be fixed.
if len(attrs[0]['paddings']) == 4:
return 0, 3
else:
return 1, 2
attrs = [
program_config.ops[i].attrs
for i in range(len(program_config.ops))
]
def test_dynamic_shape_fp16_check_output(self): # for static_shape
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half self.trt_param.precision = paddle_infer.PrecisionType.Half
self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} yield self.create_inference_config(), generate_trt_nodes_num(
self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} attrs, False), 1e-2
self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]} self.trt_param.precision = paddle_infer.PrecisionType.Int8
self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-2) yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), 1e-1
def test_trt_int8_check_output(self): # for dynamic_shape
generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num(attrs,
True), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num(attrs,
True), 1e-2
self.trt_param.precision = paddle_infer.PrecisionType.Int8 self.trt_param.precision = paddle_infer.PrecisionType.Int8
self.run_test( yield self.create_inference_config(), generate_trt_nodes_num(attrs,
trt_engine_num=1, paddle_op_num=2, quant=True, threshold=1e-1) True), 1e-1
def add_skip_trt_case(self):
# TODO(wilber): This is just the example to illustrate the skip usage.
def teller1(program_config, predictor_config):
if program_config.ops[0].attrs['groups'] == 2:
return True
return False
self.add_skip_case(
teller1, SkipReasons.ALGO_WRONG,
"Need to repair the case: ......TODO, just for the example")
def teller2(program_config, predictor_config):
if len(program_config.ops[0].attrs['paddings']) == 4:
return True
return False
self.add_skip_case(
teller2, SkipReasons.TRT_NOT_IMPLEMENTED,
"NOT Implemented: we need to add support in the future ....TODO, just for the example"
)
def teller3(program_config, predictor_config):
if (
program_config.ops[0].attrs['dilations'][0] == 1 and
program_config.ops[0].attrs['dilations'][0] == 2
) or program_config.ops[0].attrs['padding_algorithm'] != 'EXPLICIT':
return True
return False
self.add_skip_case(teller3, SkipReasons.TRT_NOT_SUPPORT,
"TODO, just for the example")
def teller4(program_config, predictor_config):
if program_config.ops[0].attrs['strides'][0] != program_config.ops[
0].attrs['strides'][1] or program_config.ops[0].attrs[
'strides'][0] == program_config.ops[0].attrs['strides'][
1] == 2:
return True
return False
self.add_skip_case(teller4, SkipReasons.TRT_NOT_SUPPORT,
"TODO, just for the example")
def test(self):
self.add_skip_trt_case()
self.run_test()
def test_quant(self):
self.add_skip_trt_case()
self.run_test(quant=True)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -16,6 +16,7 @@ import numpy as np ...@@ -16,6 +16,7 @@ import numpy as np
import unittest import unittest
import itertools import itertools
import abc import abc
import enum
import logging import logging
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -23,9 +24,9 @@ import paddle.fluid.core as core ...@@ -23,9 +24,9 @@ import paddle.fluid.core as core
import paddle.inference as paddle_infer import paddle.inference as paddle_infer
from paddle import compat as cpt from paddle import compat as cpt
from typing import * from typing import Optional, List, Callable, Dict, Any, Set
from program_config import TensorConfig, OpConfig, ProgramConfig from program_config import TensorConfig, OpConfig, ProgramConfig, create_fake_model, create_quant_model
from auto_scan_test import AutoScanTest from auto_scan_test import AutoScanTest, SkipReasons
logging.basicConfig(level=logging.INFO, format="%(message)s") logging.basicConfig(level=logging.INFO, format="%(message)s")
...@@ -60,7 +61,7 @@ class TrtLayerAutoScanTest(AutoScanTest): ...@@ -60,7 +61,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
def __init__(self, methodName='runTest'): def __init__(self, methodName='runTest'):
super(TrtLayerAutoScanTest, self).__init__(methodName) super(TrtLayerAutoScanTest, self).__init__(methodName)
self.trt_param = self.TensorRTParam( self.trt_param = self.TensorRTParam(
workspace_size=0, workspace_size=1024,
max_batch_size=4, max_batch_size=4,
min_subgraph_size=0, min_subgraph_size=0,
precision=paddle_infer.PrecisionType.Float32, precision=paddle_infer.PrecisionType.Float32,
...@@ -68,62 +69,7 @@ class TrtLayerAutoScanTest(AutoScanTest): ...@@ -68,62 +69,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
use_calib_mode=False) use_calib_mode=False)
self.dynamic_shape = self.DynamicShapeParam({}, {}, {}, False) self.dynamic_shape = self.DynamicShapeParam({}, {}, {}, False)
def update_program_input_and_weight_with_attr(self, op_attr_list): def create_inference_config(self, use_trt=True) -> paddle_infer.Config:
raise NotImplementedError
@abc.abstractmethod
def sample_program_configs(self):
all_op_attrs_keys = []
all_op_attrs_values = []
for op_config in self.ops_config:
all_op_attrs_keys.append(list(op_config["op_attrs"].keys()))
all_op_attrs_values.extend(list(op_config["op_attrs"].values()))
if len(all_op_attrs_values) == 0:
all_op_attrs_values.append([None])
for attrs_sample in itertools.product(*all_op_attrs_values):
op_attr_list = []
index = 0
ops = []
log_str = 'TEST_CASE: '
for i in range(len(self.ops_config)):
op_config = self.ops_config[i]
op_attr = dict(
zip(
list(op_config["op_attrs"].keys()), attrs_sample[
index:index + len(op_config["op_attrs"])]))
if i != len(self.ops_config) - 1:
log_str += op_config['op_type'] + str(op_attr) + ' + '
else:
log_str += op_config['op_type'] + str(op_attr)
op_attr_list.append(op_attr)
index = index + len(op_config["op_attrs"])
ops.append(
OpConfig(
type=op_config["op_type"],
inputs=op_config["op_inputs"],
outputs=op_config["op_outputs"],
attrs=op_attr))
logging.info(log_str)
self.update_program_input_and_weight_with_attr(op_attr_list)
# if no weight need to save, we create a place_holder to help seriazlie params.
if not self.program_weights:
self.program_weights = {
"place_holder_weight": TensorConfig(
shape=[1], data=np.array([1]).astype(np.float32))
}
program_config = ProgramConfig(
ops=ops,
weights=self.program_weights,
inputs=self.program_inputs,
outputs=self.program_outputs)
yield program_config
def create_program_config(
self, use_trt=True,
precision_mode=paddle_infer.PrecisionType.Float32):
config = paddle_infer.Config() config = paddle_infer.Config()
config.disable_glog_info() config.disable_glog_info()
config.enable_use_gpu(100, 0) config.enable_use_gpu(100, 0)
...@@ -133,7 +79,7 @@ class TrtLayerAutoScanTest(AutoScanTest): ...@@ -133,7 +79,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
max_batch_size=self.trt_param.max_batch_size, max_batch_size=self.trt_param.max_batch_size,
workspace_size=self.trt_param.workspace_size, workspace_size=self.trt_param.workspace_size,
min_subgraph_size=self.trt_param.min_subgraph_size, min_subgraph_size=self.trt_param.min_subgraph_size,
precision_mode=precision_mode, precision_mode=self.trt_param.precision,
use_static=self.trt_param.use_static, use_static=self.trt_param.use_static,
use_calib_mode=self.trt_param.use_calib_mode) use_calib_mode=self.trt_param.use_calib_mode)
if len(self.dynamic_shape.min_input_shape if len(self.dynamic_shape.min_input_shape
...@@ -148,32 +94,152 @@ class TrtLayerAutoScanTest(AutoScanTest): ...@@ -148,32 +94,152 @@ class TrtLayerAutoScanTest(AutoScanTest):
self.dynamic_shape.disable_trt_plugin_fp16) self.dynamic_shape.disable_trt_plugin_fp16)
return config return config
@abc.abstractmethod def assert_tensors_near(self,
def sample_predictor_configs(self): threshold: float,
def precision_to_str(p): tensor: Dict[str, np.array],
if p == paddle_infer.PrecisionType.Float32: baseline: Dict[str, np.array]):
return 'float32' for key, arr in tensor.items():
elif p == paddle_infer.PrecisionType.Half: self.assertTrue(
return 'half' np.allclose(
elif p == paddle_infer.PrecisionType.Int8: baseline[key], arr, atol=threshold),
return 'int8' "Output has diff between GPU and TensorRT. ")
else:
raise NotImplementedError('not supported type.') def assert_op_size(self, trt_engine_num, paddle_op_num):
last_passed_program = 'transpose_flatten_concat_fuse_pass.pdmodel'
trt_log_str = '' model_bytes = paddle.static.load_from_file(last_passed_program)
if len(self.dynamic_shape.min_input_shape pg = paddle.static.deserialize_program(model_bytes)
) != 0 and self.dynamic_shape.min_input_shape.keys( main_block = pg.desc.block(0)
) == self.dynamic_shape.max_input_shape.keys( op_size = main_block.op_size()
) and self.dynamic_shape.min_input_shape.keys( op_types = [
) == self.dynamic_shape.opt_input_shape.keys(): main_block.op(i).type() == 'tensorrt_engine' for i in range(op_size)
trt_log_str += 'dynamic_shape ' ]
trt_engine_size = sum(op_types)
paddle_op_size = op_size - trt_engine_size
self.assertTrue(trt_engine_size == trt_engine_num,
'trt_engine_num is {}, but got {}!'.format(
trt_engine_size, trt_engine_num))
self.assertTrue(paddle_op_size == paddle_op_num,
'paddle_op_num is {}, but got {}!'.format(
paddle_op_size, paddle_op_num))
def skip_log(self, msg: str):
logging.warning("SKIP: " + msg)
def fail_log(self, msg: str):
logging.error("FAILE: " + msg)
def success_log(self, msg: str):
logging.info("SUCCESS: " + msg)
def validate(self, func: Callable[..., bool]):
pass
def generate_op_config(self,
ops_config: List[Dict[str, Any]]) -> List[OpConfig]:
ops = []
for i in range(len(ops_config)):
op_config = ops_config[i]
ops.append(
OpConfig(
type=op_config['op_type'],
inputs=op_config['op_inputs'],
outputs=op_config['op_outputs'],
attrs=op_config['op_attrs']))
return ops
def inference_config_str(self, config: paddle_infer.Config):
dic = {}
enable_trt = config.tensorrt_engine_enabled()
trt_precison = config.tensorrt_precision_mode()
trt_dynamic_shape = config.tensorrt_dynamic_shape_enabled()
if enable_trt:
dic['use_trt'] = True
dic['trt_precision'] = trt_precison
dic['use_dynamic_shape'] = trt_dynamic_shape
else:
dic['use_trt'] = False
return str(dic)
def run_test(self, quant=False):
if quant:
def teller(program_config, predictor_config):
if predictor_config.tensorrt_precision_mode(
) == paddle_infer.PrecisionType.Int8:
return False
return True
self.add_skip_case(teller, SkipReasons.QUANT_MODEL,
"Only test QUANT model")
else: else:
trt_log_str += 'static_shape '
trt_log_str += precision_to_str(self.trt_param.precision) def teller(program_config, predictor_config):
if predictor_config.tensorrt_precision_mode(
logging.info(' --------- gpu inference ---------') ) == paddle_infer.PrecisionType.Int8:
yield self.create_program_config(use_trt=False) return True
logging.info(' --------- trt ' + trt_log_str + return False
' inference ---------')
yield self.create_program_config( self.add_skip_case(teller, SkipReasons.QUANT_MODEL,
use_trt=True, precision_mode=self.trt_param.precision) "Not test QUANT model")
for prog_config in self.sample_program_configs():
model, params = create_fake_model(prog_config)
if quant:
model, params = create_quant_model(model, params)
feed_data = {}
for name, tensor_config in prog_config.inputs.items():
feed_data[name] = {
'data': tensor_config.data,
'lod': tensor_config.lod
}
results: List[Dict[str, Tensor]] = []
# baseline: gpu run
gpu_config = self.create_inference_config(use_trt=False)
results.append(
self.run_test_config(model, params, prog_config, gpu_config,
feed_data))
self.success_log('RUN_GPU_BASELINE ' + str(prog_config) + ' vs ' +
self.inference_config_str(gpu_config))
for pred_config, nodes_num, threshold in self.sample_predictor_configs(
prog_config):
skip_flag = False
for skip_info in self.skip_cases:
if skip_info[0](prog_config, pred_config):
skip_flag = True
if skip_info[1] == SkipReasons.ALGO_WRONG:
self.skip_log("[ALGO_WRONG] " + skip_info[
2] + ' ' + repr(prog_config) + ' vs ' + self.
inference_config_str(pred_config))
elif skip_info[1] == SkipReasons.TRT_NOT_IMPLEMENTED:
self.skip_log("[TRT_NOT_IMPLEMENTED] " + skip_info[
2] + ' ' + repr(prog_config) + ' vs ' + self.
inference_config_str(pred_config))
elif skip_info[1] == SkipReasons.TRT_NOT_SUPPORT:
self.skip_log("[TRT_NOT_SUPPORT] " + skip_info[
2] + ' ' + repr(prog_config) + ' vs ' + self.
inference_config_str(pred_config))
elif skip_info[1] == SkipReasons.QUANT_MODEL:
pass
else:
raise NotImplementedError
if skip_flag:
continue
try:
results.append(
self.run_test_config(model, params, prog_config,
pred_config, feed_data))
self.assert_tensors_near(threshold, results[-1], results[0])
self.assert_op_size(nodes_num[0], nodes_num[1])
except Exception as e:
self.fail_log(
str(prog_config) + ' vs ' + self.inference_config_str(
pred_config) + str(e))
continue
self.success_log('RUN ' + str(prog_config) + ' vs ' +
self.inference_config_str(pred_config))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册