diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 04ebe0efaed2cf330f557214daee35ddcc49dac7..950e2ecca982645969ec9813096261908fbdea61 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -354,6 +354,12 @@ struct PD_INFER_DECL AnalysisConfig { /// bool tensorrt_engine_enabled() const { return use_tensorrt_; } /// + /// \brief Get the TensorRT engine precision. + /// + /// \return Precision Get the TensorRT engine precision. + /// + Precision tensorrt_precision_mode() const { return tensorrt_precision_mode_; } + /// /// \brief Set min, max, opt shape for TensorRT Dynamic shape mode. /// \param min_input_shape The min input shape of the subgraph input. /// \param max_input_shape The max input shape of the subgraph input. @@ -366,7 +372,14 @@ struct PD_INFER_DECL AnalysisConfig { std::map> max_input_shape, std::map> optim_input_shape, bool disable_trt_plugin_fp16 = false); - + /// + /// \brief A boolean state telling whether the trt dynamic_shape is used. + /// + /// \return bool Whether the trt dynamic_shape is used. + /// + bool tensorrt_dynamic_shape_enabled() const { + return min_input_shape_.empty(); + } /// /// \brief Prevent ops running in Paddle-TRT /// NOTE: just experimental, not an official stable API, easy to be broken. diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 6b3c150a0b9c8eb04948ff191cca4bb3441b60e8..e1678a65c0bcd9790f11046c51e81c8291b49ba6 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -288,7 +288,7 @@ py::bytes SerializePDTensorToBytes(PaddleTensor &tensor) { // NOLINT return static_cast(ss.str()); } -void CopyPaddleInferTensor(paddle_infer::Tensor &dst, +void CopyPaddleInferTensor(paddle_infer::Tensor &dst, // NOLINT const paddle_infer::Tensor &src) { return paddle_infer::contrib::TensorUtils::CopyTensor(&dst, src); } @@ -555,6 +555,7 @@ void BindAnalysisConfig(py::module *m) { py::arg("min_subgraph_size") = 3, py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32, py::arg("use_static") = false, py::arg("use_calib_mode") = true) + .def("tensorrt_precision_mode", &AnalysisConfig::tensorrt_precision_mode) .def("set_trt_dynamic_shape_info", &AnalysisConfig::SetTRTDynamicShapeInfo, py::arg("min_input_shape") = @@ -564,6 +565,8 @@ void BindAnalysisConfig(py::module *m) { py::arg("optim_input_shape") = std::map>({}), py::arg("disable_trt_plugin_fp16") = false) + .def("tensorrt_dynamic_shape_enabled", + &AnalysisConfig::tensorrt_dynamic_shape_enabled) .def("enable_tensorrt_oss", &AnalysisConfig::EnableTensorRtOSS) .def("tensorrt_oss_enabled", &AnalysisConfig::tensorrt_oss_enabled) .def("exp_disable_tensorrt_ops", &AnalysisConfig::Exp_DisableTensorRtOPs) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py b/python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py index 59729e5637c4e9f03a6f871627743f38eaae8c61..30f09e9a69ce07613eba656603acda2530e4ecf9 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py @@ -16,6 +16,7 @@ import numpy as np import unittest import abc import os +import enum import logging import paddle import paddle.fluid as fluid @@ -29,10 +30,22 @@ from program_config import TensorConfig, OpConfig, ProgramConfig, create_fake_mo logging.basicConfig(level=logging.INFO, format="%(message)s") +class SkipReasons(enum.Enum): + # Paddle not support, but trt support, we need to add the feature. + TRT_NOT_IMPLEMENTED = 0 + # TRT not support. + TRT_NOT_SUPPORT = 1 + # Implement wrong. + ALGO_WRONG = 2 + # Quant model, only to run in INT8 mode. + QUANT_MODEL = 3 + + class AutoScanTest(unittest.TestCase): def __init__(self, methodName='runTest'): paddle.enable_static() super(AutoScanTest, self).__init__(methodName) + self.skip_cases = [] @abc.abstractmethod def sample_program_configs(self) -> List[ProgramConfig]: @@ -46,6 +59,18 @@ class AutoScanTest(unittest.TestCase): def sample_predictor_configs(self) -> List[paddle_infer.Config]: raise NotImplementedError + @abc.abstractmethod + def add_skip_case( + self, + teller: [Callable[[ProgramConfig, paddle_infer.Config], bool]], + reason: SkipReasons, + note: str): + self.skip_cases.append((teller, reason, note)) + + @abc.abstractmethod + def check_program_validity(self, program_config: ProgramConfig) -> bool: + raise NotImplementedError + def run_test_config(self, model, params, prog_config, pred_config, feed_data) -> Dict[str, np.ndarray]: ''' @@ -56,7 +81,7 @@ class AutoScanTest(unittest.TestCase): for name, _ in prog_config.inputs.items(): input_tensor = predictor.get_input_handle(name) - input_tensor.copy_from_cpu(feed_data[name]['shape']) + input_tensor.copy_from_cpu(feed_data[name]['data']) if feed_data[name]['lod'] is not None: input_tensor.set_lod(feed_data[name]['lod']) predictor.run() @@ -66,26 +91,6 @@ class AutoScanTest(unittest.TestCase): result[out_name] = predictor.get_output_handle(o_name).copy_to_cpu() return result - def assert_op_size(self, trt_engine_num, paddle_op_num): - cur_path = os.path.dirname(__file__) - last_passed_program = os.path.join( - cur_path, 'transpose_flatten_concat_fuse_pass.pdmodel') - model_bytes = paddle.static.load_from_file(last_passed_program) - pg = paddle.static.deserialize_program(model_bytes) - main_block = pg.desc.block(0) - op_size = main_block.op_size() - op_types = [ - main_block.op(i).type() == 'tensorrt_engine' for i in range(op_size) - ] - trt_engine_size = sum(op_types) - paddle_op_size = op_size - trt_engine_size - self.assertTrue(trt_engine_size == trt_engine_num, - 'trt_engine_num is {}, but got {}!'.format( - trt_engine_size, trt_engine_num)) - self.assertTrue(paddle_op_size == paddle_op_num, - 'paddle_op_num is {}, but got {}!'.format( - paddle_op_size, paddle_op_num)) - def assert_tensors_near(self, threshold: float, tensors: List[Dict[str, np.array]]): @@ -98,42 +103,6 @@ class AutoScanTest(unittest.TestCase): first[key], arr, atol=threshold), "Output has diff between GPU and TensorRT. ") - def run_test(self, - trt_engine_num: int, - paddle_op_num: int, - threshold=1e-5, - quant=False, - error_msg=None): - for prog_config in self.sample_program_configs(): - model, params = create_fake_model(prog_config) - if quant: - model, params = create_quant_model(model, params) - for batch_size in self.batch_size_set: - feed_data = {} - log_str = ' -- Input tensor info: ' - for name, tensor_config in prog_config.inputs.items(): - tensor_shape = tensor_config.shape.copy() - tensor_shape[0] = batch_size - feed_data[name] = { - 'shape': np.random.random(tensor_shape).astype( - tensor_config.dtype), - 'lod': tensor_config.lod - } - log_str += str({ - name: { - 'shape': tensor_shape, - 'lod': tensor_config.lod - } - }) - logging.info(log_str) - results: List[Dict[str, Tensor]] = [] - for pred_config in self.sample_predictor_configs(): - results.append( - self.run_test_config(model, params, prog_config, - pred_config, feed_data)) - try: - self.assert_tensors_near( - threshold=threshold, tensors=results) - self.assert_op_size(trt_engine_num, paddle_op_num) - except: - logging.info('ERROR OCCURED: ' + error_msg) + @abc.abstractmethod + def run_test(self, quant=False): + raise NotImplementedError diff --git a/python/paddle/fluid/tests/unittests/ir/inference/program_config.py b/python/paddle/fluid/tests/unittests/ir/inference/program_config.py index 1343e9673667ac7006febc900ee8f7d0917504dc..ea804f33517b89db4da44ff74c082a49028d8b8e 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/program_config.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/program_config.py @@ -30,24 +30,24 @@ from paddle.fluid.executor import global_scope class TensorConfig: ''' A config builder for a input or a weight. - - InputVar's shape can be [-1, xxx], batch_size ''' def __init__(self, - shape: [List[int]], - dtype: [str]="float32", - data: Optional[np.array]=None, - lod: [List[List[int]]]=None): + lod: Optional[List[List[int]]]=None, + data_gen: Optional[Callable[..., np.array]]=None): ''' shape: The shape of the tensor. dtype: The data type of the tensor. data: The value of WeightVar. for input, it should be None ''' - self.shape = shape - self.dtype = dtype - self.data = data self.lod = lod + self.data_gen = data_gen + self.data = data_gen() + self.dtype = data_gen().dtype + self.shape = data_gen().shape + + def __repr__(self): + return str({'shape': self.shape, 'lod': self.lod, 'dtype': self.dtype}) class OpConfig: @@ -63,6 +63,11 @@ class OpConfig: self.outputs = outputs self.attrs = attrs + def __repr__(self): + log_str = self.type + log_str += str(self.attrs) + return log_str + class ProgramConfig: ''' A config builder for generating a Program. ''' @@ -77,6 +82,19 @@ class ProgramConfig: self.inputs = inputs self.outputs = outputs + def __repr__(self): + log_str = '' + for i in range(len(self.ops)): + if i != len(self.ops) - 1: + log_str += repr(self.ops[i]) + ' + ' + else: + log_str += repr(self.ops[i]) + log_str += ' -- ' + for t, v in self.inputs.items(): + log_str += '[' + t + ': ' + str(v) + ']' + + return log_str + def create_fake_model(program_config): ''' Create a Paddle model(in memory) according to the given config. ''' diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py index 109eef2038a77e2552ebd9991f27815f7632cae9..8df3d77050a7ce2a2ee2145e396d34e2fd224cad 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py @@ -12,81 +12,223 @@ # See the License for the specific language governing permissions and # limitations under the License. -from trt_layer_auto_scan_test import TrtLayerAutoScanTest -from program_config import TensorConfig +from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons +from program_config import TensorConfig, ProgramConfig import numpy as np import paddle.inference as paddle_infer +from functools import partial +from typing import Optional, List, Callable, Dict, Any, Set class TrtConvertConv2dTest(TrtLayerAutoScanTest): - def setUp(self): - self.ops_config = [{ - "op_type": "conv2d", - "op_inputs": { - "Input": ["input_data"], - "Filter": ["conv2d_weight"] - }, - "op_outputs": { - "Output": ["conv_output_data"] - }, - "op_attrs": { - "data_format": ["NCHW"], - "dilations": [[1, 1]], - "padding_algorithm": ["EXPLICIT"], - "groups": [1], - "paddings": [[0, 3], [3, 1]], - "strides": [[1, 1], [2, 2]], - } - }, { - "op_type": "relu", - "op_inputs": { - "X": ["conv_output_data"] - }, - "op_outputs": { - "Out": ["relu_output_data"] - }, - "op_attrs": {} - }] - self.batch_size_set = [1, 2, 4] - - def update_program_input_and_weight_with_attr(self, op_attr_list): - weight = np.random.randn(24, 3, 3, 3).astype("float32") - filter = TensorConfig(shape=[24, 3, 3, 3], data=weight) - if op_attr_list[0]["data_format"] == "NCHW": - input_data = TensorConfig(shape=[-1, 3, 64, 64]) - else: - input_data = TensorConfig(shape=[-1, 64, 64, 3]) - self.program_weights = {"conv2d_weight": filter} - self.program_inputs = {"input_data": input_data} - self.program_outputs = ["relu_output_data"] - - def test_check_fp32_output(self): - self.trt_param.precision = paddle_infer.PrecisionType.Float32 - # the fused tensorrt engine num is 1, and paddle op num is 2(feed and fetch). - self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-5) + def check_program_validity(self, program_config: ProgramConfig) -> bool: + # TODO: This is just the example to remove the wrong attrs. + inputs = program_config.inputs + weights = program_config.weights + attrs = [ + program_config.ops[i].attrs + for i in range(len(program_config.ops)) + ] - def test_check_fp16_output(self): - self.trt_param.precision = paddle_infer.PrecisionType.Half - self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-2) + # groups restriction. + if inputs['input_data'].shape[1] != weights['conv2d_weight'].shape[ + 1] * attrs[0]['groups']: + return False - def test_dynamic_shape_fp32_check_output(self): - self.trt_param.precision = paddle_infer.PrecisionType.Float32 - self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} - self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} - self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]} - self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-5) + # others restriction, todo. + + return True + + def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]]): + # TODO: This is just the example to illustrate the releation between axis and input. + # for each attr, can generate different datas + if attrs[0]['groups'] == 1: + return np.ones([2, 3, 64, 64]).astype(np.float32) + else: + return np.ones([1, 3, 64, 64]).astype(np.float32) + + def generate_weight1(attrs: List[Dict[str, Any]]): + return np.random.random([24, 3, 3, 3]).astype(np.float32) + + # for strides in [[1,1], [2,2]]: + # for paddings in [[0,3], [3,1]]: + # for groups in [1]: + # for padding_algotithm in ['EXPLICIT']: + # for dilations in [[1,1]]: + # for data_format in ['NCHW']: + + for strides in [[1, 1], [2, 2], [1, 2], [2, 3]]: + for paddings in [[0, 3], [3, 1], [1, 1, 1, 1], [2, 1, 1, 3]]: + for groups in [1, 2]: + for padding_algotithm in ['EXPLICIT', 'SAME', 'VALID']: + for dilations in [[1, 1], [1, 2]]: + for data_format in ['NCHW']: + dics = [{ + "data_fromat": data_format, + "dilations": dilations, + "padding_algorithm": padding_algotithm, + "groups": groups, + "paddings": paddings, + "strides": strides, + "data_format": data_format + }, {}] + + ops_config = [{ + "op_type": "conv2d", + "op_inputs": { + "Input": ["input_data"], + "Filter": ["conv2d_weight"] + }, + "op_outputs": { + "Output": ["conv_output_data"] + }, + "op_attrs": dics[0] + }, { + "op_type": "relu", + "op_inputs": { + "X": ["conv_output_data"] + }, + "op_outputs": { + "Out": ["relu_output_data"] + }, + "op_attrs": dics[1] + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={ + "conv2d_weight": TensorConfig( + data_gen=partial(generate_weight1, + dics)) + }, + inputs={ + "input_data": TensorConfig( + data_gen=partial(generate_input1, + dics)) + }, + outputs=["relu_output_data"]) + + # if config is invalid, we should skip that cases. + if not self.check_program_validity( + program_config): + continue + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + if len(attrs[0]['paddings']) == 4: + self.dynamic_shape.min_input_shape = { + "input_data": [1, 3, 32, 32], + '': [] + } + self.dynamic_shape.max_input_shape = { + "input_data": [4, 3, 64, 64], + '': [] + } + self.dynamic_shape.opt_input_shape = { + "input_data": [1, 3, 64, 64], + '': [] + } + else: + self.dynamic_shape.min_input_shape = { + "input_data": [1, 3, 32, 32] + } + self.dynamic_shape.max_input_shape = { + "input_data": [4, 3, 64, 64] + } + self.dynamic_shape.opt_input_shape = { + "input_data": [1, 3, 64, 64] + } + + def generate_trt_nodes_num(attrs, dynamic_shape): + # TODO: This is just the example, need to be fixed. + if len(attrs[0]['paddings']) == 4: + return 0, 3 + else: + return 1, 2 + + attrs = [ + program_config.ops[i].attrs + for i in range(len(program_config.ops)) + ] - def test_dynamic_shape_fp16_check_output(self): + # for static_shape + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} - self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} - self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]} - self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-2) + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-2 + self.trt_param.precision = paddle_infer.PrecisionType.Int8 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-1 - def test_trt_int8_check_output(self): + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num(attrs, + True), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num(attrs, + True), 1e-2 self.trt_param.precision = paddle_infer.PrecisionType.Int8 - self.run_test( - trt_engine_num=1, paddle_op_num=2, quant=True, threshold=1e-1) + yield self.create_inference_config(), generate_trt_nodes_num(attrs, + True), 1e-1 + + def add_skip_trt_case(self): + # TODO(wilber): This is just the example to illustrate the skip usage. + def teller1(program_config, predictor_config): + if program_config.ops[0].attrs['groups'] == 2: + return True + return False + + self.add_skip_case( + teller1, SkipReasons.ALGO_WRONG, + "Need to repair the case: ......TODO, just for the example") + + def teller2(program_config, predictor_config): + if len(program_config.ops[0].attrs['paddings']) == 4: + return True + return False + + self.add_skip_case( + teller2, SkipReasons.TRT_NOT_IMPLEMENTED, + "NOT Implemented: we need to add support in the future ....TODO, just for the example" + ) + + def teller3(program_config, predictor_config): + if ( + program_config.ops[0].attrs['dilations'][0] == 1 and + program_config.ops[0].attrs['dilations'][0] == 2 + ) or program_config.ops[0].attrs['padding_algorithm'] != 'EXPLICIT': + return True + return False + + self.add_skip_case(teller3, SkipReasons.TRT_NOT_SUPPORT, + "TODO, just for the example") + + def teller4(program_config, predictor_config): + if program_config.ops[0].attrs['strides'][0] != program_config.ops[ + 0].attrs['strides'][1] or program_config.ops[0].attrs[ + 'strides'][0] == program_config.ops[0].attrs['strides'][ + 1] == 2: + return True + return False + + self.add_skip_case(teller4, SkipReasons.TRT_NOT_SUPPORT, + "TODO, just for the example") + + def test(self): + self.add_skip_trt_case() + self.run_test() + + def test_quant(self): + self.add_skip_trt_case() + self.run_test(quant=True) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/trt_layer_auto_scan_test.py b/python/paddle/fluid/tests/unittests/ir/inference/trt_layer_auto_scan_test.py index 715006771878795674d9391b926be40d2ed27bc1..66ad7351a1b1b4a19456dcaa776e73277ace8266 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/trt_layer_auto_scan_test.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/trt_layer_auto_scan_test.py @@ -16,6 +16,7 @@ import numpy as np import unittest import itertools import abc +import enum import logging import paddle import paddle.fluid as fluid @@ -23,9 +24,9 @@ import paddle.fluid.core as core import paddle.inference as paddle_infer from paddle import compat as cpt -from typing import * -from program_config import TensorConfig, OpConfig, ProgramConfig -from auto_scan_test import AutoScanTest +from typing import Optional, List, Callable, Dict, Any, Set +from program_config import TensorConfig, OpConfig, ProgramConfig, create_fake_model, create_quant_model +from auto_scan_test import AutoScanTest, SkipReasons logging.basicConfig(level=logging.INFO, format="%(message)s") @@ -60,7 +61,7 @@ class TrtLayerAutoScanTest(AutoScanTest): def __init__(self, methodName='runTest'): super(TrtLayerAutoScanTest, self).__init__(methodName) self.trt_param = self.TensorRTParam( - workspace_size=0, + workspace_size=1024, max_batch_size=4, min_subgraph_size=0, precision=paddle_infer.PrecisionType.Float32, @@ -68,62 +69,7 @@ class TrtLayerAutoScanTest(AutoScanTest): use_calib_mode=False) self.dynamic_shape = self.DynamicShapeParam({}, {}, {}, False) - def update_program_input_and_weight_with_attr(self, op_attr_list): - raise NotImplementedError - - @abc.abstractmethod - def sample_program_configs(self): - all_op_attrs_keys = [] - all_op_attrs_values = [] - for op_config in self.ops_config: - all_op_attrs_keys.append(list(op_config["op_attrs"].keys())) - all_op_attrs_values.extend(list(op_config["op_attrs"].values())) - if len(all_op_attrs_values) == 0: - all_op_attrs_values.append([None]) - for attrs_sample in itertools.product(*all_op_attrs_values): - op_attr_list = [] - index = 0 - ops = [] - log_str = 'TEST_CASE: ' - for i in range(len(self.ops_config)): - op_config = self.ops_config[i] - op_attr = dict( - zip( - list(op_config["op_attrs"].keys()), attrs_sample[ - index:index + len(op_config["op_attrs"])])) - - if i != len(self.ops_config) - 1: - log_str += op_config['op_type'] + str(op_attr) + ' + ' - else: - log_str += op_config['op_type'] + str(op_attr) - - op_attr_list.append(op_attr) - index = index + len(op_config["op_attrs"]) - ops.append( - OpConfig( - type=op_config["op_type"], - inputs=op_config["op_inputs"], - outputs=op_config["op_outputs"], - attrs=op_attr)) - - logging.info(log_str) - self.update_program_input_and_weight_with_attr(op_attr_list) - # if no weight need to save, we create a place_holder to help seriazlie params. - if not self.program_weights: - self.program_weights = { - "place_holder_weight": TensorConfig( - shape=[1], data=np.array([1]).astype(np.float32)) - } - program_config = ProgramConfig( - ops=ops, - weights=self.program_weights, - inputs=self.program_inputs, - outputs=self.program_outputs) - yield program_config - - def create_program_config( - self, use_trt=True, - precision_mode=paddle_infer.PrecisionType.Float32): + def create_inference_config(self, use_trt=True) -> paddle_infer.Config: config = paddle_infer.Config() config.disable_glog_info() config.enable_use_gpu(100, 0) @@ -133,7 +79,7 @@ class TrtLayerAutoScanTest(AutoScanTest): max_batch_size=self.trt_param.max_batch_size, workspace_size=self.trt_param.workspace_size, min_subgraph_size=self.trt_param.min_subgraph_size, - precision_mode=precision_mode, + precision_mode=self.trt_param.precision, use_static=self.trt_param.use_static, use_calib_mode=self.trt_param.use_calib_mode) if len(self.dynamic_shape.min_input_shape @@ -148,32 +94,152 @@ class TrtLayerAutoScanTest(AutoScanTest): self.dynamic_shape.disable_trt_plugin_fp16) return config - @abc.abstractmethod - def sample_predictor_configs(self): - def precision_to_str(p): - if p == paddle_infer.PrecisionType.Float32: - return 'float32' - elif p == paddle_infer.PrecisionType.Half: - return 'half' - elif p == paddle_infer.PrecisionType.Int8: - return 'int8' - else: - raise NotImplementedError('not supported type.') - - trt_log_str = '' - if len(self.dynamic_shape.min_input_shape - ) != 0 and self.dynamic_shape.min_input_shape.keys( - ) == self.dynamic_shape.max_input_shape.keys( - ) and self.dynamic_shape.min_input_shape.keys( - ) == self.dynamic_shape.opt_input_shape.keys(): - trt_log_str += 'dynamic_shape ' + def assert_tensors_near(self, + threshold: float, + tensor: Dict[str, np.array], + baseline: Dict[str, np.array]): + for key, arr in tensor.items(): + self.assertTrue( + np.allclose( + baseline[key], arr, atol=threshold), + "Output has diff between GPU and TensorRT. ") + + def assert_op_size(self, trt_engine_num, paddle_op_num): + last_passed_program = 'transpose_flatten_concat_fuse_pass.pdmodel' + model_bytes = paddle.static.load_from_file(last_passed_program) + pg = paddle.static.deserialize_program(model_bytes) + main_block = pg.desc.block(0) + op_size = main_block.op_size() + op_types = [ + main_block.op(i).type() == 'tensorrt_engine' for i in range(op_size) + ] + trt_engine_size = sum(op_types) + paddle_op_size = op_size - trt_engine_size + self.assertTrue(trt_engine_size == trt_engine_num, + 'trt_engine_num is {}, but got {}!'.format( + trt_engine_size, trt_engine_num)) + self.assertTrue(paddle_op_size == paddle_op_num, + 'paddle_op_num is {}, but got {}!'.format( + paddle_op_size, paddle_op_num)) + + def skip_log(self, msg: str): + logging.warning("SKIP: " + msg) + + def fail_log(self, msg: str): + logging.error("FAILE: " + msg) + + def success_log(self, msg: str): + logging.info("SUCCESS: " + msg) + + def validate(self, func: Callable[..., bool]): + pass + + def generate_op_config(self, + ops_config: List[Dict[str, Any]]) -> List[OpConfig]: + ops = [] + for i in range(len(ops_config)): + op_config = ops_config[i] + ops.append( + OpConfig( + type=op_config['op_type'], + inputs=op_config['op_inputs'], + outputs=op_config['op_outputs'], + attrs=op_config['op_attrs'])) + return ops + + def inference_config_str(self, config: paddle_infer.Config): + dic = {} + enable_trt = config.tensorrt_engine_enabled() + trt_precison = config.tensorrt_precision_mode() + trt_dynamic_shape = config.tensorrt_dynamic_shape_enabled() + if enable_trt: + dic['use_trt'] = True + dic['trt_precision'] = trt_precison + dic['use_dynamic_shape'] = trt_dynamic_shape + else: + dic['use_trt'] = False + return str(dic) + + def run_test(self, quant=False): + if quant: + + def teller(program_config, predictor_config): + if predictor_config.tensorrt_precision_mode( + ) == paddle_infer.PrecisionType.Int8: + return False + return True + + self.add_skip_case(teller, SkipReasons.QUANT_MODEL, + "Only test QUANT model") else: - trt_log_str += 'static_shape ' - trt_log_str += precision_to_str(self.trt_param.precision) - - logging.info(' --------- gpu inference ---------') - yield self.create_program_config(use_trt=False) - logging.info(' --------- trt ' + trt_log_str + - ' inference ---------') - yield self.create_program_config( - use_trt=True, precision_mode=self.trt_param.precision) + + def teller(program_config, predictor_config): + if predictor_config.tensorrt_precision_mode( + ) == paddle_infer.PrecisionType.Int8: + return True + return False + + self.add_skip_case(teller, SkipReasons.QUANT_MODEL, + "Not test QUANT model") + + for prog_config in self.sample_program_configs(): + model, params = create_fake_model(prog_config) + if quant: + model, params = create_quant_model(model, params) + + feed_data = {} + for name, tensor_config in prog_config.inputs.items(): + feed_data[name] = { + 'data': tensor_config.data, + 'lod': tensor_config.lod + } + + results: List[Dict[str, Tensor]] = [] + + # baseline: gpu run + gpu_config = self.create_inference_config(use_trt=False) + results.append( + self.run_test_config(model, params, prog_config, gpu_config, + feed_data)) + self.success_log('RUN_GPU_BASELINE ' + str(prog_config) + ' vs ' + + self.inference_config_str(gpu_config)) + + for pred_config, nodes_num, threshold in self.sample_predictor_configs( + prog_config): + skip_flag = False + for skip_info in self.skip_cases: + if skip_info[0](prog_config, pred_config): + skip_flag = True + if skip_info[1] == SkipReasons.ALGO_WRONG: + self.skip_log("[ALGO_WRONG] " + skip_info[ + 2] + ' ' + repr(prog_config) + ' vs ' + self. + inference_config_str(pred_config)) + elif skip_info[1] == SkipReasons.TRT_NOT_IMPLEMENTED: + self.skip_log("[TRT_NOT_IMPLEMENTED] " + skip_info[ + 2] + ' ' + repr(prog_config) + ' vs ' + self. + inference_config_str(pred_config)) + elif skip_info[1] == SkipReasons.TRT_NOT_SUPPORT: + self.skip_log("[TRT_NOT_SUPPORT] " + skip_info[ + 2] + ' ' + repr(prog_config) + ' vs ' + self. + inference_config_str(pred_config)) + elif skip_info[1] == SkipReasons.QUANT_MODEL: + pass + else: + raise NotImplementedError + if skip_flag: + continue + + try: + results.append( + self.run_test_config(model, params, prog_config, + pred_config, feed_data)) + self.assert_tensors_near(threshold, results[-1], results[0]) + self.assert_op_size(nodes_num[0], nodes_num[1]) + except Exception as e: + self.fail_log( + str(prog_config) + ' vs ' + self.inference_config_str( + pred_config) + str(e)) + continue + + self.success_log('RUN ' + str(prog_config) + ' vs ' + + self.inference_config_str(pred_config))