update inference trt ut framework (#35418)

e8772486 · Wilber · GitHub · e8a88164 · e8772486 · e8772486
6 changed file
--- a/paddle/fluid/inference/api/paddle_analysis_config.h
+++ b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -354,6 +354,12 @@ struct PD_INFER_DECL AnalysisConfig {
  ///
  bool tensorrt_engine_enabled() const { return use_tensorrt_; }
  ///
+  /// \brief  Get the TensorRT engine precision.
+  ///
+  /// \return Precision Get the TensorRT engine precision.
+  ///
+  Precision tensorrt_precision_mode() const { return tensorrt_precision_mode_; }
+  ///
  /// \brief Set min, max, opt shape for TensorRT Dynamic shape mode.
  /// \param min_input_shape The min input shape of the subgraph input.
  /// \param max_input_shape The max input shape of the subgraph input.
@@ -366,7 +372,14 @@ struct PD_INFER_DECL AnalysisConfig {
      std::map<std::string, std::vector<int>> max_input_shape,
      std::map<std::string, std::vector<int>> optim_input_shape,
      bool disable_trt_plugin_fp16 = false);
-
+  ///
+  /// \brief A boolean state telling whether the trt dynamic_shape is used.
+  ///
+  /// \return bool Whether the trt dynamic_shape is used.
+  ///
+  bool tensorrt_dynamic_shape_enabled() const {
+    return min_input_shape_.empty();
+  }
  ///
  /// \brief Prevent ops running in Paddle-TRT
  /// NOTE: just experimental, not an official stable API, easy to be broken.

--- a/paddle/fluid/pybind/inference_api.cc
+++ b/paddle/fluid/pybind/inference_api.cc
@@ -288,7 +288,7 @@ py::bytes SerializePDTensorToBytes(PaddleTensor &tensor) {  // NOLINT
  return static_cast<py::bytes>(ss.str());
 }

-void CopyPaddleInferTensor(paddle_infer::Tensor &dst,
+void CopyPaddleInferTensor(paddle_infer::Tensor &dst,  // NOLINT
                           const paddle_infer::Tensor &src) {
  return paddle_infer::contrib::TensorUtils::CopyTensor(&dst, src);
 }
@@ -555,6 +555,7 @@ void BindAnalysisConfig(py::module *m) {
           py::arg("min_subgraph_size") = 3,
           py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32,
           py::arg("use_static") = false, py::arg("use_calib_mode") = true)
+      .def("tensorrt_precision_mode", &AnalysisConfig::tensorrt_precision_mode)
      .def("set_trt_dynamic_shape_info",
           &AnalysisConfig::SetTRTDynamicShapeInfo,
           py::arg("min_input_shape") =
@@ -564,6 +565,8 @@ void BindAnalysisConfig(py::module *m) {
           py::arg("optim_input_shape") =
               std::map<std::string, std::vector<int>>({}),
           py::arg("disable_trt_plugin_fp16") = false)
+      .def("tensorrt_dynamic_shape_enabled",
+           &AnalysisConfig::tensorrt_dynamic_shape_enabled)
      .def("enable_tensorrt_oss", &AnalysisConfig::EnableTensorRtOSS)
      .def("tensorrt_oss_enabled", &AnalysisConfig::tensorrt_oss_enabled)
      .def("exp_disable_tensorrt_ops", &AnalysisConfig::Exp_DisableTensorRtOPs)

--- a/python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
@@ -16,6 +16,7 @@ import numpy as np
 import unittest
 import abc
 import os
+import enum
 import logging
 import paddle
 import paddle.fluid as fluid
@@ -29,10 +30,22 @@ from program_config import TensorConfig, OpConfig, ProgramConfig, create_fake_mo
 logging.basicConfig(level=logging.INFO, format="%(message)s")


+class SkipReasons(enum.Enum):
+    # Paddle not support, but trt support, we need to add the feature.
+    TRT_NOT_IMPLEMENTED = 0
+    # TRT not support.
+    TRT_NOT_SUPPORT = 1
+    # Implement wrong.
+    ALGO_WRONG = 2
+    # Quant model, only to run in INT8 mode.
+    QUANT_MODEL = 3
+
+
 class AutoScanTest(unittest.TestCase):
    def __init__(self, methodName='runTest'):
        paddle.enable_static()
        super(AutoScanTest, self).__init__(methodName)
+        self.skip_cases = []

    @abc.abstractmethod
    def sample_program_configs(self) -> List[ProgramConfig]:
@@ -46,6 +59,18 @@ class AutoScanTest(unittest.TestCase):
    def sample_predictor_configs(self) -> List[paddle_infer.Config]:
        raise NotImplementedError

+    @abc.abstractmethod
+    def add_skip_case(
+            self,
+            teller: [Callable[[ProgramConfig, paddle_infer.Config], bool]],
+            reason: SkipReasons,
+            note: str):
+        self.skip_cases.append((teller, reason, note))
+
+    @abc.abstractmethod
+    def check_program_validity(self, program_config: ProgramConfig) -> bool:
+        raise NotImplementedError
+
    def run_test_config(self, model, params, prog_config, pred_config,
                        feed_data) -> Dict[str, np.ndarray]:
        '''
@@ -56,7 +81,7 @@ class AutoScanTest(unittest.TestCase):

        for name, _ in prog_config.inputs.items():
            input_tensor = predictor.get_input_handle(name)
-            input_tensor.copy_from_cpu(feed_data[name]['shape'])
+            input_tensor.copy_from_cpu(feed_data[name]['data'])
            if feed_data[name]['lod'] is not None:
                input_tensor.set_lod(feed_data[name]['lod'])
        predictor.run()
@@ -66,26 +91,6 @@ class AutoScanTest(unittest.TestCase):
            result[out_name] = predictor.get_output_handle(o_name).copy_to_cpu()
        return result

-    def assert_op_size(self, trt_engine_num, paddle_op_num):
-        cur_path = os.path.dirname(__file__)
-        last_passed_program = os.path.join(
-            cur_path, 'transpose_flatten_concat_fuse_pass.pdmodel')
-        model_bytes = paddle.static.load_from_file(last_passed_program)
-        pg = paddle.static.deserialize_program(model_bytes)
-        main_block = pg.desc.block(0)
-        op_size = main_block.op_size()
-        op_types = [
-            main_block.op(i).type() == 'tensorrt_engine' for i in range(op_size)
-        ]
-        trt_engine_size = sum(op_types)
-        paddle_op_size = op_size - trt_engine_size
-        self.assertTrue(trt_engine_size == trt_engine_num,
-                        'trt_engine_num is {}, but got {}!'.format(
-                            trt_engine_size, trt_engine_num))
-        self.assertTrue(paddle_op_size == paddle_op_num,
-                        'paddle_op_num is {}, but got {}!'.format(
-                            paddle_op_size, paddle_op_num))
-
    def assert_tensors_near(self,
                            threshold: float,
                            tensors: List[Dict[str, np.array]]):
@@ -98,42 +103,6 @@ class AutoScanTest(unittest.TestCase):
                        first[key], arr, atol=threshold),
                    "Output has diff between GPU and TensorRT. ")

-    def run_test(self,
-                 trt_engine_num: int,
-                 paddle_op_num: int,
-                 threshold=1e-5,
-                 quant=False,
-                 error_msg=None):
-        for prog_config in self.sample_program_configs():
-            model, params = create_fake_model(prog_config)
-            if quant:
-                model, params = create_quant_model(model, params)
-            for batch_size in self.batch_size_set:
-                feed_data = {}
-                log_str = '  -- Input tensor info: '
-                for name, tensor_config in prog_config.inputs.items():
-                    tensor_shape = tensor_config.shape.copy()
-                    tensor_shape[0] = batch_size
-                    feed_data[name] = {
-                        'shape': np.random.random(tensor_shape).astype(
-                            tensor_config.dtype),
-                        'lod': tensor_config.lod
-                    }
-                    log_str += str({
-                        name: {
-                            'shape': tensor_shape,
-                            'lod': tensor_config.lod
-                        }
-                    })
-                logging.info(log_str)
-                results: List[Dict[str, Tensor]] = []
-                for pred_config in self.sample_predictor_configs():
-                    results.append(
-                        self.run_test_config(model, params, prog_config,
-                                             pred_config, feed_data))
-                try:
-                    self.assert_tensors_near(
-                        threshold=threshold, tensors=results)
-                    self.assert_op_size(trt_engine_num, paddle_op_num)
-                except:
-                    logging.info('ERROR OCCURED: ' + error_msg)
+    @abc.abstractmethod
+    def run_test(self, quant=False):
+        raise NotImplementedError
--- a/python/paddle/fluid/tests/unittests/ir/inference/program_config.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/program_config.py
@@ -30,24 +30,24 @@ from paddle.fluid.executor import global_scope
 class TensorConfig:
    '''
    A config builder for a input or a weight.
-  
-    InputVar's shape can be [-1, xxx], batch_size
    '''

    def __init__(self,
-                 shape: [List[int]],
-                 dtype: [str]="float32",
-                 data: Optional[np.array]=None,
-                 lod: [List[List[int]]]=None):
+                 lod: Optional[List[List[int]]]=None,
+                 data_gen: Optional[Callable[..., np.array]]=None):
        '''
        shape: The shape of the tensor.
        dtype: The data type of the tensor.
        data: The value of WeightVar. for input, it should be None 
        '''
-        self.shape = shape
-        self.dtype = dtype
-        self.data = data
        self.lod = lod
+        self.data_gen = data_gen
+        self.data = data_gen()
+        self.dtype = data_gen().dtype
+        self.shape = data_gen().shape
+
+    def __repr__(self):
+        return str({'shape': self.shape, 'lod': self.lod, 'dtype': self.dtype})


 class OpConfig:
@@ -63,6 +63,11 @@ class OpConfig:
        self.outputs = outputs
        self.attrs = attrs

+    def __repr__(self):
+        log_str = self.type
+        log_str += str(self.attrs)
+        return log_str
+

 class ProgramConfig:
    '''  A config builder for generating a Program.  '''
@@ -77,6 +82,19 @@ class ProgramConfig:
        self.inputs = inputs
        self.outputs = outputs

+    def __repr__(self):
+        log_str = ''
+        for i in range(len(self.ops)):
+            if i != len(self.ops) - 1:
+                log_str += repr(self.ops[i]) + ' + '
+            else:
+                log_str += repr(self.ops[i])
+        log_str += ' -- '
+        for t, v in self.inputs.items():
+            log_str += '[' + t + ': ' + str(v) + ']'
+
+        return log_str
+

 def create_fake_model(program_config):
    '''  Create a Paddle model(in memory) according to the given config.  '''

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py
@@ -12,15 +12,69 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from trt_layer_auto_scan_test import TrtLayerAutoScanTest
-from program_config import TensorConfig
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons
+from program_config import TensorConfig, ProgramConfig
 import numpy as np
 import paddle.inference as paddle_infer
+from functools import partial
+from typing import Optional, List, Callable, Dict, Any, Set


 class TrtConvertConv2dTest(TrtLayerAutoScanTest):
-    def setUp(self):
-        self.ops_config = [{
+    def check_program_validity(self, program_config: ProgramConfig) -> bool:
+        # TODO: This is just the example to remove the wrong attrs.
+        inputs = program_config.inputs
+        weights = program_config.weights
+        attrs = [
+            program_config.ops[i].attrs
+            for i in range(len(program_config.ops))
+        ]
+
+        # groups restriction.
+        if inputs['input_data'].shape[1] != weights['conv2d_weight'].shape[
+                1] * attrs[0]['groups']:
+            return False
+
+        # others restriction, todo.
+
+        return True
+
+    def sample_program_configs(self):
+        def generate_input1(attrs: List[Dict[str, Any]]):
+            # TODO: This is just the example to illustrate the releation between axis and input.
+            # for each attr, can generate different datas
+            if attrs[0]['groups'] == 1:
+                return np.ones([2, 3, 64, 64]).astype(np.float32)
+            else:
+                return np.ones([1, 3, 64, 64]).astype(np.float32)
+
+        def generate_weight1(attrs: List[Dict[str, Any]]):
+            return np.random.random([24, 3, 3, 3]).astype(np.float32)
+
+        # for strides in [[1,1], [2,2]]:
+        #     for paddings in [[0,3], [3,1]]:
+        #         for groups in [1]:
+        #             for padding_algotithm in ['EXPLICIT']:
+        #                 for dilations in [[1,1]]:
+        #                     for data_format in ['NCHW']:
+
+        for strides in [[1, 1], [2, 2], [1, 2], [2, 3]]:
+            for paddings in [[0, 3], [3, 1], [1, 1, 1, 1], [2, 1, 1, 3]]:
+                for groups in [1, 2]:
+                    for padding_algotithm in ['EXPLICIT', 'SAME', 'VALID']:
+                        for dilations in [[1, 1], [1, 2]]:
+                            for data_format in ['NCHW']:
+                                dics = [{
+                                    "data_fromat": data_format,
+                                    "dilations": dilations,
+                                    "padding_algorithm": padding_algotithm,
+                                    "groups": groups,
+                                    "paddings": paddings,
+                                    "strides": strides,
+                                    "data_format": data_format
+                                }, {}]
+
+                                ops_config = [{
                                    "op_type": "conv2d",
                                    "op_inputs": {
                                        "Input": ["input_data"],
@@ -29,14 +83,7 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
                                    "op_outputs": {
                                        "Output": ["conv_output_data"]
                                    },
-            "op_attrs": {
-                "data_format": ["NCHW"],
-                "dilations": [[1, 1]],
-                "padding_algorithm": ["EXPLICIT"],
-                "groups": [1],
-                "paddings": [[0, 3], [3, 1]],
-                "strides": [[1, 1], [2, 2]],
-            }
+                                    "op_attrs": dics[0]
                                }, {
                                    "op_type": "relu",
                                    "op_inputs": {
@@ -45,48 +92,143 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
                                    "op_outputs": {
                                        "Out": ["relu_output_data"]
                                    },
-            "op_attrs": {}
+                                    "op_attrs": dics[1]
                                }]
-        self.batch_size_set = [1, 2, 4]
+                                ops = self.generate_op_config(ops_config)
+
+                                program_config = ProgramConfig(
+                                    ops=ops,
+                                    weights={
+                                        "conv2d_weight": TensorConfig(
+                                            data_gen=partial(generate_weight1,
+                                                             dics))
+                                    },
+                                    inputs={
+                                        "input_data": TensorConfig(
+                                            data_gen=partial(generate_input1,
+                                                             dics))
+                                    },
+                                    outputs=["relu_output_data"])
+
+                                # if config is invalid, we should skip that cases.
+                                if not self.check_program_validity(
+                                        program_config):
+                                    continue
+
+                                yield program_config
+
+    def sample_predictor_configs(
+            self, program_config) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            if len(attrs[0]['paddings']) == 4:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [1, 3, 32, 32],
+                    '': []
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [4, 3, 64, 64],
+                    '': []
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [1, 3, 64, 64],
+                    '': []
+                }
+            else:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [1, 3, 32, 32]
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [4, 3, 64, 64]
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [1, 3, 64, 64]
+                }

-    def update_program_input_and_weight_with_attr(self, op_attr_list):
-        weight = np.random.randn(24, 3, 3, 3).astype("float32")
-        filter = TensorConfig(shape=[24, 3, 3, 3], data=weight)
-        if op_attr_list[0]["data_format"] == "NCHW":
-            input_data = TensorConfig(shape=[-1, 3, 64, 64])
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            # TODO: This is just the example, need to be fixed.
+            if len(attrs[0]['paddings']) == 4:
+                return 0, 3
            else:
-            input_data = TensorConfig(shape=[-1, 64, 64, 3])
-        self.program_weights = {"conv2d_weight": filter}
-        self.program_inputs = {"input_data": input_data}
-        self.program_outputs = ["relu_output_data"]
+                return 1, 2

-    def test_check_fp32_output(self):
-        self.trt_param.precision = paddle_infer.PrecisionType.Float32
-        # the fused tensorrt engine num is 1, and paddle op num is 2(feed and fetch).
-        self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-5)
+        attrs = [
+            program_config.ops[i].attrs
+            for i in range(len(program_config.ops))
+        ]

-    def test_check_fp16_output(self):
+        # for static_shape
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
-        self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-2)
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False), 1e-2
+        self.trt_param.precision = paddle_infer.PrecisionType.Int8
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False), 1e-1

-    def test_dynamic_shape_fp32_check_output(self):
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
-        self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]}
-        self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]}
-        self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]}
-        self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-5)
-
-    def test_dynamic_shape_fp16_check_output(self):
+        yield self.create_inference_config(), generate_trt_nodes_num(attrs,
+                                                                     True), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
-        self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]}
-        self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]}
-        self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]}
-        self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-2)
-
-    def test_trt_int8_check_output(self):
+        yield self.create_inference_config(), generate_trt_nodes_num(attrs,
+                                                                     True), 1e-2
        self.trt_param.precision = paddle_infer.PrecisionType.Int8
-        self.run_test(
-            trt_engine_num=1, paddle_op_num=2, quant=True, threshold=1e-1)
+        yield self.create_inference_config(), generate_trt_nodes_num(attrs,
+                                                                     True), 1e-1
+
+    def add_skip_trt_case(self):
+        # TODO(wilber): This is just the example to illustrate the skip usage.
+        def teller1(program_config, predictor_config):
+            if program_config.ops[0].attrs['groups'] == 2:
+                return True
+            return False
+
+        self.add_skip_case(
+            teller1, SkipReasons.ALGO_WRONG,
+            "Need to repair the case: ......TODO, just for the example")
+
+        def teller2(program_config, predictor_config):
+            if len(program_config.ops[0].attrs['paddings']) == 4:
+                return True
+            return False
+
+        self.add_skip_case(
+            teller2, SkipReasons.TRT_NOT_IMPLEMENTED,
+            "NOT Implemented: we need to add support in the future ....TODO, just for the example"
+        )
+
+        def teller3(program_config, predictor_config):
+            if (
+                    program_config.ops[0].attrs['dilations'][0] == 1 and
+                    program_config.ops[0].attrs['dilations'][0] == 2
+            ) or program_config.ops[0].attrs['padding_algorithm'] != 'EXPLICIT':
+                return True
+            return False
+
+        self.add_skip_case(teller3, SkipReasons.TRT_NOT_SUPPORT,
+                           "TODO, just for the example")
+
+        def teller4(program_config, predictor_config):
+            if program_config.ops[0].attrs['strides'][0] != program_config.ops[
+                    0].attrs['strides'][1] or program_config.ops[0].attrs[
+                        'strides'][0] == program_config.ops[0].attrs['strides'][
+                            1] == 2:
+                return True
+            return False
+
+        self.add_skip_case(teller4, SkipReasons.TRT_NOT_SUPPORT,
+                           "TODO, just for the example")
+
+    def test(self):
+        self.add_skip_trt_case()
+        self.run_test()
+
+    def test_quant(self):
+        self.add_skip_trt_case()
+        self.run_test(quant=True)


 if __name__ == "__main__":

--- a/python/paddle/fluid/tests/unittests/ir/inference/trt_layer_auto_scan_test.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/trt_layer_auto_scan_test.py
@@ -16,6 +16,7 @@ import numpy as np
 import unittest
 import itertools
 import abc
+import enum
 import logging
 import paddle
 import paddle.fluid as fluid
@@ -23,9 +24,9 @@ import paddle.fluid.core as core
 import paddle.inference as paddle_infer

 from paddle import compat as cpt
-from typing import *
-from program_config import TensorConfig, OpConfig, ProgramConfig
-from auto_scan_test import AutoScanTest
+from typing import Optional, List, Callable, Dict, Any, Set
+from program_config import TensorConfig, OpConfig, ProgramConfig, create_fake_model, create_quant_model
+from auto_scan_test import AutoScanTest, SkipReasons

 logging.basicConfig(level=logging.INFO, format="%(message)s")

@@ -60,7 +61,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
    def __init__(self, methodName='runTest'):
        super(TrtLayerAutoScanTest, self).__init__(methodName)
        self.trt_param = self.TensorRTParam(
-            workspace_size=0,
+            workspace_size=1024,
            max_batch_size=4,
            min_subgraph_size=0,
            precision=paddle_infer.PrecisionType.Float32,
@@ -68,62 +69,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
            use_calib_mode=False)
        self.dynamic_shape = self.DynamicShapeParam({}, {}, {}, False)

-    def update_program_input_and_weight_with_attr(self, op_attr_list):
-        raise NotImplementedError
-
-    @abc.abstractmethod
-    def sample_program_configs(self):
-        all_op_attrs_keys = []
-        all_op_attrs_values = []
-        for op_config in self.ops_config:
-            all_op_attrs_keys.append(list(op_config["op_attrs"].keys()))
-            all_op_attrs_values.extend(list(op_config["op_attrs"].values()))
-        if len(all_op_attrs_values) == 0:
-            all_op_attrs_values.append([None])
-        for attrs_sample in itertools.product(*all_op_attrs_values):
-            op_attr_list = []
-            index = 0
-            ops = []
-            log_str = 'TEST_CASE: '
-            for i in range(len(self.ops_config)):
-                op_config = self.ops_config[i]
-                op_attr = dict(
-                    zip(
-                        list(op_config["op_attrs"].keys()), attrs_sample[
-                            index:index + len(op_config["op_attrs"])]))
-
-                if i != len(self.ops_config) - 1:
-                    log_str += op_config['op_type'] + str(op_attr) + ' + '
-                else:
-                    log_str += op_config['op_type'] + str(op_attr)
-
-                op_attr_list.append(op_attr)
-                index = index + len(op_config["op_attrs"])
-                ops.append(
-                    OpConfig(
-                        type=op_config["op_type"],
-                        inputs=op_config["op_inputs"],
-                        outputs=op_config["op_outputs"],
-                        attrs=op_attr))
-
-            logging.info(log_str)
-            self.update_program_input_and_weight_with_attr(op_attr_list)
-            # if no weight need to save, we create a place_holder to help seriazlie params.
-            if not self.program_weights:
-                self.program_weights = {
-                    "place_holder_weight": TensorConfig(
-                        shape=[1], data=np.array([1]).astype(np.float32))
-                }
-            program_config = ProgramConfig(
-                ops=ops,
-                weights=self.program_weights,
-                inputs=self.program_inputs,
-                outputs=self.program_outputs)
-            yield program_config
-
-    def create_program_config(
-            self, use_trt=True,
-            precision_mode=paddle_infer.PrecisionType.Float32):
+    def create_inference_config(self, use_trt=True) -> paddle_infer.Config:
        config = paddle_infer.Config()
        config.disable_glog_info()
        config.enable_use_gpu(100, 0)
@@ -133,7 +79,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
                max_batch_size=self.trt_param.max_batch_size,
                workspace_size=self.trt_param.workspace_size,
                min_subgraph_size=self.trt_param.min_subgraph_size,
-                precision_mode=precision_mode,
+                precision_mode=self.trt_param.precision,
                use_static=self.trt_param.use_static,
                use_calib_mode=self.trt_param.use_calib_mode)
            if len(self.dynamic_shape.min_input_shape
@@ -148,32 +94,152 @@ class TrtLayerAutoScanTest(AutoScanTest):
                    self.dynamic_shape.disable_trt_plugin_fp16)
        return config

-    @abc.abstractmethod
-    def sample_predictor_configs(self):
-        def precision_to_str(p):
-            if p == paddle_infer.PrecisionType.Float32:
-                return 'float32'
-            elif p == paddle_infer.PrecisionType.Half:
-                return 'half'
-            elif p == paddle_infer.PrecisionType.Int8:
-                return 'int8'
+    def assert_tensors_near(self,
+                            threshold: float,
+                            tensor: Dict[str, np.array],
+                            baseline: Dict[str, np.array]):
+        for key, arr in tensor.items():
+            self.assertTrue(
+                np.allclose(
+                    baseline[key], arr, atol=threshold),
+                "Output has diff between GPU and TensorRT. ")
+
+    def assert_op_size(self, trt_engine_num, paddle_op_num):
+        last_passed_program = 'transpose_flatten_concat_fuse_pass.pdmodel'
+        model_bytes = paddle.static.load_from_file(last_passed_program)
+        pg = paddle.static.deserialize_program(model_bytes)
+        main_block = pg.desc.block(0)
+        op_size = main_block.op_size()
+        op_types = [
+            main_block.op(i).type() == 'tensorrt_engine' for i in range(op_size)
+        ]
+        trt_engine_size = sum(op_types)
+        paddle_op_size = op_size - trt_engine_size
+        self.assertTrue(trt_engine_size == trt_engine_num,
+                        'trt_engine_num is {}, but got {}!'.format(
+                            trt_engine_size, trt_engine_num))
+        self.assertTrue(paddle_op_size == paddle_op_num,
+                        'paddle_op_num is {}, but got {}!'.format(
+                            paddle_op_size, paddle_op_num))
+
+    def skip_log(self, msg: str):
+        logging.warning("SKIP: " + msg)
+
+    def fail_log(self, msg: str):
+        logging.error("FAILE: " + msg)
+
+    def success_log(self, msg: str):
+        logging.info("SUCCESS: " + msg)
+
+    def validate(self, func: Callable[..., bool]):
+        pass
+
+    def generate_op_config(self,
+                           ops_config: List[Dict[str, Any]]) -> List[OpConfig]:
+        ops = []
+        for i in range(len(ops_config)):
+            op_config = ops_config[i]
+            ops.append(
+                OpConfig(
+                    type=op_config['op_type'],
+                    inputs=op_config['op_inputs'],
+                    outputs=op_config['op_outputs'],
+                    attrs=op_config['op_attrs']))
+        return ops
+
+    def inference_config_str(self, config: paddle_infer.Config):
+        dic = {}
+        enable_trt = config.tensorrt_engine_enabled()
+        trt_precison = config.tensorrt_precision_mode()
+        trt_dynamic_shape = config.tensorrt_dynamic_shape_enabled()
+        if enable_trt:
+            dic['use_trt'] = True
+            dic['trt_precision'] = trt_precison
+            dic['use_dynamic_shape'] = trt_dynamic_shape
        else:
-                raise NotImplementedError('not supported type.')
+            dic['use_trt'] = False
+        return str(dic)

-        trt_log_str = ''
-        if len(self.dynamic_shape.min_input_shape
-               ) != 0 and self.dynamic_shape.min_input_shape.keys(
-               ) == self.dynamic_shape.max_input_shape.keys(
-               ) and self.dynamic_shape.min_input_shape.keys(
-               ) == self.dynamic_shape.opt_input_shape.keys():
-            trt_log_str += 'dynamic_shape '
+    def run_test(self, quant=False):
+        if quant:
+
+            def teller(program_config, predictor_config):
+                if predictor_config.tensorrt_precision_mode(
+                ) == paddle_infer.PrecisionType.Int8:
+                    return False
+                return True
+
+            self.add_skip_case(teller, SkipReasons.QUANT_MODEL,
+                               "Only test QUANT model")
        else:
-            trt_log_str += 'static_shape '
-        trt_log_str += precision_to_str(self.trt_param.precision)
-
-        logging.info('    --------- gpu inference ---------')
-        yield self.create_program_config(use_trt=False)
-        logging.info('    --------- trt ' + trt_log_str +
-                     ' inference ---------')
-        yield self.create_program_config(
-            use_trt=True, precision_mode=self.trt_param.precision)
+
+            def teller(program_config, predictor_config):
+                if predictor_config.tensorrt_precision_mode(
+                ) == paddle_infer.PrecisionType.Int8:
+                    return True
+                return False
+
+            self.add_skip_case(teller, SkipReasons.QUANT_MODEL,
+                               "Not test QUANT model")
+
+        for prog_config in self.sample_program_configs():
+            model, params = create_fake_model(prog_config)
+            if quant:
+                model, params = create_quant_model(model, params)
+
+            feed_data = {}
+            for name, tensor_config in prog_config.inputs.items():
+                feed_data[name] = {
+                    'data': tensor_config.data,
+                    'lod': tensor_config.lod
+                }
+
+            results: List[Dict[str, Tensor]] = []
+
+            # baseline: gpu run
+            gpu_config = self.create_inference_config(use_trt=False)
+            results.append(
+                self.run_test_config(model, params, prog_config, gpu_config,
+                                     feed_data))
+            self.success_log('RUN_GPU_BASELINE ' + str(prog_config) + ' vs ' +
+                             self.inference_config_str(gpu_config))
+
+            for pred_config, nodes_num, threshold in self.sample_predictor_configs(
+                    prog_config):
+                skip_flag = False
+                for skip_info in self.skip_cases:
+                    if skip_info[0](prog_config, pred_config):
+                        skip_flag = True
+                        if skip_info[1] == SkipReasons.ALGO_WRONG:
+                            self.skip_log("[ALGO_WRONG] " + skip_info[
+                                2] + ' ' + repr(prog_config) + ' vs ' + self.
+                                          inference_config_str(pred_config))
+                        elif skip_info[1] == SkipReasons.TRT_NOT_IMPLEMENTED:
+                            self.skip_log("[TRT_NOT_IMPLEMENTED] " + skip_info[
+                                2] + ' ' + repr(prog_config) + ' vs ' + self.
+                                          inference_config_str(pred_config))
+                        elif skip_info[1] == SkipReasons.TRT_NOT_SUPPORT:
+                            self.skip_log("[TRT_NOT_SUPPORT] " + skip_info[
+                                2] + ' ' + repr(prog_config) + ' vs ' + self.
+                                          inference_config_str(pred_config))
+                        elif skip_info[1] == SkipReasons.QUANT_MODEL:
+                            pass
+                        else:
+                            raise NotImplementedError
+                if skip_flag:
+                    continue
+
+                try:
+                    results.append(
+                        self.run_test_config(model, params, prog_config,
+                                             pred_config, feed_data))
+                    self.assert_tensors_near(threshold, results[-1], results[0])
+                    self.assert_op_size(nodes_num[0], nodes_num[1])
+                except Exception as e:
+                    self.fail_log(
+                        str(prog_config) + ' vs ' + self.inference_config_str(
+                            pred_config) + str(e))
+                    continue
+
+                self.success_log('RUN ' + str(prog_config) + ' vs ' +
+                                 self.inference_config_str(pred_config))