trt ut add serialize and deserialize (#35645)

bf983c2f · Wilber · GitHub · efeec79b · bf983c2f · bf983c2f
5 changed file
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -582,6 +582,15 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
                << desc.Output("Out").size() << ".";
        return false;
      }
+      auto* block = desc.Block();
+      auto* x_var_desc = block->FindVar(desc.Input("X")[0]);
+      auto* y_var_desc = block->FindVar(desc.Input("Y")[0]);
+      const auto x_shape = x_var_desc->GetShape();
+      const auto y_shape = y_var_desc->GetShape();
+      if (x_shape.size() == 1 && y_shape.size() == 1) {
+        VLOG(3) << "Now trt may not support two 1d tensor elementwise op.";
+        return false;
+      }
    }

    if (op_type == "stack") {

--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -1155,6 +1155,8 @@ function parallel_test_base_gpu() {
 EOF

 set -x
+        # set trt_convert ut to run 30% cases.
+        export TEST_NUM_PERCENT_CASES=0.3
        precison_cases=""
        bash $PADDLE_ROOT/tools/check_added_ut.sh
        if [ ${PRECISION_TEST:-OFF} == "ON" ]; then

--- a/python/paddle/fluid/tests/unittests/ir/inference/program_config.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/program_config.py
@@ -102,6 +102,8 @@ class ProgramConfig:
        log_str += ' -- '
        for t, v in self.inputs.items():
            log_str += '[' + t + ': ' + str(v) + ']'
+        for t, v in self.weights.items():
+            log_str += '[' + t + ': ' + str(v) + ']'

        return log_str


--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py
@@ -163,10 +163,10 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
            attrs, False), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-2
+            attrs, False), (1e-5, 1e-5)
        self.trt_param.precision = paddle_infer.PrecisionType.Int8
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-1
+            attrs, False), (1e-5, 1e-5)

        # for dynamic_shape
        generate_dynamic_shape(attrs)
@@ -174,11 +174,11 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
        yield self.create_inference_config(), generate_trt_nodes_num(attrs,
                                                                     True), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
-        yield self.create_inference_config(), generate_trt_nodes_num(attrs,
-                                                                     True), 1e-2
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True), (1e-5, 1e-5)
        self.trt_param.precision = paddle_infer.PrecisionType.Int8
-        yield self.create_inference_config(), generate_trt_nodes_num(attrs,
-                                                                     True), 1e-1
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True), (1e-5, 1e-5)

    def add_skip_trt_case(self):
        # TODO(wilber): This is just the example to illustrate the skip usage.

--- a/python/paddle/fluid/tests/unittests/ir/inference/trt_layer_auto_scan_test.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/trt_layer_auto_scan_test.py
@@ -17,7 +17,10 @@ import unittest
 import itertools
 import abc
 import enum
+import sys
+import os
 import logging
+import time
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
@@ -65,14 +68,18 @@ class TrtLayerAutoScanTest(AutoScanTest):
            max_batch_size=4,
            min_subgraph_size=0,
            precision=paddle_infer.PrecisionType.Float32,
-            use_static=False,
+            use_static=True,
            use_calib_mode=False)
        self.dynamic_shape = self.DynamicShapeParam({}, {}, {}, False)
+        self.num_percent_cases = float(
+            os.getenv(
+                'TEST_NUM_PERCENT_CASES', default='1.0'))

    def create_inference_config(self, use_trt=True) -> paddle_infer.Config:
        config = paddle_infer.Config()
        config.disable_glog_info()
        config.enable_use_gpu(100, 0)
+        config.set_optim_cache_dir('trt_convert_cache_dir')
        if use_trt:
            config.switch_ir_debug()
            config.enable_tensorrt_engine(
@@ -95,7 +102,8 @@ class TrtLayerAutoScanTest(AutoScanTest):
        return config

    def assert_tensors_near(self,
-                            threshold: float,
+                            atol: float,
+                            rtol: float,
                            tensor: Dict[str, np.array],
                            baseline: Dict[str, np.array]):
        for key, arr in tensor.items():
@@ -104,7 +112,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
                "The output shape of GPU and TensorRT are not equal.")
            self.assertTrue(
                np.allclose(
-                    baseline[key], arr, atol=threshold),
+                    baseline[key], arr, atol=atol, rtol=rtol),
                "Output has diff between GPU and TensorRT. ")

    def assert_op_size(self, trt_engine_num, paddle_op_num):
@@ -165,8 +173,21 @@ class TrtLayerAutoScanTest(AutoScanTest):

    def run_test(self, quant=False):
        status = True
-
+        np.random.seed(int(1000 * time.time()) % 2**32)
+        run_flags = []
        for prog_config in self.sample_program_configs():
+            # In CI, only run 30% cases
+            if np.random.rand() < self.num_percent_cases:
+                run_flags.append(True)
+            else:
+                run_flags.append(False)
+        np.random.seed(1024)
+
+        for prog_config, run_flags in zip(self.sample_program_configs(),
+                                          run_flags):
+            if not run_flags:
+                continue
+
            # if program is invalid, we should skip that cases.
            if not self.is_program_valid(prog_config):
                continue
@@ -194,6 +215,17 @@ class TrtLayerAutoScanTest(AutoScanTest):

            for pred_config, nodes_num, threshold in self.sample_predictor_configs(
                    prog_config):
+
+                if isinstance(threshold, float):
+                    atol = threshold
+                    rtol = 1e-8
+                elif isinstance(threshold, list) or isinstance(threshold,
+                                                               tuple):
+                    atol = threshold[0]
+                    rtol = threshold[1]
+                else:
+                    raise NotImplementedError
+
                if quant and pred_config.tensorrt_precision_mode(
                ) != paddle_infer.PrecisionType.Int8:
                    continue
@@ -218,12 +250,18 @@ class TrtLayerAutoScanTest(AutoScanTest):
                        break

                try:
+                    pred_config_deserialize = paddle_infer.Config(pred_config)
                    results.append(
                        self.run_test_config(model, params, prog_config,
                                             pred_config, feed_data))
-                    self.assert_tensors_near(threshold, results[-1], results[0])
+                    self.assert_tensors_near(atol, rtol, results[-1],
+                                             results[0])
                    if not skip_flag:
                        self.assert_op_size(nodes_num[0], nodes_num[1])
+                    # deserialize test
+                    if nodes_num[0] > 0:
+                        self.run_test_config(model, params, prog_config,
+                                             pred_config_deserialize, feed_data)
                except Exception as e:
                    self.fail_log(
                        str(prog_config) + ' vs ' + self.inference_config_str(