Cherrypick NV fixes to release/2.4 (#48263)

* Reduce squeeze2_matmul_fuse_pass, flattent tests time (#47098) * Add missing fp32 config and reduce the testing combination * Reduce trt matmul pass test max examples * Loose TRT fp16 tests tolerance (#47100) * Loose TRT half test tolerance to 1e-3 (#47101) * Loose TRT half test tolerance to 1e-3 (#47106) * Update distributed_strategy.proto (#46531) * Close popen pipe after used (#47053) * Add launch_bounds (#47285) * Fix TRT UT failures (#47488) * Format cherry-picked commits * CudnnNormConvolution is no longer supported on NVIDIA Hopper GPUs (#48203) * Skip tests that use fused_ops on H100 * Add error message to FusedOps on H100 Co-authored-by: N Shijie <505749828@qq.com> Co-authored-by: N Leo Chen <39020268+leo0519@users.noreply.github.com> Co-authored-by: N Tian Zheng <tizheng@nvidia.com>

Cherrypick NV fixes to release/2.4 (#48263)
* Reduce squeeze2_matmul_fuse_pass, flattent tests time (#47098) * Add missing fp32 config and reduce the testing combination * Reduce trt matmul pass test max examples * Loose TRT fp16 tests tolerance (#47100) * Loose TRT half test tolerance to 1e-3 (#47101) * Loose TRT half test tolerance to 1e-3 (#47106) * Update distributed_strategy.proto (#46531) * Close popen pipe after used (#47053) * Add launch_bounds (#47285) * Fix TRT UT failures (#47488) * Format cherry-picked commits * CudnnNormConvolution is no longer supported on NVIDIA Hopper GPUs (#48203) * Skip tests that use fused_ops on H100 * Add error message to FusedOps on H100 Co-authored-by: N Shijie <505749828@qq.com> Co-authored-by: N Leo Chen <39020268+leo0519@users.noreply.github.com> Co-authored-by: N Tian Zheng <tizheng@nvidia.com>
7a0b8625 · zlsh80826 · GitHub · a2f61fef · 7a0b8625 · 7a0b8625
53 changed file
--- a/paddle/fluid/framework/distributed_strategy.proto
+++ b/paddle/fluid/framework/distributed_strategy.proto
@@ -123,6 +123,7 @@ message BuildStrategy {
  optional bool allow_cuda_graph_capture = 14 [ default = false ];
  optional int32 reduce_strategy = 15 [ default = 0 ];
  optional bool fuse_gemm_epilogue = 16 [ default = false ];
+  optional string debug_graphviz_path = 17;
 }
 message ExecutionStrategy {

--- a/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h
+++ b/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h
@@ -45,6 +45,14 @@ struct NormConvolutionArgs {
           int stride,
           int dilation,
           int group) {
+    PADDLE_ENFORCE_LT(
+        ctx.GetComputeCapability(),
+        90,
+        phi::errors::PreconditionNotMet(
+            "Expect compute compatiblity to be less than 90, but got %d. "
+            "CUDNN FusedOps is no longer available on H100 and later "
+            "devices.",
+            ctx.GetComputeCapability()));
    PADDLE_ENFORCE_EQ(
        input_shape.size(),
        4U,

--- a/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc
+++ b/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc
@@ -442,7 +442,7 @@ TEST(CudnnNormConvFp16, K1S1) {
  phi::GPUContext *ctx = static_cast<phi::GPUContext *>(
      platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0)));
-  if (ctx->GetComputeCapability() < 70) {
+  if (ctx->GetComputeCapability() < 70 || ctx->GetComputeCapability() >= 90) {
    ASSERT_THROW(test.CheckForward(1e-3, true),
                 paddle::platform::EnforceNotMet);
    ASSERT_THROW(test.CheckBackward(1e-3, true),
@@ -472,7 +472,7 @@ TEST(CudnnNormConvFp16, K3S1) {
  phi::GPUContext *ctx = static_cast<phi::GPUContext *>(
      platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0)));
-  if (ctx->GetComputeCapability() < 70) {
+  if (ctx->GetComputeCapability() < 70 || ctx->GetComputeCapability() >= 90) {
    ASSERT_THROW(test.CheckForward(1e-3, true),
                 paddle::platform::EnforceNotMet);
    ASSERT_THROW(test.CheckBackward(1e-3, true),
@@ -502,7 +502,7 @@ TEST(CudnnNormConvFp16, K1S1O4) {
  phi::GPUContext *ctx = static_cast<phi::GPUContext *>(
      platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0)));
-  if (ctx->GetComputeCapability() < 70) {
+  if (ctx->GetComputeCapability() < 70 || ctx->GetComputeCapability() >= 90) {
    ASSERT_THROW(test.CheckForward(1e-3, true),
                 paddle::platform::EnforceNotMet);
    ASSERT_THROW(test.CheckBackward(1e-3, true),
@@ -532,7 +532,7 @@ TEST(CudnnNormConvFp16, K1S2O4) {
  phi::GPUContext *ctx = static_cast<phi::GPUContext *>(
      platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0)));
-  if (ctx->GetComputeCapability() <= 70) {
+  if (ctx->GetComputeCapability() <= 70 || ctx->GetComputeCapability() >= 90) {
    ASSERT_THROW(test.CheckForward(1e-3, true),
                 paddle::platform::EnforceNotMet);
    ASSERT_THROW(test.CheckBackward(1e-3), paddle::platform::EnforceNotMet);

--- a/paddle/fluid/operators/fused/fused_dropout_act_bias.h
+++ b/paddle/fluid/operators/fused/fused_dropout_act_bias.h
@@ -256,17 +256,19 @@ template <typename T,
          int BlockSizeX,
          int BlockSizeY,
          int VecSize,
-          typename Functor>
+          typename Functor,
-__global__ void FusedDropoutActBiasGrad(Functor act_grad,
+          int THREADS_PER_CTA = BlockSizeX *BlockSizeY>
-                                        const T *dout,
+__global__ __launch_bounds__(THREADS_PER_CTA) void FusedDropoutActBiasGrad(
-                                        const MaskType *mask,
+    Functor act_grad,
-                                        const T *src,
+    const T *dout,
-                                        const T *bias,
+    const MaskType *mask,
-                                        const T factor,
+    const T *src,
-                                        const int64_t rows,
+    const T *bias,
-                                        const int64_t cols,
+    const T factor,
-                                        T *dx,
+    const int64_t rows,
-                                        T *dbias) {
+    const int64_t cols,
+    T *dx,
+    T *dbias) {
  int64_t col_id = blockIdx.x * blockDim.x + threadIdx.x;
  using LoadT = phi::AlignedVector<T, VecSize>;

--- a/python/paddle/fluid/core.py
+++ b/python/paddle/fluid/core.py
@@ -35,9 +35,9 @@ try:
    if os.name == 'nt':
        third_lib_path = current_path + os.sep + '..' + os.sep + 'libs'
        # Will load shared library from 'path' on windows
-        os.environ[
+        os.environ['path'] = (
-            'path'] = current_path + ';' + third_lib_path + ';' + os.environ[
+            current_path + ';' + third_lib_path + ';' + os.environ['path']
-                'path']
+        )
        sys.path.insert(0, third_lib_path)
        # Note: from python3.8, PATH will not take effect
        # https://github.com/python/cpython/pull/12302
@@ -47,20 +47,24 @@ try:
 except ImportError as e:
    from .. import compat as cpt
    if os.name == 'nt':
        executable_path = os.path.abspath(os.path.dirname(sys.executable))
        raise ImportError(
            """NOTE: You may need to run \"set PATH=%s;%%PATH%%\"
        if you encounters \"DLL load failed\" errors. If you have python
        installed in other directory, replace \"%s\" with your own
-        directory. The original error is: \n %s""" %
+        directory. The original error is: \n %s"""
-            (executable_path, executable_path, cpt.get_exception_message(e)))
+            % (executable_path, executable_path, cpt.get_exception_message(e))
+        )
    else:
        raise ImportError(
            """NOTE: You may need to run \"export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH\"
        if you encounters \"libmkldnn.so not found\" errors. If you have python
        installed in other directory, replace \"/usr/local/lib\" with your own
-        directory. The original error is: \n""" + cpt.get_exception_message(e))
+        directory. The original error is: \n"""
+            + cpt.get_exception_message(e)
+        )
 except Exception as e:
    raise e
@@ -70,36 +74,45 @@ def avx_supported():
    Whether current system(Linux, MacOS, Windows) is supported with AVX.
    """
    from .. import compat as cpt
    sysstr = platform.system().lower()
    has_avx = False
    if sysstr == 'linux':
        try:
-            has_avx = os.popen('cat /proc/cpuinfo | grep -i avx').read() != ''
+            pipe = os.popen('cat /proc/cpuinfo | grep -i avx')
+            has_avx = pipe.read() != ''
+            pipe.close()
        except Exception as e:
-            sys.stderr.write('Can not get the AVX flag from /proc/cpuinfo.\n'
+            sys.stderr.write(
-                             'The original error is: %s\n' %
+                'Can not get the AVX flag from /proc/cpuinfo.\n'
-                             cpt.get_exception_message(e))
+                'The original error is: %s\n' % cpt.get_exception_message(e)
+            )
        return has_avx
    elif sysstr == 'darwin':
        try:
-            has_avx = os.popen(
+            pipe = os.popen('sysctl machdep.cpu.features | grep -i avx')
-                'sysctl machdep.cpu.features | grep -i avx').read() != ''
+            has_avx = pipe.read() != ''
+            pipe.close()
        except Exception as e:
            sys.stderr.write(
                'Can not get the AVX flag from machdep.cpu.features.\n'
-                'The original error is: %s\n' % cpt.get_exception_message(e))
+                'The original error is: %s\n' % cpt.get_exception_message(e)
+            )
        if not has_avx:
            import subprocess
            pipe = subprocess.Popen(
                'sysctl machdep.cpu.leaf7_features | grep -i avx',
                shell=True,
                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE)
+                stderr=subprocess.PIPE,
+            )
            _ = pipe.communicate()
            has_avx = True if pipe.returncode == 0 else False
        return has_avx
    elif sysstr == 'windows':
        import ctypes
        ONE_PAGE = ctypes.c_size_t(0x1000)
        def asm_func(code_str, restype=ctypes.c_uint32, argtypes=()):
@@ -109,24 +122,31 @@ def avx_supported():
            pfnVirtualAlloc.restype = ctypes.c_void_p
            MEM_COMMIT = ctypes.c_ulong(0x1000)
            PAGE_READWRITE = ctypes.c_ulong(0x4)
-            address = pfnVirtualAlloc(None, ONE_PAGE, MEM_COMMIT,
+            address = pfnVirtualAlloc(
-                                      PAGE_READWRITE)
+                None, ONE_PAGE, MEM_COMMIT, PAGE_READWRITE
+            )
            if not address:
                raise Exception("Failed to VirtualAlloc")
            # Copy the code into the memory segment
-            memmove = ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.c_void_p,
+            memmove = ctypes.CFUNCTYPE(
-                                       ctypes.c_void_p,
+                ctypes.c_void_p,
-                                       ctypes.c_size_t)(ctypes._memmove_addr)
+                ctypes.c_void_p,
+                ctypes.c_void_p,
+                ctypes.c_size_t,
+            )(ctypes._memmove_addr)
            if memmove(address, code_str, len(code_str)) < 0:
                raise Exception("Failed to memmove")
            # Enable execute permissions
            PAGE_EXECUTE = ctypes.c_ulong(0x10)
            pfnVirtualProtect = ctypes.windll.kernel32.VirtualProtect
-            res = pfnVirtualProtect(ctypes.c_void_p(address),
+            res = pfnVirtualProtect(
-                                    ONE_PAGE, PAGE_EXECUTE,
+                ctypes.c_void_p(address),
-                                    ctypes.byref(ctypes.c_ulong(0)))
+                ONE_PAGE,
+                PAGE_EXECUTE,
+                ctypes.byref(ctypes.c_ulong(0)),
+            )
            if not res:
                raise Exception("Failed VirtualProtect")
@@ -135,7 +155,8 @@ def avx_supported():
            pfnGetCurrentProcess.restype = ctypes.c_void_p
            prochandle = ctypes.c_void_p(pfnGetCurrentProcess())
            res = ctypes.windll.kernel32.FlushInstructionCache(
-                prochandle, ctypes.c_void_p(address), ONE_PAGE)
+                prochandle, ctypes.c_void_p(address), ONE_PAGE
+            )
            if not res:
                raise Exception("Failed FlushInstructionCache")
@@ -153,12 +174,14 @@ def avx_supported():
            # Convert the code_str into a function that returns uint
            func, address = asm_func(code_str)
            retval = func()
-            ctypes.windll.kernel32.VirtualFree(ctypes.c_void_p(address),
+            ctypes.windll.kernel32.VirtualFree(
-                                               ctypes.c_size_t(0), ONE_PAGE)
+                ctypes.c_void_p(address), ctypes.c_size_t(0), ONE_PAGE
+            )
        except Exception as e:
-            sys.stderr.write('Failed getting the AVX flag on Windows.\n'
+            sys.stderr.write(
-                             'The original error is: %s\n' %
+                'Failed getting the AVX flag on Windows.\n'
-                             cpt.get_exception_message(e))
+                'The original error is: %s\n' % cpt.get_exception_message(e)
+            )
        return (retval & (1 << avx_bit)) > 0
    else:
        sys.stderr.write('Do not get AVX flag on %s\n' % sysstr)
@@ -167,10 +190,10 @@ def avx_supported():
 def run_shell_command(cmd):
    import subprocess
-    out, err = subprocess.Popen(cmd,
-                                stdout=subprocess.PIPE,
+    out, err = subprocess.Popen(
-                                stderr=subprocess.PIPE,
+        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True
-                                shell=True).communicate()
+    ).communicate()
    if err:
        return None
    else:
@@ -179,8 +202,9 @@ def run_shell_command(cmd):
 def get_dso_path(core_so, dso_name):
    if core_so and dso_name:
-        return run_shell_command("ldd %s|grep %s|awk '{print $3}'" %
+        return run_shell_command(
-                                 (core_so, dso_name))
+            "ldd %s|grep %s|awk '{print $3}'" % (core_so, dso_name)
+        )
    else:
        return None
@@ -189,6 +213,7 @@ def load_dso(dso_absolute_path):
    if dso_absolute_path:
        try:
            from ctypes import cdll
            cdll.LoadLibrary(dso_absolute_path)
        except:
            warnings.warn("Load {} failed".format(dso_absolute_path))
@@ -247,12 +272,14 @@ if platform.system().lower() == 'linux':
 try:
    from . import libpaddle
    if avx_supported() and not libpaddle.is_compiled_with_avx():
        sys.stderr.write(
            "Hint: Your machine support AVX, but the installed paddlepaddle doesn't have avx core. "
            "Hence, no-avx core with worse preformance will be imported.\nIf you like, you could "
            "reinstall paddlepaddle by 'python -m pip install --force-reinstall paddlepaddle-gpu[==version]' "
-            "to get better performance.\n")
+            "to get better performance.\n"
+        )
    # assign tensor alias
    libpaddle.LoDTensor = libpaddle.Tensor
@@ -283,6 +310,7 @@ try:
    from .libpaddle import _Profiler, _ProfilerResult, _RecordEvent
    from .libpaddle import _set_current_stream
    from .libpaddle import _get_phi_kernel_name
    if sys.platform != 'win32':
        from .libpaddle import _set_process_pids
        from .libpaddle import _erase_process_pids
@@ -295,12 +323,18 @@ try:
 except Exception as e:
    if has_paddle_dy_lib:
        sys.stderr.write(
-            'Error: Can not import paddle core while this file exists: ' +
+            'Error: Can not import paddle core while this file exists: '
-            current_path + os.sep + 'libpaddle.' + dy_lib_suffix + '\n')
+            + current_path
+            + os.sep
+            + 'libpaddle.'
+            + dy_lib_suffix
+            + '\n'
+        )
    if not avx_supported() and libpaddle.is_compiled_with_avx():
        sys.stderr.write(
            "Error: Your machine doesn't support AVX, but the installed PaddlePaddle is avx core, "
-            "you should reinstall paddlepaddle with no-avx core.\n")
+            "you should reinstall paddlepaddle with no-avx core.\n"
+        )
    raise e
@@ -317,22 +351,26 @@ def set_paddle_custom_device_lib_path(lib_path):
 # set paddle lib path
 def set_paddle_lib_path():
-    site_dirs = site.getsitepackages() if hasattr(
+    site_dirs = (
-        site,
+        site.getsitepackages()
-        'getsitepackages') else [x for x in sys.path if 'site-packages' in x]
+        if hasattr(site, 'getsitepackages')
+        else [x for x in sys.path if 'site-packages' in x]
+    )
    for site_dir in site_dirs:
        lib_dir = os.path.sep.join([site_dir, 'paddle', 'libs'])
        if os.path.exists(lib_dir):
            _set_paddle_lib_path(lib_dir)
            set_paddle_custom_device_lib_path(
-                os.path.sep.join([lib_dir, '..', '..', 'paddle-plugins']))
+                os.path.sep.join([lib_dir, '..', '..', 'paddle-plugins'])
+            )
            return
    if hasattr(site, 'USER_SITE'):
        lib_dir = os.path.sep.join([site.USER_SITE, 'paddle', 'libs'])
        if os.path.exists(lib_dir):
            _set_paddle_lib_path(lib_dir)
            set_paddle_custom_device_lib_path(
-                os.path.sep.join([lib_dir, '..', '..', 'paddle-plugins']))
+                os.path.sep.join([lib_dir, '..', '..', 'paddle-plugins'])
+            )
 set_paddle_lib_path()
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_activation.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_activation.py
@@ -22,12 +22,10 @@ from typing import Optional, List, Callable, Dict, Any, Set
 class TrtConvertActivationTest(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True
    def sample_program_configs(self):
        def generate_input1(dims, batch, attrs: List[Dict[str, Any]]):
            if dims == 1:
                return np.random.random([32]).astype(np.float32)
@@ -41,11 +39,19 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest):
        for dims in [1, 2, 3, 4]:
            for batch in [1, 4]:
                for op_type in [
-                        "relu", "sigmoid", "tanh", "relu6", "elu", "selu",
+                    "relu",
-                        "softsign", "stanh", "thresholded_relu", "softplus"
+                    "sigmoid",
+                    "tanh",
+                    "relu6",
+                    "elu",
+                    "selu",
+                    "softsign",
+                    "stanh",
+                    "thresholded_relu",
+                    "softplus",
                ]:
                    # few samples to reduce time
-                    #for beta in [-0.2, 0.5, 0.67, 3]:
+                    # for beta in [-0.2, 0.5, 0.67, 3]:
                    #    for alpha in [-0.2, 0.5, 0.67, 3]:
                    for beta in [0.67]:
                        for alpha in [0.67]:
@@ -62,33 +68,34 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest):
                            if op_type == "softplus":
                                dics = [{"beta": beta}]
-                            ops_config = [{
+                            ops_config = [
-                                "op_type": op_type,
+                                {
-                                "op_inputs": {
+                                    "op_type": op_type,
-                                    "X": ["input_data"]
+                                    "op_inputs": {"X": ["input_data"]},
-                                },
+                                    "op_outputs": {"Out": ["output_data"]},
-                                "op_outputs": {
+                                    "op_attrs": dics[0],
-                                    "Out": ["output_data"]
+                                }
-                                },
+                            ]
-                                "op_attrs": dics[0]
-                            }]
                            ops = self.generate_op_config(ops_config)
                            program_config = ProgramConfig(
                                ops=ops,
                                weights={},
                                inputs={
-                                    "input_data":
+                                    "input_data": TensorConfig(
-                                    TensorConfig(data_gen=partial(
+                                        data_gen=partial(
-                                        generate_input1, dims, batch, dics))
+                                            generate_input1, dims, batch, dics
+                                        )
+                                    )
                                },
-                                outputs=["output_data"])
+                                outputs=["output_data"],
+                            )
                            yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            if self.dims == 1:
                self.dynamic_shape.min_input_shape = {"input_data": [1]}
@@ -131,19 +138,23 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest):
        clear_dynamic_shape()
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-3
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-3
    def test(self):
        self.run_test()

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_anchor_generator.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_anchor_generator.py
@@ -22,60 +22,66 @@ from typing import Optional, List, Callable, Dict, Any, Set
 class TrtConvertAnchorGeneratorTest(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True
    def sample_program_configs(self):
        def generate_input1(batch, attrs: List[Dict[str, Any]]):
            return np.random.random([batch, 3, 64, 64]).astype(np.float32)
        for batch in [1, 2, 4]:
            for anchor_sizes in [[64.0, 128.0, 256.0, 512.0]]:
                for aspect_ratios in [[0.5, 1, 2], [0.4, 1.2, 3]]:
-                    for variances in [[1.0, 1.0, 1.0, 1.0],
+                    for variances in [
-                                      [0.5, 1.0, 0.5, 1.0]]:
+                        [1.0, 1.0, 1.0, 1.0],
+                        [0.5, 1.0, 0.5, 1.0],
+                    ]:
                        for stride in [[16.0, 16.0], [16.0, 32.0]]:
                            for offset in [0.5, 0.8]:
-                                dics = [{
+                                dics = [
-                                    "anchor_sizes": anchor_sizes,
+                                    {
-                                    "aspect_ratios": aspect_ratios,
+                                        "anchor_sizes": anchor_sizes,
-                                    "variances": variances,
+                                        "aspect_ratios": aspect_ratios,
-                                    "stride": stride,
+                                        "variances": variances,
-                                    "offset": offset
+                                        "stride": stride,
-                                }]
+                                        "offset": offset,
+                                    }
-                                ops_config = [{
+                                ]
-                                    "op_type": "anchor_generator",
-                                    "op_inputs": {
+                                ops_config = [
-                                        "Input": ["input_data"]
+                                    {
-                                    },
+                                        "op_type": "anchor_generator",
-                                    "op_outputs": {
+                                        "op_inputs": {"Input": ["input_data"]},
-                                        "Anchors": ["output_anchors"],
+                                        "op_outputs": {
-                                        "Variances": ["output_variances"]
+                                            "Anchors": ["output_anchors"],
-                                    },
+                                            "Variances": ["output_variances"],
-                                    "op_attrs": dics[0]
+                                        },
-                                }]
+                                        "op_attrs": dics[0],
+                                    }
+                                ]
                                ops = self.generate_op_config(ops_config)
                                program_config = ProgramConfig(
                                    ops=ops,
                                    weights={},
                                    inputs={
-                                        "input_data":
+                                        "input_data": TensorConfig(
-                                        TensorConfig(data_gen=partial(
+                                            data_gen=partial(
-                                            generate_input1, batch, dics))
+                                                generate_input1, batch, dics
+                                            )
+                                        )
                                    },
                                    outputs=[
-                                        "output_anchors", "output_variances"
+                                        "output_anchors",
-                                    ])
+                                        "output_variances",
+                                    ],
+                                )
                                yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]}
            self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]}
@@ -100,19 +106,23 @@ class TrtConvertAnchorGeneratorTest(TrtLayerAutoScanTest):
        clear_dynamic_shape()
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-3
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-3
    def test(self):
        self.run_test()

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_arg_max.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_arg_max.py
@@ -22,7 +22,6 @@ from typing import List
 class TrtConvertArgMaxTest(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        input_shape = program_config.inputs["arg_max_input"].shape
        axis = program_config.ops[0].attrs["axis"]
@@ -33,7 +32,6 @@ class TrtConvertArgMaxTest(TrtLayerAutoScanTest):
        return True
    def sample_program_configs(self):
        def generate_input(rank, batch):
            dims = [batch]
            for i in range(rank - 1):
@@ -48,36 +46,37 @@ class TrtConvertArgMaxTest(TrtLayerAutoScanTest):
                        self.rank = rank
                        flatten = False
                        dtype = 2
-                        ops_config = [{
+                        ops_config = [
-                            "op_type": "arg_max",
+                            {
-                            "op_inputs": {
+                                "op_type": "arg_max",
-                                "X": ["arg_max_input"]
+                                "op_inputs": {"X": ["arg_max_input"]},
-                            },
+                                "op_outputs": {"Out": ["arg_max_out"]},
-                            "op_outputs": {
+                                "op_attrs": {
-                                "Out": ["arg_max_out"]
+                                    "axis": axis,
-                            },
+                                    "keepdims": keepdims,
-                            "op_attrs": {
+                                    "flatten": flatten,
-                                "axis": axis,
+                                    "dtype": dtype,
-                                "keepdims": keepdims,
+                                },
-                                "flatten": flatten,
-                                "dtype": dtype
                            }
-                        }]
+                        ]
                        ops = self.generate_op_config(ops_config)
                        program_config = ProgramConfig(
                            ops=ops,
                            weights={},
                            inputs={
-                                "arg_max_input":
+                                "arg_max_input": TensorConfig(
-                                TensorConfig(data_gen=partial(
+                                    data_gen=partial(
-                                    generate_input, rank, batch))
+                                        generate_input, rank, batch
+                                    )
+                                )
                            },
-                            outputs=["arg_max_out"])
+                            outputs=["arg_max_out"],
+                        )
                        yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            if self.rank == 3:
                self.dynamic_shape.min_input_shape = {
@@ -117,19 +116,23 @@ class TrtConvertArgMaxTest(TrtLayerAutoScanTest):
        clear_dynamic_shape()
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-3
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-3
    def test(self):
        self.run_test()

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_bmm.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_bmm.py
@@ -12,20 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest
 from program_config import TensorConfig, ProgramConfig
 import numpy as np
 import paddle.inference as paddle_infer
 from functools import partial
-from typing import Optional, List, Callable, Dict, Any, Set
+from typing import List
 import unittest
 import os
 class TrtConvertBmmTest_dynamic(TrtLayerAutoScanTest):
    def sample_program_configs(self):
        def generate_input(shape):
            return np.random.random(shape).astype(np.float32)
@@ -33,48 +31,47 @@ class TrtConvertBmmTest_dynamic(TrtLayerAutoScanTest):
            input1_shape = [batch, 350, 75]
            input2_shape = [batch, 75, 25]
            dics = [{}]
-            ops_config = [{
+            ops_config = [
-                "op_type": "bmm",
+                {
-                "op_inputs": {
+                    "op_type": "bmm",
-                    "X": ["input1_data"],
+                    "op_inputs": {"X": ["input1_data"], "Y": ["input2_data"]},
-                    "Y": ["input2_data"]
+                    "op_outputs": {"Out": ["output_data"]},
-                },
+                    "op_attrs": dics[0],
-                "op_outputs": {
+                }
-                    "Out": ["output_data"]
+            ]
-                },
-                "op_attrs": dics[0]
-            }]
            ops = self.generate_op_config(ops_config)
            program_config = ProgramConfig(
                ops=ops,
                weights={},
                inputs={
-                    "input1_data":
+                    "input1_data": TensorConfig(
-                    TensorConfig(
+                        data_gen=partial(generate_input, input1_shape)
-                        data_gen=partial(generate_input, input1_shape)),
+                    ),
-                    "input2_data":
+                    "input2_data": TensorConfig(
-                    TensorConfig(data_gen=partial(generate_input, input2_shape))
+                        data_gen=partial(generate_input, input2_shape)
+                    ),
                },
-                outputs=["output_data"])
+                outputs=["output_data"],
+            )
            yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            self.dynamic_shape.min_input_shape = {
                "input1_data": [10, 350, 75],
-                "input2_data": [10, 75, 25]
+                "input2_data": [10, 75, 25],
            }
            self.dynamic_shape.max_input_shape = {
                "input1_data": [100, 350, 75],
-                "input2_data": [100, 75, 25]
+                "input2_data": [100, 75, 25],
            }
            self.dynamic_shape.opt_input_shape = {
                "input1_data": [15, 350, 75],
-                "input2_data": [15, 75, 25]
+                "input2_data": [15, 75, 25],
            }
        def clear_dynamic_shape():
@@ -95,25 +92,29 @@ class TrtConvertBmmTest_dynamic(TrtLayerAutoScanTest):
        clear_dynamic_shape()
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-3
        # The output has little diff between gpu and trt in CI-Windows-Inference
        tol_fp32 = 1e-4
        tol_half = 1e-4
-        if (os.name == 'nt'):
+        if os.name == 'nt':
            tol_fp32 = 1e-2
            tol_half = 1e-2
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), tol_fp32
+            attrs, True
+        ), tol_fp32
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), tol_half
+            attrs, True
+        ), tol_half
    def add_skip_trt_case(self):
        pass

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_clip.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_clip.py
@@ -22,12 +22,10 @@ import unittest
 class TrtConvertClipTest(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True
    def sample_program_configs(self):
        def generate_input1(dims, batch, attrs: List[Dict[str, Any]]):
            if dims == 1:
                return np.ones([32]).astype(np.float32)
@@ -46,52 +44,52 @@ class TrtConvertClipTest(TrtLayerAutoScanTest):
        for dims in [1, 2, 3, 4]:
            for batch in [1, 4]:
-                for op_inputs in [{
+                for op_inputs in [
-                        "X": ["input_data"]
+                    {"X": ["input_data"]},
-                }, {
+                    {"X": ["input_data"], "Min": ["Min_"], "Max": ["Max_"]},
-                        "X": ["input_data"],
+                ]:
-                        "Min": ["Min_"],
-                        "Max": ["Max_"]
-                }]:
                    self.input_num = len(op_inputs)
                    self.dims = dims
-                    dics = [{
+                    dics = [
-                        "min": np.random.uniform(1, 10),
+                        {
-                        "max": np.random.uniform(10, 20)
+                            "min": np.random.uniform(1, 10),
-                    }, {
+                            "max": np.random.uniform(10, 20),
-                        "op_inputs": op_inputs
-                    }]
-                    ops_config = [{
-                        "op_type": "clip",
-                        "op_inputs": op_inputs,
-                        "op_outputs": {
-                            "Out": ["output_data"]
                        },
-                        "op_attrs": dics[0]
+                        {"op_inputs": op_inputs},
-                    }]
+                    ]
+                    ops_config = [
+                        {
+                            "op_type": "clip",
+                            "op_inputs": op_inputs,
+                            "op_outputs": {"Out": ["output_data"]},
+                            "op_attrs": dics[0],
+                        }
+                    ]
                    ops = self.generate_op_config(ops_config)
                    program_config = ProgramConfig(
                        ops=ops,
                        weights={
-                            "Min_":
+                            "Min_": TensorConfig(
-                            TensorConfig(
+                                data_gen=partial(generate_weight1, dics)
-                                data_gen=partial(generate_weight1, dics)),
+                            ),
-                            "Max_":
+                            "Max_": TensorConfig(
-                            TensorConfig(
+                                data_gen=partial(generate_weight2, dics)
-                                data_gen=partial(generate_weight2, dics))
+                            ),
                        },
                        inputs={
-                            "input_data":
+                            "input_data": TensorConfig(
-                            TensorConfig(data_gen=partial(
+                                data_gen=partial(
-                                generate_input1, dims, batch, dics))
+                                    generate_input1, dims, batch, dics
+                                )
+                            )
                        },
-                        outputs=["output_data"])
+                        outputs=["output_data"],
+                    )
                    yield program_config
    def sample_predictor_configs(self, program_config):
        def generate_dynamic_shape(attrs):
            if self.dims == 1:
                self.dynamic_shape.min_input_shape = {"input_data": [1]}
@@ -135,19 +133,23 @@ class TrtConvertClipTest(TrtLayerAutoScanTest):
        clear_dynamic_shape()
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-3
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-3
    def test(self):
        self.run_test()

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_concat.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_concat.py
@@ -22,7 +22,6 @@ from typing import Optional, List, Callable, Dict, Any, Set
 class TrtConvertConcatTest(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        inputs = program_config.inputs
        weights = program_config.weights
@@ -31,14 +30,13 @@ class TrtConvertConcatTest(TrtLayerAutoScanTest):
        attrs = [
            program_config.ops[i].attrs for i in range(len(program_config.ops))
        ]
-        #The input dimension should be less than or equal to the set axis.
+        # The input dimension should be less than or equal to the set axis.
        if len(inputs['concat_input1'].shape) <= attrs[0]['axis']:
            return False
        return True
    def sample_program_configs(self):
        def generate_input1(attrs: List[Dict[str, Any]], batch):
            if self.dims == 4:
                return np.ones([batch, 3, 24, 24]).astype(np.float32)
@@ -79,58 +77,83 @@ class TrtConvertConcatTest(TrtLayerAutoScanTest):
                        self.num_input = num_input
                        self.dims = dims
                        dics = [{"axis": axis}, {}]
-                        dics_intput = [{
+                        dics_intput = [
-                            "X":
+                            {
-                            ["concat_input1", "concat_input2", "concat_input3"],
+                                "X": [
-                            "AxisTensor": ["AxisTensor"],
+                                    "concat_input1",
-                        }, {
+                                    "concat_input2",
-                            "X":
+                                    "concat_input3",
-                            ["concat_input1", "concat_input2", "concat_input3"]
+                                ],
-                        }]
+                                "AxisTensor": ["AxisTensor"],
-                        dics_inputs = [{
+                            },
-                            "concat_input1":
+                            {
-                            TensorConfig(
+                                "X": [
-                                data_gen=partial(generate_input1, dics, batch)),
+                                    "concat_input1",
-                            "concat_input2":
+                                    "concat_input2",
-                            TensorConfig(
+                                    "concat_input3",
-                                data_gen=partial(generate_input2, dics, batch)),
+                                ]
-                            "concat_input3":
+                            },
-                            TensorConfig(
+                        ]
-                                data_gen=partial(generate_input3, dics, batch)),
+                        dics_inputs = [
-                            "AxisTensor":
+                            {
-                            TensorConfig(
+                                "concat_input1": TensorConfig(
-                                data_gen=partial(generate_weight1, dics))
+                                    data_gen=partial(
-                        }, {
+                                        generate_input1, dics, batch
-                            "concat_input1":
+                                    )
-                            TensorConfig(
+                                ),
-                                data_gen=partial(generate_input1, dics, batch)),
+                                "concat_input2": TensorConfig(
-                            "concat_input2":
+                                    data_gen=partial(
-                            TensorConfig(
+                                        generate_input2, dics, batch
-                                data_gen=partial(generate_input2, dics, batch)),
+                                    )
-                            "concat_input3":
+                                ),
-                            TensorConfig(
+                                "concat_input3": TensorConfig(
-                                data_gen=partial(generate_input3, dics, batch))
+                                    data_gen=partial(
-                        }]
+                                        generate_input3, dics, batch
-                        ops_config = [{
+                                    )
-                            "op_type": "concat",
+                                ),
-                            "op_inputs": dics_intput[num_input],
+                                "AxisTensor": TensorConfig(
-                            "op_outputs": {
+                                    data_gen=partial(generate_weight1, dics)
-                                "Out": ["concat_output"]
+                                ),
+                            },
+                            {
+                                "concat_input1": TensorConfig(
+                                    data_gen=partial(
+                                        generate_input1, dics, batch
+                                    )
+                                ),
+                                "concat_input2": TensorConfig(
+                                    data_gen=partial(
+                                        generate_input2, dics, batch
+                                    )
+                                ),
+                                "concat_input3": TensorConfig(
+                                    data_gen=partial(
+                                        generate_input3, dics, batch
+                                    )
+                                ),
                            },
-                            "op_attrs": dics[0]
+                        ]
-                        }]
+                        ops_config = [
+                            {
+                                "op_type": "concat",
+                                "op_inputs": dics_intput[num_input],
+                                "op_outputs": {"Out": ["concat_output"]},
+                                "op_attrs": dics[0],
+                            }
+                        ]
                        ops = self.generate_op_config(ops_config)
                        program_config = ProgramConfig(
                            ops=ops,
                            weights={},
                            inputs=dics_inputs[num_input],
-                            outputs=["concat_output"])
+                            outputs=["concat_output"],
+                        )
                        yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            if self.num_input == 0:
                if self.dims == 4:
@@ -138,76 +161,76 @@ class TrtConvertConcatTest(TrtLayerAutoScanTest):
                        "concat_input1": [1, 3, 24, 24],
                        "concat_input2": [1, 3, 24, 24],
                        "concat_input3": [1, 3, 24, 24],
-                        "AxisTensor": [1]
+                        "AxisTensor": [1],
                    }
                    self.dynamic_shape.max_input_shape = {
                        "concat_input1": [4, 3, 48, 48],
                        "concat_input2": [4, 3, 48, 48],
                        "concat_input3": [4, 3, 48, 48],
-                        "AxisTensor": [1]
+                        "AxisTensor": [1],
                    }
                    self.dynamic_shape.opt_input_shape = {
                        "concat_input1": [1, 3, 24, 24],
                        "concat_input2": [1, 3, 24, 24],
                        "concat_input3": [1, 3, 24, 24],
-                        "AxisTensor": [1]
+                        "AxisTensor": [1],
                    }
                elif self.dims == 3:
                    self.dynamic_shape.min_input_shape = {
                        "concat_input1": [1, 3, 24],
                        "concat_input2": [1, 3, 24],
                        "concat_input3": [1, 3, 24],
-                        "AxisTensor": [1]
+                        "AxisTensor": [1],
                    }
                    self.dynamic_shape.max_input_shape = {
                        "concat_input1": [4, 12, 48],
                        "concat_input2": [4, 12, 48],
                        "concat_input3": [4, 12, 48],
-                        "AxisTensor": [1]
+                        "AxisTensor": [1],
                    }
                    self.dynamic_shape.opt_input_shape = {
                        "concat_input1": [1, 3, 24],
                        "concat_input2": [1, 3, 24],
                        "concat_input3": [1, 3, 24],
-                        "AxisTensor": [1]
+                        "AxisTensor": [1],
                    }
                elif self.dims == 2:
                    self.dynamic_shape.min_input_shape = {
                        "concat_input1": [1, 24],
                        "concat_input2": [1, 24],
                        "concat_input3": [1, 24],
-                        "AxisTensor": [1]
+                        "AxisTensor": [1],
                    }
                    self.dynamic_shape.max_input_shape = {
                        "concat_input1": [4, 48],
                        "concat_input2": [4, 48],
                        "concat_input3": [4, 48],
-                        "AxisTensor": [1]
+                        "AxisTensor": [1],
                    }
                    self.dynamic_shape.opt_input_shape = {
                        "concat_input1": [1, 24],
                        "concat_input2": [1, 24],
                        "concat_input3": [1, 24],
-                        "AxisTensor": [1]
+                        "AxisTensor": [1],
                    }
                elif self.dims == 1:
                    self.dynamic_shape.min_input_shape = {
                        "concat_input1": [24],
                        "concat_input2": [24],
                        "concat_input3": [24],
-                        "AxisTensor": [0]
+                        "AxisTensor": [0],
                    }
                    self.dynamic_shape.max_input_shape = {
                        "concat_input1": [48],
                        "concat_input2": [48],
                        "concat_input3": [48],
-                        "AxisTensor": [0]
+                        "AxisTensor": [0],
                    }
                    self.dynamic_shape.opt_input_shape = {
                        "concat_input1": [24],
                        "concat_input2": [24],
                        "concat_input3": [24],
-                        "AxisTensor": [0]
+                        "AxisTensor": [0],
                    }
            elif self.num_input == 1:
                if self.dims == 4:
@@ -219,60 +242,60 @@ class TrtConvertConcatTest(TrtLayerAutoScanTest):
                    self.dynamic_shape.max_input_shape = {
                        "concat_input1": [4, 3, 48, 48],
                        "concat_input2": [4, 3, 48, 48],
-                        "concat_input3": [4, 3, 48, 48]
+                        "concat_input3": [4, 3, 48, 48],
                    }
                    self.dynamic_shape.opt_input_shape = {
                        "concat_input1": [1, 3, 24, 24],
                        "concat_input2": [1, 3, 24, 24],
-                        "concat_input3": [1, 3, 24, 24]
+                        "concat_input3": [1, 3, 24, 24],
                    }
                elif self.dims == 3:
                    self.dynamic_shape.min_input_shape = {
                        "concat_input1": [1, 3, 24],
                        "concat_input2": [1, 3, 24],
-                        "concat_input3": [1, 3, 24]
+                        "concat_input3": [1, 3, 24],
                    }
                    self.dynamic_shape.max_input_shape = {
                        "concat_input1": [4, 12, 48],
                        "concat_input2": [4, 12, 48],
-                        "concat_input3": [4, 12, 48]
+                        "concat_input3": [4, 12, 48],
                    }
                    self.dynamic_shape.opt_input_shape = {
                        "concat_input1": [1, 3, 24],
                        "concat_input2": [1, 3, 24],
-                        "concat_input3": [1, 3, 24]
+                        "concat_input3": [1, 3, 24],
                    }
                elif self.dims == 2:
                    self.dynamic_shape.min_input_shape = {
                        "concat_input1": [1, 24],
                        "concat_input2": [1, 24],
-                        "concat_input3": [1, 24]
+                        "concat_input3": [1, 24],
                    }
                    self.dynamic_shape.max_input_shape = {
                        "concat_input1": [4, 48],
                        "concat_input2": [4, 48],
-                        "concat_input3": [4, 48]
+                        "concat_input3": [4, 48],
                    }
                    self.dynamic_shape.opt_input_shape = {
                        "concat_input1": [1, 24],
                        "concat_input2": [1, 24],
-                        "concat_input3": [1, 24]
+                        "concat_input3": [1, 24],
                    }
                elif self.dims == 1:
                    self.dynamic_shape.min_input_shape = {
                        "concat_input1": [24],
                        "concat_input2": [24],
-                        "concat_input3": [24]
+                        "concat_input3": [24],
                    }
                    self.dynamic_shape.max_input_shape = {
                        "concat_input1": [48],
                        "concat_input2": [48],
-                        "concat_input3": [48]
+                        "concat_input3": [48],
                    }
                    self.dynamic_shape.opt_input_shape = {
                        "concat_input1": [24],
                        "concat_input2": [24],
-                        "concat_input3": [24]
+                        "concat_input3": [24],
                    }
        def clear_dynamic_shape():
@@ -296,29 +319,33 @@ class TrtConvertConcatTest(TrtLayerAutoScanTest):
        clear_dynamic_shape()
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-3
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-3
    def add_skip_trt_case(self):
        def teller1(program_config, predictor_config):
            if len(program_config.inputs) == 4:
                return True
            return False
-        self.add_skip_case(teller1, SkipReasons.TRT_NOT_SUPPORT,
+        self.add_skip_case(
-                           "INPUT AxisTensor NOT SUPPORT")
+            teller1, SkipReasons.TRT_NOT_SUPPORT, "INPUT AxisTensor NOT SUPPORT"
+        )
    def test(self):
        self.add_skip_trt_case()

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d_transpose.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d_transpose.py
@@ -22,7 +22,6 @@ from typing import Optional, List, Callable, Dict, Any, Set
 class TrtConvertConv2dTransposeTest(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        inputs = program_config.inputs
        weights = program_config.weights
@@ -30,8 +29,10 @@ class TrtConvertConv2dTransposeTest(TrtLayerAutoScanTest):
            program_config.ops[i].attrs for i in range(len(program_config.ops))
        ]
-        if inputs['input_data'].shape[
+        if (
-                1] != weights['conv2d_weight'].shape[1] * attrs[0]['groups']:
+            inputs['input_data'].shape[1]
+            != weights['conv2d_weight'].shape[1] * attrs[0]['groups']
+        ):
            return False
        if inputs['input_data'].shape[1] != weights['conv2d_weight'].shape[0]:
@@ -54,12 +55,13 @@ class TrtConvertConv2dTransposeTest(TrtLayerAutoScanTest):
        def generate_weight1(num_channels, attrs: List[Dict[str, Any]]):
            if attrs[0]['groups'] == 1:
-                return np.random.random([num_channels, num_channels, 3,
+                return np.random.random(
-                                         3]).astype(np.float32)
+                    [num_channels, num_channels, 3, 3]
+                ).astype(np.float32)
            else:
                return np.random.random(
-                    [num_channels, int(num_channels / 2), 3,
+                    [num_channels, int(num_channels / 2), 3, 3]
-                     3]).astype(np.float32)
+                ).astype(np.float32)
        for num_channels in [2, 4, 6]:
            for batch in [1, 4]:
@@ -67,99 +69,113 @@ class TrtConvertConv2dTransposeTest(TrtLayerAutoScanTest):
                    for paddings in [[0, 3], [1, 2, 3, 4]]:
                        for groups in [2]:
                            for padding_algorithm in [
-                                    'EXPLICIT', 'SAME', 'VALID'
+                                'EXPLICIT',
+                                'SAME',
+                                'VALID',
                            ]:
                                for dilations in [[2, 2], [1, 2]]:
                                    for data_format in ['NCHW']:
                                        self.num_channels = num_channels
-                                        dics = [{
+                                        dics = [
-                                            "data_fromat": data_format,
+                                            {
-                                            "dilations": dilations,
+                                                "data_fromat": data_format,
-                                            "padding_algorithm":
+                                                "dilations": dilations,
-                                            padding_algorithm,
+                                                "padding_algorithm": padding_algorithm,
-                                            "groups": groups,
+                                                "groups": groups,
-                                            "paddings": paddings,
+                                                "paddings": paddings,
-                                            "strides": strides,
+                                                "strides": strides,
-                                            "data_format": data_format,
+                                                "data_format": data_format,
-                                            "output_size": [],
+                                                "output_size": [],
-                                            "output_padding": []
+                                                "output_padding": [],
-                                        }]
+                                            }
+                                        ]
-                                        ops_config = [{
-                                            "op_type": "conv2d_transpose",
+                                        ops_config = [
-                                            "op_inputs": {
+                                            {
-                                                "Input": ["input_data"],
+                                                "op_type": "conv2d_transpose",
-                                                "Filter": ["conv2d_weight"]
+                                                "op_inputs": {
-                                            },
+                                                    "Input": ["input_data"],
-                                            "op_outputs": {
+                                                    "Filter": ["conv2d_weight"],
-                                                "Output": ["output_data"]
+                                                },
-                                            },
+                                                "op_outputs": {
-                                            "op_attrs": dics[0]
+                                                    "Output": ["output_data"]
-                                        }]
+                                                },
+                                                "op_attrs": dics[0],
+                                            }
+                                        ]
                                        ops = self.generate_op_config(
-                                            ops_config)
+                                            ops_config
+                                        )
                                        program_config = ProgramConfig(
                                            ops=ops,
                                            weights={
-                                                "conv2d_weight":
+                                                "conv2d_weight": TensorConfig(
-                                                TensorConfig(data_gen=partial(
+                                                    data_gen=partial(
-                                                    generate_weight1,
+                                                        generate_weight1,
-                                                    num_channels, dics))
+                                                        num_channels,
+                                                        dics,
+                                                    )
+                                                )
                                            },
                                            inputs={
-                                                "input_data":
+                                                "input_data": TensorConfig(
-                                                TensorConfig(data_gen=partial(
+                                                    data_gen=partial(
-                                                    generate_input1, batch,
+                                                        generate_input1,
-                                                    num_channels, dics))
+                                                        batch,
+                                                        num_channels,
+                                                        dics,
+                                                    )
+                                                )
                                            },
-                                            outputs=["output_data"])
+                                            outputs=["output_data"],
+                                        )
                                        yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            if self.num_channels == 2:
                self.dynamic_shape.min_input_shape = {
                    "input_data": [1, 2, 32, 32],
-                    "output_data": [1, 24, 32, 32]
+                    "output_data": [1, 24, 32, 32],
                }
                self.dynamic_shape.max_input_shape = {
                    "input_data": [4, 2, 64, 64],
-                    "output_data": [4, 24, 64, 64]
+                    "output_data": [4, 24, 64, 64],
                }
                self.dynamic_shape.opt_input_shape = {
                    "input_data": [1, 2, 64, 64],
-                    "output_data": [1, 24, 64, 64]
+                    "output_data": [1, 24, 64, 64],
                }
            elif self.num_channels == 4:
                self.dynamic_shape.min_input_shape = {
                    "input_data": [1, 4, 32, 32],
-                    "output_data": [1, 24, 32, 32]
+                    "output_data": [1, 24, 32, 32],
                }
                self.dynamic_shape.max_input_shape = {
                    "input_data": [4, 4, 64, 64],
-                    "output_data": [4, 24, 64, 64]
+                    "output_data": [4, 24, 64, 64],
                }
                self.dynamic_shape.opt_input_shape = {
                    "input_data": [1, 4, 64, 64],
-                    "output_data": [1, 24, 64, 64]
+                    "output_data": [1, 24, 64, 64],
                }
            else:
                self.dynamic_shape.min_input_shape = {
                    "input_data": [1, 6, 32, 32],
-                    "output_data": [1, 24, 32, 32]
+                    "output_data": [1, 24, 32, 32],
                }
                self.dynamic_shape.max_input_shape = {
                    "input_data": [4, 6, 64, 64],
-                    "output_data": [4, 24, 64, 64]
+                    "output_data": [4, 24, 64, 64],
                }
                self.dynamic_shape.opt_input_shape = {
                    "input_data": [1, 6, 64, 64],
-                    "output_data": [1, 24, 64, 64]
+                    "output_data": [1, 24, 64, 64],
                }
        def clear_dynamic_shape():
@@ -178,10 +194,12 @@ class TrtConvertConv2dTransposeTest(TrtLayerAutoScanTest):
        clear_dynamic_shape()
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), (1e-5, 1e-3)
+            attrs, False
+        ), (1e-3, 1e-3)
        # self.trt_param.precision = paddle_infer.PrecisionType.Int8
        # yield self.create_inference_config(), generate_trt_nodes_num(
        #     attrs, False), (1e-5, 1e-5)
@@ -190,24 +208,26 @@ class TrtConvertConv2dTransposeTest(TrtLayerAutoScanTest):
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), (1e-5, 1e-3)
+            attrs, True
+        ), (1e-3, 1e-3)
        # self.trt_param.precision = paddle_infer.PrecisionType.Int8
        # yield self.create_inference_config(), generate_trt_nodes_num(
        #     attrs, True), (1e-5, 1e-5)
    def add_skip_trt_case(self):
        def teller1(program_config, predictor_config):
            if self.trt_param.precision == paddle_infer.PrecisionType.Int8:
                return True
            return False
        self.add_skip_case(
-            teller1, SkipReasons.TRT_NOT_IMPLEMENTED,
+            teller1,
-            "When precisionType is int8 without relu op, output is different between Trt and Paddle."
+            SkipReasons.TRT_NOT_IMPLEMENTED,
+            "When precisionType is int8 without relu op, output is different between Trt and Paddle.",
        )
    def test(self):
@@ -221,7 +241,6 @@ class TrtConvertConv2dTransposeTest(TrtLayerAutoScanTest):
 # Special case
 class TrtConvertConv2dTransposeTest2(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        ver = paddle_infer.get_trt_compile_version()
        if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 7000:
@@ -241,49 +260,52 @@ class TrtConvertConv2dTransposeTest2(TrtLayerAutoScanTest):
        batch = 1
        self.num_channels = num_channels
-        dics = [{
+        dics = [
-            "data_fromat": 'NCHW',
+            {
-            "dilations": [1, 1],
+                "data_fromat": 'NCHW',
-            "padding_algorithm": 'EXPLICIT',
+                "dilations": [1, 1],
-            "groups": 1,
+                "padding_algorithm": 'EXPLICIT',
-            "paddings": [1, 1],
+                "groups": 1,
-            "strides": [2, 2],
+                "paddings": [1, 1],
-            "output_padding": [1, 1],
+                "strides": [2, 2],
-            "output_size": [],
+                "output_padding": [1, 1],
-        }]
+                "output_size": [],
+            }
-        ops_config = [{
+        ]
-            "op_type": "conv2d_transpose",
-            "op_inputs": {
+        ops_config = [
-                "Input": ["input_data"],
+            {
-                "Filter": ["conv2d_weight"]
+                "op_type": "conv2d_transpose",
-            },
+                "op_inputs": {
-            "op_outputs": {
+                    "Input": ["input_data"],
-                "Output": ["output_data"]
+                    "Filter": ["conv2d_weight"],
-            },
+                },
-            "op_attrs": dics[0]
+                "op_outputs": {"Output": ["output_data"]},
-        }]
+                "op_attrs": dics[0],
+            }
+        ]
        ops = self.generate_op_config(ops_config)
        program_config = ProgramConfig(
            ops=ops,
            weights={
-                "conv2d_weight":
+                "conv2d_weight": TensorConfig(
-                TensorConfig(
+                    data_gen=partial(generate_weight1, num_channels, dics)
-                    data_gen=partial(generate_weight1, num_channels, dics))
+                )
            },
            inputs={
-                "input_data":
+                "input_data": TensorConfig(
-                TensorConfig(data_gen=partial(generate_input1, batch,
+                    data_gen=partial(generate_input1, batch, num_channels, dics)
-                                              num_channels, dics))
+                )
            },
-            outputs=["output_data"])
+            outputs=["output_data"],
+        )
        yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            self.dynamic_shape.min_input_shape = {
                "input_data": [1, 128, 20, 30],
@@ -311,19 +333,23 @@ class TrtConvertConv2dTransposeTest2(TrtLayerAutoScanTest):
        clear_dynamic_shape()
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-4
+            attrs, False
+        ), 1e-4
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), (1e0, 1e-3)
+            attrs, False
+        ), (1e0, 1e-3)
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-4
+            attrs, True
+        ), 1e-4
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), (1e0, 1e-3)
+            attrs, True
+        ), (1e0, 1e-3)
    def add_skip_trt_case(self):
        pass

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_dropout.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_dropout.py
@@ -22,12 +22,10 @@ from typing import Optional, List, Callable, Dict, Any, Set
 class TrtConvertDropoutTest(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True
    def sample_program_configs(self):
        def generate_input1(dims, batch, attrs: List[Dict[str, Any]]):
            if dims == 1:
                return np.ones([64]).astype(np.float32)
@@ -42,47 +40,57 @@ class TrtConvertDropoutTest(TrtLayerAutoScanTest):
            for batch in [1, 2, 4]:
                for fix_seed in [False, True]:
                    for dropout_implementation in [
-                            "downgrade_in_infer", "upscale_in_train"
+                        "downgrade_in_infer",
+                        "upscale_in_train",
                    ]:
                        for dropout_prob in [np.random.random()]:
                            for seed in [0, 64, 128, 512]:
                                self.dims = dims
-                                dics = [{
+                                dics = [
-                                    "fix_seed": fix_seed,
+                                    {
-                                    "dropout_implementation":
+                                        "fix_seed": fix_seed,
-                                    dropout_implementation,
+                                        "dropout_implementation": dropout_implementation,
-                                    "dropout_prob": dropout_prob,
+                                        "dropout_prob": dropout_prob,
-                                    "seed": seed,
+                                        "seed": seed,
-                                    "is_test": True
+                                        "is_test": True,
-                                }]
+                                    }
+                                ]
-                                ops_config = [{
-                                    "op_type": "dropout",
+                                ops_config = [
-                                    "op_inputs": {
+                                    {
-                                        "X": ["input_data"],
+                                        "op_type": "dropout",
-                                    },
+                                        "op_inputs": {
-                                    "op_outputs": {
+                                            "X": ["input_data"],
-                                        "Out": ["dropout_output_data"]
+                                        },
-                                    },
+                                        "op_outputs": {
-                                    "op_attrs": dics[0]
+                                            "Out": ["dropout_output_data"]
-                                }]
+                                        },
+                                        "op_attrs": dics[0],
+                                    }
+                                ]
                                ops = self.generate_op_config(ops_config)
                                program_config = ProgramConfig(
                                    ops=ops,
                                    weights={},
                                    inputs={
-                                        "input_data":
+                                        "input_data": TensorConfig(
-                                        TensorConfig(data_gen=partial(
+                                            data_gen=partial(
-                                            generate_input1, dims, batch, dics))
+                                                generate_input1,
+                                                dims,
+                                                batch,
+                                                dics,
+                                            )
+                                        )
                                    },
-                                    outputs=["dropout_output_data"])
+                                    outputs=["dropout_output_data"],
+                                )
                                yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            if self.dims == 1:
                self.dynamic_shape.min_input_shape = {"input_data": [1]}
@@ -128,19 +136,23 @@ class TrtConvertDropoutTest(TrtLayerAutoScanTest):
        clear_dynamic_shape()
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-3
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-3
    def add_skip_trt_case(self):
        pass

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_equal.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_equal.py
@@ -22,7 +22,6 @@ from typing import Optional, List, Callable, Dict, Any, Set
 class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        attrs = [
            program_config.ops[i].attrs for i in range(len(program_config.ops))
@@ -35,7 +34,6 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest):
        return True
    def sample_program_configs(self):
        def generate_input(shape):
            return np.random.random(shape).astype(np.float32)
@@ -44,86 +42,84 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest):
                for axis in [-1 if len(shape) == 1 else 1]:
                    self.dims = len(shape)
                    dics = [{"axis": axis}, {"in_dtype": 0, "out_dtype": 5}]
-                    ops_config = [{
+                    ops_config = [
-                        "op_type": "equal",
+                        {
-                        "op_inputs": {
+                            "op_type": "equal",
-                            "X": ["input_data1"],
+                            "op_inputs": {
-                            "Y": ["input_data2"]
+                                "X": ["input_data1"],
-                        },
+                                "Y": ["input_data2"],
-                        "op_outputs": {
+                            },
-                            "Out": ["compare_output_data"]
+                            "op_outputs": {"Out": ["compare_output_data"]},
+                            "op_attrs": dics[0],
                        },
-                        "op_attrs": dics[0]
+                        {
-                    }, {
+                            "op_type": "cast",
-                        "op_type": "cast",
+                            "op_inputs": {"X": ["compare_output_data"]},
-                        "op_inputs": {
+                            "op_outputs": {"Out": ["output_data"]},
-                            "X": ["compare_output_data"]
+                            "op_attrs": dics[1],
                        },
-                        "op_outputs": {
+                    ]
-                            "Out": ["output_data"]
-                        },
-                        "op_attrs": dics[1]
-                    }]
                    ops = self.generate_op_config(ops_config)
                    program_config = ProgramConfig(
                        ops=ops,
                        weights={},
                        inputs={
-                            "input_data1":
+                            "input_data1": TensorConfig(
-                            TensorConfig(
+                                data_gen=partial(generate_input, shape)
-                                data_gen=partial(generate_input, shape)),
+                            ),
-                            "input_data2":
+                            "input_data2": TensorConfig(
-                            TensorConfig(
+                                data_gen=partial(generate_input, shape)
-                                data_gen=partial(generate_input, shape))
+                            ),
                        },
-                        outputs=["output_data"])
+                        outputs=["output_data"],
+                    )
                    yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            # The input.dims[1] must be equal to the weight's length.
            if self.dims == 2:
                self.dynamic_shape.min_input_shape = {
                    "input_data1": [1, 1],
-                    "input_data2": [1, 1]
+                    "input_data2": [1, 1],
                }
                self.dynamic_shape.max_input_shape = {
                    "input_data1": [4, 1],
-                    "input_data2": [4, 1]
+                    "input_data2": [4, 1],
                }
                self.dynamic_shape.opt_input_shape = {
                    "input_data1": [2, 1],
-                    "input_data2": [2, 1]
+                    "input_data2": [2, 1],
                }
            elif self.dims == 3:
                self.dynamic_shape.min_input_shape = {
                    "input_data1": [1, 1, 4],
-                    "input_data2": [1, 1, 4]
+                    "input_data2": [1, 1, 4],
                }
                self.dynamic_shape.max_input_shape = {
                    "input_data1": [4, 1, 256],
-                    "input_data2": [1, 1, 256]
+                    "input_data2": [1, 1, 256],
                }
                self.dynamic_shape.opt_input_shape = {
                    "input_data1": [2, 1, 16],
-                    "input_data2": [2, 1, 16]
+                    "input_data2": [2, 1, 16],
                }
            elif self.dims == 4:
                self.dynamic_shape.min_input_shape = {
                    "input_data1": [1, 1, 4, 4],
-                    "input_data2": [1, 1, 4, 4]
+                    "input_data2": [1, 1, 4, 4],
                }
                self.dynamic_shape.max_input_shape = {
                    "input_data1": [4, 1, 128, 256],
-                    "input_data2": [4, 1, 128, 256]
+                    "input_data2": [4, 1, 128, 256],
                }
                self.dynamic_shape.opt_input_shape = {
                    "input_data1": [2, 1, 32, 16],
-                    "input_data2": [2, 1, 32, 16]
+                    "input_data2": [2, 1, 32, 16],
                }
        def clear_dynamic_shape():
@@ -144,19 +140,23 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest):
        clear_dynamic_shape()
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-3
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-3
    def test(self):
        self.run_test()

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_fc.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_fc.py
@@ -23,10 +23,9 @@ import os
 class TrtConvertFcTest(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        # The output has diff between gpu and trt in CI windows
-        if (os.name == 'nt'):
+        if os.name == 'nt':
            return False
        return True
@@ -34,12 +33,14 @@ class TrtConvertFcTest(TrtLayerAutoScanTest):
        self.trt_param.workspace_size = 1073741824
        def generate_input1(batch, attrs: List[Dict[str, Any]]):
-            return np.random.random([batch, 3, 64, (int)(attrs[0]["m"] / 2),
+            return np.random.random(
-                                     2]).astype(np.float32)
+                [batch, 3, 64, (int)(attrs[0]["m"] / 2), 2]
+            ).astype(np.float32)
        def generate_w(batch, attrs: List[Dict[str, Any]]):
-            return np.random.random([attrs[0]["m"],
+            return np.random.random([attrs[0]["m"], attrs[0]["n"]]).astype(
-                                     attrs[0]["n"]]).astype(np.float32)
+                np.float32
+            )
        def generate_bias(batch, attrs: List[Dict[str, Any]]):
            return np.random.random([attrs[0]["n"]]).astype(np.float32)
@@ -53,7 +54,7 @@ class TrtConvertFcTest(TrtLayerAutoScanTest):
                        "m": m,
                        "n": n,
                    },
-                    {}
+                    {},
                ]
                ops_config = [
@@ -62,12 +63,10 @@ class TrtConvertFcTest(TrtLayerAutoScanTest):
                        "op_inputs": {
                            "Input": ["input_data"],
                            "W": ["w_data"],
-                            "Bias": ["bias_data"]
+                            "Bias": ["bias_data"],
-                        },
-                        "op_outputs": {
-                            "Out": ["output_data"]
                        },
-                        "op_attrs": dics[0]
+                        "op_outputs": {"Out": ["output_data"]},
+                        "op_attrs": dics[0],
                    },
                ]
@@ -76,24 +75,26 @@ class TrtConvertFcTest(TrtLayerAutoScanTest):
                program_config = ProgramConfig(
                    ops=ops,
                    weights={
-                        "w_data":
+                        "w_data": TensorConfig(
-                        TensorConfig(data_gen=partial(generate_w, batch, dics)),
+                            data_gen=partial(generate_w, batch, dics)
-                        "bias_data":
+                        ),
-                        TensorConfig(
+                        "bias_data": TensorConfig(
-                            data_gen=partial(generate_bias, batch, dics))
+                            data_gen=partial(generate_bias, batch, dics)
+                        ),
                    },
                    inputs={
-                        "input_data":
+                        "input_data": TensorConfig(
-                        TensorConfig(
+                            data_gen=partial(generate_input1, batch, dics)
-                            data_gen=partial(generate_input1, batch, dics)),
+                        ),
                    },
-                    outputs=["output_data"])
+                    outputs=["output_data"],
+                )
                yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            self.dynamic_shape.min_input_shape = {
                "input_data": [1, 3, 32, 16, 2],
@@ -121,19 +122,23 @@ class TrtConvertFcTest(TrtLayerAutoScanTest):
        # clear_dynamic_shape()
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), (1e-5, 1e-5)
+            attrs, False
+        ), (1e-3, 1e-3)
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), (1e-5, 1e-5)
+            attrs, True
+        ), (1e-3, 1e-3)
    def test(self):
        self.run_test()
@@ -143,10 +148,9 @@ class TrtConvertFcTest(TrtLayerAutoScanTest):
 class TrtConvertFcTest2(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        # The output has diff between gpu and trt in CI windows
-        if (os.name == 'nt'):
+        if os.name == 'nt':
            return False
        return True
@@ -157,8 +161,9 @@ class TrtConvertFcTest2(TrtLayerAutoScanTest):
            return np.random.random([batch, 3, 64, 14]).astype(np.float32)
        def generate_w(batch, attrs: List[Dict[str, Any]]):
-            return np.random.random([attrs[0]["m"],
+            return np.random.random([attrs[0]["m"], attrs[0]["n"]]).astype(
-                                     attrs[0]["n"]]).astype(np.float32)
+                np.float32
+            )
        def generate_bias(batch, attrs: List[Dict[str, Any]]):
            return np.random.random([attrs[0]["n"]]).astype(np.float32)
@@ -172,7 +177,7 @@ class TrtConvertFcTest2(TrtLayerAutoScanTest):
                        "m": m,
                        "n": n,
                    },
-                    {}
+                    {},
                ]
                ops_config = [
@@ -181,12 +186,10 @@ class TrtConvertFcTest2(TrtLayerAutoScanTest):
                        "op_inputs": {
                            "Input": ["input_data"],
                            "W": ["w_data"],
-                            "Bias": ["bias_data"]
+                            "Bias": ["bias_data"],
                        },
-                        "op_outputs": {
+                        "op_outputs": {"Out": ["output_data"]},
-                            "Out": ["output_data"]
+                        "op_attrs": dics[0],
-                        },
-                        "op_attrs": dics[0]
                    },
                ]
@@ -195,24 +198,26 @@ class TrtConvertFcTest2(TrtLayerAutoScanTest):
                program_config = ProgramConfig(
                    ops=ops,
                    weights={
-                        "w_data":
+                        "w_data": TensorConfig(
-                        TensorConfig(data_gen=partial(generate_w, batch, dics)),
+                            data_gen=partial(generate_w, batch, dics)
-                        "bias_data":
+                        ),
-                        TensorConfig(
+                        "bias_data": TensorConfig(
-                            data_gen=partial(generate_bias, batch, dics))
+                            data_gen=partial(generate_bias, batch, dics)
+                        ),
                    },
                    inputs={
-                        "input_data":
+                        "input_data": TensorConfig(
-                        TensorConfig(
+                            data_gen=partial(generate_input1, batch, dics)
-                            data_gen=partial(generate_input1, batch, dics)),
+                        ),
                    },
-                    outputs=["output_data"])
+                    outputs=["output_data"],
+                )
                yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape():
            self.dynamic_shape.min_input_shape = {
                "input_data": [1, 3, 32, 14],
@@ -234,14 +239,14 @@ class TrtConvertFcTest2(TrtLayerAutoScanTest):
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), (1, 2), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
-        yield self.create_inference_config(), (1, 2), (1e-5, 1e-5)
+        yield self.create_inference_config(), (1, 2), (1e-3, 1e-3)
        # for dynamic_shape
        generate_dynamic_shape()
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), (1, 2), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
-        yield self.create_inference_config(), (1, 2), (1e-5, 1e-5)
+        yield self.create_inference_config(), (1, 2), (1e-3, 1e-3)
    def test(self):
        self.run_test()
@@ -277,7 +282,7 @@ class TrtConvertFcTest3(TrtLayerAutoScanTest):
                        "m": m,
                        "n": n,
                    },
-                    {}
+                    {},
                ]
                ops_config = [
@@ -286,12 +291,10 @@ class TrtConvertFcTest3(TrtLayerAutoScanTest):
                        "op_inputs": {
                            "Input": ["input_data"],
                            "W": ["w_data"],
-                            "Bias": ["bias_data"]
+                            "Bias": ["bias_data"],
                        },
-                        "op_outputs": {
+                        "op_outputs": {"Out": ["output_data"]},
-                            "Out": ["output_data"]
+                        "op_attrs": dics[0],
-                        },
-                        "op_attrs": dics[0]
                    },
                ]
@@ -300,24 +303,26 @@ class TrtConvertFcTest3(TrtLayerAutoScanTest):
                program_config = ProgramConfig(
                    ops=ops,
                    weights={
-                        "w_data":
+                        "w_data": TensorConfig(
-                        TensorConfig(data_gen=partial(generate_w, batch, dics)),
+                            data_gen=partial(generate_w, batch, dics)
-                        "bias_data":
+                        ),
-                        TensorConfig(
+                        "bias_data": TensorConfig(
-                            data_gen=partial(generate_bias, batch, dics))
+                            data_gen=partial(generate_bias, batch, dics)
+                        ),
                    },
                    inputs={
-                        "input_data":
+                        "input_data": TensorConfig(
-                        TensorConfig(
+                            data_gen=partial(generate_input1, batch, dics)
-                            data_gen=partial(generate_input1, batch, dics)),
+                        ),
                    },
-                    outputs=["output_data"])
+                    outputs=["output_data"],
+                )
                yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape():
            self.dynamic_shape.min_input_shape = {
                "input_data": [1, 14, 1, 2],
@@ -339,16 +344,16 @@ class TrtConvertFcTest3(TrtLayerAutoScanTest):
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), (1, 2), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
-        yield self.create_inference_config(), (1, 2), (1e-5, 1e-5)
+        yield self.create_inference_config(), (1, 2), (1e-3, 1e-3)
        # for dynamic_shape
        generate_dynamic_shape()
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), (1, 2), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
-        yield self.create_inference_config(), (1, 2), (1e-5, 1e-5)
+        yield self.create_inference_config(), (1, 2), (1e-3, 1e-3)
        self.trt_param.precision = paddle_infer.PrecisionType.Int8
-        yield self.create_inference_config(), (1, 2), (1e-5, 1e-5)
+        yield self.create_inference_config(), (1, 2), (1e-3, 1e-3)
    def test(self):
        self.run_test()

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_fill_constant.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_fill_constant.py
@@ -22,12 +22,10 @@ from typing import Optional, List, Callable, Dict, Any, Set
 class TrtConvertSplitTest(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True
    def sample_program_configs(self):
        def generate_value_data(attrs: List[Dict[str, Any]]):
            return np.array([1]).astype(np.int32)
@@ -47,21 +45,28 @@ class TrtConvertSplitTest(TrtLayerAutoScanTest):
                            str_value = str_value
                        else:
                            str_value = ""
-                        dics = [{
+                        dics = [
-                            "str_value": str_value,
+                            {
-                            "value": value,
+                                "str_value": str_value,
-                            "shape": shape,
+                                "value": value,
-                            "dtype": dtype
+                                "shape": shape,
-                        }, {
+                                "dtype": dtype,
-                            "axis": -1
+                            },
-                        }]
+                            {"axis": -1},
-                        dics_intput = [{
+                        ]
-                            "ValueTensor": ["value_data"]
+                        dics_intput = [
-                        }, {
+                            {"ValueTensor": ["value_data"]},
-                            "ShapeTensor": ["shape_data"],
+                            {
-                        }, {
+                                "ShapeTensor": ["shape_data"],
-                            "ShapeTensorList": ["shapeT1_data", "shapeT2_data"],
+                            },
-                        }, {}]
+                            {
+                                "ShapeTensorList": [
+                                    "shapeT1_data",
+                                    "shapeT2_data",
+                                ],
+                            },
+                            {},
+                        ]
                        ops_config = [
                            {
                                "op_type": "fill_constant",
@@ -69,7 +74,7 @@ class TrtConvertSplitTest(TrtLayerAutoScanTest):
                                "op_outputs": {
                                    "Out": ["out_data"],
                                },
-                                "op_attrs": dics[0]
+                                "op_attrs": dics[0],
                            },
                        ]
@@ -81,26 +86,31 @@ class TrtConvertSplitTest(TrtLayerAutoScanTest):
                            ops=ops,
                            weights={},
                            inputs={
-                                "value_data":
+                                "value_data": TensorConfig(
-                                TensorConfig(data_gen=partial(
+                                    data_gen=partial(generate_value_data, dics)
-                                    generate_value_data, dics)),
+                                ),
-                                "shape_data":
+                                "shape_data": TensorConfig(
-                                TensorConfig(data_gen=partial(
+                                    data_gen=partial(generate_shape_data, dics)
-                                    generate_shape_data, dics)),
+                                ),
-                                "shapeT1_data":
+                                "shapeT1_data": TensorConfig(
-                                TensorConfig(data_gen=partial(
+                                    data_gen=partial(
-                                    generate_shapelist_data, dics)),
+                                        generate_shapelist_data, dics
-                                "shapeT2_data":
+                                    )
-                                TensorConfig(data_gen=partial(
+                                ),
-                                    generate_shapelist_data, dics)),
+                                "shapeT2_data": TensorConfig(
+                                    data_gen=partial(
+                                        generate_shapelist_data, dics
+                                    )
+                                ),
                            },
-                            outputs=["out_data"])
+                            outputs=["out_data"],
+                        )
                        yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            self.input_shape = [1, 1]
            max_shape = list(self.input_shape)
@@ -118,7 +128,7 @@ class TrtConvertSplitTest(TrtLayerAutoScanTest):
            self.dynamic_shape.opt_input_shape = {}
        def generate_trt_nodes_num(attrs, dynamic_shape):
-            if (self.num_input < 3):
+            if self.num_input < 3:
                return 0, 6
            return 1, 5
@@ -131,10 +141,12 @@ class TrtConvertSplitTest(TrtLayerAutoScanTest):
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-3
    def add_skip_trt_case(self):
        pass

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_flatten.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_flatten.py
@@ -22,16 +22,14 @@ from typing import Optional, List, Callable, Dict, Any, Set
 class TrtConvertFlattenTest_dim_2(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True
    def sample_program_configs(self):
        def generate_input(batch):
            return np.random.random([batch, 32]).astype(np.float32)
-        for batch in [1, 2, 4]:
+        for batch in [1, 4]:
            for axis in [0, 1]:
                for type in ["flatten", "flatten2"]:
                    if type == "flatten":
@@ -39,34 +37,35 @@ class TrtConvertFlattenTest_dim_2(TrtLayerAutoScanTest):
                    else:
                        op_outputs = {
                            "Out": ["output_data"],
-                            "XShape": ["xshape_data"]
+                            "XShape": ["xshape_data"],
                        }
                    dics = [{"axis": axis}]
-                    ops_config = [{
+                    ops_config = [
-                        "op_type": "flatten",
+                        {
-                        "op_inputs": {
+                            "op_type": "flatten",
-                            "X": ["input_data"]
+                            "op_inputs": {"X": ["input_data"]},
-                        },
+                            "op_outputs": op_outputs,
-                        "op_outputs": op_outputs,
+                            "op_attrs": dics[0],
-                        "op_attrs": dics[0]
+                        }
-                    }]
+                    ]
                    ops = self.generate_op_config(ops_config)
                    program_config = ProgramConfig(
                        ops=ops,
                        weights={},
                        inputs={
-                            "input_data":
+                            "input_data": TensorConfig(
-                            TensorConfig(
+                                data_gen=partial(generate_input, batch)
-                                data_gen=partial(generate_input, batch))
+                            )
                        },
-                        outputs=["output_data"])
+                        outputs=["output_data"],
+                    )
                    yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            self.dynamic_shape.min_input_shape = {"input_data": [1, 8]}
            self.dynamic_shape.max_input_shape = {"input_data": [4, 64]}
@@ -100,35 +99,37 @@ class TrtConvertFlattenTest_dim_2(TrtLayerAutoScanTest):
        # for static_shape
        clear_dynamic_shape()
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), (1e-3, 1e-3)
+            attrs, False
+        ), (1e-3, 1e-3)
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), (1e-3, 1e-3)
+            attrs, True
+        ), (1e-3, 1e-3)
    def test(self):
        self.run_test()
 class TrtConvertFlattenTest_dim_3(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True
    def sample_program_configs(self):
        def generate_input(batch):
            return np.random.random([batch, 32, 64]).astype(np.float32)
-        for batch in [1, 2, 4]:
+        for batch in [1, 4]:
            for axis in [0, 1, 2]:
                for type in ["flatten", "flatten2"]:
                    if type == "flatten":
@@ -136,38 +137,39 @@ class TrtConvertFlattenTest_dim_3(TrtLayerAutoScanTest):
                    else:
                        op_outputs = {
                            "Out": ["output_data"],
-                            "XShape": ["xshape_data"]
+                            "XShape": ["xshape_data"],
                        }
                    dics = [{"axis": axis}]
-                    ops_config = [{
+                    ops_config = [
-                        "op_type": "flatten",
+                        {
-                        "op_inputs": {
+                            "op_type": "flatten",
-                            "X": ["input_data"]
+                            "op_inputs": {"X": ["input_data"]},
-                        },
+                            "op_outputs": op_outputs,
-                        "op_outputs": op_outputs,
+                            "op_attrs": dics[0],
-                        "op_attrs": dics[0]
+                        }
-                    }]
+                    ]
                    ops = self.generate_op_config(ops_config)
                    program_config = ProgramConfig(
                        ops=ops,
                        weights={},
                        inputs={
-                            "input_data":
+                            "input_data": TensorConfig(
-                            TensorConfig(
+                                data_gen=partial(generate_input, batch)
-                                data_gen=partial(generate_input, batch))
+                            )
                        },
-                        outputs=["output_data"])
+                        outputs=["output_data"],
+                    )
                    yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            self.dynamic_shape.min_input_shape = {"input_data": [1, 8, 8]}
-            self.dynamic_shape.max_input_shape = {"input_data": [4, 64, 768]}
+            self.dynamic_shape.max_input_shape = {"input_data": [4, 32, 64]}
-            self.dynamic_shape.opt_input_shape = {"input_data": [2, 32, 256]}
+            self.dynamic_shape.opt_input_shape = {"input_data": [2, 32, 64]}
        def clear_dynamic_shape():
            self.dynamic_shape.max_input_shape = {}
@@ -198,35 +200,37 @@ class TrtConvertFlattenTest_dim_3(TrtLayerAutoScanTest):
        clear_dynamic_shape()
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), (1e-3, 1e-3)
+            attrs, False
+        ), (1e-3, 1e-3)
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), (1e-3, 1e-3)
+            attrs, True
+        ), (1e-3, 1e-3)
    def test(self):
        self.run_test()
 class TrtConvertFlattenTest_dim_4(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True
    def sample_program_configs(self):
        def generate_input(batch):
            return np.random.random([batch, 8, 8, 8]).astype(np.float32)
-        for batch in [1, 2, 4]:
+        for batch in [1, 4]:
            for axis in [0, 1, 2, 3]:
                for type in ["flatten", "flatten2"]:
                    if type == "flatten":
@@ -234,37 +238,38 @@ class TrtConvertFlattenTest_dim_4(TrtLayerAutoScanTest):
                    else:
                        op_outputs = {
                            "Out": ["output_data"],
-                            "XShape": ["xshape_data"]
+                            "XShape": ["xshape_data"],
                        }
                    dics = [{"axis": axis}]
-                    ops_config = [{
+                    ops_config = [
-                        "op_type": "flatten",
+                        {
-                        "op_inputs": {
+                            "op_type": "flatten",
-                            "X": ["input_data"]
+                            "op_inputs": {"X": ["input_data"]},
-                        },
+                            "op_outputs": op_outputs,
-                        "op_outputs": op_outputs,
+                            "op_attrs": dics[0],
-                        "op_attrs": dics[0]
+                        }
-                    }]
+                    ]
                    ops = self.generate_op_config(ops_config)
                    program_config = ProgramConfig(
                        ops=ops,
                        weights={},
                        inputs={
-                            "input_data":
+                            "input_data": TensorConfig(
-                            TensorConfig(
+                                data_gen=partial(generate_input, batch)
-                                data_gen=partial(generate_input, batch))
+                            )
                        },
-                        outputs=["output_data"])
+                        outputs=["output_data"],
+                    )
                    yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            self.dynamic_shape.min_input_shape = {"input_data": [1, 4, 4, 4]}
-            self.dynamic_shape.max_input_shape = {"input_data": [4, 32, 64, 64]}
+            self.dynamic_shape.max_input_shape = {"input_data": [4, 32, 32, 32]}
            self.dynamic_shape.opt_input_shape = {"input_data": [2, 16, 16, 8]}
        def clear_dynamic_shape():
@@ -294,36 +299,39 @@ class TrtConvertFlattenTest_dim_4(TrtLayerAutoScanTest):
        # for static_shape
        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), (1e-3, 1e-3)
+            attrs, False
+        ), (1e-3, 1e-3)
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), (1e-3, 1e-3)
+            attrs, True
+        ), (1e-3, 1e-3)
    def test(self):
        self.run_test()
 class TrtConvertFlattenTest_dim_5(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True
    def sample_program_configs(self):
        def generate_input(batch):
            return np.random.random([batch, 8, 8, 8]).astype(np.float32)
-        for batch in [1, 2, 4]:
+        for batch in [1, 4]:
            for axis in [0, 1, 2, 3, 4]:
                for type in ["flatten", "flatten2"]:
                    if type == "flatten":
@@ -331,37 +339,38 @@ class TrtConvertFlattenTest_dim_5(TrtLayerAutoScanTest):
                    else:
                        op_outputs = {
                            "Out": ["output_data"],
-                            "XShape": ["xshape_data"]
+                            "XShape": ["xshape_data"],
                        }
                    dics = [{"axis": axis}]
-                    ops_config = [{
+                    ops_config = [
-                        "op_type": "flatten",
+                        {
-                        "op_inputs": {
+                            "op_type": "flatten",
-                            "X": ["input_data"]
+                            "op_inputs": {"X": ["input_data"]},
-                        },
+                            "op_outputs": op_outputs,
-                        "op_outputs": op_outputs,
+                            "op_attrs": dics[0],
-                        "op_attrs": dics[0]
+                        }
-                    }]
+                    ]
                    ops = self.generate_op_config(ops_config)
                    program_config = ProgramConfig(
                        ops=ops,
                        weights={},
                        inputs={
-                            "input_data":
+                            "input_data": TensorConfig(
-                            TensorConfig(
+                                data_gen=partial(generate_input, batch)
-                                data_gen=partial(generate_input, batch))
+                            )
                        },
-                        outputs=["output_data"])
+                        outputs=["output_data"],
+                    )
                    yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            self.dynamic_shape.min_input_shape = {"input_data": [1, 4, 4, 4]}
-            self.dynamic_shape.max_input_shape = {"input_data": [4, 32, 64, 64]}
+            self.dynamic_shape.max_input_shape = {"input_data": [4, 16, 16, 8]}
            self.dynamic_shape.opt_input_shape = {"input_data": [2, 16, 16, 8]}
        def clear_dynamic_shape():
@@ -391,20 +400,25 @@ class TrtConvertFlattenTest_dim_5(TrtLayerAutoScanTest):
        # for static_shape
        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), (1e-3, 1e-3)
+            attrs, False
+        ), (1e-3, 1e-3)
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), (1e-3, 1e-3)
+            attrs, True
+        ), (1e-3, 1e-3)
    def test(self):
        self.run_test()

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gather.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gather.py
@@ -23,7 +23,6 @@ import unittest
 class TrtConvertGatherTest(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        inputs = program_config.inputs
        attrs = [
@@ -35,7 +34,6 @@ class TrtConvertGatherTest(TrtLayerAutoScanTest):
        return True
    def sample_program_configs(self):
        def generate_input1(shape):
            return np.random.random(shape).astype(np.float32)
@@ -52,112 +50,126 @@ class TrtConvertGatherTest(TrtLayerAutoScanTest):
            for index in [[1, 4], [4, 8]]:
                for axis in [0, 1, 2, 3]:
                    for overwrite in [True, False]:
-                        for input in [{
+                        for input in [
-                                "X": ["input_data"],
+                            {"X": ["input_data"], "Index": ["index_data"]},
-                                "Index": ["index_data"]
+                            {
-                        }, {
                                "X": ["input_data"],
                                "Index": ["index_data"],
-                                "Axis": ["axis_data"]
+                                "Axis": ["axis_data"],
-                        }]:
+                            },
+                        ]:
                            for index_type_int32 in [True, False]:
                                self.shape = shape
                                self.axis = axis
                                self.input_num = len(input)
                                self.index_type_int32 = index_type_int32
                                dics = [{"overwrite": overwrite, "axis": axis}]
-                                ops_config = [{
+                                ops_config = [
-                                    "op_type": "gather",
+                                    {
-                                    "op_inputs": input,
+                                        "op_type": "gather",
-                                    "op_outputs": {
+                                        "op_inputs": input,
-                                        "Out": ["output_data"]
+                                        "op_outputs": {"Out": ["output_data"]},
-                                    },
+                                        "op_attrs": dics[0],
-                                    "op_attrs": dics[0]
+                                    }
-                                }]
+                                ]
                                ops = self.generate_op_config(ops_config)
                                program_config = ProgramConfig(
                                    ops=ops,
                                    weights={},
                                    inputs={
-                                        "input_data":
+                                        "input_data": TensorConfig(
-                                        TensorConfig(data_gen=partial(
+                                            data_gen=partial(
-                                            generate_input1, shape)),
+                                                generate_input1, shape
-                                        "index_data":
+                                            )
-                                        TensorConfig(data_gen=partial(
+                                        ),
-                                            generate_input2
+                                        "index_data": TensorConfig(
-                                            if index_type_int32 ==
+                                            data_gen=partial(
-                                            True else generate_input4, index)),
+                                                generate_input2
-                                    } if len(input) == 2 else {
+                                                if index_type_int32 == True
-                                        "input_data":
+                                                else generate_input4,
-                                        TensorConfig(data_gen=partial(
+                                                index,
-                                            generate_input1, shape)),
+                                            )
-                                        "index_data":
+                                        ),
-                                        TensorConfig(data_gen=partial(
+                                    }
-                                            generate_input2, index)),
+                                    if len(input) == 2
-                                        "axis_data":
+                                    else {
-                                        TensorConfig(data_gen=partial(
+                                        "input_data": TensorConfig(
-                                            generate_input3, axis)),
+                                            data_gen=partial(
+                                                generate_input1, shape
+                                            )
+                                        ),
+                                        "index_data": TensorConfig(
+                                            data_gen=partial(
+                                                generate_input2, index
+                                            )
+                                        ),
+                                        "axis_data": TensorConfig(
+                                            data_gen=partial(
+                                                generate_input3, axis
+                                            )
+                                        ),
                                    },
-                                    outputs=["output_data"])
+                                    outputs=["output_data"],
+                                )
                                yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            if len(self.shape) == 1:
                self.dynamic_shape.min_input_shape = {
                    "input_data": [4],
-                    "index_data": [1]
+                    "index_data": [1],
                }
                self.dynamic_shape.max_input_shape = {
                    "input_data": [128],
-                    "index_data": [4]
+                    "index_data": [4],
                }
                self.dynamic_shape.opt_input_shape = {
                    "input_data": [16],
-                    "index_data": [2]
+                    "index_data": [2],
                }
            elif len(self.shape) == 2:
                self.dynamic_shape.min_input_shape = {
                    "input_data": [2, 4],
-                    "index_data": [1]
+                    "index_data": [1],
                }
                self.dynamic_shape.max_input_shape = {
                    "input_data": [256, 256],
-                    "index_data": [4]
+                    "index_data": [4],
                }
                self.dynamic_shape.opt_input_shape = {
                    "input_data": [64, 32],
-                    "index_data": [2]
+                    "index_data": [2],
                }
            elif len(self.shape) == 3:
                self.dynamic_shape.min_input_shape = {
                    "input_data": [2, 4, 4],
-                    "index_data": [1]
+                    "index_data": [1],
                }
                self.dynamic_shape.max_input_shape = {
                    "input_data": [128, 256, 256],
-                    "index_data": [4]
+                    "index_data": [4],
                }
                self.dynamic_shape.opt_input_shape = {
                    "input_data": [16, 64, 32],
-                    "index_data": [2]
+                    "index_data": [2],
                }
            elif len(self.shape) == 4:
                self.dynamic_shape.min_input_shape = {
                    "input_data": [2, 4, 4, 2],
-                    "index_data": [1]
+                    "index_data": [1],
                }
                self.dynamic_shape.max_input_shape = {
                    "input_data": [128, 256, 64, 128],
-                    "index_data": [4]
+                    "index_data": [4],
                }
                self.dynamic_shape.opt_input_shape = {
                    "input_data": [16, 64, 16, 32],
-                    "index_data": [2]
+                    "index_data": [2],
                }
        def clear_dynamic_shape():
@@ -182,10 +194,12 @@ class TrtConvertGatherTest(TrtLayerAutoScanTest):
        clear_dynamic_shape()
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            False), 1e-5
+            False
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            False), 1e-5
+            False
+        ), 1e-3
        # for dynamic_shape
        generate_dynamic_shape(attrs)
@@ -201,14 +215,17 @@ class TrtConvertGatherTest(TrtLayerAutoScanTest):
            def teller1(program_config, predictor_config):
                if len(self.dynamic_shape.min_input_shape) != 0:
                    inputs = program_config.inputs
-                    if len(inputs['input_data'].shape) == 1 or len(
+                    if (
-                            inputs['index_data'].shape) == 1:
+                        len(inputs['input_data'].shape) == 1
+                        or len(inputs['index_data'].shape) == 1
+                    ):
                        return True
                return False
            self.add_skip_case(
-                teller1, SkipReasons.TRT_NOT_SUPPORT,
+                teller1,
-                "Need to repair the case: trt reshape out failed for dynamic shape mode when inputs' dims==1. under trt7.0 "
+                SkipReasons.TRT_NOT_SUPPORT,
+                "Need to repair the case: trt reshape out failed for dynamic shape mode when inputs' dims==1. under trt7.0 ",
            )
    def test(self):

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gather_nd.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gather_nd.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gelu.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gelu.py
@@ -22,12 +22,10 @@ import unittest
 class TrtConvertGeluTest(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True
    def sample_program_configs(self):
        def generate_input1(dims, attrs: List[Dict[str, Any]]):
            if dims == 1:
                return np.ones([32]).astype(np.float32)
@@ -43,33 +41,32 @@ class TrtConvertGeluTest(TrtLayerAutoScanTest):
                self.dims = dims
                dics = [{"approximate": approximate}]
-                ops_config = [{
+                ops_config = [
-                    "op_type": "gelu",
+                    {
-                    "op_inputs": {
+                        "op_type": "gelu",
-                        "X": ["input_data"]
+                        "op_inputs": {"X": ["input_data"]},
-                    },
+                        "op_outputs": {"Out": ["output_data"]},
-                    "op_outputs": {
+                        "op_attrs": dics[0],
-                        "Out": ["output_data"]
+                    }
-                    },
+                ]
-                    "op_attrs": dics[0]
-                }]
                ops = self.generate_op_config(ops_config)
                program_config = ProgramConfig(
                    ops=ops,
                    weights={},
                    inputs={
-                        "input_data":
+                        "input_data": TensorConfig(
-                        TensorConfig(
+                            data_gen=partial(generate_input1, dims, dics)
-                            data_gen=partial(generate_input1, dims, dics))
+                        )
                    },
-                    outputs=["output_data"])
+                    outputs=["output_data"],
+                )
                yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            if self.dims == 1:
                self.dynamic_shape.min_input_shape = {"input_data": [1]}
@@ -123,19 +120,23 @@ class TrtConvertGeluTest(TrtLayerAutoScanTest):
        clear_dynamic_shape()
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-3
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-3
    def test(self):
        self.run_test()

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_grid_sampler.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_grid_sampler.py
@@ -22,29 +22,27 @@ import unittest
 class TrtConvertGridSampler(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True
    def sample_program_configs(self):
        def generate_input1():
            return np.random.random([1, 3, 32, 32]).astype(np.float32)
        def generate_input2():
            return np.random.random([1, 3, 3, 2]).astype(np.float32)
-        ops_config = [{
+        ops_config = [
-            "op_type": "grid_sampler",
+            {
-            "op_inputs": {
+                "op_type": "grid_sampler",
-                "X": ["input_data"],
+                "op_inputs": {
-                "Grid": ["grid_data"],
+                    "X": ["input_data"],
-            },
+                    "Grid": ["grid_data"],
-            "op_outputs": {
+                },
-                "Output": ["output_data"]
+                "op_outputs": {"Output": ["output_data"]},
-            },
+                "op_attrs": {},
-            "op_attrs": {}
+            }
-        }]
+        ]
        ops = self.generate_op_config(ops_config)
        for i in range(10):
@@ -52,30 +50,33 @@ class TrtConvertGridSampler(TrtLayerAutoScanTest):
                ops=ops,
                weights={},
                inputs={
-                    "input_data":
+                    "input_data": TensorConfig(
-                    TensorConfig(data_gen=partial(generate_input1)),
+                        data_gen=partial(generate_input1)
-                    "grid_data":
+                    ),
-                    TensorConfig(data_gen=partial(generate_input2)),
+                    "grid_data": TensorConfig(
+                        data_gen=partial(generate_input2)
+                    ),
                },
-                outputs=["output_data"])
+                outputs=["output_data"],
+            )
        yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            self.dynamic_shape.min_input_shape = {
                "input_data": [1, 3, 32, 32],
-                "grid_data": [1, 3, 3, 2]
+                "grid_data": [1, 3, 3, 2],
            }
            self.dynamic_shape.max_input_shape = {
                "input_data": [1, 3, 64, 64],
-                "grid_data": [1, 3, 4, 4]
+                "grid_data": [1, 3, 4, 4],
            }
            self.dynamic_shape.opt_input_shape = {
                "input_data": [1, 3, 32, 32],
-                "grid_data": [1, 3, 3, 2]
+                "grid_data": [1, 3, 3, 2],
            }
        def clear_dynamic_shape():
@@ -92,14 +93,14 @@ class TrtConvertGridSampler(TrtLayerAutoScanTest):
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), (0, 4), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
-        yield self.create_inference_config(), (0, 4), 1e-5
+        yield self.create_inference_config(), (0, 4), 1e-3
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), (1, 3), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
-        yield self.create_inference_config(), (1, 3), 1e-5
+        yield self.create_inference_config(), (1, 3), 1e-3
    def test(self):
        self.run_test()

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_group_norm.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_group_norm.py
@@ -22,7 +22,6 @@ import unittest
 class TrtConvertGroupNormTest(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        inputs = program_config.inputs
        weights = program_config.weights
@@ -36,7 +35,6 @@ class TrtConvertGroupNormTest(TrtLayerAutoScanTest):
        return True
    def sample_program_configs(self):
        def generate_input(attrs: List[Dict[str, Any]], batch):
            if attrs[0]['data_layout'] == 'NCHW':
                return np.random.random([batch, 32, 64, 64]).astype(np.float32)
@@ -53,47 +51,56 @@ class TrtConvertGroupNormTest(TrtLayerAutoScanTest):
            for group in [1, 4, 32, -1]:
                for epsilon in [0.0001, 0.0007, -1, 1]:
                    for data_layout in ['NCHW']:
-                        dics = [{
+                        dics = [
-                            "epsilon": epsilon,
+                            {
-                            "groups": group,
+                                "epsilon": epsilon,
-                            "data_layout": data_layout
+                                "groups": group,
-                        }]
+                                "data_layout": data_layout,
-                        ops_config = [{
+                            }
-                            "op_type": "group_norm",
+                        ]
-                            "op_inputs": {
+                        ops_config = [
-                                "X": ["input_data"],
+                            {
-                                "Scale": ["scale_weight"],
+                                "op_type": "group_norm",
-                                "Bias": ["bias_weight"]
+                                "op_inputs": {
-                            },
+                                    "X": ["input_data"],
-                            "op_outputs": {
+                                    "Scale": ["scale_weight"],
-                                "Y": ["y_output"],
+                                    "Bias": ["bias_weight"],
-                                "Mean": ["mean_output"],
+                                },
-                                "Variance": ["variance_output"]
+                                "op_outputs": {
-                            },
+                                    "Y": ["y_output"],
-                            "op_attrs": dics[0]
+                                    "Mean": ["mean_output"],
-                        }]
+                                    "Variance": ["variance_output"],
+                                },
+                                "op_attrs": dics[0],
+                            }
+                        ]
                        ops = self.generate_op_config(ops_config)
                        program_config = ProgramConfig(
                            ops=ops,
                            weights={
-                                "scale_weight":
+                                "scale_weight": TensorConfig(
-                                TensorConfig(data_gen=partial(generate_scale)),
+                                    data_gen=partial(generate_scale)
-                                "bias_weight":
+                                ),
-                                TensorConfig(data_gen=partial(generate_bias))
+                                "bias_weight": TensorConfig(
+                                    data_gen=partial(generate_bias)
+                                ),
                            },
                            inputs={
-                                "input_data":
+                                "input_data": TensorConfig(
-                                TensorConfig(data_gen=partial(
+                                    data_gen=partial(
-                                    generate_input, dics, batch))
+                                        generate_input, dics, batch
+                                    )
+                                )
                            },
-                            outputs=["y_output"])
+                            outputs=["y_output"],
+                        )
                        yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            self.dynamic_shape.min_input_shape = {"input_data": [1, 16, 16, 16]}
            self.dynamic_shape.max_input_shape = {
@@ -117,19 +124,23 @@ class TrtConvertGroupNormTest(TrtLayerAutoScanTest):
        clear_dynamic_shape()
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), (1e-3, 1e-3)
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), (1e-3, 1e-3)
    def add_skip_trt_case(self):
        pass

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_hard_sigmoid.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_hard_sigmoid.py
@@ -22,12 +22,10 @@ import unittest
 class TrtConvertHardSigmoidTest_dim_2(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True
    def sample_program_configs(self):
        def generate_input(shape):
            return np.random.random(shape).astype(np.float32)
@@ -37,33 +35,34 @@ class TrtConvertHardSigmoidTest_dim_2(TrtLayerAutoScanTest):
                for slope in [0.1, 0.5]:
                    for offset in [0.2, 0.7]:
                        dics = [{"slope": slope, "offset": offset}]
-                        ops_config = [{
+                        ops_config = [
-                            "op_type": "hard_sigmoid",
+                            {
-                            "op_inputs": {
+                                "op_type": "hard_sigmoid",
-                                "X": ["input_data"],
+                                "op_inputs": {
-                            },
+                                    "X": ["input_data"],
-                            "op_outputs": {
+                                },
-                                "Out": ["output_data"]
+                                "op_outputs": {"Out": ["output_data"]},
-                            },
+                                "op_attrs": dics[0],
-                            "op_attrs": dics[0]
+                            }
-                        }]
+                        ]
                        ops = self.generate_op_config(ops_config)
                        program_config = ProgramConfig(
                            ops=ops,
                            weights={},
                            inputs={
-                                "input_data":
+                                "input_data": TensorConfig(
-                                TensorConfig(
+                                    data_gen=partial(generate_input, shape)
-                                    data_gen=partial(generate_input, shape))
+                                )
                            },
-                            outputs=["output_data"])
+                            outputs=["output_data"],
+                        )
                        yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            if self.input_dim == 2:
                self.dynamic_shape.min_input_shape = {"input_data": [1, 8]}
@@ -98,14 +97,14 @@ class TrtConvertHardSigmoidTest_dim_2(TrtLayerAutoScanTest):
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), (1, 2), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
-        yield self.create_inference_config(), (1, 2), 1e-5
+        yield self.create_inference_config(), (1, 2), 1e-3
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), (1, 2), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
-        yield self.create_inference_config(), (1, 2), 1e-5
+        yield self.create_inference_config(), (1, 2), 1e-3
    def test(self):
        self.run_test()

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_hard_swish.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_hard_swish.py
@@ -22,7 +22,6 @@ import unittest
 class TrtConvertHardSwishTest(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        inputs = program_config.inputs
        weights = program_config.weights
@@ -36,46 +35,46 @@ class TrtConvertHardSwishTest(TrtLayerAutoScanTest):
        return True
    def sample_program_configs(self):
        def generate_input1(attrs: List[Dict[str, Any]]):
            return np.ones([1, 3, 32, 32]).astype(np.float32)
        for threshold in [6.0, 7.0, 100.0, 0.0, -1.0]:
            for scale in [5.0, 7.0, -1.0, 0.0, 100.0]:
                for offset in [3.0, 5.0, -1.0, 0.0, 100.0]:
-                    dics = [{
+                    dics = [
-                        "threshold": threshold,
+                        {
-                        "scale": scale,
+                            "threshold": threshold,
-                        "offset": offset
+                            "scale": scale,
-                    }]
+                            "offset": offset,
+                        }
-                    ops_config = [{
+                    ]
-                        "op_type": "hard_swish",
-                        "op_inputs": {
+                    ops_config = [
-                            "X": ["input_data"]
+                        {
-                        },
+                            "op_type": "hard_swish",
-                        "op_outputs": {
+                            "op_inputs": {"X": ["input_data"]},
-                            "Out": ["hard_swish_output_data"]
+                            "op_outputs": {"Out": ["hard_swish_output_data"]},
-                        },
+                            "op_attrs": dics[0],
-                        "op_attrs": dics[0]
+                        }
-                    }]
+                    ]
                    ops = self.generate_op_config(ops_config)
                    program_config = ProgramConfig(
                        ops=ops,
                        weights={},
                        inputs={
-                            "input_data":
+                            "input_data": TensorConfig(
-                            TensorConfig(
+                                data_gen=partial(generate_input1, dics)
-                                data_gen=partial(generate_input1, dics))
+                            )
                        },
-                        outputs=["hard_swish_output_data"])
+                        outputs=["hard_swish_output_data"],
+                    )
                    yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 16, 16]}
            self.dynamic_shape.max_input_shape = {"input_data": [2, 3, 32, 32]}
@@ -97,19 +96,23 @@ class TrtConvertHardSwishTest(TrtLayerAutoScanTest):
        clear_dynamic_shape()
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), (1e-5, 1e-5)
+            attrs, False
+        ), (1e-3, 1e-3)
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), (1e-5, 1e-5)
+            attrs, True
+        ), (1e-3, 1e-3)
    def test(self):
        self.run_test()

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_inverse.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_inverse.py
@@ -22,41 +22,41 @@ import unittest
 class TrtConvertInverse(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True
    def sample_program_configs(self):
        def generate_input1():
            return np.random.random([32, 32]).astype(np.float32)
-        ops_config = [{
+        ops_config = [
-            "op_type": "inverse",
+            {
-            "op_inputs": {
+                "op_type": "inverse",
-                "Input": ["input_data"],
+                "op_inputs": {
-            },
+                    "Input": ["input_data"],
-            "op_outputs": {
+                },
-                "Output": ["output_data"]
+                "op_outputs": {"Output": ["output_data"]},
-            },
+                "op_attrs": {},
-            "op_attrs": {}
+            }
-        }]
+        ]
        ops = self.generate_op_config(ops_config)
        for i in range(10):
            program_config = ProgramConfig(
                ops=ops,
                weights={},
                inputs={
-                    "input_data":
+                    "input_data": TensorConfig(
-                    TensorConfig(data_gen=partial(generate_input1)),
+                        data_gen=partial(generate_input1)
+                    ),
                },
-                outputs=["output_data"])
+                outputs=["output_data"],
+            )
            yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            self.dynamic_shape.min_input_shape = {
                "input_data": [1, 1],
@@ -82,14 +82,14 @@ class TrtConvertInverse(TrtLayerAutoScanTest):
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), (0, 3), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
-        yield self.create_inference_config(), (0, 3), 1e-5
+        yield self.create_inference_config(), (0, 3), 1e-3
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), (1, 2), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
-        yield self.create_inference_config(), (1, 2), 1e-5
+        yield self.create_inference_config(), (1, 2), 1e-3
    def test(self):
        self.run_test()

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_leaky_relu.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_leaky_relu.py
@@ -23,12 +23,10 @@ import unittest
 class TrtConvertLeakyReluTest(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True
    def sample_program_configs(self):
        def generate_input1(shape):
            return np.random.random(shape).astype(np.float32)
@@ -37,32 +35,35 @@ class TrtConvertLeakyReluTest(TrtLayerAutoScanTest):
                self.input_dim = len(shape)
                for alpha in [0.02, 1.0, 100.0, -1.0, 0.0]:
                    dics = [{"alpha": alpha}]
-                    ops_config = [{
+                    ops_config = [
-                        "op_type": "leaky_relu",
+                        {
-                        "op_inputs": {
+                            "op_type": "leaky_relu",
-                            "X": ["input_data"],
+                            "op_inputs": {
-                        },
+                                "X": ["input_data"],
-                        "op_outputs": {
+                            },
-                            "Out": ["y_data"],
+                            "op_outputs": {
-                        },
+                                "Out": ["y_data"],
-                        "op_attrs": dics[0]
+                            },
-                    }]
+                            "op_attrs": dics[0],
+                        }
+                    ]
                    ops = self.generate_op_config(ops_config)
                    program_config = ProgramConfig(
                        ops=ops,
                        weights={},
                        inputs={
-                            "input_data":
+                            "input_data": TensorConfig(
-                            TensorConfig(
+                                data_gen=partial(generate_input1, shape)
-                                data_gen=partial(generate_input1, shape))
+                            )
                        },
-                        outputs=["y_data"])
+                        outputs=["y_data"],
+                    )
                    yield program_config
    def sample_predictor_configs(
-            self, program_config) -> (paddle_infer.Config, List[int], float):
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            if self.input_dim == 2:
                self.dynamic_shape.min_input_shape = {"input_data": [1, 8]}
@@ -101,25 +102,31 @@ class TrtConvertLeakyReluTest(TrtLayerAutoScanTest):
        clear_dynamic_shape()
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), (1e-5, 1e-5)
+            attrs, False
+        ), (1e-3, 1e-3)
        self.trt_param.precision = paddle_infer.PrecisionType.Int8
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), (1e-5, 1e-5)
+            attrs, False
+        ), (1e-3, 1e-3)
        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), (1e-5, 1e-5)
+            attrs, True
+        ), (1e-3, 1e-3)
        self.trt_param.precision = paddle_infer.PrecisionType.Int8
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), (1e-5, 1e-5)
+            attrs, True
+        ), (1e-3, 1e-3)
    def test(self):
        self.run_test()

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_matmul.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_matmul.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_matmul_v2.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_matmul_v2.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multihead_matmul.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multihead_matmul.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_pad3d.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_pad3d.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_pool2d.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_pool2d.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reshape.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reshape.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_roi_align.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_roi_align.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_roll.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_roll.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_scatter_nd_add.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_scatter_nd_add.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_shape.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_shape.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_shuffle_channel.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_shuffle_channel.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_slice.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_slice.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_split.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_split.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_squeeze2.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_squeeze2.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_stack.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_stack.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_sum.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_sum.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_tile.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_tile.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_top_k.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_top_k.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_top_k_v2.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_top_k_v2.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_transpose.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_transpose.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unfold.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unfold.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unsqueeze2.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unsqueeze2.py
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_squeeze2_matmul_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_squeeze2_matmul_fuse_pass.py
--- a/python/paddle/fluid/tests/unittests/ir/test_fuse_resnet_unit.py
+++ b/python/paddle/fluid/tests/unittests/ir/test_fuse_resnet_unit.py