diff --git a/src/gopt/test/inference.cpp b/src/gopt/test/inference.cpp
index 842584b50e8f57084487efc00afafe085de7be5d..c18e774ffd68448ed70df8039bd6abbdba59f0ce 100644
--- a/src/gopt/test/inference.cpp
+++ b/src/gopt/test/inference.cpp
@@ -1959,8 +1959,6 @@ TEST(TestEnableTensorCore, Nchw4Nchw) {
 }
 #endif
 
-//! close for cu111 ci, reopen it when bug fixed
-#if CUDA_VERSION < 11000
 TEST(TestEnableTensorCore, ConvBiasWithZ) {
     REQUIRE_GPU(1);
     auto cn = CompNode::load("gpu0");
@@ -2023,10 +2021,7 @@ TEST(TestEnableTensorCore, ConvBiasWithZ) {
     func->execute();
     MGB_ASSERT_TENSOR_EQ(host_y, host_y_opt);
 }
-#endif
 
-//! close for cu111 ci, reopen it when bug fixed
-#if CUDA_VERSION < 11000
 TEST(TestEnableTensorCore, Pooling) {
     REQUIRE_GPU(1);
     auto cn = CompNode::load("gpu0");
@@ -2094,7 +2089,6 @@ TEST(TestEnableTensorCore, Pooling) {
     func->execute();
     MGB_ASSERT_TENSOR_EQ(host_y, host_y_opt);
 }
-#endif
 
 TEST(TestGoptInference, EnableTensorCore) {
     REQUIRE_GPU(1);
@@ -2296,8 +2290,6 @@ TEST(FuseConvBiasZPass, BlockFuse) {
 }
 #endif
 
-//! close for cu111 ci, reopen it when bug fixed
-#if CUDA_VERSION < 11000
 TEST(TestEnableTensorCore, ShuffleMerge) {
     REQUIRE_GPU(1);
     auto cn = CompNode::load("gpu0");
@@ -2391,7 +2383,6 @@ TEST(TestEnableTensorCore, ShuffleMerge) {
     func->execute();
     MGB_ASSERT_TENSOR_EQ(host_y, host_y_opt);
 }
-#endif
 
 #endif
 
@@ -2575,8 +2566,6 @@ TEST(TestGoptInference, EnableCHWN4) {
 }
 #endif
 
-//! close for cu111 ci, reopen it when bug fixed
-#if CUDA_VERSION < 11000
 TEST(TestGoptInference, EnableCHWN4WarpPespective) {
     REQUIRE_GPU(1);
     auto cn = CompNode::load("gpu0");
@@ -2664,7 +2653,6 @@ TEST(TestGoptInference, EnableCHWN4WarpPespective) {
     func->execute();
     MGB_ASSERT_TENSOR_EQ(host_y, host_y_opt);
 }
-#endif
 
 TEST(TestGoptInference, EnableCHWN4Pooling) {
     REQUIRE_GPU(1);
@@ -2754,8 +2742,6 @@ TEST(TestGoptInference, EnableCHWN4Pooling) {
     MGB_ASSERT_TENSOR_EQ(host_y, host_y_opt);
 }
 
-//! close for cu111 ci, reopen it when bug fixed
-#if CUDA_VERSION < 11000
 TEST(TestGoptInference, EnableCHWN4ShuffleRemove) {
     REQUIRE_GPU(1);
     auto cn = CompNode::load("gpu0");
@@ -2878,7 +2864,6 @@ TEST(TestGoptInference, EnableCHWN4ShuffleRemove) {
     func->execute();
     MGB_ASSERT_TENSOR_EQ(host_y, host_y_opt);
 }
-#endif
 
 TEST(TestGoptInference, ConvertFormatNCHW4GPU) {
     REQUIRE_GPU(1);
@@ -3977,8 +3962,6 @@ TEST(TestGoptInference, FoldingConvDimshuffle) {
     func->execute();
 }
 
-//! close for cu111 ci, reopen it when bug fixed
-#if CUDA_VERSION < 11000
 TEST(TestGoptInference, FoldingConvDimshuffleNCHW4NCHW32) {
     REQUIRE_GPU(1);
     auto cn = CompNode::load("gpu0");
@@ -4063,7 +4046,6 @@ TEST(TestGoptInference, FoldingConvDimshuffleNCHW4NCHW32) {
     func->execute();
     MGB_ASSERT_TENSOR_EQ(host_y_fuse, host_y_non_fuse);
 }
-#endif
 
 #if CUDA_VERSION >= 10020
 TEST(TestGoptInference, FoldingConvDimshuffleNCHW32NCHW4) {
diff --git a/src/tensorrt/test/opr_replace.cpp b/src/tensorrt/test/opr_replace.cpp
index e3818375775ddb034f327f7e6cbd02592e1e39f2..699d304d541c0eb39bdbeb6b18a81e2208661ac8 100644
--- a/src/tensorrt/test/opr_replace.cpp
+++ b/src/tensorrt/test/opr_replace.cpp
@@ -223,7 +223,7 @@ TEST(TestTensorRTReplace, ElemAddFusion) {
     ASSERT_EQ(3u, trt_opr->cast_final_safe<opr::TensorRTOpr>()
                           .trt_manager()
                           .iobuf_size());
-    MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4);
+    MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 5e-4);
 }
 
 TEST(TestTensorRTReplace, BatchedMatrixMulBasic) {
diff --git a/src/tensorrt/test/tensorrt.cpp b/src/tensorrt/test/tensorrt.cpp
index 31ca8dc8533ad38f2b75924b931360f45f1047d7..d782699d23fe279b7c172bae457b9ada0029be82 100644
--- a/src/tensorrt/test/tensorrt.cpp
+++ b/src/tensorrt/test/tensorrt.cpp
@@ -44,14 +44,14 @@ TEST(TestOprTensorRT, Basic) {
     auto func = net.graph->compile({make_callback_copy(net.y, host_z1),
                                     make_callback_copy(y2, host_z2)});
     func->execute();
-    MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4);
+    MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 2e-4);
 
     auto&& host_x = net.host_x;
     auto&& gen = net.gen;
 
     *host_x = *gen({1, 23, 43, 43});
     func->execute();
-    MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4);
+    MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 2e-4);
     *host_x = *gen({10, 23, 12, 12});
     func->execute();
     MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-3);
diff --git a/src/tensorrt/test/tensorrt_runtime.cpp b/src/tensorrt/test/tensorrt_runtime.cpp
index bc1132f36ecaad71bda0d55c5fbed99b7a2a6a11..4cd9b1ef4f49d2d33dab14290b3778260896a6f7 100644
--- a/src/tensorrt/test/tensorrt_runtime.cpp
+++ b/src/tensorrt/test/tensorrt_runtime.cpp
@@ -58,7 +58,7 @@ TEST(TestOprTensorRT, RuntimeBasic) {
     auto func = net.graph->compile({make_callback_copy(net.y, host_z1),
                                     make_callback_copy(y2, host_z2)});
     func->execute();
-    MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4);
+    MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 5e-4);
 }
 
 
@@ -128,13 +128,13 @@ TEST(TestOprTensorRT, RuntimeChangeBatchSize) {
     auto func = net.graph->compile({make_callback_copy(net.y, host_z1),
                                     make_callback_copy(y2, host_z2)});
     func->execute();
-    MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4);
+    MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 5e-4);
     *net.host_x = *net.gen({1, 23, 28, 28});
     func->execute();
-    MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4);
+    MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 5e-4);
     *net.host_x = *net.gen({10, 23, 28, 28});
     func->execute();
-    MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4);
+    MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 5e-4);
 }
 
 #endif  // MGB_ENABLE_TENSOR_RT