diff --git a/paddle/fluid/framework/selected_rows_utils.cc b/paddle/fluid/framework/selected_rows_utils.cc
index 67bb6cac67026b6ae268cfa823e6cf2d344ab270..6d961b92f5da063828a8e40d64f1846f302cee5c 100644
--- a/paddle/fluid/framework/selected_rows_utils.cc
+++ b/paddle/fluid/framework/selected_rows_utils.cc
@@ -72,8 +72,12 @@ void DeserializeFromStream(std::istream& is,
   }
   {
     // the 2st field, rows information
-    uint64_t size;
+    uint64_t size = 0;
     is.read(reinterpret_cast<char*>(&size), sizeof(size));
+    PADDLE_ENFORCE_EQ(
+        is.good(),
+        true,
+        platform::errors::Unavailable("Cannot read the number of rows."));
     auto& rows = *selected_rows->mutable_rows();
     rows.resize(size);
     for (uint64_t i = 0; i < size; ++i) {
diff --git a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
index 7e38e407336e5fab6832cdb7179554b87223b193..86f3e759d42a825f6d627bf9f0b066b12b36b1b2 100644
--- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
@@ -524,7 +524,7 @@ if(NOT WIN32)
   set_tests_properties(test_post_training_quantization_program_resnet50
                        PROPERTIES TIMEOUT 240)
   set_tests_properties(test_post_training_quantization_mobilenetv1
-                       PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=NIGHTLY")
+                       PROPERTIES TIMEOUT 900 LABELS "RUN_TYPE=NIGHTLY")
   set_tests_properties(test_post_training_quantization_resnet50
                        PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=NIGHTLY")
   set_tests_properties(test_post_training_quantization_mnist PROPERTIES TIMEOUT
diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
index fc675ed4a07d83571abc61e10f79fe804e85b09a..499367d339cab336d0e774cbfe30e75e1650997d 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
@@ -241,6 +241,8 @@ class TestPostTrainingQuantization(unittest.TestCase):
     def generate_quantized_model(self,
                                  model_path,
                                  quantizable_op_type,
+                                 batch_size,
+                                 batch_nums,
                                  algo="KL",
                                  round_type="round",
                                  is_full_quantize=False,
@@ -263,6 +265,8 @@ class TestPostTrainingQuantization(unittest.TestCase):
         ptq = PostTrainingQuantization(executor=exe,
                                        sample_generator=val_reader,
                                        model_dir=model_path,
+                                       batch_size=batch_size,
+                                       batch_nums=batch_nums,
                                        algo=algo,
                                        batch_nums=batch_nums,
                                        quantizable_op_type=quantizable_op_type,
@@ -302,7 +306,8 @@ class TestPostTrainingQuantization(unittest.TestCase):
         print("Start INT8 post training quantization for {0} on {1} images ...".
               format(model, sample_iterations * batch_size))
         self.generate_quantized_model(os.path.join(model_cache_folder, "model"),
-                                      quantizable_op_type, algo, round_type,
+                                      quantizable_op_type, batch_size,
+                                      sample_iterations, algo, round_type,
                                       is_full_quantize, is_use_cache_file,
                                       is_optimize_model, batch_nums,
                                       onnx_format)
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py b/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py
index 5393014fe7d34bf8ebb60c0f6c741cd0d385fc61..044451695d4d0269d6e46259e6133e32f58e349a 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py
@@ -225,8 +225,8 @@ class InferencePassTest(unittest.TestCase):
                     tensorrt_output = tensorrt_output.flatten()
 
                 np.testing.assert_allclose(
-                    paddle_out,
                     tensorrt_output,
+                    paddle_out,
                     rtol=rtol,
                     atol=atol,
                     err_msg='Output has diff between GPU and TensorRT. ')
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py
index e4514dd0ee948ac988a46ecd2784e3f87de13a9f..4d8f4e2b19549d24c242c130b94848403c3e4886 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py
@@ -47,11 +47,11 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
         self.trt_param.workspace_size = 1073741824
 
         def generate_input1(batch, attrs: List[Dict[str, Any]]):
-            return np.ones([batch, attrs[0]['groups'] * 3, 64,
-                            64]).astype(np.float32)
+            return np.ones([batch, attrs[0]['groups'] * 3, 64, 64]).astype(
+                np.float32) / 4
 
         def generate_weight1(attrs: List[Dict[str, Any]]):
-            return np.random.random([24, 3, 3, 3]).astype(np.float32)
+            return np.random.random([9, 3, 3, 3]).astype(np.float32) - 0.5
 
         batch_options = [1, 2]
         strides_options = [[2, 2], [1, 2]]
@@ -162,7 +162,7 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
             attrs, False), (1e-3, 1e-3)
         self.trt_param.precision = paddle_infer.PrecisionType.Int8
         yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), (1e-3, 1e-3)
+            attrs, False), (1e-2, 1e-2)
 
         # for dynamic_shape
         generate_dynamic_shape(attrs)
@@ -174,7 +174,7 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
             attrs, True), (1e-3, 1e-3)
         self.trt_param.precision = paddle_infer.PrecisionType.Int8
         yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), (1e-3, 1e-3)
+            attrs, True), (1e-2, 1e-2)
 
     def test(self):
         self.run_test()
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py
index 457db86c3236d31771c8cd4e49fb1804151f604f..56767b3457791d9a9530832598175380267162be 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py
@@ -128,7 +128,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest):
             attrs, False), 1e-5
         self.trt_param.precision = paddle_infer.PrecisionType.Half
         yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False), (1e-3, 1e-3)
 
         # for dynamic_shape
         generate_dynamic_shape(attrs)
@@ -137,7 +137,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest):
             attrs, True), 1e-5
         self.trt_param.precision = paddle_infer.PrecisionType.Half
         yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True), (1e-3, 1e-3)
 
     def test(self):
         self.run_test()
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py
index 68c3e9bd377db8769871a3b7791f58cd75ecfb08..42b234827b1e720d8e7a4b6bfa7fac8768dd604e 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py
@@ -44,9 +44,9 @@ class TrtConvertReduceSumTest(TrtLayerAutoScanTest):
 
         def generate_input1(dtype, attrs: List[Dict[str, Any]]):
             if dtype == -1 or dtype == 5:
-                return np.random.random([1, 3, 64, 64]).astype(np.float32)
+                return np.random.random([1, 3, 32, 32]).astype(np.float32)
             elif dtype == 2:
-                return np.random.random([1, 3, 64, 64]).astype(np.int32)
+                return np.random.random([1, 3, 32, 32]).astype(np.int32)
 
         for keep_dim in [True, False]:
             for dim in [[], [1], [0], [0, 1], [1, 2, 3], [-2, 0, 3], [-3],
@@ -93,7 +93,7 @@ class TrtConvertReduceSumTest(TrtLayerAutoScanTest):
         def generate_dynamic_shape(attrs):
             self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]}
             self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]}
-            self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]}
+            self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 32, 32]}
 
         def clear_dynamic_shape():
             self.dynamic_shape.min_input_shape = {}
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten2_matmul_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten2_matmul_fuse_pass.py
index a02cdb6a34791ce3657365f8e74bc8eeba342694..730babf2aab6dd502715a745a8a136d36ed34bb7 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten2_matmul_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten2_matmul_fuse_pass.py
@@ -154,7 +154,7 @@ class TestFlatten2MatmulFusePass(PassAutoScanTest):
 
     def test(self):
         self.run_and_statis(quant=False,
-                            max_examples=50,
+                            max_examples=25,
                             passes=["trt_flatten2_matmul_fuse_pass"])
 
 
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py
index 22f278d6d5d18f19d999fe9f13fec9a0d357d771..c4488a57f96057c2e2edd9773ecb699c628b8cee 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py
@@ -79,7 +79,14 @@ class TensorRTPool3dTest(InferencePassTest):
             shutil.rmtree(self.path + "_opt_cache")
         if core.is_compiled_with_cuda():
             use_gpu = True
-            self.check_output_with_option(use_gpu)
+            if self.precision == AnalysisConfig.Precision.Float32:
+                atol, rtol = (1e-5, 1e-5)
+            elif self.precision == AnalysisConfig.Precision.Half:
+                atol, rtol = (1e-3, 1e-3)
+            else:
+                raise ValueError("Unsupported precision {}".format(
+                    self.precision))
+            self.check_output_with_option(use_gpu, atol=atol, rtol=rtol)
             self.assertTrue(
                 PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
 
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py
index 3812642d2a5e4b19cb7c59cbbead9e35b30a3cee..f6eaa2fb8c75e0d1d525483ebb4b457470c85065 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py
@@ -75,7 +75,14 @@ class TensorRTPoolTest(InferencePassTest):
             shutil.rmtree(self.path + "_opt_cache")
         if core.is_compiled_with_cuda():
             use_gpu = True
-            self.check_output_with_option(use_gpu)
+            if self.precision == AnalysisConfig.Precision.Float32:
+                atol, rtol = (1e-5, 1e-5)
+            elif self.precision == AnalysisConfig.Precision.Half:
+                atol, rtol = (1e-3, 1e-3)
+            else:
+                raise ValueError("Unsupported precision {}".format(
+                    self.precision))
+            self.check_output_with_option(use_gpu, atol=atol, rtol=rtol)
             self.assertTrue(
                 PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
 
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py
index 1086e1428e09f359f66b8ef251ac65b14d14a9d5..ead27625af24c08d2e23b5998b426a7b41d47620 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py
@@ -218,7 +218,10 @@ class TRTReduceMeanStaticFP16(InferencePassTest):
     def test_check_output(self):
         if core.is_compiled_with_cuda():
             use_gpu = True
-            self.check_output_with_option(use_gpu, flatten=True)
+            self.check_output_with_option(use_gpu,
+                                          flatten=True,
+                                          atol=1e-3,
+                                          rtol=1e-3)
             self.assertTrue(
                 PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
 
@@ -244,7 +247,10 @@ class TRTReduceMeanFP16Static(InferencePassTest):
     def test_check_output(self):
         if core.is_compiled_with_cuda():
             use_gpu = True
-            self.check_output_with_option(use_gpu, flatten=True)
+            self.check_output_with_option(use_gpu,
+                                          flatten=True,
+                                          atol=1e-3,
+                                          rtol=1e-3)
             self.assertTrue(
                 PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
 
diff --git a/python/paddle/fluid/tests/unittests/test_einsum_v2.py b/python/paddle/fluid/tests/unittests/test_einsum_v2.py
index 971ad1fa744e92bb98cca12bb54dca0241bdd522..7230cd97ebdd884a426b5fd1259540ac644f4443 100644
--- a/python/paddle/fluid/tests/unittests/test_einsum_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_einsum_v2.py
@@ -530,21 +530,26 @@ class TestStaticGraphShape(unittest.TestCase):
         self.assertEqual(C.shape, (-1, 384))
 
 
+@unittest.skipIf(not core.is_compiled_with_cuda()
+                 or not core.is_bfloat16_supported(core.CUDAPlace(0)),
+                 "core is not compiled with CUDA or not support the bfloat16")
 class TestBF16(unittest.TestCase):
     """
     EinsumOp support bfloat16 type, add unittest here for the correctness.
     """
 
     def test_shape(self):
-        if paddle.is_compiled_with_cuda() and _is_gpu_bfloat16_supported():
-            """ MatmulKernel support bfloat16 only if cuda_major >= 11.0 and Compute Capability >= 8.0
+        cuda_major = paddle.version.cuda().split('.')[0].strip()
+        if int(cuda_major) >= 11:
+            """ MatmulKernel support bfloat16 only if cuda_major > 11.0.
             """
             A = paddle.to_tensor(np.array([1.0, 2.0])).astype(paddle.bfloat16)
             A = A.cuda()
             B = paddle.to_tensor(np.array([2.0, 3.0])).astype(paddle.bfloat16)
             B = B.cuda()
             C = paddle.einsum('i,i->', A, B)
-            self.assertEqual(C.astype(paddle.float32).item(), 8.0)
+            D = paddle.to_tensor(8.0).astype(paddle.bfloat16)
+            self.assertEqual(C.item(), D.item())
 
 
 class TestComplex(unittest.TestCase):