Fix UT failures (#45099)

1ac8ca4d · Leo Chen · GitHub · 213f8038 · 1ac8ca4d · 1ac8ca4d
12 changed file
--- a/paddle/fluid/framework/selected_rows_utils.cc
+++ b/paddle/fluid/framework/selected_rows_utils.cc
@@ -72,8 +72,12 @@ void DeserializeFromStream(std::istream& is,
  }
  {
    // the 2st field, rows information
-    uint64_t size;
+    uint64_t size = 0;
    is.read(reinterpret_cast<char*>(&size), sizeof(size));
+    PADDLE_ENFORCE_EQ(
+        is.good(),
+        true,
+        platform::errors::Unavailable("Cannot read the number of rows."));
    auto& rows = *selected_rows->mutable_rows();
    rows.resize(size);
    for (uint64_t i = 0; i < size; ++i) {

--- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
@@ -524,7 +524,7 @@ if(NOT WIN32)
  set_tests_properties(test_post_training_quantization_program_resnet50
                       PROPERTIES TIMEOUT 240)
  set_tests_properties(test_post_training_quantization_mobilenetv1
-                       PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=NIGHTLY")
+                       PROPERTIES TIMEOUT 900 LABELS "RUN_TYPE=NIGHTLY")
  set_tests_properties(test_post_training_quantization_resnet50
                       PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=NIGHTLY")
  set_tests_properties(test_post_training_quantization_mnist PROPERTIES TIMEOUT

--- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
@@ -241,6 +241,8 @@ class TestPostTrainingQuantization(unittest.TestCase):
    def generate_quantized_model(self,
                                 model_path,
                                 quantizable_op_type,
+                                 batch_size,
+                                 batch_nums,
                                 algo="KL",
                                 round_type="round",
                                 is_full_quantize=False,
@@ -263,6 +265,8 @@ class TestPostTrainingQuantization(unittest.TestCase):
        ptq = PostTrainingQuantization(executor=exe,
                                       sample_generator=val_reader,
                                       model_dir=model_path,
+                                       batch_size=batch_size,
+                                       batch_nums=batch_nums,
                                       algo=algo,
                                       batch_nums=batch_nums,
                                       quantizable_op_type=quantizable_op_type,
@@ -302,7 +306,8 @@ class TestPostTrainingQuantization(unittest.TestCase):
        print("Start INT8 post training quantization for {0} on {1} images ...".
              format(model, sample_iterations * batch_size))
        self.generate_quantized_model(os.path.join(model_cache_folder, "model"),
-                                      quantizable_op_type, algo, round_type,
+                                      quantizable_op_type, batch_size,
+                                      sample_iterations, algo, round_type,
                                      is_full_quantize, is_use_cache_file,
                                      is_optimize_model, batch_nums,
                                      onnx_format)

--- a/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py
@@ -225,8 +225,8 @@ class InferencePassTest(unittest.TestCase):
                    tensorrt_output = tensorrt_output.flatten()

                np.testing.assert_allclose(
-                    paddle_out,
                    tensorrt_output,
+                    paddle_out,
                    rtol=rtol,
                    atol=atol,
                    err_msg='Output has diff between GPU and TensorRT. ')

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py
@@ -47,11 +47,11 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
        self.trt_param.workspace_size = 1073741824

        def generate_input1(batch, attrs: List[Dict[str, Any]]):
-            return np.ones([batch, attrs[0]['groups'] * 3, 64,
-                            64]).astype(np.float32)
+            return np.ones([batch, attrs[0]['groups'] * 3, 64, 64]).astype(
+                np.float32) / 4

        def generate_weight1(attrs: List[Dict[str, Any]]):
-            return np.random.random([24, 3, 3, 3]).astype(np.float32)
+            return np.random.random([9, 3, 3, 3]).astype(np.float32) - 0.5

        batch_options = [1, 2]
        strides_options = [[2, 2], [1, 2]]
@@ -162,7 +162,7 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
            attrs, False), (1e-3, 1e-3)
        self.trt_param.precision = paddle_infer.PrecisionType.Int8
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), (1e-3, 1e-3)
+            attrs, False), (1e-2, 1e-2)

        # for dynamic_shape
        generate_dynamic_shape(attrs)
@@ -174,7 +174,7 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
            attrs, True), (1e-3, 1e-3)
        self.trt_param.precision = paddle_infer.PrecisionType.Int8
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), (1e-3, 1e-3)
+            attrs, True), (1e-2, 1e-2)

    def test(self):
        self.run_test()

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py
@@ -128,7 +128,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest):
            attrs, False), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, False), 1e-5
+            attrs, False), (1e-3, 1e-3)

        # for dynamic_shape
        generate_dynamic_shape(attrs)
@@ -137,7 +137,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest):
            attrs, True), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        yield self.create_inference_config(), generate_trt_nodes_num(
-            attrs, True), 1e-5
+            attrs, True), (1e-3, 1e-3)

    def test(self):
        self.run_test()

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py
@@ -44,9 +44,9 @@ class TrtConvertReduceSumTest(TrtLayerAutoScanTest):

        def generate_input1(dtype, attrs: List[Dict[str, Any]]):
            if dtype == -1 or dtype == 5:
-                return np.random.random([1, 3, 64, 64]).astype(np.float32)
+                return np.random.random([1, 3, 32, 32]).astype(np.float32)
            elif dtype == 2:
-                return np.random.random([1, 3, 64, 64]).astype(np.int32)
+                return np.random.random([1, 3, 32, 32]).astype(np.int32)

        for keep_dim in [True, False]:
            for dim in [[], [1], [0], [0, 1], [1, 2, 3], [-2, 0, 3], [-3],
@@ -93,7 +93,7 @@ class TrtConvertReduceSumTest(TrtLayerAutoScanTest):
        def generate_dynamic_shape(attrs):
            self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]}
            self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]}
-            self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]}
+            self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 32, 32]}

        def clear_dynamic_shape():
            self.dynamic_shape.min_input_shape = {}

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten2_matmul_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten2_matmul_fuse_pass.py
@@ -154,7 +154,7 @@ class TestFlatten2MatmulFusePass(PassAutoScanTest):

    def test(self):
        self.run_and_statis(quant=False,
-                            max_examples=50,
+                            max_examples=25,
                            passes=["trt_flatten2_matmul_fuse_pass"])



--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py
@@ -79,7 +79,14 @@ class TensorRTPool3dTest(InferencePassTest):
            shutil.rmtree(self.path + "_opt_cache")
        if core.is_compiled_with_cuda():
            use_gpu = True
-            self.check_output_with_option(use_gpu)
+            if self.precision == AnalysisConfig.Precision.Float32:
+                atol, rtol = (1e-5, 1e-5)
+            elif self.precision == AnalysisConfig.Precision.Half:
+                atol, rtol = (1e-3, 1e-3)
+            else:
+                raise ValueError("Unsupported precision {}".format(
+                    self.precision))
+            self.check_output_with_option(use_gpu, atol=atol, rtol=rtol)
            self.assertTrue(
                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))


--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py
@@ -75,7 +75,14 @@ class TensorRTPoolTest(InferencePassTest):
            shutil.rmtree(self.path + "_opt_cache")
        if core.is_compiled_with_cuda():
            use_gpu = True
-            self.check_output_with_option(use_gpu)
+            if self.precision == AnalysisConfig.Precision.Float32:
+                atol, rtol = (1e-5, 1e-5)
+            elif self.precision == AnalysisConfig.Precision.Half:
+                atol, rtol = (1e-3, 1e-3)
+            else:
+                raise ValueError("Unsupported precision {}".format(
+                    self.precision))
+            self.check_output_with_option(use_gpu, atol=atol, rtol=rtol)
            self.assertTrue(
                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))


--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py
@@ -218,7 +218,10 @@ class TRTReduceMeanStaticFP16(InferencePassTest):
    def test_check_output(self):
        if core.is_compiled_with_cuda():
            use_gpu = True
-            self.check_output_with_option(use_gpu, flatten=True)
+            self.check_output_with_option(use_gpu,
+                                          flatten=True,
+                                          atol=1e-3,
+                                          rtol=1e-3)
            self.assertTrue(
                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))

@@ -244,7 +247,10 @@ class TRTReduceMeanFP16Static(InferencePassTest):
    def test_check_output(self):
        if core.is_compiled_with_cuda():
            use_gpu = True
-            self.check_output_with_option(use_gpu, flatten=True)
+            self.check_output_with_option(use_gpu,
+                                          flatten=True,
+                                          atol=1e-3,
+                                          rtol=1e-3)
            self.assertTrue(
                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))


--- a/python/paddle/fluid/tests/unittests/test_einsum_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_einsum_v2.py
@@ -530,21 +530,26 @@ class TestStaticGraphShape(unittest.TestCase):
        self.assertEqual(C.shape, (-1, 384))


+@unittest.skipIf(not core.is_compiled_with_cuda()
+                 or not core.is_bfloat16_supported(core.CUDAPlace(0)),
+                 "core is not compiled with CUDA or not support the bfloat16")
 class TestBF16(unittest.TestCase):
    """
    EinsumOp support bfloat16 type, add unittest here for the correctness.
    """

    def test_shape(self):
-        if paddle.is_compiled_with_cuda() and _is_gpu_bfloat16_supported():
-            """ MatmulKernel support bfloat16 only if cuda_major >= 11.0 and Compute Capability >= 8.0
+        cuda_major = paddle.version.cuda().split('.')[0].strip()
+        if int(cuda_major) >= 11:
+            """ MatmulKernel support bfloat16 only if cuda_major > 11.0.
            """
            A = paddle.to_tensor(np.array([1.0, 2.0])).astype(paddle.bfloat16)
            A = A.cuda()
            B = paddle.to_tensor(np.array([2.0, 3.0])).astype(paddle.bfloat16)
            B = B.cuda()
            C = paddle.einsum('i,i->', A, B)
-            self.assertEqual(C.astype(paddle.float32).item(), 8.0)
+            D = paddle.to_tensor(8.0).astype(paddle.bfloat16)
+            self.assertEqual(C.item(), D.item())


 class TestComplex(unittest.TestCase):