diff --git a/paddle/fluid/framework/selected_rows_utils.cc b/paddle/fluid/framework/selected_rows_utils.cc index 67bb6cac67026b6ae268cfa823e6cf2d344ab270..6d961b92f5da063828a8e40d64f1846f302cee5c 100644 --- a/paddle/fluid/framework/selected_rows_utils.cc +++ b/paddle/fluid/framework/selected_rows_utils.cc @@ -72,8 +72,12 @@ void DeserializeFromStream(std::istream& is, } { // the 2st field, rows information - uint64_t size; + uint64_t size = 0; is.read(reinterpret_cast(&size), sizeof(size)); + PADDLE_ENFORCE_EQ( + is.good(), + true, + platform::errors::Unavailable("Cannot read the number of rows.")); auto& rows = *selected_rows->mutable_rows(); rows.resize(size); for (uint64_t i = 0; i < size; ++i) { diff --git a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt index 7e38e407336e5fab6832cdb7179554b87223b193..86f3e759d42a825f6d627bf9f0b066b12b36b1b2 100644 --- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt +++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt @@ -524,7 +524,7 @@ if(NOT WIN32) set_tests_properties(test_post_training_quantization_program_resnet50 PROPERTIES TIMEOUT 240) set_tests_properties(test_post_training_quantization_mobilenetv1 - PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=NIGHTLY") + PROPERTIES TIMEOUT 900 LABELS "RUN_TYPE=NIGHTLY") set_tests_properties(test_post_training_quantization_resnet50 PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=NIGHTLY") set_tests_properties(test_post_training_quantization_mnist PROPERTIES TIMEOUT diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py index fc675ed4a07d83571abc61e10f79fe804e85b09a..499367d339cab336d0e774cbfe30e75e1650997d 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py @@ -241,6 +241,8 @@ class TestPostTrainingQuantization(unittest.TestCase): def generate_quantized_model(self, model_path, quantizable_op_type, + batch_size, + batch_nums, algo="KL", round_type="round", is_full_quantize=False, @@ -263,6 +265,8 @@ class TestPostTrainingQuantization(unittest.TestCase): ptq = PostTrainingQuantization(executor=exe, sample_generator=val_reader, model_dir=model_path, + batch_size=batch_size, + batch_nums=batch_nums, algo=algo, batch_nums=batch_nums, quantizable_op_type=quantizable_op_type, @@ -302,7 +306,8 @@ class TestPostTrainingQuantization(unittest.TestCase): print("Start INT8 post training quantization for {0} on {1} images ...". format(model, sample_iterations * batch_size)) self.generate_quantized_model(os.path.join(model_cache_folder, "model"), - quantizable_op_type, algo, round_type, + quantizable_op_type, batch_size, + sample_iterations, algo, round_type, is_full_quantize, is_use_cache_file, is_optimize_model, batch_nums, onnx_format) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py b/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py index 5393014fe7d34bf8ebb60c0f6c741cd0d385fc61..044451695d4d0269d6e46259e6133e32f58e349a 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py @@ -225,8 +225,8 @@ class InferencePassTest(unittest.TestCase): tensorrt_output = tensorrt_output.flatten() np.testing.assert_allclose( - paddle_out, tensorrt_output, + paddle_out, rtol=rtol, atol=atol, err_msg='Output has diff between GPU and TensorRT. ') diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py index e4514dd0ee948ac988a46ecd2784e3f87de13a9f..4d8f4e2b19549d24c242c130b94848403c3e4886 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py @@ -47,11 +47,11 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest): self.trt_param.workspace_size = 1073741824 def generate_input1(batch, attrs: List[Dict[str, Any]]): - return np.ones([batch, attrs[0]['groups'] * 3, 64, - 64]).astype(np.float32) + return np.ones([batch, attrs[0]['groups'] * 3, 64, 64]).astype( + np.float32) / 4 def generate_weight1(attrs: List[Dict[str, Any]]): - return np.random.random([24, 3, 3, 3]).astype(np.float32) + return np.random.random([9, 3, 3, 3]).astype(np.float32) - 0.5 batch_options = [1, 2] strides_options = [[2, 2], [1, 2]] @@ -162,7 +162,7 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest): attrs, False), (1e-3, 1e-3) self.trt_param.precision = paddle_infer.PrecisionType.Int8 yield self.create_inference_config(), generate_trt_nodes_num( - attrs, False), (1e-3, 1e-3) + attrs, False), (1e-2, 1e-2) # for dynamic_shape generate_dynamic_shape(attrs) @@ -174,7 +174,7 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest): attrs, True), (1e-3, 1e-3) self.trt_param.precision = paddle_infer.PrecisionType.Int8 yield self.create_inference_config(), generate_trt_nodes_num( - attrs, True), (1e-3, 1e-3) + attrs, True), (1e-2, 1e-2) def test(self): self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py index 457db86c3236d31771c8cd4e49fb1804151f604f..56767b3457791d9a9530832598175380267162be 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py @@ -128,7 +128,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest): attrs, False), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), generate_trt_nodes_num( - attrs, False), 1e-5 + attrs, False), (1e-3, 1e-3) # for dynamic_shape generate_dynamic_shape(attrs) @@ -137,7 +137,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest): attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), generate_trt_nodes_num( - attrs, True), 1e-5 + attrs, True), (1e-3, 1e-3) def test(self): self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py index 68c3e9bd377db8769871a3b7791f58cd75ecfb08..42b234827b1e720d8e7a4b6bfa7fac8768dd604e 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py @@ -44,9 +44,9 @@ class TrtConvertReduceSumTest(TrtLayerAutoScanTest): def generate_input1(dtype, attrs: List[Dict[str, Any]]): if dtype == -1 or dtype == 5: - return np.random.random([1, 3, 64, 64]).astype(np.float32) + return np.random.random([1, 3, 32, 32]).astype(np.float32) elif dtype == 2: - return np.random.random([1, 3, 64, 64]).astype(np.int32) + return np.random.random([1, 3, 32, 32]).astype(np.int32) for keep_dim in [True, False]: for dim in [[], [1], [0], [0, 1], [1, 2, 3], [-2, 0, 3], [-3], @@ -93,7 +93,7 @@ class TrtConvertReduceSumTest(TrtLayerAutoScanTest): def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} - self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]} + self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 32, 32]} def clear_dynamic_shape(): self.dynamic_shape.min_input_shape = {} diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten2_matmul_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten2_matmul_fuse_pass.py index a02cdb6a34791ce3657365f8e74bc8eeba342694..730babf2aab6dd502715a745a8a136d36ed34bb7 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten2_matmul_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten2_matmul_fuse_pass.py @@ -154,7 +154,7 @@ class TestFlatten2MatmulFusePass(PassAutoScanTest): def test(self): self.run_and_statis(quant=False, - max_examples=50, + max_examples=25, passes=["trt_flatten2_matmul_fuse_pass"]) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py index 22f278d6d5d18f19d999fe9f13fec9a0d357d771..c4488a57f96057c2e2edd9773ecb699c628b8cee 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py @@ -79,7 +79,14 @@ class TensorRTPool3dTest(InferencePassTest): shutil.rmtree(self.path + "_opt_cache") if core.is_compiled_with_cuda(): use_gpu = True - self.check_output_with_option(use_gpu) + if self.precision == AnalysisConfig.Precision.Float32: + atol, rtol = (1e-5, 1e-5) + elif self.precision == AnalysisConfig.Precision.Half: + atol, rtol = (1e-3, 1e-3) + else: + raise ValueError("Unsupported precision {}".format( + self.precision)) + self.check_output_with_option(use_gpu, atol=atol, rtol=rtol) self.assertTrue( PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py index 3812642d2a5e4b19cb7c59cbbead9e35b30a3cee..f6eaa2fb8c75e0d1d525483ebb4b457470c85065 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py @@ -75,7 +75,14 @@ class TensorRTPoolTest(InferencePassTest): shutil.rmtree(self.path + "_opt_cache") if core.is_compiled_with_cuda(): use_gpu = True - self.check_output_with_option(use_gpu) + if self.precision == AnalysisConfig.Precision.Float32: + atol, rtol = (1e-5, 1e-5) + elif self.precision == AnalysisConfig.Precision.Half: + atol, rtol = (1e-3, 1e-3) + else: + raise ValueError("Unsupported precision {}".format( + self.precision)) + self.check_output_with_option(use_gpu, atol=atol, rtol=rtol) self.assertTrue( PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py index 1086e1428e09f359f66b8ef251ac65b14d14a9d5..ead27625af24c08d2e23b5998b426a7b41d47620 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py @@ -218,7 +218,10 @@ class TRTReduceMeanStaticFP16(InferencePassTest): def test_check_output(self): if core.is_compiled_with_cuda(): use_gpu = True - self.check_output_with_option(use_gpu, flatten=True) + self.check_output_with_option(use_gpu, + flatten=True, + atol=1e-3, + rtol=1e-3) self.assertTrue( PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) @@ -244,7 +247,10 @@ class TRTReduceMeanFP16Static(InferencePassTest): def test_check_output(self): if core.is_compiled_with_cuda(): use_gpu = True - self.check_output_with_option(use_gpu, flatten=True) + self.check_output_with_option(use_gpu, + flatten=True, + atol=1e-3, + rtol=1e-3) self.assertTrue( PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) diff --git a/python/paddle/fluid/tests/unittests/test_einsum_v2.py b/python/paddle/fluid/tests/unittests/test_einsum_v2.py index 971ad1fa744e92bb98cca12bb54dca0241bdd522..7230cd97ebdd884a426b5fd1259540ac644f4443 100644 --- a/python/paddle/fluid/tests/unittests/test_einsum_v2.py +++ b/python/paddle/fluid/tests/unittests/test_einsum_v2.py @@ -530,21 +530,26 @@ class TestStaticGraphShape(unittest.TestCase): self.assertEqual(C.shape, (-1, 384)) +@unittest.skipIf(not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not compiled with CUDA or not support the bfloat16") class TestBF16(unittest.TestCase): """ EinsumOp support bfloat16 type, add unittest here for the correctness. """ def test_shape(self): - if paddle.is_compiled_with_cuda() and _is_gpu_bfloat16_supported(): - """ MatmulKernel support bfloat16 only if cuda_major >= 11.0 and Compute Capability >= 8.0 + cuda_major = paddle.version.cuda().split('.')[0].strip() + if int(cuda_major) >= 11: + """ MatmulKernel support bfloat16 only if cuda_major > 11.0. """ A = paddle.to_tensor(np.array([1.0, 2.0])).astype(paddle.bfloat16) A = A.cuda() B = paddle.to_tensor(np.array([2.0, 3.0])).astype(paddle.bfloat16) B = B.cuda() C = paddle.einsum('i,i->', A, B) - self.assertEqual(C.astype(paddle.float32).item(), 8.0) + D = paddle.to_tensor(8.0).astype(paddle.bfloat16) + self.assertEqual(C.item(), D.item()) class TestComplex(unittest.TestCase):