未验证 提交 1ac8ca4d 编写于 作者: L Leo Chen 提交者: GitHub

Fix UT failures (#45099)

上级 213f8038
...@@ -72,8 +72,12 @@ void DeserializeFromStream(std::istream& is, ...@@ -72,8 +72,12 @@ void DeserializeFromStream(std::istream& is,
} }
{ {
// the 2st field, rows information // the 2st field, rows information
uint64_t size; uint64_t size = 0;
is.read(reinterpret_cast<char*>(&size), sizeof(size)); is.read(reinterpret_cast<char*>(&size), sizeof(size));
PADDLE_ENFORCE_EQ(
is.good(),
true,
platform::errors::Unavailable("Cannot read the number of rows."));
auto& rows = *selected_rows->mutable_rows(); auto& rows = *selected_rows->mutable_rows();
rows.resize(size); rows.resize(size);
for (uint64_t i = 0; i < size; ++i) { for (uint64_t i = 0; i < size; ++i) {
......
...@@ -524,7 +524,7 @@ if(NOT WIN32) ...@@ -524,7 +524,7 @@ if(NOT WIN32)
set_tests_properties(test_post_training_quantization_program_resnet50 set_tests_properties(test_post_training_quantization_program_resnet50
PROPERTIES TIMEOUT 240) PROPERTIES TIMEOUT 240)
set_tests_properties(test_post_training_quantization_mobilenetv1 set_tests_properties(test_post_training_quantization_mobilenetv1
PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=NIGHTLY") PROPERTIES TIMEOUT 900 LABELS "RUN_TYPE=NIGHTLY")
set_tests_properties(test_post_training_quantization_resnet50 set_tests_properties(test_post_training_quantization_resnet50
PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=NIGHTLY") PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=NIGHTLY")
set_tests_properties(test_post_training_quantization_mnist PROPERTIES TIMEOUT set_tests_properties(test_post_training_quantization_mnist PROPERTIES TIMEOUT
......
...@@ -241,6 +241,8 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -241,6 +241,8 @@ class TestPostTrainingQuantization(unittest.TestCase):
def generate_quantized_model(self, def generate_quantized_model(self,
model_path, model_path,
quantizable_op_type, quantizable_op_type,
batch_size,
batch_nums,
algo="KL", algo="KL",
round_type="round", round_type="round",
is_full_quantize=False, is_full_quantize=False,
...@@ -263,6 +265,8 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -263,6 +265,8 @@ class TestPostTrainingQuantization(unittest.TestCase):
ptq = PostTrainingQuantization(executor=exe, ptq = PostTrainingQuantization(executor=exe,
sample_generator=val_reader, sample_generator=val_reader,
model_dir=model_path, model_dir=model_path,
batch_size=batch_size,
batch_nums=batch_nums,
algo=algo, algo=algo,
batch_nums=batch_nums, batch_nums=batch_nums,
quantizable_op_type=quantizable_op_type, quantizable_op_type=quantizable_op_type,
...@@ -302,7 +306,8 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -302,7 +306,8 @@ class TestPostTrainingQuantization(unittest.TestCase):
print("Start INT8 post training quantization for {0} on {1} images ...". print("Start INT8 post training quantization for {0} on {1} images ...".
format(model, sample_iterations * batch_size)) format(model, sample_iterations * batch_size))
self.generate_quantized_model(os.path.join(model_cache_folder, "model"), self.generate_quantized_model(os.path.join(model_cache_folder, "model"),
quantizable_op_type, algo, round_type, quantizable_op_type, batch_size,
sample_iterations, algo, round_type,
is_full_quantize, is_use_cache_file, is_full_quantize, is_use_cache_file,
is_optimize_model, batch_nums, is_optimize_model, batch_nums,
onnx_format) onnx_format)
......
...@@ -225,8 +225,8 @@ class InferencePassTest(unittest.TestCase): ...@@ -225,8 +225,8 @@ class InferencePassTest(unittest.TestCase):
tensorrt_output = tensorrt_output.flatten() tensorrt_output = tensorrt_output.flatten()
np.testing.assert_allclose( np.testing.assert_allclose(
paddle_out,
tensorrt_output, tensorrt_output,
paddle_out,
rtol=rtol, rtol=rtol,
atol=atol, atol=atol,
err_msg='Output has diff between GPU and TensorRT. ') err_msg='Output has diff between GPU and TensorRT. ')
......
...@@ -47,11 +47,11 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest): ...@@ -47,11 +47,11 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
self.trt_param.workspace_size = 1073741824 self.trt_param.workspace_size = 1073741824
def generate_input1(batch, attrs: List[Dict[str, Any]]): def generate_input1(batch, attrs: List[Dict[str, Any]]):
return np.ones([batch, attrs[0]['groups'] * 3, 64, return np.ones([batch, attrs[0]['groups'] * 3, 64, 64]).astype(
64]).astype(np.float32) np.float32) / 4
def generate_weight1(attrs: List[Dict[str, Any]]): def generate_weight1(attrs: List[Dict[str, Any]]):
return np.random.random([24, 3, 3, 3]).astype(np.float32) return np.random.random([9, 3, 3, 3]).astype(np.float32) - 0.5
batch_options = [1, 2] batch_options = [1, 2]
strides_options = [[2, 2], [1, 2]] strides_options = [[2, 2], [1, 2]]
...@@ -162,7 +162,7 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest): ...@@ -162,7 +162,7 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
attrs, False), (1e-3, 1e-3) attrs, False), (1e-3, 1e-3)
self.trt_param.precision = paddle_infer.PrecisionType.Int8 self.trt_param.precision = paddle_infer.PrecisionType.Int8
yield self.create_inference_config(), generate_trt_nodes_num( yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), (1e-3, 1e-3) attrs, False), (1e-2, 1e-2)
# for dynamic_shape # for dynamic_shape
generate_dynamic_shape(attrs) generate_dynamic_shape(attrs)
...@@ -174,7 +174,7 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest): ...@@ -174,7 +174,7 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
attrs, True), (1e-3, 1e-3) attrs, True), (1e-3, 1e-3)
self.trt_param.precision = paddle_infer.PrecisionType.Int8 self.trt_param.precision = paddle_infer.PrecisionType.Int8
yield self.create_inference_config(), generate_trt_nodes_num( yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True), (1e-3, 1e-3) attrs, True), (1e-2, 1e-2)
def test(self): def test(self):
self.run_test() self.run_test()
......
...@@ -128,7 +128,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest): ...@@ -128,7 +128,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest):
attrs, False), 1e-5 attrs, False), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num( yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), 1e-5 attrs, False), (1e-3, 1e-3)
# for dynamic_shape # for dynamic_shape
generate_dynamic_shape(attrs) generate_dynamic_shape(attrs)
...@@ -137,7 +137,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest): ...@@ -137,7 +137,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest):
attrs, True), 1e-5 attrs, True), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num( yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True), 1e-5 attrs, True), (1e-3, 1e-3)
def test(self): def test(self):
self.run_test() self.run_test()
......
...@@ -44,9 +44,9 @@ class TrtConvertReduceSumTest(TrtLayerAutoScanTest): ...@@ -44,9 +44,9 @@ class TrtConvertReduceSumTest(TrtLayerAutoScanTest):
def generate_input1(dtype, attrs: List[Dict[str, Any]]): def generate_input1(dtype, attrs: List[Dict[str, Any]]):
if dtype == -1 or dtype == 5: if dtype == -1 or dtype == 5:
return np.random.random([1, 3, 64, 64]).astype(np.float32) return np.random.random([1, 3, 32, 32]).astype(np.float32)
elif dtype == 2: elif dtype == 2:
return np.random.random([1, 3, 64, 64]).astype(np.int32) return np.random.random([1, 3, 32, 32]).astype(np.int32)
for keep_dim in [True, False]: for keep_dim in [True, False]:
for dim in [[], [1], [0], [0, 1], [1, 2, 3], [-2, 0, 3], [-3], for dim in [[], [1], [0], [0, 1], [1, 2, 3], [-2, 0, 3], [-3],
...@@ -93,7 +93,7 @@ class TrtConvertReduceSumTest(TrtLayerAutoScanTest): ...@@ -93,7 +93,7 @@ class TrtConvertReduceSumTest(TrtLayerAutoScanTest):
def generate_dynamic_shape(attrs): def generate_dynamic_shape(attrs):
self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]}
self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]}
self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]} self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 32, 32]}
def clear_dynamic_shape(): def clear_dynamic_shape():
self.dynamic_shape.min_input_shape = {} self.dynamic_shape.min_input_shape = {}
......
...@@ -154,7 +154,7 @@ class TestFlatten2MatmulFusePass(PassAutoScanTest): ...@@ -154,7 +154,7 @@ class TestFlatten2MatmulFusePass(PassAutoScanTest):
def test(self): def test(self):
self.run_and_statis(quant=False, self.run_and_statis(quant=False,
max_examples=50, max_examples=25,
passes=["trt_flatten2_matmul_fuse_pass"]) passes=["trt_flatten2_matmul_fuse_pass"])
......
...@@ -79,7 +79,14 @@ class TensorRTPool3dTest(InferencePassTest): ...@@ -79,7 +79,14 @@ class TensorRTPool3dTest(InferencePassTest):
shutil.rmtree(self.path + "_opt_cache") shutil.rmtree(self.path + "_opt_cache")
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
use_gpu = True use_gpu = True
self.check_output_with_option(use_gpu) if self.precision == AnalysisConfig.Precision.Float32:
atol, rtol = (1e-5, 1e-5)
elif self.precision == AnalysisConfig.Precision.Half:
atol, rtol = (1e-3, 1e-3)
else:
raise ValueError("Unsupported precision {}".format(
self.precision))
self.check_output_with_option(use_gpu, atol=atol, rtol=rtol)
self.assertTrue( self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
......
...@@ -75,7 +75,14 @@ class TensorRTPoolTest(InferencePassTest): ...@@ -75,7 +75,14 @@ class TensorRTPoolTest(InferencePassTest):
shutil.rmtree(self.path + "_opt_cache") shutil.rmtree(self.path + "_opt_cache")
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
use_gpu = True use_gpu = True
self.check_output_with_option(use_gpu) if self.precision == AnalysisConfig.Precision.Float32:
atol, rtol = (1e-5, 1e-5)
elif self.precision == AnalysisConfig.Precision.Half:
atol, rtol = (1e-3, 1e-3)
else:
raise ValueError("Unsupported precision {}".format(
self.precision))
self.check_output_with_option(use_gpu, atol=atol, rtol=rtol)
self.assertTrue( self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
......
...@@ -218,7 +218,10 @@ class TRTReduceMeanStaticFP16(InferencePassTest): ...@@ -218,7 +218,10 @@ class TRTReduceMeanStaticFP16(InferencePassTest):
def test_check_output(self): def test_check_output(self):
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
use_gpu = True use_gpu = True
self.check_output_with_option(use_gpu, flatten=True) self.check_output_with_option(use_gpu,
flatten=True,
atol=1e-3,
rtol=1e-3)
self.assertTrue( self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
...@@ -244,7 +247,10 @@ class TRTReduceMeanFP16Static(InferencePassTest): ...@@ -244,7 +247,10 @@ class TRTReduceMeanFP16Static(InferencePassTest):
def test_check_output(self): def test_check_output(self):
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
use_gpu = True use_gpu = True
self.check_output_with_option(use_gpu, flatten=True) self.check_output_with_option(use_gpu,
flatten=True,
atol=1e-3,
rtol=1e-3)
self.assertTrue( self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
......
...@@ -530,21 +530,26 @@ class TestStaticGraphShape(unittest.TestCase): ...@@ -530,21 +530,26 @@ class TestStaticGraphShape(unittest.TestCase):
self.assertEqual(C.shape, (-1, 384)) self.assertEqual(C.shape, (-1, 384))
@unittest.skipIf(not core.is_compiled_with_cuda()
or not core.is_bfloat16_supported(core.CUDAPlace(0)),
"core is not compiled with CUDA or not support the bfloat16")
class TestBF16(unittest.TestCase): class TestBF16(unittest.TestCase):
""" """
EinsumOp support bfloat16 type, add unittest here for the correctness. EinsumOp support bfloat16 type, add unittest here for the correctness.
""" """
def test_shape(self): def test_shape(self):
if paddle.is_compiled_with_cuda() and _is_gpu_bfloat16_supported(): cuda_major = paddle.version.cuda().split('.')[0].strip()
""" MatmulKernel support bfloat16 only if cuda_major >= 11.0 and Compute Capability >= 8.0 if int(cuda_major) >= 11:
""" MatmulKernel support bfloat16 only if cuda_major > 11.0.
""" """
A = paddle.to_tensor(np.array([1.0, 2.0])).astype(paddle.bfloat16) A = paddle.to_tensor(np.array([1.0, 2.0])).astype(paddle.bfloat16)
A = A.cuda() A = A.cuda()
B = paddle.to_tensor(np.array([2.0, 3.0])).astype(paddle.bfloat16) B = paddle.to_tensor(np.array([2.0, 3.0])).astype(paddle.bfloat16)
B = B.cuda() B = B.cuda()
C = paddle.einsum('i,i->', A, B) C = paddle.einsum('i,i->', A, B)
self.assertEqual(C.astype(paddle.float32).item(), 8.0) D = paddle.to_tensor(8.0).astype(paddle.bfloat16)
self.assertEqual(C.item(), D.item())
class TestComplex(unittest.TestCase): class TestComplex(unittest.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册