未验证 提交 1ac8ca4d 编写于 作者: L Leo Chen 提交者: GitHub

Fix UT failures (#45099)

上级 213f8038
......@@ -72,8 +72,12 @@ void DeserializeFromStream(std::istream& is,
}
{
// the 2st field, rows information
uint64_t size;
uint64_t size = 0;
is.read(reinterpret_cast<char*>(&size), sizeof(size));
PADDLE_ENFORCE_EQ(
is.good(),
true,
platform::errors::Unavailable("Cannot read the number of rows."));
auto& rows = *selected_rows->mutable_rows();
rows.resize(size);
for (uint64_t i = 0; i < size; ++i) {
......
......@@ -524,7 +524,7 @@ if(NOT WIN32)
set_tests_properties(test_post_training_quantization_program_resnet50
PROPERTIES TIMEOUT 240)
set_tests_properties(test_post_training_quantization_mobilenetv1
PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=NIGHTLY")
PROPERTIES TIMEOUT 900 LABELS "RUN_TYPE=NIGHTLY")
set_tests_properties(test_post_training_quantization_resnet50
PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=NIGHTLY")
set_tests_properties(test_post_training_quantization_mnist PROPERTIES TIMEOUT
......
......@@ -241,6 +241,8 @@ class TestPostTrainingQuantization(unittest.TestCase):
def generate_quantized_model(self,
model_path,
quantizable_op_type,
batch_size,
batch_nums,
algo="KL",
round_type="round",
is_full_quantize=False,
......@@ -263,6 +265,8 @@ class TestPostTrainingQuantization(unittest.TestCase):
ptq = PostTrainingQuantization(executor=exe,
sample_generator=val_reader,
model_dir=model_path,
batch_size=batch_size,
batch_nums=batch_nums,
algo=algo,
batch_nums=batch_nums,
quantizable_op_type=quantizable_op_type,
......@@ -302,7 +306,8 @@ class TestPostTrainingQuantization(unittest.TestCase):
print("Start INT8 post training quantization for {0} on {1} images ...".
format(model, sample_iterations * batch_size))
self.generate_quantized_model(os.path.join(model_cache_folder, "model"),
quantizable_op_type, algo, round_type,
quantizable_op_type, batch_size,
sample_iterations, algo, round_type,
is_full_quantize, is_use_cache_file,
is_optimize_model, batch_nums,
onnx_format)
......
......@@ -225,8 +225,8 @@ class InferencePassTest(unittest.TestCase):
tensorrt_output = tensorrt_output.flatten()
np.testing.assert_allclose(
paddle_out,
tensorrt_output,
paddle_out,
rtol=rtol,
atol=atol,
err_msg='Output has diff between GPU and TensorRT. ')
......
......@@ -47,11 +47,11 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
self.trt_param.workspace_size = 1073741824
def generate_input1(batch, attrs: List[Dict[str, Any]]):
return np.ones([batch, attrs[0]['groups'] * 3, 64,
64]).astype(np.float32)
return np.ones([batch, attrs[0]['groups'] * 3, 64, 64]).astype(
np.float32) / 4
def generate_weight1(attrs: List[Dict[str, Any]]):
return np.random.random([24, 3, 3, 3]).astype(np.float32)
return np.random.random([9, 3, 3, 3]).astype(np.float32) - 0.5
batch_options = [1, 2]
strides_options = [[2, 2], [1, 2]]
......@@ -162,7 +162,7 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
attrs, False), (1e-3, 1e-3)
self.trt_param.precision = paddle_infer.PrecisionType.Int8
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), (1e-3, 1e-3)
attrs, False), (1e-2, 1e-2)
# for dynamic_shape
generate_dynamic_shape(attrs)
......@@ -174,7 +174,7 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
attrs, True), (1e-3, 1e-3)
self.trt_param.precision = paddle_infer.PrecisionType.Int8
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True), (1e-3, 1e-3)
attrs, True), (1e-2, 1e-2)
def test(self):
self.run_test()
......
......@@ -128,7 +128,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest):
attrs, False), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), 1e-5
attrs, False), (1e-3, 1e-3)
# for dynamic_shape
generate_dynamic_shape(attrs)
......@@ -137,7 +137,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest):
attrs, True), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True), 1e-5
attrs, True), (1e-3, 1e-3)
def test(self):
self.run_test()
......
......@@ -44,9 +44,9 @@ class TrtConvertReduceSumTest(TrtLayerAutoScanTest):
def generate_input1(dtype, attrs: List[Dict[str, Any]]):
if dtype == -1 or dtype == 5:
return np.random.random([1, 3, 64, 64]).astype(np.float32)
return np.random.random([1, 3, 32, 32]).astype(np.float32)
elif dtype == 2:
return np.random.random([1, 3, 64, 64]).astype(np.int32)
return np.random.random([1, 3, 32, 32]).astype(np.int32)
for keep_dim in [True, False]:
for dim in [[], [1], [0], [0, 1], [1, 2, 3], [-2, 0, 3], [-3],
......@@ -93,7 +93,7 @@ class TrtConvertReduceSumTest(TrtLayerAutoScanTest):
def generate_dynamic_shape(attrs):
self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]}
self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]}
self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]}
self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 32, 32]}
def clear_dynamic_shape():
self.dynamic_shape.min_input_shape = {}
......
......@@ -154,7 +154,7 @@ class TestFlatten2MatmulFusePass(PassAutoScanTest):
def test(self):
self.run_and_statis(quant=False,
max_examples=50,
max_examples=25,
passes=["trt_flatten2_matmul_fuse_pass"])
......
......@@ -79,7 +79,14 @@ class TensorRTPool3dTest(InferencePassTest):
shutil.rmtree(self.path + "_opt_cache")
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu)
if self.precision == AnalysisConfig.Precision.Float32:
atol, rtol = (1e-5, 1e-5)
elif self.precision == AnalysisConfig.Precision.Half:
atol, rtol = (1e-3, 1e-3)
else:
raise ValueError("Unsupported precision {}".format(
self.precision))
self.check_output_with_option(use_gpu, atol=atol, rtol=rtol)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
......
......@@ -75,7 +75,14 @@ class TensorRTPoolTest(InferencePassTest):
shutil.rmtree(self.path + "_opt_cache")
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu)
if self.precision == AnalysisConfig.Precision.Float32:
atol, rtol = (1e-5, 1e-5)
elif self.precision == AnalysisConfig.Precision.Half:
atol, rtol = (1e-3, 1e-3)
else:
raise ValueError("Unsupported precision {}".format(
self.precision))
self.check_output_with_option(use_gpu, atol=atol, rtol=rtol)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
......
......@@ -218,7 +218,10 @@ class TRTReduceMeanStaticFP16(InferencePassTest):
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu, flatten=True)
self.check_output_with_option(use_gpu,
flatten=True,
atol=1e-3,
rtol=1e-3)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
......@@ -244,7 +247,10 @@ class TRTReduceMeanFP16Static(InferencePassTest):
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu, flatten=True)
self.check_output_with_option(use_gpu,
flatten=True,
atol=1e-3,
rtol=1e-3)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
......
......@@ -530,21 +530,26 @@ class TestStaticGraphShape(unittest.TestCase):
self.assertEqual(C.shape, (-1, 384))
@unittest.skipIf(not core.is_compiled_with_cuda()
or not core.is_bfloat16_supported(core.CUDAPlace(0)),
"core is not compiled with CUDA or not support the bfloat16")
class TestBF16(unittest.TestCase):
"""
EinsumOp support bfloat16 type, add unittest here for the correctness.
"""
def test_shape(self):
if paddle.is_compiled_with_cuda() and _is_gpu_bfloat16_supported():
""" MatmulKernel support bfloat16 only if cuda_major >= 11.0 and Compute Capability >= 8.0
cuda_major = paddle.version.cuda().split('.')[0].strip()
if int(cuda_major) >= 11:
""" MatmulKernel support bfloat16 only if cuda_major > 11.0.
"""
A = paddle.to_tensor(np.array([1.0, 2.0])).astype(paddle.bfloat16)
A = A.cuda()
B = paddle.to_tensor(np.array([2.0, 3.0])).astype(paddle.bfloat16)
B = B.cuda()
C = paddle.einsum('i,i->', A, B)
self.assertEqual(C.astype(paddle.float32).item(), 8.0)
D = paddle.to_tensor(8.0).astype(paddle.bfloat16)
self.assertEqual(C.item(), D.item())
class TestComplex(unittest.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册