From abc1c3d40c4041d56e59a6cdac4317c2ca95e74b Mon Sep 17 00:00:00 2001 From: lishicheng1996 <43111799+lishicheng1996@users.noreply.github.com> Date: Tue, 27 Jun 2023 17:28:59 +0800 Subject: [PATCH] [BugFix] fix bugs in DCU unit tests (#54874) * block bf16 tests on ROCM * block more bf16 tests on ROCM * some unittest cases doesn't have kernels on ROCm * some unittest cases doesn't have kernels on ROCm * fix code style --- test/legacy_test/test_assign_op.py | 3 +- test/legacy_test/test_cast_op.py | 8 +++++ test/legacy_test/test_elementwise_mul_op.py | 4 +++ test/legacy_test/test_elementwise_pow_op.py | 4 +++ test/legacy_test/test_fill_any_like_op.py | 3 +- test/legacy_test/test_layer_norm_op.py | 33 +++++++++++++++++-- test/legacy_test/test_matmul_v2_op.py | 1 + test/legacy_test/test_reduce_op.py | 11 ++++++- test/legacy_test/test_reshape_op.py | 4 +++ test/legacy_test/test_scale_op.py | 3 +- ..._model_parallel_fused_multi_transformer.py | 5 ++- 11 files changed, 72 insertions(+), 7 deletions(-) diff --git a/test/legacy_test/test_assign_op.py b/test/legacy_test/test_assign_op.py index 9069b11669d..9299b07fc21 100644 --- a/test/legacy_test/test_assign_op.py +++ b/test/legacy_test/test_assign_op.py @@ -72,7 +72,8 @@ class TestAssignFP16Op(eager_op_test.OpTest): @unittest.skipIf( - not paddle.is_compiled_with_cuda(), "BFP16 test runs only on GPU" + not paddle.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(), + "BFP16 test runs only on CUDA", ) class TestAssignBFP16Op(eager_op_test.OpTest): def setUp(self): diff --git a/test/legacy_test/test_cast_op.py b/test/legacy_test/test_cast_op.py index c830f5f9f81..dde01a2296c 100644 --- a/test/legacy_test/test_cast_op.py +++ b/test/legacy_test/test_cast_op.py @@ -95,6 +95,10 @@ class TestCastOpFp32ToFp16(OpTest): self.check_grad(['X'], ['Out'], check_prim=True, only_check_prim=True) +@unittest.skipIf( + not paddle.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(), + "BFP16 test runs only on CUDA", +) class TestCastOpBf16ToFp32(OpTest): def setUp(self): ipt = np.array(np.random.randint(10, size=[10, 10])).astype('uint16') @@ -120,6 +124,10 @@ class TestCastOpBf16ToFp32(OpTest): self.check_grad(['X'], ['Out'], check_prim=True, only_check_prim=True) +@unittest.skipIf( + not paddle.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(), + "BFP16 test runs only on CUDA", +) class TestCastOpFp32ToBf16(OpTest): def setUp(self): ipt = np.random.random(size=[10, 10]).astype('float32') diff --git a/test/legacy_test/test_elementwise_mul_op.py b/test/legacy_test/test_elementwise_mul_op.py index 8356d055c20..987d1541910 100644 --- a/test/legacy_test/test_elementwise_mul_op.py +++ b/test/legacy_test/test_elementwise_mul_op.py @@ -163,6 +163,10 @@ class TestElementwiseMulOp_ZeroDim3(ElementwiseMulOp): self.out = np.multiply(self.x, self.y) +@unittest.skipIf( + not paddle.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(), + "BFP16 test runs only on CUDA", +) class TestBF16ElementwiseMulOp(OpTest): def setUp(self): self.op_type = "elementwise_mul" diff --git a/test/legacy_test/test_elementwise_pow_op.py b/test/legacy_test/test_elementwise_pow_op.py index d450cc8a606..88297a2293a 100644 --- a/test/legacy_test/test_elementwise_pow_op.py +++ b/test/legacy_test/test_elementwise_pow_op.py @@ -268,6 +268,10 @@ class TestElementwisePowOpFP16(OpTest): ) +@unittest.skipIf( + not paddle.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(), + "BFP16 test runs only on CUDA", +) class TestElementwisePowBF16Op(OpTest): def setUp(self): self.op_type = "elementwise_pow" diff --git a/test/legacy_test/test_fill_any_like_op.py b/test/legacy_test/test_fill_any_like_op.py index 36cf77195cc..31a3fa38363 100644 --- a/test/legacy_test/test_fill_any_like_op.py +++ b/test/legacy_test/test_fill_any_like_op.py @@ -64,7 +64,8 @@ class TestFillAnyLikeOpFloat32(TestFillAnyLikeOp): @unittest.skipIf( - not core.is_compiled_with_cuda(), "core is not compiled with CUDA" + not core.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(), + "core is not compiled with CUDA", ) class TestFillAnyLikeOpBfloat16(OpTest): def setUp(self): diff --git a/test/legacy_test/test_layer_norm_op.py b/test/legacy_test/test_layer_norm_op.py index 6fa2c41da3e..32d23ad3e1c 100644 --- a/test/legacy_test/test_layer_norm_op.py +++ b/test/legacy_test/test_layer_norm_op.py @@ -126,6 +126,10 @@ def layer_norm_wrapper( ) +@unittest.skipIf( + paddle.is_compiled_with_rocm(), + "ROCm doesn't support fp64 LayerNormOpByOp currently", +) class TestLayerNormOpByOpTest(OpTest): def setUp(self): self.python_api = layer_norm_wrapper @@ -164,7 +168,7 @@ class TestLayerNormOpByOpTest(OpTest): self.cinn_rtol = 1e-5 self.max_relative_error = 1e-5 - + # ROCm does not have float64 LayerNorm kernel self.dtype = "float64" self.x_shape = [2, 6, 6, 3] self.epsilon = 0.00001 @@ -218,6 +222,7 @@ class TestLayerNormOpByOpTest(OpTest): @unittest.skipIf( not core.is_compiled_with_cuda() + or paddle.is_compiled_with_rocm() or not core.is_bfloat16_supported(core.CUDAPlace(0)), "core is not compiled with CUDA or not support the bfloat16", ) @@ -306,6 +311,10 @@ class TestLayerNormBF16OpByOpTest(OpTest): } +@unittest.skipIf( + paddle.is_compiled_with_rocm(), + "ROCm doesn't support fp64 LayerNormOpByOp currently", +) class TestLayerNormOpByOpTestFP64_case2(TestLayerNormOpByOpTest): def initConfig(self): self.rev_comp_atol = 1e-6 @@ -328,6 +337,10 @@ class TestLayerNormOpByOpTestFP64_case2(TestLayerNormOpByOpTest): self.has_bias = False +@unittest.skipIf( + paddle.is_compiled_with_rocm(), + "ROCm doesn't support bf16 LayerNormOpByOp currently", +) class TestLayerNormBF16OpByOpTest_case2(TestLayerNormBF16OpByOpTest): def initConfig(self): self.ori_atol = 1e-2 @@ -343,6 +356,10 @@ class TestLayerNormBF16OpByOpTest_case2(TestLayerNormBF16OpByOpTest): self.has_bias = False +@unittest.skipIf( + paddle.is_compiled_with_rocm(), + "ROCm doesn't support fp64 LayerNormOpByOp currently", +) class TestLayerNormOpByOpTestFP64_case3(TestLayerNormOpByOpTest): def initConfig(self): self.rev_comp_atol = 1e-7 @@ -365,6 +382,10 @@ class TestLayerNormOpByOpTestFP64_case3(TestLayerNormOpByOpTest): self.has_bias = False +@unittest.skipIf( + paddle.is_compiled_with_rocm(), + "ROCm doesn't support bf16 LayerNormOpByOp currently", +) class TestLayerNormBF16OpByOpTest_case3(TestLayerNormBF16OpByOpTest): def initConfig(self): self.ori_atol = 1e-2 @@ -380,6 +401,10 @@ class TestLayerNormBF16OpByOpTest_case3(TestLayerNormBF16OpByOpTest): self.has_bias = False +@unittest.skipIf( + paddle.is_compiled_with_rocm(), + "ROCm doesn't support fp64 LayerNormOpByOp currently", +) class TestLayerNormOpByOpTestFP64_case4(TestLayerNormOpByOpTest): def initConfig(self): self.rev_comp_atol = 1e-6 @@ -801,6 +826,10 @@ class TestFP16ScaleBiasLayerNorm(unittest.TestCase): assert_equal(b_g_np_1, b_g_np_2) +@unittest.skipIf( + not core.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(), + "BF16 is only supported on CUDA.", +) class TestBF16ScaleBiasLayerNorm(unittest.TestCase): def check_main(self, x_np, weight_np, bias_np, dtype): paddle.disable_static() @@ -934,7 +963,7 @@ class TestFastMathLayerNormOp(unittest.TestCase): ) def test_main(self): - if not paddle.is_compiled_with_cuda(): + if not paddle.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(): return self.check_with_dtype(dtype="float32") self.check_with_dtype(dtype="bfloat16") diff --git a/test/legacy_test/test_matmul_v2_op.py b/test/legacy_test/test_matmul_v2_op.py index f7b83fce177..6adc3603fb0 100644 --- a/test/legacy_test/test_matmul_v2_op.py +++ b/test/legacy_test/test_matmul_v2_op.py @@ -405,6 +405,7 @@ create_test_fp16_class(TestMatMulOpBroadcast2) def create_test_bf16_class(parent, atol=0.01): @unittest.skipIf( not core.is_compiled_with_cuda() + or paddle.is_compiled_with_rocm() or not core.is_bfloat16_supported(core.CUDAPlace(0)), "core is not compiled with CUDA and not support the bfloat16", ) diff --git a/test/legacy_test/test_reduce_op.py b/test/legacy_test/test_reduce_op.py index 95d5fb5ceb2..4320cfd2a5d 100644 --- a/test/legacy_test/test_reduce_op.py +++ b/test/legacy_test/test_reduce_op.py @@ -198,7 +198,8 @@ create_test_fp16_class(TestSumOp3Dim) def create_test_bf16_class(parent): @unittest.skipIf( - not core.is_compiled_with_cuda(), "core is not compiled with CUDA" + not core.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(), + "core is not compiled with CUDA", ) class TestSumOpBf16(parent): def setUp(self): @@ -349,6 +350,7 @@ class TestMaxFP16Op(TestMaxFP32Op): @unittest.skipIf( not core.is_compiled_with_cuda() + or paddle.is_compiled_with_rocm() or not core.is_bfloat16_supported(core.CUDAPlace(0)), "core is not compiled with CUDA or not support the bfloat16", ) @@ -449,6 +451,9 @@ class TestMin8DOp(OpTest): reason="reduce_min is discontinuous non-derivable function," " its gradient check is not supported by unittest framework." ) +@unittest.skipIf( + paddle.is_compiled_with_rocm(), "ROCm doesn't have FP16 reduce_min kernel" +) class TestMinFP16Op(OpTest): """Remove Min with subgradient from gradient check to confirm the success of CI.""" @@ -479,6 +484,7 @@ class TestMinFP16Op(OpTest): @unittest.skipIf( not core.is_compiled_with_cuda() + or paddle.is_compiled_with_rocm() or not core.is_bfloat16_supported(core.CUDAPlace(0)), "core is not compiled with CUDA or not support the bfloat16", ) @@ -541,6 +547,7 @@ class TestProdFP16OP(TestProdOp): @unittest.skipIf( not core.is_compiled_with_cuda() + or paddle.is_compiled_with_rocm() or not core.is_bfloat16_supported(core.CUDAPlace(0)), "core is not compiled with CUDA or not support the bfloat16", ) @@ -648,6 +655,7 @@ class TestProd6DFP16OP(TestProd6DOp): @unittest.skipIf( not core.is_compiled_with_cuda() + or paddle.is_compiled_with_rocm() or not core.is_bfloat16_supported(core.CUDAPlace(0)), "core is not compiled with CUDA or not support the bfloat16", ) @@ -721,6 +729,7 @@ class TestProd8DFP16OP(TestProd8DOp): @unittest.skipIf( not core.is_compiled_with_cuda() + or paddle.is_compiled_with_rocm() or not core.is_bfloat16_supported(core.CUDAPlace(0)), "core is not compiled with CUDA or not support the bfloat16", ) diff --git a/test/legacy_test/test_reshape_op.py b/test/legacy_test/test_reshape_op.py index d5acc54d572..2feecb5005b 100755 --- a/test/legacy_test/test_reshape_op.py +++ b/test/legacy_test/test_reshape_op.py @@ -86,6 +86,10 @@ class TestReshapeOp_ZeroDim3(OpTest): self.infered_shape = () +@unittest.skipIf( + not paddle.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(), + "BFP16 test runs only on CUDA", +) class TestReshapeBF16Op(OpTest): def setUp(self): self.init_data() diff --git a/test/legacy_test/test_scale_op.py b/test/legacy_test/test_scale_op.py index 40712745dec..7708ce8deaa 100644 --- a/test/legacy_test/test_scale_op.py +++ b/test/legacy_test/test_scale_op.py @@ -155,7 +155,8 @@ class TestScaleFp16Op(TestScaleOp): @unittest.skipIf( - not core.is_compiled_with_rocm(), "core is not compiled with CUDA" + not paddle.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(), + "BFP16 test runs only on CUDA", ) class TestScaleBF16Op(OpTest): def setUp(self): diff --git a/test/legacy_test/test_static_model_parallel_fused_multi_transformer.py b/test/legacy_test/test_static_model_parallel_fused_multi_transformer.py index f4637b070cb..705680b531b 100644 --- a/test/legacy_test/test_static_model_parallel_fused_multi_transformer.py +++ b/test/legacy_test/test_static_model_parallel_fused_multi_transformer.py @@ -34,7 +34,10 @@ class TestStaticModelParallel(TestDistBase): def test_dist_static_model_parallel_fused_multi_transformer(self): from paddle import fluid - if fluid.core.is_compiled_with_cuda(): + if ( + fluid.core.is_compiled_with_cuda() + and not paddle.is_compiled_with_rocm() + ): self.check_with_place( "static_model_parallel_fused_multi_transformer.py", delta=1e-5, -- GitLab