From 33e4aca96c9b7908892f8d1165230556f593b948 Mon Sep 17 00:00:00 2001 From: Yuang Liu Date: Mon, 12 Jun 2023 11:24:37 +0800 Subject: [PATCH] fix a100 cuda 12 ut (#54542) --- test/collective/CMakeLists.txt | 16 ++++++++++++---- test/collective/fleet/CMakeLists.txt | 19 +++++++++++++------ .../fleet/hybrid_parallel_mp_layers.py | 2 +- test/distributed_passes/CMakeLists.txt | 2 +- 4 files changed, 27 insertions(+), 12 deletions(-) diff --git a/test/collective/CMakeLists.txt b/test/collective/CMakeLists.txt index ee29e8842e2..e2ee8e68336 100644 --- a/test/collective/CMakeLists.txt +++ b/test/collective/CMakeLists.txt @@ -200,8 +200,12 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX)) endif() if((WITH_GPU OR WITH_ROCM) AND (LINUX)) py_test_modules( - test_collective_reduce_api MODULES test_collective_reduce_api ENVS - "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python") + test_collective_reduce_api + MODULES + test_collective_reduce_api + ENVS + "NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python" + ) set_tests_properties(test_collective_reduce_api PROPERTIES TIMEOUT "500" LABELS "RUN_TYPE=DIST") endif() @@ -272,8 +276,12 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX)) endif() if((WITH_GPU OR WITH_ROCM) AND (LINUX)) py_test_modules( - test_collective_split_col_linear MODULES test_collective_split_col_linear - ENVS "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python") + test_collective_split_col_linear + MODULES + test_collective_split_col_linear + ENVS + "NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python" + ) set_tests_properties(test_collective_split_col_linear PROPERTIES TIMEOUT "300" LABELS "RUN_TYPE=DIST") endif() diff --git a/test/collective/fleet/CMakeLists.txt b/test/collective/fleet/CMakeLists.txt index aa27af240ac..47d6db03896 100644 --- a/test/collective/fleet/CMakeLists.txt +++ b/test/collective/fleet/CMakeLists.txt @@ -332,7 +332,7 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT) LABELS "RUN_TYPE=DIST" ENVS - "PADDLE_DIST_UT_PORT=21234;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python" + "NVIDIA_TF32_OVERRIDE=0;PADDLE_DIST_UT_PORT=21234;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python" ) set_tests_properties(test_hybrid_parallel_inference_helper PROPERTIES TIMEOUT "120") @@ -351,8 +351,12 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX OR WIN32)) endif() if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT) py_test_modules( - test_recv_save_op MODULES test_recv_save_op ENVS - "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python") + test_recv_save_op + MODULES + test_recv_save_op + ENVS + "NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python" + ) endif() if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT) py_test_modules( @@ -696,7 +700,7 @@ if((WITH_GPU OR WITH_ROCM) AND LOCAL_ALL_PLAT) LABELS "RUN_TYPE=DIST" ENVS - "PADDLE_DIST_UT_PORT=21274;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python" + "NVIDIA_TF32_OVERRIDE=0;PADDLE_DIST_UT_PORT=21274;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python" ) set_tests_properties(test_parallel_dygraph_mnist PROPERTIES TIMEOUT "200") endif() @@ -922,9 +926,12 @@ if((WITH_GPU) AND (LINUX)) endif() if((WITH_GPU) AND (LINUX)) py_test_modules( - test_dygraph_save_for_auto_infer MODULES test_dygraph_save_for_auto_infer + test_dygraph_save_for_auto_infer + MODULES + test_dygraph_save_for_auto_infer ENVS - "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python") + "NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python" + ) set_tests_properties(test_dygraph_save_for_auto_infer PROPERTIES TIMEOUT "300" LABELS "RUN_TYPE=DIST") endif() diff --git a/test/collective/fleet/hybrid_parallel_mp_layers.py b/test/collective/fleet/hybrid_parallel_mp_layers.py index b8e57a9a11b..751bc9255c1 100644 --- a/test/collective/fleet/hybrid_parallel_mp_layers.py +++ b/test/collective/fleet/hybrid_parallel_mp_layers.py @@ -221,7 +221,7 @@ class TestDistTraning(unittest.TestCase): optimizer_b.step() np.testing.assert_allclose( - loss_a.numpy(), loss_b.numpy(), rtol=5e-6 + loss_a.numpy(), loss_b.numpy(), rtol=5e-5 ) def test_parallel_embedding(self): diff --git a/test/distributed_passes/CMakeLists.txt b/test/distributed_passes/CMakeLists.txt index e2b8697fc85..79bc34620a4 100644 --- a/test/distributed_passes/CMakeLists.txt +++ b/test/distributed_passes/CMakeLists.txt @@ -27,7 +27,7 @@ if(NOT ((WITH_GPU) AND (CUDA_VERSION GREATER_EQUAL 11.6))) endif() foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) + py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS "NVIDIA_TF32_OVERRIDE=0") list(APPEND DIST_TEST_OPS ${TEST_OP}) set_tests_properties(${TEST_OP} PROPERTIES TIMEOUT 200) set_tests_properties(${TEST_OP} PROPERTIES LABELS "RUN_TYPE=DIST") -- GitLab