fix a100 cuda 12 ut (#54542) (#54550)

945e0522 · Yuang Liu · GitHub · ee5430fb · 945e0522 · 945e0522
4 changed file
--- a/test/collective/CMakeLists.txt
+++ b/test/collective/CMakeLists.txt
@@ -200,8 +200,12 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX))
 endif()
 if((WITH_GPU OR WITH_ROCM) AND (LINUX))
  py_test_modules(
-    test_collective_reduce_api MODULES test_collective_reduce_api ENVS
-    "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
+    test_collective_reduce_api
+    MODULES
+    test_collective_reduce_api
+    ENVS
+    "NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python"
+  )
  set_tests_properties(test_collective_reduce_api
                       PROPERTIES TIMEOUT "500" LABELS "RUN_TYPE=DIST")
 endif()
@@ -272,8 +276,12 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX))
 endif()
 if((WITH_GPU OR WITH_ROCM) AND (LINUX))
  py_test_modules(
-    test_collective_split_col_linear MODULES test_collective_split_col_linear
-    ENVS "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
+    test_collective_split_col_linear
+    MODULES
+    test_collective_split_col_linear
+    ENVS
+    "NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python"
+  )
  set_tests_properties(test_collective_split_col_linear
                       PROPERTIES TIMEOUT "300" LABELS "RUN_TYPE=DIST")
 endif()

--- a/test/collective/fleet/CMakeLists.txt
+++ b/test/collective/fleet/CMakeLists.txt
@@ -332,7 +332,7 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
    LABELS
    "RUN_TYPE=DIST"
    ENVS
-    "PADDLE_DIST_UT_PORT=21234;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
+    "NVIDIA_TF32_OVERRIDE=0;PADDLE_DIST_UT_PORT=21234;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
  )
  set_tests_properties(test_hybrid_parallel_inference_helper PROPERTIES TIMEOUT
                                                                        "120")
@@ -351,8 +351,12 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX OR WIN32))
 endif()
 if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
  py_test_modules(
-    test_recv_save_op MODULES test_recv_save_op ENVS
-    "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
+    test_recv_save_op
+    MODULES
+    test_recv_save_op
+    ENVS
+    "NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
+  )
 endif()
 if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
  py_test_modules(
@@ -696,7 +700,7 @@ if((WITH_GPU OR WITH_ROCM) AND LOCAL_ALL_PLAT)
    LABELS
    "RUN_TYPE=DIST"
    ENVS
-    "PADDLE_DIST_UT_PORT=21274;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
+    "NVIDIA_TF32_OVERRIDE=0;PADDLE_DIST_UT_PORT=21274;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
  )
  set_tests_properties(test_parallel_dygraph_mnist PROPERTIES TIMEOUT "200")
 endif()
@@ -922,9 +926,12 @@ if((WITH_GPU) AND (LINUX))
 endif()
 if((WITH_GPU) AND (LINUX))
  py_test_modules(
-    test_dygraph_save_for_auto_infer MODULES test_dygraph_save_for_auto_infer
+    test_dygraph_save_for_auto_infer
+    MODULES
+    test_dygraph_save_for_auto_infer
    ENVS
-    "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
+    "NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
+  )
  set_tests_properties(test_dygraph_save_for_auto_infer
                       PROPERTIES TIMEOUT "300" LABELS "RUN_TYPE=DIST")
 endif()
--- a/test/collective/fleet/hybrid_parallel_mp_layers.py
+++ b/test/collective/fleet/hybrid_parallel_mp_layers.py
@@ -221,7 +221,7 @@ class TestDistTraning(unittest.TestCase):
            optimizer_b.step()

            np.testing.assert_allclose(
-                loss_a.numpy(), loss_b.numpy(), rtol=5e-6
+                loss_a.numpy(), loss_b.numpy(), rtol=5e-5
            )

    def test_parallel_embedding(self):

--- a/test/distributed_passes/CMakeLists.txt
+++ b/test/distributed_passes/CMakeLists.txt
@@ -27,7 +27,7 @@ if(NOT ((WITH_GPU) AND (CUDA_VERSION GREATER_EQUAL 11.6)))
 endif()

 foreach(TEST_OP ${TEST_OPS})
-  py_test_modules(${TEST_OP} MODULES ${TEST_OP})
+  py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS "NVIDIA_TF32_OVERRIDE=0")
  list(APPEND DIST_TEST_OPS ${TEST_OP})
  set_tests_properties(${TEST_OP} PROPERTIES TIMEOUT 200)
  set_tests_properties(${TEST_OP} PROPERTIES LABELS "RUN_TYPE=DIST")