diff --git a/test/collective/CMakeLists.txt b/test/collective/CMakeLists.txt index ee29e8842e2783f606a64bd75b430bbc359ad98d..e2ee8e68336d44ad1f25d45c42e9fb83d797963f 100644 --- a/test/collective/CMakeLists.txt +++ b/test/collective/CMakeLists.txt @@ -200,8 +200,12 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX)) endif() if((WITH_GPU OR WITH_ROCM) AND (LINUX)) py_test_modules( - test_collective_reduce_api MODULES test_collective_reduce_api ENVS - "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python") + test_collective_reduce_api + MODULES + test_collective_reduce_api + ENVS + "NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python" + ) set_tests_properties(test_collective_reduce_api PROPERTIES TIMEOUT "500" LABELS "RUN_TYPE=DIST") endif() @@ -272,8 +276,12 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX)) endif() if((WITH_GPU OR WITH_ROCM) AND (LINUX)) py_test_modules( - test_collective_split_col_linear MODULES test_collective_split_col_linear - ENVS "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python") + test_collective_split_col_linear + MODULES + test_collective_split_col_linear + ENVS + "NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python" + ) set_tests_properties(test_collective_split_col_linear PROPERTIES TIMEOUT "300" LABELS "RUN_TYPE=DIST") endif() diff --git a/test/collective/fleet/CMakeLists.txt b/test/collective/fleet/CMakeLists.txt index aa27af240accf8da7dfd68a20b7ca97d6f51cb96..47d6db038960de586e7366a34b57e4d3de1c2e66 100644 --- a/test/collective/fleet/CMakeLists.txt +++ b/test/collective/fleet/CMakeLists.txt @@ -332,7 +332,7 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT) LABELS "RUN_TYPE=DIST" ENVS - "PADDLE_DIST_UT_PORT=21234;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python" + "NVIDIA_TF32_OVERRIDE=0;PADDLE_DIST_UT_PORT=21234;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python" ) set_tests_properties(test_hybrid_parallel_inference_helper PROPERTIES TIMEOUT "120") @@ -351,8 +351,12 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX OR WIN32)) endif() if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT) py_test_modules( - test_recv_save_op MODULES test_recv_save_op ENVS - "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python") + test_recv_save_op + MODULES + test_recv_save_op + ENVS + "NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python" + ) endif() if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT) py_test_modules( @@ -696,7 +700,7 @@ if((WITH_GPU OR WITH_ROCM) AND LOCAL_ALL_PLAT) LABELS "RUN_TYPE=DIST" ENVS - "PADDLE_DIST_UT_PORT=21274;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python" + "NVIDIA_TF32_OVERRIDE=0;PADDLE_DIST_UT_PORT=21274;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python" ) set_tests_properties(test_parallel_dygraph_mnist PROPERTIES TIMEOUT "200") endif() @@ -922,9 +926,12 @@ if((WITH_GPU) AND (LINUX)) endif() if((WITH_GPU) AND (LINUX)) py_test_modules( - test_dygraph_save_for_auto_infer MODULES test_dygraph_save_for_auto_infer + test_dygraph_save_for_auto_infer + MODULES + test_dygraph_save_for_auto_infer ENVS - "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python") + "NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python" + ) set_tests_properties(test_dygraph_save_for_auto_infer PROPERTIES TIMEOUT "300" LABELS "RUN_TYPE=DIST") endif() diff --git a/test/collective/fleet/hybrid_parallel_mp_layers.py b/test/collective/fleet/hybrid_parallel_mp_layers.py index b8e57a9a11b8a2d3e2a91347c87148d23d5ef0e3..751bc9255c100a5bbdb14a991fe59776edff7a92 100644 --- a/test/collective/fleet/hybrid_parallel_mp_layers.py +++ b/test/collective/fleet/hybrid_parallel_mp_layers.py @@ -221,7 +221,7 @@ class TestDistTraning(unittest.TestCase): optimizer_b.step() np.testing.assert_allclose( - loss_a.numpy(), loss_b.numpy(), rtol=5e-6 + loss_a.numpy(), loss_b.numpy(), rtol=5e-5 ) def test_parallel_embedding(self): diff --git a/test/distributed_passes/CMakeLists.txt b/test/distributed_passes/CMakeLists.txt index e2b8697fc85915a757f90affbba36e431f087e6c..79bc34620a49b107dadaafdb74873417005ec2e3 100644 --- a/test/distributed_passes/CMakeLists.txt +++ b/test/distributed_passes/CMakeLists.txt @@ -27,7 +27,7 @@ if(NOT ((WITH_GPU) AND (CUDA_VERSION GREATER_EQUAL 11.6))) endif() foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) + py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS "NVIDIA_TF32_OVERRIDE=0") list(APPEND DIST_TEST_OPS ${TEST_OP}) set_tests_properties(${TEST_OP} PROPERTIES TIMEOUT 200) set_tests_properties(${TEST_OP} PROPERTIES LABELS "RUN_TYPE=DIST")