未验证 提交 945e0522 编写于 作者: Y Yuang Liu 提交者: GitHub

fix a100 cuda 12 ut (#54542) (#54550)

上级 ee5430fb
......@@ -200,8 +200,12 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX))
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_collective_reduce_api MODULES test_collective_reduce_api ENVS
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
test_collective_reduce_api
MODULES
test_collective_reduce_api
ENVS
"NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_collective_reduce_api
PROPERTIES TIMEOUT "500" LABELS "RUN_TYPE=DIST")
endif()
......@@ -272,8 +276,12 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX))
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_collective_split_col_linear MODULES test_collective_split_col_linear
ENVS "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
test_collective_split_col_linear
MODULES
test_collective_split_col_linear
ENVS
"NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_collective_split_col_linear
PROPERTIES TIMEOUT "300" LABELS "RUN_TYPE=DIST")
endif()
......
......@@ -332,7 +332,7 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21234;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
"NVIDIA_TF32_OVERRIDE=0;PADDLE_DIST_UT_PORT=21234;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_hybrid_parallel_inference_helper PROPERTIES TIMEOUT
"120")
......@@ -351,8 +351,12 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX OR WIN32))
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
py_test_modules(
test_recv_save_op MODULES test_recv_save_op ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
test_recv_save_op
MODULES
test_recv_save_op
ENVS
"NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
py_test_modules(
......@@ -696,7 +700,7 @@ if((WITH_GPU OR WITH_ROCM) AND LOCAL_ALL_PLAT)
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21274;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
"NVIDIA_TF32_OVERRIDE=0;PADDLE_DIST_UT_PORT=21274;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_dygraph_mnist PROPERTIES TIMEOUT "200")
endif()
......@@ -922,9 +926,12 @@ if((WITH_GPU) AND (LINUX))
endif()
if((WITH_GPU) AND (LINUX))
py_test_modules(
test_dygraph_save_for_auto_infer MODULES test_dygraph_save_for_auto_infer
test_dygraph_save_for_auto_infer
MODULES
test_dygraph_save_for_auto_infer
ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
"NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_dygraph_save_for_auto_infer
PROPERTIES TIMEOUT "300" LABELS "RUN_TYPE=DIST")
endif()
......@@ -221,7 +221,7 @@ class TestDistTraning(unittest.TestCase):
optimizer_b.step()
np.testing.assert_allclose(
loss_a.numpy(), loss_b.numpy(), rtol=5e-6
loss_a.numpy(), loss_b.numpy(), rtol=5e-5
)
def test_parallel_embedding(self):
......
......@@ -27,7 +27,7 @@ if(NOT ((WITH_GPU) AND (CUDA_VERSION GREATER_EQUAL 11.6)))
endif()
foreach(TEST_OP ${TEST_OPS})
py_test_modules(${TEST_OP} MODULES ${TEST_OP})
py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS "NVIDIA_TF32_OVERRIDE=0")
list(APPEND DIST_TEST_OPS ${TEST_OP})
set_tests_properties(${TEST_OP} PROPERTIES TIMEOUT 200)
set_tests_properties(${TEST_OP} PROPERTIES LABELS "RUN_TYPE=DIST")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册