From ee6354d4fb378b4dd106d00927a24becd5622b94 Mon Sep 17 00:00:00 2001 From: Yichen Zhang <32740647+pkuzyc@users.noreply.github.com> Date: Wed, 14 Jun 2023 11:39:54 +0800 Subject: [PATCH] fix the timeout bug of some communication api on A100 (#54513) (#54602) --- test/collective/CMakeLists.txt | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/test/collective/CMakeLists.txt b/test/collective/CMakeLists.txt index e2ee8e68336..a5e9e0e3ac7 100644 --- a/test/collective/CMakeLists.txt +++ b/test/collective/CMakeLists.txt @@ -79,8 +79,13 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX)) py_test_modules( test_collective_alltoall_api MODULES test_collective_alltoall_api ENVS "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python") - set_tests_properties(test_collective_alltoall_api - PROPERTIES TIMEOUT "120" LABELS "RUN_TYPE=DIST") + if(${CUDA_ARCH_NAME} STREQUAL "Ampere") + set_tests_properties(test_collective_alltoall_api + PROPERTIES TIMEOUT "160" LABELS "RUN_TYPE=DIST") + else() + set_tests_properties(test_collective_alltoall_api + PROPERTIES TIMEOUT "120" LABELS "RUN_TYPE=DIST") + endif() endif() if((WITH_GPU OR WITH_ROCM) AND (LINUX)) bash_test_modules( @@ -130,8 +135,13 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX)) py_test_modules( test_collective_broadcast_api MODULES test_collective_broadcast_api ENVS "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python") - set_tests_properties(test_collective_broadcast_api - PROPERTIES TIMEOUT "300" LABELS "RUN_TYPE=DIST") + if(${CUDA_ARCH_NAME} STREQUAL "Ampere") + set_tests_properties(test_collective_broadcast_api + PROPERTIES TIMEOUT "360" LABELS "RUN_TYPE=DIST") + else() + set_tests_properties(test_collective_broadcast_api + PROPERTIES TIMEOUT "300" LABELS "RUN_TYPE=DIST") + endif() endif() if((WITH_GPU OR WITH_ROCM) AND (LINUX)) py_test_modules( @@ -228,8 +238,13 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX)) test_collective_reduce_scatter_api MODULES test_collective_reduce_scatter_api ENVS "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python") - set_tests_properties(test_collective_reduce_scatter_api - PROPERTIES TIMEOUT "150" LABELS "RUN_TYPE=DIST") + if(${CUDA_ARCH_NAME} STREQUAL "Ampere") + set_tests_properties(test_collective_reduce_scatter_api + PROPERTIES TIMEOUT "210" LABELS "RUN_TYPE=DIST") + else() + set_tests_properties(test_collective_reduce_scatter_api + PROPERTIES TIMEOUT "150" LABELS "RUN_TYPE=DIST") + endif() endif() if((WITH_GPU OR WITH_ROCM) AND (LINUX)) py_test_modules( -- GitLab