diff --git a/test/auto_parallel/CMakeLists.txt b/test/auto_parallel/CMakeLists.txt index c805071af32019ca05e32906554377207050bbc3..811ce947dbb0bdc38e097b35a7ed83b1c28cd908 100644 --- a/test/auto_parallel/CMakeLists.txt +++ b/test/auto_parallel/CMakeLists.txt @@ -24,7 +24,7 @@ if(WITH_DISTRIBUTE AND WITH_GPU) py_test_modules(test_optimization_tuner_api MODULES test_optimization_tuner_api) set_tests_properties(test_optimization_tuner_api - PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 80) + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 120) py_test_modules(test_converter MODULES test_converter) set_tests_properties(test_converter PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 50) @@ -48,10 +48,10 @@ if(WITH_DISTRIBUTE AND WITH_GPU) PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 50) py_test_modules(test_pass_sharding MODULES test_pass_sharding) set_tests_properties(test_pass_sharding - PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 50) + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 100) py_test_modules(test_pass_amp MODULES test_pass_amp) set_tests_properties(test_pass_amp PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" - TIMEOUT 50) + TIMEOUT 80) py_test_modules(test_amp_o2_pass MODULES test_amp_o2_pass) set_tests_properties(test_amp_o2_pass PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 50) @@ -85,11 +85,11 @@ if(WITH_DISTRIBUTE AND WITH_GPU) py_test_modules(test_tuning_recompute MODULES test_tuning_recompute) set_tests_properties(test_tuning_recompute PROPERTIES TIMEOUT 300) py_test_modules(test_fused_linear_pass MODULES test_fused_linear_pass) - set_tests_properties(test_fused_linear_pass PROPERTIES TIMEOUT 20) + set_tests_properties(test_fused_linear_pass PROPERTIES TIMEOUT 40) py_test_modules(test_align_tool MODULES test_align_tool) set_tests_properties(test_align_tool PROPERTIES TIMEOUT 20) py_test_modules(test_pass_base_list MODULES test_pass_base_list) - set_tests_properties(test_pass_base_list PROPERTIES TIMEOUT 20) + set_tests_properties(test_pass_base_list PROPERTIES TIMEOUT 40) py_test_modules(test_fuse_adamw_pass MODULES test_fuse_adamw_pass) set_tests_properties(test_fuse_adamw_pass PROPERTIES TIMEOUT 20) py_test_modules(test_rule_based_tuner_o2 MODULES test_rule_based_tuner_o2) diff --git a/test/collective/CMakeLists.txt b/test/collective/CMakeLists.txt index a5e9e0e3ac741c97bac0b1d7627ce3e96124df4c..1e0cf94dfa982610d346c2b9e321ccfeceadb093 100644 --- a/test/collective/CMakeLists.txt +++ b/test/collective/CMakeLists.txt @@ -107,7 +107,7 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX)) test_collective_alltoall_single_api ENVS "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python") set_tests_properties(test_collective_alltoall_single_api - PROPERTIES TIMEOUT "120" LABELS "RUN_TYPE=DIST") + PROPERTIES TIMEOUT "160" LABELS "RUN_TYPE=DIST") endif() if((WITH_GPU OR WITH_ROCM) AND (LINUX)) py_test_modules( @@ -137,10 +137,10 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX)) "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python") if(${CUDA_ARCH_NAME} STREQUAL "Ampere") set_tests_properties(test_collective_broadcast_api - PROPERTIES TIMEOUT "360" LABELS "RUN_TYPE=DIST") + PROPERTIES TIMEOUT "500" LABELS "RUN_TYPE=DIST") else() set_tests_properties(test_collective_broadcast_api - PROPERTIES TIMEOUT "300" LABELS "RUN_TYPE=DIST") + PROPERTIES TIMEOUT "450" LABELS "RUN_TYPE=DIST") endif() endif() if((WITH_GPU OR WITH_ROCM) AND (LINUX)) @@ -178,7 +178,7 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX)) test_collective_isend_irecv_api MODULES test_collective_isend_irecv_api ENVS "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python") set_tests_properties(test_collective_isend_irecv_api - PROPERTIES TIMEOUT "120" LABELS "RUN_TYPE=DIST") + PROPERTIES TIMEOUT "160" LABELS "RUN_TYPE=DIST") endif() if((WITH_GPU OR WITH_ROCM) AND (LINUX)) py_test_modules( @@ -240,10 +240,10 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX)) "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python") if(${CUDA_ARCH_NAME} STREQUAL "Ampere") set_tests_properties(test_collective_reduce_scatter_api - PROPERTIES TIMEOUT "210" LABELS "RUN_TYPE=DIST") + PROPERTIES TIMEOUT "360" LABELS "RUN_TYPE=DIST") else() set_tests_properties(test_collective_reduce_scatter_api - PROPERTIES TIMEOUT "150" LABELS "RUN_TYPE=DIST") + PROPERTIES TIMEOUT "250" LABELS "RUN_TYPE=DIST") endif() endif() if((WITH_GPU OR WITH_ROCM) AND (LINUX)) @@ -258,7 +258,7 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX)) test_collective_scatter_api MODULES test_collective_scatter_api ENVS "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python") set_tests_properties(test_collective_scatter_api - PROPERTIES TIMEOUT "180" LABELS "RUN_TYPE=DIST") + PROPERTIES TIMEOUT "300" LABELS "RUN_TYPE=DIST") endif() if((WITH_GPU OR WITH_ROCM) AND (LINUX)) py_test_modules( diff --git a/test/collective/fleet/CMakeLists.txt b/test/collective/fleet/CMakeLists.txt index 47d6db038960de586e7366a34b57e4d3de1c2e66..4a472f19508ea11dcac191219399dfc7693a405d 100644 --- a/test/collective/fleet/CMakeLists.txt +++ b/test/collective/fleet/CMakeLists.txt @@ -237,7 +237,7 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT) START_BASH ../../legacy_test/dist_test.sh TIMEOUT - "120" + "160" LABELS "RUN_TYPE=DIST" ENVS @@ -682,13 +682,13 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT) START_BASH ../../legacy_test/dist_test.sh TIMEOUT - "120" + "240" LABELS "RUN_TYPE=DIST" ENVS "PADDLE_DIST_UT_PORT=21272;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python" ) - set_tests_properties(test_ir_pass_pipeline PROPERTIES TIMEOUT "120") + set_tests_properties(test_ir_pass_pipeline PROPERTIES TIMEOUT "240") endif() if((WITH_GPU OR WITH_ROCM) AND LOCAL_ALL_PLAT) bash_test_modules( @@ -922,7 +922,7 @@ if((WITH_GPU) AND (LINUX)) test_dygraph_dist_save_load MODULES test_dygraph_dist_save_load ENVS "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python") set_tests_properties(test_dygraph_dist_save_load - PROPERTIES TIMEOUT "200" LABELS "RUN_TYPE=DIST") + PROPERTIES TIMEOUT "300" LABELS "RUN_TYPE=DIST") endif() if((WITH_GPU) AND (LINUX)) py_test_modules( diff --git a/test/collective/fleet/dygraph_group_sharded_stage3.py b/test/collective/fleet/dygraph_group_sharded_stage3.py index 5499968079a2e3ebe4924fd5c73bb3e22a89c078..bbe0884d982e47b6b29a4a2ddff873731ee0ec40 100644 --- a/test/collective/fleet/dygraph_group_sharded_stage3.py +++ b/test/collective/fleet/dygraph_group_sharded_stage3.py @@ -366,7 +366,10 @@ def test_stage2_stage3(): # bfp16 nccl_version = core.nccl_version() - if nccl_version >= 21000: + if ( + nccl_version >= 21000 + and paddle.device.cuda.get_device_properties().major >= 8 + ): stage2_params = train_mlp( mlp11, sharding_stage=2, diff --git a/test/collective/fleet/dygraph_group_sharded_stage3_offload.py b/test/collective/fleet/dygraph_group_sharded_stage3_offload.py index e97a163e42f9afdfb55b511db3ea3a0280429d4c..b34f178aa836383bf1c07577cdd63df54d2c0ee2 100644 --- a/test/collective/fleet/dygraph_group_sharded_stage3_offload.py +++ b/test/collective/fleet/dygraph_group_sharded_stage3_offload.py @@ -215,7 +215,10 @@ def test_stage3_offload(): # bfp16 offload nccl_version = core.nccl_version() - if nccl_version >= 21000: + if ( + nccl_version >= 21000 + and paddle.device.cuda.get_device_properties().major >= 8 + ): stage3_params = train_mlp(mlp7, use_pure_fp16=True, use_bfp16=True) stage3_params_offload = train_mlp( mlp8, use_pure_fp16=True, offload=True, use_bfp16=True diff --git a/test/collective/fleet/hybrid_parallel_mp_bf16.py b/test/collective/fleet/hybrid_parallel_mp_bf16.py index 9e0847b38c530b8c45e7413dfa101c7a4df6f4a8..ae977f9891768b4567021452be629d32a8bb8bb6 100644 --- a/test/collective/fleet/hybrid_parallel_mp_bf16.py +++ b/test/collective/fleet/hybrid_parallel_mp_bf16.py @@ -60,5 +60,8 @@ class TestMPFP16(TestDistMPTraning): if __name__ == "__main__": - if check_nccl_version_for_bf16(): + if ( + check_nccl_version_for_bf16() + and paddle.device.cuda.get_device_properties().major >= 8 + ): unittest.main() diff --git a/test/collective/fleet/hybrid_parallel_pp_bf16.py b/test/collective/fleet/hybrid_parallel_pp_bf16.py index 6ae0a98d5c8078db9230b75cf6deb6a580c93309..70b3aec1515a84867602c98c12374a1a7fc95583 100644 --- a/test/collective/fleet/hybrid_parallel_pp_bf16.py +++ b/test/collective/fleet/hybrid_parallel_pp_bf16.py @@ -165,5 +165,8 @@ class TestDistPPTraning(unittest.TestCase): if __name__ == "__main__": - if check_nccl_version_for_bf16(): + if ( + check_nccl_version_for_bf16() + and paddle.device.cuda.get_device_properties().major >= 8 + ): unittest.main() diff --git a/test/distributed_passes/CMakeLists.txt b/test/distributed_passes/CMakeLists.txt index 79bc34620a49b107dadaafdb74873417005ec2e3..12018ff20deee887d8da033fa8b9ce567cb2b72f 100644 --- a/test/distributed_passes/CMakeLists.txt +++ b/test/distributed_passes/CMakeLists.txt @@ -29,6 +29,6 @@ endif() foreach(TEST_OP ${TEST_OPS}) py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS "NVIDIA_TF32_OVERRIDE=0") list(APPEND DIST_TEST_OPS ${TEST_OP}) - set_tests_properties(${TEST_OP} PROPERTIES TIMEOUT 200) + set_tests_properties(${TEST_OP} PROPERTIES TIMEOUT 250) set_tests_properties(${TEST_OP} PROPERTIES LABELS "RUN_TYPE=DIST") endforeach() diff --git a/test/legacy_test/CMakeLists.txt b/test/legacy_test/CMakeLists.txt index b3d630d2d52de7f6e21b7cc3b951af9430e2a1ac..bd9ad353f799b3977201c61d5a0bbc9e3d8784c0 100644 --- a/test/legacy_test/CMakeLists.txt +++ b/test/legacy_test/CMakeLists.txt @@ -1304,4 +1304,4 @@ set_tests_properties(test_reduce_op_static_build PROPERTIES TIMEOUT 500) set_tests_properties(test_sync_batch_norm_op_static_build PROPERTIES LABELS "RUN_TYPE=DIST") set_tests_properties(test_sync_batch_norm_op_static_build PROPERTIES TIMEOUT - 120) + 250)