未验证 提交 8700a7bd 编写于 作者: G gongweibao 提交者: GitHub

Fix unittests bugs. (#30250)

上级 dd6f5919
...@@ -39,7 +39,6 @@ list(APPEND MIXED_DIST_TEST_OPS test_fleet_launch_async) ...@@ -39,7 +39,6 @@ list(APPEND MIXED_DIST_TEST_OPS test_fleet_launch_async)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_launch_cloud) list(APPEND MIXED_DIST_TEST_OPS test_fleet_launch_cloud)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_launch_nproc) list(APPEND MIXED_DIST_TEST_OPS test_fleet_launch_nproc)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_api_input) list(APPEND MIXED_DIST_TEST_OPS test_fleet_api_input)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_checkpoint)
list(APPEND MIXED_DIST_TEST_OPS test_collective_optimizer) list(APPEND MIXED_DIST_TEST_OPS test_collective_optimizer)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_base) list(APPEND MIXED_DIST_TEST_OPS test_fleet_base)
list(APPEND MIXED_DIST_TEST_OPS test_fleet_base_2) list(APPEND MIXED_DIST_TEST_OPS test_fleet_base_2)
...@@ -128,6 +127,7 @@ if(WIN32) ...@@ -128,6 +127,7 @@ if(WIN32)
LIST(REMOVE_ITEM TEST_OPS test_complex_matmul) LIST(REMOVE_ITEM TEST_OPS test_complex_matmul)
endif() endif()
LIST(REMOVE_ITEM TEST_OPS test_fleet_checkpoint)
LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint) LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint)
LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint1) LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint1)
LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint2) LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint2)
...@@ -509,7 +509,6 @@ if(WITH_DISTRIBUTE) ...@@ -509,7 +509,6 @@ if(WITH_DISTRIBUTE)
if(NOT APPLE) if(NOT APPLE)
if(WITH_GPU) if(WITH_GPU)
bash_test_modules(test_c_comm_init_op START_BASH test_c_comm_init_op.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) bash_test_modules(test_c_comm_init_op START_BASH test_c_comm_init_op.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR})
py_test_modules(test_fleet_checkpoint MODULES test_fleet_checkpoint)
py_test_modules(test_launch_coverage MODULES test_launch_coverage) py_test_modules(test_launch_coverage MODULES test_launch_coverage)
endif() endif()
...@@ -579,15 +578,18 @@ if(NOT WIN32) ...@@ -579,15 +578,18 @@ if(NOT WIN32)
endif() endif()
if(WITH_DISTRIBUTE AND NOT APPLE AND NOT WIN32) if(WITH_DISTRIBUTE AND NOT APPLE AND NOT WIN32)
bash_test_modules(test_auto_checkpoint START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") py_test_modules(test_fleet_checkpoint MODULES test_fleet_checkpoint)
bash_test_modules(test_auto_checkpoint1 START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") set_tests_properties(test_fleet_checkpoint PROPERTIES TIMEOUT 200)
bash_test_modules(test_auto_checkpoint2 START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") set_tests_properties(test_fleet_checkpoint PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY")
bash_test_modules(test_auto_checkpoint3 START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") bash_test_modules(test_auto_checkpoint START_BASH dist_test.sh TIMEOUT 200 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY")
bash_test_modules(test_auto_checkpoint_multiple START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") bash_test_modules(test_auto_checkpoint1 START_BASH dist_test.sh TIMEOUT 200 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY")
bash_test_modules(test_auto_checkpoint_dist_basic START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") bash_test_modules(test_auto_checkpoint2 START_BASH dist_test.sh TIMEOUT 200 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY")
bash_test_modules(test_hdfs1 START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") bash_test_modules(test_auto_checkpoint3 START_BASH dist_test.sh TIMEOUT 200 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY")
bash_test_modules(test_hdfs2 START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") bash_test_modules(test_auto_checkpoint_multiple START_BASH dist_test.sh TIMEOUT 200 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY")
bash_test_modules(test_hdfs3 START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") bash_test_modules(test_auto_checkpoint_dist_basic START_BASH dist_test.sh TIMEOUT 200 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY")
bash_test_modules(test_hdfs1 START_BASH dist_test.sh TIMEOUT 200 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY")
bash_test_modules(test_hdfs2 START_BASH dist_test.sh TIMEOUT 200 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY")
bash_test_modules(test_hdfs3 START_BASH dist_test.sh TIMEOUT 200 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY")
endif() endif()
add_subdirectory(sequence) add_subdirectory(sequence)
...@@ -660,7 +662,6 @@ endif() ...@@ -660,7 +662,6 @@ endif()
if (WITH_DISTRIBUTE AND NOT APPLE) if (WITH_DISTRIBUTE AND NOT APPLE)
if(WITH_GPU) if(WITH_GPU)
set_tests_properties(test_c_comm_init_op PROPERTIES TIMEOUT 120) set_tests_properties(test_c_comm_init_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_fleet_checkpoint PROPERTIES TIMEOUT 120)
set_tests_properties(test_dist_mnist_gradient_merge PROPERTIES TIMEOUT 120) set_tests_properties(test_dist_mnist_gradient_merge PROPERTIES TIMEOUT 120)
endif() endif()
endif() endif()
......
...@@ -28,12 +28,12 @@ export PADDLE_PORT=35789 ...@@ -28,12 +28,12 @@ export PADDLE_PORT=35789
export TRAINER_PORTS_NUM=2 export TRAINER_PORTS_NUM=2
distributed_args="--ips=${cluster_node_ips} --gpus=0,1 --log_dir=testlog" distributed_args="--ips=${cluster_node_ips} --gpus=0,1 --log_dir=testlog"
CUDA_VISIBLE_DEVICES=0,1 python -m paddle.distributed.fleet.launch ${distributed_args} multi_process.py fleetrun CUDA_VISIBLE_DEVICES=0,1 python -m paddle.distributed.fleet.launch ${distributed_args} multi_process.py fleetlaunchcloud
str1="selected_gpus:0 worker_endpoints:127.0.0.1:35789,127.0.0.1:35790,127.0.0.2:35789,127.0.0.2:35790 trainers_num:4 current_endpoint:127.0.0.1:35789 trainer_id:0" str1="selected_gpus:0 worker_endpoints:127.0.0.1:35789,127.0.0.1:35790,127.0.0.2:35789,127.0.0.2:35790 trainers_num:4 current_endpoint:127.0.0.1:35789 trainer_id:0"
str2="selected_gpus:1 worker_endpoints:127.0.0.1:35789,127.0.0.1:35790,127.0.0.2:35789,127.0.0.2:35790 trainers_num:4 current_endpoint:127.0.0.1:35790 trainer_id:1" str2="selected_gpus:1 worker_endpoints:127.0.0.1:35789,127.0.0.1:35790,127.0.0.2:35789,127.0.0.2:35790 trainers_num:4 current_endpoint:127.0.0.1:35790 trainer_id:1"
file_0="multi_process_fleetrun.check_0.log" file_0="multi_process_fleetlaunchcloud.check_0.log"
file_1="multi_process_fleetrun.check_1.log" file_1="multi_process_fleetlaunchcloud.check_1.log"
echo "paddlecloud params test" echo "paddlecloud params test"
if grep -q "$str1" "$file_0"; then if grep -q "$str1" "$file_0"; then
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册