fix testlaunch test=develop (#23304)

af0ad915 · gongweibao · GitHub · 2169e6fb · af0ad915 · af0ad915
Showing with 7 addition and 5 deletion

python/paddle/fluid/tests/unittests/CMakeLists.txt python/paddle/fluid/tests/unittests/CMakeLists.txt +1 -1

python/paddle/fluid/tests/unittests/test_launch.sh python/paddle/fluid/tests/unittests/test_launch.sh +6 -4

未找到文件。
--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -300,7 +300,7 @@ if(WITH_DISTRIBUTE)
    if(NOT APPLE)
        if(WITH_GPU)
            # NOTE. test_launch only work in gpu collective mode
-            bash_test_modules(test_launch MODULES test_launch.sh)
+            bash_test_modules(test_launch MODULES test_launch.sh  ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR})
        endif()
        bash_test_modules(test_launch_ps MODULES test_launch_ps.sh)


--- a/python/paddle/fluid/tests/unittests/test_launch.sh
+++ b/python/paddle/fluid/tests/unittests/test_launch.sh
 #!/bin/bash
 set -e
 # use default values
-python -m paddle.distributed.launch multi_process.py
+# FIXME: random fails on Unknown command lines -c (or -m).
+launch_py=${PADDLE_BINARY_DIR}/python/paddle/distributed/launch.py
+python ${launch_py} multi_process.py

 # use paddlecloud
 cluster_node_ips="10.0.0.1"
@@ -15,7 +17,7 @@ export PADDLE_PORT=35019
 export PADDLE_PORTS_NUM=2

 distributed_args="--use_paddlecloud --cluster_node_ips=${cluster_node_ips} --node_ip=${node_ip} --selected_gpus=0,1 --log_dir=testlog"
-CUDA_VISIBLE_DEVICES=0,1 python -m paddle.distributed.launch ${distributed_args} multi_process.py
+CUDA_VISIBLE_DEVICES=0,1 python ${launch_py} ${distributed_args} multi_process.py

 str1="selected_gpus:0 worker_endpoints:127.0.0.1:35019,127.0.0.1:35020,127.0.0.2:35019,127.0.0.2:35020 trainers_num:4 current_endpoint:127.0.0.1:35019 trainer_id:0"
 str2="selected_gpus:1 worker_endpoints:127.0.0.1:35019,127.0.0.1:35020,127.0.0.2:35019,127.0.0.2:35020 trainers_num:4 current_endpoint:127.0.0.1:35020 trainer_id:1"
@@ -50,7 +52,7 @@ unset PADDLE_PORTS_NUM

 echo ""
 echo "paddle.distributed.launch async poll process test"
-if ! CUDA_VISIBLE_DEVICES=0,1 python -m paddle.distributed.launch ${distributed_args} multi_process.py abort; then
+if ! CUDA_VISIBLE_DEVICES=0,1 python ${launch_py} ${distributed_args} multi_process.py abort; then
    echo "train abort as planned"
 fi

@@ -77,5 +79,5 @@ rm -rf $file_0_0 $file_0_1

 distributed_args="--selected_gpus=0,1 --log_dir=testlog"
 export PADDLE_LAUNCH_LOG="test_launch_filelock_0"
-CUDA_VISIBLE_DEVICES=0,1 python -m paddle.distributed.launch ${distributed_args} find_ports.py
+CUDA_VISIBLE_DEVICES=0,1 python ${launch_py} ${distributed_args} find_ports.py
 str_0="worker_endpoints:127.0.0.1:6070,127.0.0.1:6071"