From f3e5a5cf67f9167f07abb33b6ec7b01e72cf1cc9 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Sat, 8 Jun 2019 19:56:42 +0800 Subject: [PATCH] Unset https_proxy and http_proxy in our launch.py (#17915) --- python/paddle/distributed/launch.py | 7 ++++++- python/paddle/fluid/tests/unittests/test_launch.sh | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/python/paddle/distributed/launch.py b/python/paddle/distributed/launch.py index e7b6dfa6d60..b6857237638 100644 --- a/python/paddle/distributed/launch.py +++ b/python/paddle/distributed/launch.py @@ -173,7 +173,11 @@ def start_procs(args): "PADDLE_CURRENT_ENDPOINT": "%s:%d" % (current_node_ip, args.started_port + i), "PADDLE_TRAINERS_NUM": "%d" % nranks, - "PADDLE_TRAINER_ENDPOINTS": trainers_endpoints + "PADDLE_TRAINER_ENDPOINTS": trainers_endpoints, + # paddle broadcast ncclUniqueId use socket, and + # proxy maybe make trainers unreachable, so set them to "" + "http_proxy": "", + "https_proxy": "" }) cmd = [sys.executable, "-u", args.training_script @@ -182,6 +186,7 @@ def start_procs(args): cmds.append(cmd) if args.log_dir is not None: + os.system("mkdir -p {}".format(args.log_dir)) fn = open("%s/workerlog.%d" % (args.log_dir, i), "w") log_fns.append(fn) diff --git a/python/paddle/fluid/tests/unittests/test_launch.sh b/python/paddle/fluid/tests/unittests/test_launch.sh index 7b849d022d2..01b620d01df 100644 --- a/python/paddle/fluid/tests/unittests/test_launch.sh +++ b/python/paddle/fluid/tests/unittests/test_launch.sh @@ -8,7 +8,7 @@ python -m paddle.distributed.launch multi_process.py cluster_node_ips="127.0.0.1" node_ip="127.0.0.1" -distributed_args="--cluster_node_ips ${cluster_node_ips} --node_ip ${node_ip} --selected_gpus=0,1" +distributed_args="--cluster_node_ips ${cluster_node_ips} --node_ip ${node_ip} --selected_gpus=0,1 --log_dir testlog" python -m paddle.distributed.launch ${distributed_args} multi_process.py str1="selected_gpus:0 worker_endpoints:['127.0.0.1:6170', '127.0.0.1:6171'] trainers_num:2 current_endpoint:127.0.0.1:6170 trainer_id:0" -- GitLab