未验证 提交 05c15a08 编写于 作者: Q Qiao Longfei 提交者: GitHub

Merge pull request #14467 from jacquesqiao/update-trainer-retry

optimize distribute checkport
......@@ -34,6 +34,7 @@ def wait_server_ready(endpoints):
"""
while True:
all_ok = True
not_ready_endpoints = []
for ep in endpoints:
ip_port = ep.split(":")
with closing(socket.socket(socket.AF_INET,
......@@ -42,8 +43,11 @@ def wait_server_ready(endpoints):
result = sock.connect_ex((ip_port[0], int(ip_port[1])))
if result != 0:
all_ok = False
not_ready_endpoints.append(ep)
if not all_ok:
sys.stderr.write("pserver not ready, wait 3 sec to retry...\n")
sys.stderr.write("not ready endpoints:" + str(not_ready_endpoints) +
"\n")
sys.stderr.flush()
time.sleep(3)
else:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册