未验证 提交 05c15a08 编写于 作者: Q Qiao Longfei 提交者: GitHub

Merge pull request #14467 from jacquesqiao/update-trainer-retry

optimize distribute checkport
...@@ -34,6 +34,7 @@ def wait_server_ready(endpoints): ...@@ -34,6 +34,7 @@ def wait_server_ready(endpoints):
""" """
while True: while True:
all_ok = True all_ok = True
not_ready_endpoints = []
for ep in endpoints: for ep in endpoints:
ip_port = ep.split(":") ip_port = ep.split(":")
with closing(socket.socket(socket.AF_INET, with closing(socket.socket(socket.AF_INET,
...@@ -42,8 +43,11 @@ def wait_server_ready(endpoints): ...@@ -42,8 +43,11 @@ def wait_server_ready(endpoints):
result = sock.connect_ex((ip_port[0], int(ip_port[1]))) result = sock.connect_ex((ip_port[0], int(ip_port[1])))
if result != 0: if result != 0:
all_ok = False all_ok = False
not_ready_endpoints.append(ep)
if not all_ok: if not all_ok:
sys.stderr.write("pserver not ready, wait 3 sec to retry...\n") sys.stderr.write("pserver not ready, wait 3 sec to retry...\n")
sys.stderr.write("not ready endpoints:" + str(not_ready_endpoints) +
"\n")
sys.stderr.flush() sys.stderr.flush()
time.sleep(3) time.sleep(3)
else: else:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册