checkport.py 1.8 KB
Newer Older
W
Wu Yi 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
import time
import socket
from contextlib import closing


def wait_server_ready(endpoints):
    """
    Wait until parameter servers are ready, use connext_ex to detect
    port readiness.

    Args:
        endpoints (list): endpoints string list, like:
                         ["127.0.0.1:8080", "127.0.0.1:8081"]

    Examples:
        .. code-block:: python

           wait_server_ready(["127.0.0.1:8080", "127.0.0.1:8081"])
    """
    while True:
        all_ok = True
Q
Qiao Longfei 已提交
37
        not_ready_endpoints = []
W
Wu Yi 已提交
38 39 40 41 42 43 44 45
        for ep in endpoints:
            ip_port = ep.split(":")
            with closing(socket.socket(socket.AF_INET,
                                       socket.SOCK_STREAM)) as sock:
                sock.settimeout(2)
                result = sock.connect_ex((ip_port[0], int(ip_port[1])))
                if result != 0:
                    all_ok = False
Q
Qiao Longfei 已提交
46
                    not_ready_endpoints.append(ep)
W
Wu Yi 已提交
47 48
        if not all_ok:
            sys.stderr.write("pserver not ready, wait 3 sec to retry...\n")
Q
Qiao Longfei 已提交
49 50
            sys.stderr.write("not ready endpoints:" + str(not_ready_endpoints) +
                             "\n")
W
Wu Yi 已提交
51 52 53 54
            sys.stderr.flush()
            time.sleep(3)
        else:
            break