From 8fe8039ec84d368d3d2b3052ed1c1b82f489402d Mon Sep 17 00:00:00 2001 From: kuizhiqing Date: Mon, 28 Mar 2022 10:59:36 +0800 Subject: [PATCH] Launch fix port (#40936) --- .../paddle/distributed/launch/context/node.py | 18 ++++++++++++------ python/paddle/distributed/launch/main.py | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/python/paddle/distributed/launch/context/node.py b/python/paddle/distributed/launch/context/node.py index 1ece4db0fbb..2fa8b892275 100644 --- a/python/paddle/distributed/launch/context/node.py +++ b/python/paddle/distributed/launch/context/node.py @@ -25,6 +25,7 @@ class Node(object): self.device = Device.parse_device() self.ip = self.get_host_ip() self.free_ports = [] + self._allocated_ports = [] def get_host_ip(self): try: @@ -42,13 +43,18 @@ class Node(object): def get_ports_occupied(self): return self.free_ports - @classmethod def get_free_port(self): - with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: - s.setsockopt(socket.SOL_SOCKET, socket.SO_LINGER, - struct.pack('ii', 1, 0)) - s.bind(('', 0)) - return s.getsockname()[1] + for _ in range(100): + with closing(socket.socket(socket.AF_INET, + socket.SOCK_STREAM)) as s: + s.bind(('', 0)) + port = s.getsockname()[1] + if port in self._allocated_ports: + continue + else: + self._allocated_ports.append(port) + return port + return port @classmethod def is_server_ready(self, ip, port): diff --git a/python/paddle/distributed/launch/main.py b/python/paddle/distributed/launch/main.py index 83a5e18714d..dd7edba35a4 100644 --- a/python/paddle/distributed/launch/main.py +++ b/python/paddle/distributed/launch/main.py @@ -36,7 +36,7 @@ def launch(): Base Parameters: - - ``--master``: The master/rendezvous server, support http:// and etcd://, default with http://. e.g., ``--master=127.0.0.1:8080``. Default ``--log_dir=None``. + - ``--master``: The master/rendezvous server, support http:// and etcd://, default with http://. e.g., ``--master=127.0.0.1:8080``. Default ``--master=None``. - ``--rank``: The rank of the node, can be auto assigned by master. Default ``--rank=-1``. -- GitLab