From 6473e27d56b3023fa8b46e61b110bcd90356b7fc Mon Sep 17 00:00:00 2001 From: WangXi Date: Fri, 14 May 2021 16:05:21 +0800 Subject: [PATCH] fix launch port already in use (#32892) --- python/paddle/distributed/fleet/launch_utils.py | 9 ++++++--- .../paddle/fluid/tests/unittests/test_launch_coverage.py | 4 ++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/python/paddle/distributed/fleet/launch_utils.py b/python/paddle/distributed/fleet/launch_utils.py index be7ad257ccb..c69b21538b6 100644 --- a/python/paddle/distributed/fleet/launch_utils.py +++ b/python/paddle/distributed/fleet/launch_utils.py @@ -12,9 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import functools import logging -import socket import time import os import signal @@ -27,6 +25,7 @@ from contextlib import closing import socket import warnings import six +import struct import paddle import paddle.fluid as fluid @@ -362,6 +361,10 @@ def add_arguments(argname, type, default, help, argparser, **kwargs): def find_free_ports(num): def __free_port(): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: + # Note(wangxi): Close the connection with a TCP RST instead + # of a TCP FIN, to avoid time_wait state. + s.setsockopt(socket.SOL_SOCKET, socket.SO_LINGER, + struct.pack('ii', 1, 0)) s.bind(('', 0)) return s.getsockname()[1] @@ -376,7 +379,7 @@ def find_free_ports(num): return port_set step += 1 - if step > 100: + if step > 400: print( "can't find avilable port and use the specified static port now!" ) diff --git a/python/paddle/fluid/tests/unittests/test_launch_coverage.py b/python/paddle/fluid/tests/unittests/test_launch_coverage.py index 43613928585..9fbf27e3c1d 100644 --- a/python/paddle/fluid/tests/unittests/test_launch_coverage.py +++ b/python/paddle/fluid/tests/unittests/test_launch_coverage.py @@ -24,6 +24,7 @@ import paddle.fluid as fluid from argparse import ArgumentParser, REMAINDER from paddle.distributed.utils import _print_arguments, get_gpus, get_cluster_from_args +from paddle.distributed.fleet.launch_utils import find_free_ports def _parse_args(): @@ -115,6 +116,9 @@ class TestCoverage(unittest.TestCase): args.use_paddlecloud = True cluster, pod = get_cluster_from_args(args, "0") + def test_find_free_ports(self): + find_free_ports(2) + if __name__ == '__main__': unittest.main() -- GitLab