From ead89b11e0ab9607c45ef13a465418f9912920b3 Mon Sep 17 00:00:00 2001 From: Webbley <948209535@qq.com> Date: Mon, 22 Nov 2021 15:01:30 +0800 Subject: [PATCH] bugfix in fleetrun when launching multiple machines training manually (#37274) --- python/paddle/distributed/fleet/launch_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/paddle/distributed/fleet/launch_utils.py b/python/paddle/distributed/fleet/launch_utils.py index d1f4442ee6..f0a252b34b 100644 --- a/python/paddle/distributed/fleet/launch_utils.py +++ b/python/paddle/distributed/fleet/launch_utils.py @@ -410,7 +410,7 @@ def get_ports(num, offset): if ports is not None: ports = list(ports) else: - start_port = os.environ.get('FLAGS_START_PORT') + start_port = int(os.environ.get('FLAGS_START_PORT')) ports = range(start_port + offset, start_port + offset + num, 1) return ports @@ -1131,12 +1131,12 @@ class ParameterServerLauncher(object): # get http_port if args.http_port: - self.http_port = args.http_port + http_port = [args.http_port] else: http_port = get_ports( 1, self.server_num + self.worker_num + self.heter_worker_num) - http_ip = self.server_endpoints.split(",")[0].split(":")[0] - self.http_port = http_ip + ":" + str(http_port[0]) + http_ip = self.server_endpoints.split(",")[0].split(":")[0] + self.http_port = http_ip + ":" + str(http_port[0]) # check local or user define self.server_endpoints_ips = [ -- GitLab