diff --git a/python/paddle/distributed/fleet/elastic.py b/python/paddle/distributed/fleet/elastic.py index 706868918f531ff84f4291b13efa281b47f2e49d..101269764adb192051f77d7306e91a1d3529c1d2 100644 --- a/python/paddle/distributed/fleet/elastic.py +++ b/python/paddle/distributed/fleet/elastic.py @@ -41,14 +41,16 @@ class LauncherInterface(object): def _terminate_procs(self): # try to terminate process by group, this happend in multiprocess senario in user process - for p in self.procs: - if p.proc.poll() is None: - os.killpg(os.getpgid(p.proc.pid), signal.SIGTERM) - if p.log_fn: - p.log_fn.close() - logger.info("terminate process group gid:{}".format(p.proc.pid)) + if os.name != 'nt': + for p in self.procs: + if p.proc.poll() is None: + os.killpg(os.getpgid(p.proc.pid), signal.SIGTERM) + if p.log_fn: + p.log_fn.close() + logger.info("terminate process group gid:{}".format( + p.proc.pid)) - time.sleep(1) + time.sleep(1) for p in self.procs: if p.proc.poll() is None: p.proc.terminate() diff --git a/python/paddle/distributed/fleet/launch_utils.py b/python/paddle/distributed/fleet/launch_utils.py index 7c183fc9286c818500cf6d91f87b18aee65cd5ea..6ead643df6c1b8c57588313dbb917f8d754d9f51 100644 --- a/python/paddle/distributed/fleet/launch_utils.py +++ b/python/paddle/distributed/fleet/launch_utils.py @@ -517,6 +517,7 @@ def start_local_trainers(cluster, "details abouts PADDLE_TRAINER_ENDPOINTS can be found in {}/endpoints.log, and detail running logs maybe found in {}/workerlog.0". format(log_dir, log_dir)) fn = None + pre_fn = None if os.name == 'nt' else os.setsid if log_dir is not None: os.system("mkdir -p {}".format(log_dir)) if os.path.exists("%s/endpoints.log" % log_dir): @@ -526,13 +527,9 @@ def start_local_trainers(cluster, f.write("\n".join(cluster.trainers_endpoints())) fn = open("%s/workerlog.%d" % (log_dir, idx), "a") proc = subprocess.Popen( - cmd, - env=current_env, - stdout=fn, - stderr=fn, - preexec_fn=os.setsid) + cmd, env=current_env, stdout=fn, stderr=fn, preexec_fn=pre_fn) else: - proc = subprocess.Popen(cmd, env=current_env, preexec_fn=os.setsid) + proc = subprocess.Popen(cmd, env=current_env, preexec_fn=pre_fn) tp = TrainerProc() tp.proc = proc