未验证 提交 556be6d7 编写于 作者: M mapingshuo 提交者: GitHub

fix popen error (#24767) (#25215)

replace join to terminate, test=develop
上级 5e6848d9
...@@ -252,15 +252,12 @@ def get_cluster(node_ips, node_ip, paddle_ports, selected_gpus): ...@@ -252,15 +252,12 @@ def get_cluster(node_ips, node_ip, paddle_ports, selected_gpus):
def terminate_local_procs(procs): def terminate_local_procs(procs):
for p in procs: for p in procs:
if p.proc.poll() is None: if p.proc.poll() is None:
# subprocess need to release resource(e.g. shared memory) p.proc.terminate()
# use join to wait subprocess releasing
p.proc.join(timeout=1)
p.log_fn.close() p.log_fn.close()
logger.debug("terminate process id:{}".format(p.proc.pid)) logger.debug("terminate process id:{}".format(p.proc.pid))
# wait all process terminiated #wait all process terminiated
# time.sleep(3) time.sleep(3)
for step in range(0, 50): for step in range(0, 50):
alive = False alive = False
for p in procs: for p in procs:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册