提交 f82bea40 编写于 作者: T tangwei

add log hint for local cluster training

上级 45d20ee2
...@@ -53,13 +53,10 @@ class LocalClusterEngine(Engine): ...@@ -53,13 +53,10 @@ class LocalClusterEngine(Engine):
"POD_IP": user_endpoints_ips[i] "POD_IP": user_endpoints_ips[i]
}) })
if logs_dir is not None:
os.system("mkdir -p {}".format(logs_dir)) os.system("mkdir -p {}".format(logs_dir))
fn = open("%s/server.%d" % (logs_dir, i), "w") fn = open("%s/server.%d" % (logs_dir, i), "w")
log_fns.append(fn) log_fns.append(fn)
proc = subprocess.Popen(cmd, env=current_env, stdout=fn, stderr=fn, cwd=os.getcwd()) proc = subprocess.Popen(cmd, env=current_env, stdout=fn, stderr=fn, cwd=os.getcwd())
else:
proc = subprocess.Popen(cmd, env=current_env, cwd=os.getcwd())
procs.append(proc) procs.append(proc)
for i in range(worker_num): for i in range(worker_num):
...@@ -70,13 +67,10 @@ class LocalClusterEngine(Engine): ...@@ -70,13 +67,10 @@ class LocalClusterEngine(Engine):
"PADDLE_TRAINER_ID": str(i) "PADDLE_TRAINER_ID": str(i)
}) })
if logs_dir is not None:
os.system("mkdir -p {}".format(logs_dir)) os.system("mkdir -p {}".format(logs_dir))
fn = open("%s/worker.%d" % (logs_dir, i), "w") fn = open("%s/worker.%d" % (logs_dir, i), "w")
log_fns.append(fn) log_fns.append(fn)
proc = subprocess.Popen(cmd, env=current_env, stdout=fn, stderr=fn, cwd=os.getcwd()) proc = subprocess.Popen(cmd, env=current_env, stdout=fn, stderr=fn, cwd=os.getcwd())
else:
proc = subprocess.Popen(cmd, env=current_env, cwd=os.getcwd())
procs.append(proc) procs.append(proc)
# only wait worker to finish here # only wait worker to finish here
...@@ -91,7 +85,9 @@ class LocalClusterEngine(Engine): ...@@ -91,7 +85,9 @@ class LocalClusterEngine(Engine):
if len(log_fns) > 0: if len(log_fns) > 0:
log_fns[i].close() log_fns[i].close()
procs[i].terminate() procs[i].terminate()
print("all workers and parameter servers already completed", file=sys.stderr) print("all workers already completed, you can view logs under the {} directory".format(logs_dir),
file=sys.stderr)
def run(self):
def run(self):
self.start_procs() self.start_procs()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册