未验证 提交 905e2346 编写于 作者: D danleifeng 提交者: GitHub

add endpoints log;test=develop (#27439)

上级 9f3a9be7
......@@ -463,9 +463,8 @@ def launch():
cuda_device_num = 0
if len(has_ps_args) > 0 or cuda_device_num == 0:
logger.info(
"Run parameter-sever cpu mode. pserver arguments:{}, cuda count:{}".
format(has_ps_args, cuda_device_num))
logger.info("Run parameter-sever cpu mode. pserver arguments:{}".format(
has_ps_args))
launch_ps(args)
elif len(has_collective_args) > 0:
logger.info("Run collective gpu mode. gpu arguments:{}, cuda count:{}".
......
......@@ -435,9 +435,17 @@ def start_local_trainers(cluster,
len(pod.trainers),
pretty_print_envs(proc_env, ("Distributed Envs",
"Value"))))
logger.info(
"details abouts PADDLE_TRAINER_ENDPOINTS can be found in {}/endpoints.log.".
format(log_dir))
fn = None
if log_dir is not None:
os.system("mkdir -p {}".format(log_dir))
if os.path.exists("%s/endpoints.log" % log_dir):
os.system("rm -f {}/endpoints.log".format(log_dir))
with open("%s/endpoints.log" % log_dir, "w") as f:
f.write("PADDLE_TRAINER_ENDPOINTS: \n")
f.write("\n".join(cluster.trainers_endpoints()))
fn = open("%s/workerlog.%d" % (log_dir, idx), "a")
proc = subprocess.Popen(cmd, env=current_env, stdout=fn, stderr=fn)
else:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册