未验证 提交 905e2346 编写于 作者: D danleifeng 提交者: GitHub

add endpoints log;test=develop (#27439)

上级 9f3a9be7
...@@ -463,9 +463,8 @@ def launch(): ...@@ -463,9 +463,8 @@ def launch():
cuda_device_num = 0 cuda_device_num = 0
if len(has_ps_args) > 0 or cuda_device_num == 0: if len(has_ps_args) > 0 or cuda_device_num == 0:
logger.info( logger.info("Run parameter-sever cpu mode. pserver arguments:{}".format(
"Run parameter-sever cpu mode. pserver arguments:{}, cuda count:{}". has_ps_args))
format(has_ps_args, cuda_device_num))
launch_ps(args) launch_ps(args)
elif len(has_collective_args) > 0: elif len(has_collective_args) > 0:
logger.info("Run collective gpu mode. gpu arguments:{}, cuda count:{}". logger.info("Run collective gpu mode. gpu arguments:{}, cuda count:{}".
......
...@@ -435,9 +435,17 @@ def start_local_trainers(cluster, ...@@ -435,9 +435,17 @@ def start_local_trainers(cluster,
len(pod.trainers), len(pod.trainers),
pretty_print_envs(proc_env, ("Distributed Envs", pretty_print_envs(proc_env, ("Distributed Envs",
"Value")))) "Value"))))
logger.info(
"details abouts PADDLE_TRAINER_ENDPOINTS can be found in {}/endpoints.log.".
format(log_dir))
fn = None fn = None
if log_dir is not None: if log_dir is not None:
os.system("mkdir -p {}".format(log_dir)) os.system("mkdir -p {}".format(log_dir))
if os.path.exists("%s/endpoints.log" % log_dir):
os.system("rm -f {}/endpoints.log".format(log_dir))
with open("%s/endpoints.log" % log_dir, "w") as f:
f.write("PADDLE_TRAINER_ENDPOINTS: \n")
f.write("\n".join(cluster.trainers_endpoints()))
fn = open("%s/workerlog.%d" % (log_dir, idx), "a") fn = open("%s/workerlog.%d" % (log_dir, idx), "a")
proc = subprocess.Popen(cmd, env=current_env, stdout=fn, stderr=fn) proc = subprocess.Popen(cmd, env=current_env, stdout=fn, stderr=fn)
else: else:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册