未验证 提交 ca03f498 编写于 作者: C chengduo 提交者: GitHub

fix distributed launch.py (#17571)

test=develop
上级 6e11f977
......@@ -75,11 +75,11 @@ def start_procs(gpus, entrypoint, entrypoint_args, log_dir):
nranks = num_nodes * gpus
# ======== for dist training =======
gpu_ids = get_gpu_ids(gpus)
for i in gpu_ids:
for i in range(gpus):
curr_env = {}
curr_env.update(default_envs)
curr_env.update({
"FLAGS_selected_gpus": "%d" % i,
"FLAGS_selected_gpus": "%d" % gpu_ids[i],
"PADDLE_TRAINER_ID": "%d" % (node_trainer_id * gpus + i),
"PADDLE_CURRENT_ENDPOINT": "%s:617%d" % (current_ip, i),
# nranks
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册