From cda7397fbafe4d565b9124fee6a012a148cf1159 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Tue, 5 Jan 2021 00:40:43 -0600 Subject: [PATCH] [cherry pick]Set FLAGS_selected_gpus for spawn (#29962) (#30097) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set FLAGS_selected_gpus for spawn. When the child process starts, it will inherit the configuration of the main process and set the FLAGS once, but the environment variable has not been set at this time, which leads to the FLAGS_selected_gpus is keep same with mainprocess(usually empty), so manually update the flags here. 注:增加了一个单测,又移除了,单测打印显示CI机器nvidia-smi只有两张卡,需要大于两张卡才能测这个问题 --- python/paddle/distributed/spawn.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/python/paddle/distributed/spawn.py b/python/paddle/distributed/spawn.py index 433662e8ebc..86ec18061c5 100644 --- a/python/paddle/distributed/spawn.py +++ b/python/paddle/distributed/spawn.py @@ -27,7 +27,7 @@ from paddle.device import get_device # deprecated module import from paddle.fluid import core -from paddle.fluid.framework import _cpu_num +from paddle.fluid.framework import _cpu_num, set_flags class ParallelEnvArgs(object): @@ -153,6 +153,12 @@ def _remove_risky_env(): def _set_trainer_env(env_dict): + # NOTE(chenweihang): [ Why need set FLAGS_selected_gpus here? ] + # When the child process starts, it will inherit the configuration of the + # main process and set the FLAGS once, but the environment variable has + # not been set at this time, which leads to the FLAGS_selected_gpus + # is keep same with mainprocess(usually empty), so manually update the flags here + set_flags({'FLAGS_selected_gpus': env_dict['FLAGS_selected_gpus']}) for var_name in env_dict: os.environ[var_name] = env_dict[var_name] -- GitLab