From f1b8f0efd5f801ec49504c5986ff998a86726b5c Mon Sep 17 00:00:00 2001 From: caozhou <48191911+Caozhou1995@users.noreply.github.com> Date: Fri, 21 Oct 2022 14:13:30 +0800 Subject: [PATCH] fix process group init bug (#47224) --- python/paddle/distributed/auto_parallel/engine.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/paddle/distributed/auto_parallel/engine.py b/python/paddle/distributed/auto_parallel/engine.py index 2eaefee6cd..1d6bce931d 100644 --- a/python/paddle/distributed/auto_parallel/engine.py +++ b/python/paddle/distributed/auto_parallel/engine.py @@ -647,12 +647,13 @@ class Engine: # Traverse different rank programs and traverse each op of them, # instantiate communication by process_mapping. all_process_groups = get_all_process_groups() - + cur_rank = self._cur_rank + # NOTE: After the implementation of the unified dynamic and static communication group initialization mode in the future, the initialization logic of full mode will be removed because port occupation error may occur. if self._strategy.auto_mode == "full": initialize_pg_in_full_mode(all_process_groups, cur_rank) else: for process_group in all_process_groups: - if self._cur_rank not in process_group.ranks: + if cur_rank not in process_group.ranks: continue process_group.instantiate() -- GitLab