From 0f649b32397dcd043e51b414904ccfa730b52603 Mon Sep 17 00:00:00 2001 From: LiYuRio <63526175+LiYuRio@users.noreply.github.com> Date: Fri, 28 Oct 2022 14:20:32 +0800 Subject: [PATCH] remove tcp store barrier (#47184) --- python/paddle/distributed/collective.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index 77258f7036c..6825dae045f 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -409,11 +409,8 @@ def new_group(ranks=None, backend=None, timeout=_default_timeout): # TODO(shenliang03): This is a temporary solution to solve the problem of # hang caused by tcp paddle.distributed.barrier(group=group) - # NOTE(liyurui): All processors should hang and wait using tcp store, in case master exit before sub-group is created. - if backend != 'heter': - _barrier_by_tcp_store(group_name, _default_store, timeout) - else: - print("Warning: store barrier is not supported for heter backend.") + if paddle.distributed.get_world_size() > 1: + paddle.distributed.barrier() return group if not backend: -- GitLab