From cb12282e124d9689dc389e061d5eb2a97795373f Mon Sep 17 00:00:00 2001 From: JZ-LIANG <38102074+JZ-LIANG@users.noreply.github.com> Date: Mon, 7 Jun 2021 15:53:25 +0800 Subject: [PATCH] [sharding] bugfix for group init hang (#33327) --- .../distributed/fleet/meta_optimizers/sharding/utils.py | 5 +++++ .../tests/unittests/test_fleet_sharding_meta_optimizer.py | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py index ca3606c16e..285647352d 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py @@ -638,3 +638,8 @@ def append_naive_sync(block, sync_var, ring_id): 'use_calc_stream': True, OP_ROLE_KEY: OpRole.Forward }) + block.append_op( + type='c_sync_calc_stream', + inputs={'X': [sync_var]}, + outputs={'Out': [sync_var]}, + attrs={OP_ROLE_KEY: OpRole.Forward}) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py index be5e87b9d3..af020548af 100755 --- a/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py @@ -530,7 +530,8 @@ class TestFleetMetaOptimizer(TestFleetMetaOptimizer): 'uniform_random', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'c_gen_nccl_id', 'c_comm_init', 'fill_constant', 'c_allreduce_sum', - 'c_gen_nccl_id', 'c_comm_init', 'fill_constant', 'c_allreduce_sum', + 'c_sync_calc_stream', 'c_gen_nccl_id', 'c_comm_init', + 'fill_constant', 'c_allreduce_sum', 'c_sync_calc_stream', 'c_gen_nccl_id', 'c_comm_init', 'c_gen_nccl_id', 'c_comm_init' ]) -- GitLab