未验证 提交 e585accd 编写于 作者: H Haohongxiang 提交者: GitHub

fix dp pp comm overlap (#53398)

上级 6db6a347
......@@ -75,6 +75,9 @@ class PipelineParallel(MetaParallelBase):
].dp_comm_overlap
self._dp_comm_buffers = []
if self._dp_comm_overlap:
assert self.use_data_parallel and self.num_stages > 1
p2p.initialize_p2p_groups(
hcg, self._using_cache, self._enable_partial_send_recv
)
......
......@@ -62,9 +62,16 @@ def _dygraph_distributed_optimizer(optimizer, strategy=None):
if fleet_env.worker_num() > 1:
if not fleet_env._user_defined_strategy.heter_ccl_mode:
return HybridParallelOptimizer(
hp_optim = HybridParallelOptimizer(
optimizer, fleet_env._hcg, fleet_env._user_defined_strategy
)
if fleet_env._user_defined_strategy.hybrid_configs[
"pp_configs"
].dp_comm_overlap:
hp_optim._dp_enable = False
return hp_optim
else:
return HeterParallelOptimizer(
optimizer, fleet_env._user_defined_strategy
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册