revert p2p communication for xpu (#53496)

eda0c588 · Roc · GitHub · 19950e65 · eda0c588
隐藏空白更改
内联并排

Showing with 0 addition and 26 deletion

python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py ...ributed/fleet/meta_parallel/pp_utils/p2p_communication.py +0 -26

未找到文件。
--- a/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py
+++ b/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py
@@ -24,26 +24,6 @@ _hcg = None
 _use_cache = False
 _enable_partial_send_recv = True

-_xpu_comm_group_started = False
-
-
-def _xpu_comm_group_start():
-    if not paddle.is_compiled_with_xpu():
-        return
-    global _xpu_comm_group_started
-    assert not _xpu_comm_group_started
-    framework.core.ProcessGroupBKCL.group_start()
-    _xpu_comm_group_started = True
-
-
-def _xpu_comm_group_end():
-    if not paddle.is_compiled_with_xpu():
-        return
-    global _xpu_comm_group_started
-    if _xpu_comm_group_started:
-        framework.core.ProcessGroupBKCL.group_end()
-        _xpu_comm_group_started = False
-

 def initialize_p2p_groups(hcg, use_cache=True, enable_partial_send_recv=True):
    global _hcg, _use_cache, _enable_partial_send_recv
@@ -371,7 +351,6 @@ def _p2p_helper(
    # TODO(Yuang Liu): use batch_isend_irecv replace all these comm ops
    tasks = []
    # start to p2p communicate
-    _xpu_comm_group_start()
    if tensor_send_prev is not None:
        if isinstance(tensor_send_prev, tuple):
            for d in tensor_send_prev:
@@ -407,7 +386,6 @@ def _p2p_helper(
                    use_calc_stream=sync_recv,
                )
                if sync_recv:
-                    _xpu_comm_group_end()
                    allgather_partial(
                        d,
                        nranks=mp_degree,
@@ -428,7 +406,6 @@ def _p2p_helper(
            )

            if sync_recv:
-                _xpu_comm_group_end()
                allgather_partial(
                    tensor_recv_prev,
                    nranks=mp_degree,
@@ -475,7 +452,6 @@ def _p2p_helper(
                )

                if sync_recv:
-                    _xpu_comm_group_end()
                    allgather_partial(
                        d,
                        nranks=mp_degree,
@@ -496,7 +472,6 @@ def _p2p_helper(
                use_calc_stream=sync_recv,
            )
            if sync_recv:
-                _xpu_comm_group_end()
                allgather_partial(
                    tensor_recv_next,
                    nranks=mp_degree,
@@ -506,7 +481,6 @@ def _p2p_helper(
                )
            else:
                tasks.append(task)
-    _xpu_comm_group_end()
    if not sync_recv:
        if framework.in_dygraph_mode():
            # wait irecv tasks in eager dygraph mode with new comm library