diff --git a/imperative/python/megengine/distributed/__init__.py b/imperative/python/megengine/distributed/__init__.py index 55ee93c6047f26ea5412623c17b01a3a5e6b4d81..df50e1f01c12a2aa33fe0a7844de9d006e88a1d3 100644 --- a/imperative/python/megengine/distributed/__init__.py +++ b/imperative/python/megengine/distributed/__init__.py @@ -26,7 +26,7 @@ from .server import Client, Server @mproperty def backend(mod): r"""Get or set backend of collective communication. - Available backends are ['nccl', 'shm', 'rccl'] + Available backends are ['nccl', 'rccl'] Examples: diff --git a/imperative/python/megengine/distributed/group.py b/imperative/python/megengine/distributed/group.py index dd961293bedaecc3c1f80d5002a2326223f3425d..e3a7cac88cb5eec6cf4e3b9b6ffe6e85e98d9119 100644 --- a/imperative/python/megengine/distributed/group.py +++ b/imperative/python/megengine/distributed/group.py @@ -95,7 +95,7 @@ class Group: WORLD = Group([]) _devices = {"gpu", "cuda", "rocm"} -_backends = {"nccl", "rccl", "shm", "auto"} +_backends = {"nccl", "rccl", "auto"} def init_process_group( @@ -115,7 +115,7 @@ def init_process_group( world_size: total number of processes participating in the job. rank: rank of the current process. device: the GPU device id to bind this process to. - backend: communicator backend, currently support 'nccl' and 'shm'. + backend: communicator backend, currently support 'nccl' and 'rccl'. """ physical_device_type = what_is_xpu() if device_type == "xpu" else device_type if not isinstance(master_ip, str): diff --git a/imperative/python/megengine/distributed/helper.py b/imperative/python/megengine/distributed/helper.py index 2530438b247bb7ffcd971338979458efd9172d1b..8683e58abf2bf62924ced117f71275975dfa12a7 100644 --- a/imperative/python/megengine/distributed/helper.py +++ b/imperative/python/megengine/distributed/helper.py @@ -205,10 +205,7 @@ class AllreduceCallback: assert _group._sd, "please call init_process_group first" backend = _group._sd.backend if backend == "auto": - if group.is_single_machine and not _check_enable_p2p(): - backend = "shm" - else: - backend = "nccl" + backend = "nccl" self._backend = backend def _reset(self): diff --git a/src/opr-mm/impl/megray_helper.cpp b/src/opr-mm/impl/megray_helper.cpp index aa8f8c10c43b976125357f158b4c4178def3a35f..8e65e14f5681e1dabea381c51263644747b75c88 100644 --- a/src/opr-mm/impl/megray_helper.cpp +++ b/src/opr-mm/impl/megray_helper.cpp @@ -31,10 +31,8 @@ MegRay::Backend mgb::opr::get_megray_backend(const std::string& backend) { return MegRay::MEGRAY_RCCL; } else if (backend == "ucx") { return MegRay::MEGRAY_UCX; - } else if (backend == "shm") { - return MegRay::MEGRAY_SHM; } else { - mgb_throw(MegBrainError, "back CollectiveComm backend"); + mgb_throw(MegBrainError, "bad CollectiveComm backend"); } }