rem cncl keyword in py (#52939)

ea04bef8 · Kim Yann · GitHub · d2b0d63f · ea04bef8 · ea04bef8
5 changed file
--- a/python/paddle/distributed/fleet/launch.py
+++ b/python/paddle/distributed/fleet/launch.py
@@ -757,7 +757,7 @@ def launch():
        check_backend(args.backend)
        distribute_mode = DistributeMode.COLLECTIVE
-    # assert args.backend in ['gloo', 'nccl', 'bkcl', 'cncl', 'heter', 'unknown']
+    # assert args.backend in ['gloo', 'nccl', 'bkcl', 'heter', 'unknown']
    if args.backend == 'gloo':
        logger.warning("launch start with CPUONLY mode")

--- a/python/paddle/distributed/fleet/launch_utils.py
+++ b/python/paddle/distributed/fleet/launch_utils.py
@@ -1986,7 +1986,6 @@ def check_backend(backend):
        'nccl',
        'gloo',
        'bkcl',
-        'cncl',
        'auto',
        'heter',
        'xccl',

--- a/python/paddle/distributed/parallel.py
+++ b/python/paddle/distributed/parallel.py
@@ -886,7 +886,7 @@ def _start_kv_server(port, http_server_d, size):
 def _is_cpuonly(backend):
    check_backend(backend)
    if (
-        backend in ['auto', 'nccl', 'bkcl', 'heter', 'cncl']
+        backend in ['auto', 'nccl', 'bkcl', 'heter']
        and (core.is_compiled_with_cuda() or core.is_compiled_with_xpu())
    ) or backend == 'xccl':

--- a/python/paddle/distributed/spawn.py
+++ b/python/paddle/distributed/spawn.py
@@ -126,8 +126,6 @@ def _get_default_backend():
        return 'nccl'
    elif 'xpu' in device:
        return 'bkcl'
-    elif 'mlu' in device:
-        return 'cncl'
    elif 'cpu' in device:
        return 'gloo'
    else:
@@ -259,45 +257,6 @@ def _get_subprocess_env_list(nprocs, options):
                        "XPU_VISIBLE_DEVICES (%s)."
                        % (card_id, ",".join(env_devices_list))
                    )
-    elif options['backend'] == 'cncl':
-        args.selected_devices = options.get('mlus', None)
-        if args.selected_devices is None:
-            args.selected_devices = options.get('selected_devices', None)
-        env_devices = os.getenv("MLU_VISIBLE_DEVICES", None)
-        if env_devices is None or env_devices == "":
-            env_devices_list = [
-                str(x) for x in range(core.get_custom_device_count('mlu'))
-            ]
-        else:
-            env_devices_list = env_devices.split(',')
-        if args.selected_devices is None:
-            if len(env_devices_list) < nprocs:
-                raise RuntimeError(
-                    "the number of visible devices(%d) is less than the number "
-                    "of spawn processes(%d), please ensure that the correct "
-                    "`nprocs` argument is passed or the environment variable "
-                    "`MLU_VISIBLE_DEVICES` is correctly configured."
-                    % (len(env_devices_list), nprocs)
-                )
-            args.selected_devices = ",".join(
-                [str(env_devices_list[x]) for x in range(0, nprocs)]
-            )
-        else:
-            selected_device_list = args.selected_devices.split(',')
-            if len(selected_device_list) != nprocs:
-                raise ValueError(
-                    "The number of selected devices(%s) is not equal to "
-                    "the number of spawn processes(%d), please ensure that the "
-                    "correct `nprocs` and `mlus` arguments are passed."
-                    % (len(selected_device_list), nprocs)
-                )
-            for card_id in selected_device_list:
-                if card_id not in env_devices_list:
-                    raise ValueError(
-                        "The selected mlu card %s cannot found in "
-                        "MLU_VISIBLE_DEVICES (%s)."
-                        % (card_id, ",".join(env_devices_list))
-                    )
    elif options['backend'] == 'gloo':
        # TODO check gpu / xpu flag must not exist
        warnings.warn(
@@ -372,8 +331,6 @@ def _set_trainer_env(env_dict, backend):
        set_flags({'FLAGS_selected_gpus': env_dict['FLAGS_selected_gpus']})
    elif backend == 'bkcl':
        set_flags({'FLAGS_selected_xpus': env_dict['FLAGS_selected_xpus']})
-    elif backend == 'cncl':
-        set_flags({'FLAGS_selected_mlus': env_dict['FLAGS_selected_mlus']})
    else:
        # NOTE(xiongkun) why not raise Error ?
        # So far, we added support for CPU parallel, and will be applied when paddle is not

--- a/python/paddle/distributed/utils/launch_utils.py
+++ b/python/paddle/distributed/utils/launch_utils.py
@@ -427,15 +427,6 @@ def _prepare_trainer_env(cluster, trainer, backend=None):
            "PADDLE_TRAINERS_NUM": "%d" % cluster.trainers_nranks(),
            "PADDLE_TRAINER_ENDPOINTS": ",".join(cluster.trainers_endpoints()),
        }
-    elif backend == 'cncl':
-        proc_env = {
-            "FLAGS_selected_mlus": "%s"
-            % ",".join([str(g) for g in trainer.gpus]),
-            "PADDLE_TRAINER_ID": "%d" % trainer.rank,
-            "PADDLE_CURRENT_ENDPOINT": "%s" % trainer.endpoint,
-            "PADDLE_TRAINERS_NUM": "%d" % cluster.trainers_nranks(),
-            "PADDLE_TRAINER_ENDPOINTS": ",".join(cluster.trainers_endpoints()),
-        }
    elif backend == 'gloo':
        # NOTE (xiongkun) default fall back into cpu only
        proc_env = {