"Not found distinct arguments and not compiled with cuda or xpu or npu. "
"Not found distinct arguments and not compiled with cuda or xpu or npu or mlu. "
"But found args.servers not empty, default use ps mode")
returnDistributeMode.PS
else:
returnDistributeMode.COLLECTIVE
else:
logger.warning(
"Not found distinct arguments and compiled with cuda or xpu or npu. "
"Not found distinct arguments and compiled with cuda or xpu or npu or mlu. "
"Default use collective mode")
returnDistributeMode.COLLECTIVE
...
...
@@ -536,6 +551,10 @@ def launch():
- ``--selected_xpus``: xpus aliases, recommend to use ``--xpus``.
- ``--mlus``: It's for mlu training. e.g., ``--mlus=0,1,2,3`` will launch four training processes each bound to one mlu.
- ``--selected_mlus``: mlus aliases, recommend to use ``--mlus``.
- ``training_script``: The full path to the single GPU training program/script to be launched in parallel, followed by all the arguments for the training script. e.g., ``traing.py``
- ``training_script_args``: The args of training_script. e.g., ``--lr=0.1``
...
...
@@ -688,7 +707,7 @@ def launch():
check_backend(args.backend)
distribute_mode=DistributeMode.COLLECTIVE
#assert args.backend in ['gloo', 'nccl', 'bkcl', 'heter', 'unknown']
#assert args.backend in ['gloo', 'nccl', 'bkcl', 'cncl', 'heter', 'unknown']