Add MPICH Multinode Runner (#2839)

* MPICH support * MPICH changes * MPICH changes * MPICH changes * MPICH changes * accelerator runtime modifications * Accelerator runtime changes * Accelerator runtime modifications * Remove redundant print from single node * Move hostfile to tmp * Code cleanup for MPICH class * Code cleanup, rm whitespace * Removing mpiexec environment check details * Not needed tmp hostfile as pass directly * Remove debugging comments * rm print statement * Revert comm changes as WA not needed * Use MPICHRunner name for class * Use MPICHRunner as class name * No need to use args.force_multi and args.launcher . This should be set in deepspeedexamples gpt-3.6b .sh script as: $launcher=MPICH run_cmd=" deepspeed --hostfile=${hostfile_ds} --num_nodes ${NUM_WORKERS} --num_gpus ${NUM_GPUS_PER_WORKER} --launcher=${launcher} --force_multi pretrain_gpt2.py $@ ${gpt_options}" * Adhere to code pattern * Rm empty lines in MPICHRunner class * Uncomment check for num nodes and workers when used hostfile_deepspeed in gpt-3.6b.sh * pass MPICH hostfile through launcher_args in gpt-3.6b.sh * Clean code and remove args hostfile * fix merge * fix merge --------- Co-authored-by: N Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> * clean up and fix format * add ut --------- Co-authored-by: N Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> Co-authored-by: N Ammar Ahmad Awan <ammar.awan@microsoft.com> Co-authored-by: N Olatunji Ruwase <olruwase@microsoft.com>

Add MPICH Multinode Runner (#2839)
* MPICH support * MPICH changes * MPICH changes * MPICH changes * MPICH changes * accelerator runtime modifications * Accelerator runtime changes * Accelerator runtime modifications * Remove redundant print from single node * Move hostfile to tmp * Code cleanup for MPICH class * Code cleanup, rm whitespace * Removing mpiexec environment check details * Not needed tmp hostfile as pass directly * Remove debugging comments * rm print statement * Revert comm changes as WA not needed * Use MPICHRunner name for class * Use MPICHRunner as class name * No need to use args.force_multi and args.launcher . This should be set in deepspeedexamples gpt-3.6b .sh script as: $launcher=MPICH run_cmd=" deepspeed --hostfile=${hostfile_ds} --num_nodes ${NUM_WORKERS} --num_gpus ${NUM_GPUS_PER_WORKER} --launcher=${launcher} --force_multi pretrain_gpt2.py $@ ${gpt_options}" * Adhere to code pattern * Rm empty lines in MPICHRunner class * Uncomment check for num nodes and workers when used hostfile_deepspeed in gpt-3.6b.sh * pass MPICH hostfile through launcher_args in gpt-3.6b.sh * Clean code and remove args hostfile * fix merge * fix merge --------- Co-authored-by: N Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> * clean up and fix format * add ut --------- Co-authored-by: N Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> Co-authored-by: N Ammar Ahmad Awan <ammar.awan@microsoft.com> Co-authored-by: N Olatunji Ruwase <olruwase@microsoft.com>
8d53ac0c · mzl · GitHub · 91d7090e · 8d53ac0c · 8d53ac0c
4 changed file
--- a/deepspeed/launcher/constants.py
+++ b/deepspeed/launcher/constants.py
@@ -4,6 +4,7 @@ PDSH_LAUNCHER = 'pdsh'
 PDSH_MAX_FAN_OUT = 1024

 OPENMPI_LAUNCHER = 'openmpi'
+MPICH_LAUNCHER = 'mpich'
 SLURM_LAUNCHER = 'slurm'
 MVAPICH_LAUNCHER = 'mvapich'
 MVAPICH_TMP_HOSTFILE = '/tmp/deepspeed_mvapich_hostfile'

--- a/deepspeed/launcher/multinode_runner.py
+++ b/deepspeed/launcher/multinode_runner.py
@@ -170,6 +170,55 @@ class OpenMPIRunner(MultiNodeRunner):
                                                        ] + self.user_arguments


+class MPICHRunner(MultiNodeRunner):
+    def __init__(self, args, world_info_base64, resource_pool):
+        super().__init__(args, world_info_base64)
+        self.resource_pool = resource_pool
+
+    def backend_exists(self):
+        #TODO: if IB is available we should suggestion mpich
+        return shutil.which('mpirun')  #mpich_info
+
+    @property
+    def name(self):
+        return "mpich"
+
+    def validate_args(self):
+        super().validate_args()
+        #TODO: Allow for include/exclude at node-level but not gpu-level
+        if self.args.include != "" or self.args.exclude != "":
+            raise ValueError(
+                f"{self.name} backend does not support worker include/exclusion")
+
+        if self.args.num_nodes != -1 or self.args.num_gpus != -1:
+            raise ValueError(
+                f"{self.name} backend does not support limiting num nodes/gpus")
+
+    def get_cmd(self, environment, active_resources):
+        devices_per_node = self.resource_pool.values()
+        total_process_count = sum(devices_per_node)
+        process_per_node = list(devices_per_node)[0]
+
+        mpirun_cmd = [
+            'mpirun',
+            '-n',
+            f'{total_process_count}',
+            '-ppn',
+            f'{process_per_node}',
+        ] + split(self.args.launcher_args)
+        export_cmd = []
+
+        for k, v in self.exports.items():
+            export_cmd += ['-x', "{}={}".format(k, v)]
+
+        python_exec = []
+        if not self.args.no_python:
+            python_exec = [sys.executable, "-u"]
+            if self.args.module:
+                python_exec.append("-m")
+        return mpirun_cmd + python_exec + [self.user_script] + self.user_arguments
+
+
 class SlurmRunner(MultiNodeRunner):
    def __init__(self, args, world_info_base64, resource_pool):
        super().__init__(args, world_info_base64)

--- a/deepspeed/launcher/runner.py
+++ b/deepspeed/launcher/runner.py
@@ -18,8 +18,8 @@ from copy import deepcopy
 import signal
 import time

-from .multinode_runner import PDSHRunner, OpenMPIRunner, MVAPICHRunner, SlurmRunner
-from .constants import PDSH_LAUNCHER, OPENMPI_LAUNCHER, MVAPICH_LAUNCHER, SLURM_LAUNCHER
+from .multinode_runner import PDSHRunner, OpenMPIRunner, MVAPICHRunner, SlurmRunner, MPICHRunner
+from .constants import PDSH_LAUNCHER, OPENMPI_LAUNCHER, MVAPICH_LAUNCHER, SLURM_LAUNCHER, MPICH_LAUNCHER
 from ..constants import TORCH_DISTRIBUTED_DEFAULT_PORT
 from ..nebula.constants import NEBULA_EXPORT_ENVS
 from ..utils import logger
@@ -114,7 +114,7 @@ def parse_args(args=None):
        default=PDSH_LAUNCHER,
        type=str,
        help="(optional) choose launcher backend for multi-node "
-        "training. Options currently include PDSH, OpenMPI, MVAPICH, SLURM.")
+        "training. Options currently include PDSH, OpenMPI, MVAPICH, SLURM, MPICH.")

    parser.add_argument("--launcher_args",
                        default="",
@@ -511,6 +511,8 @@ def main(args=None):
            runner = PDSHRunner(args, world_info_base64)
        elif args.launcher == OPENMPI_LAUNCHER:
            runner = OpenMPIRunner(args, world_info_base64, resource_pool)
+        elif args.launcher == MPICH_LAUNCHER:
+            runner = MPICHRunner(args, world_info_base64, resource_pool)
        elif args.launcher == MVAPICH_LAUNCHER:
            runner = MVAPICHRunner(args, world_info_base64, resource_pool)
        elif args.launcher == SLURM_LAUNCHER:

--- a/tests/unit/launcher/test_multinode_runner.py
+++ b/tests/unit/launcher/test_multinode_runner.py
@@ -31,6 +31,13 @@ def test_openmpi_runner(runner_info):
    assert cmd[0] == 'mpirun'


+def test_mpich_runner(runner_info):
+    env, resource_pool, world_info, args = runner_info
+    runner = mnrunner.MPICHRunner(args, world_info, resource_pool)
+    cmd = runner.get_cmd(env, resource_pool)
+    assert cmd[0] == 'mpirun'
+
+
 def test_slurm_runner(runner_info):
    env, resource_pool, world_info, args = runner_info
    runner = mnrunner.SlurmRunner(args, world_info, resource_pool)