__init__.py 7.6 KB
Newer Older
J
Jeff Rasley 已提交
1 2 3
'''
Copyright 2020 The Microsoft DeepSpeed Team
'''
4 5
import sys
import types
J
Jeff Rasley 已提交
6

J
Jeff Rasley 已提交
7 8 9
from . import ops

from .runtime.engine import DeepSpeedEngine
10
from .runtime.engine import ADAM_OPTIMIZER, LAMB_OPTIMIZER
11
from .runtime.pipe.engine import PipelineEngine
J
Jeff Rasley 已提交
12 13 14 15
from .runtime.lr_schedules import add_tuning_arguments
from .runtime.config import DeepSpeedConfig
from .runtime.activation_checkpointing import checkpointing
from .ops.transformer import DeepSpeedTransformerLayer, DeepSpeedTransformerConfig
16 17 18
from .utils import log_dist

from .pipe import PipelineModule
J
Jeff Rasley 已提交
19

J
Jeff Rasley 已提交
20
try:
J
Jeff Rasley 已提交
21
    from .git_version_info import version, git_hash, git_branch
J
Jeff Rasley 已提交
22
except ImportError:
23
    version = "0.0.0+unknown"
J
Jeff Rasley 已提交
24 25 26
    git_hash = None
    git_branch = None

27
# Export version information
28 29 30 31
version, __version_tag__ = version.split('+')
__version_major__ = int(version.split('.')[0])
__version_minor__ = int(version.split('.')[1])
__version_patch__ = int(version.split('.')[2])
32 33 34 35 36
__version__ = '.'.join(
    map(str,
        [__version_major__,
         __version_minor__,
         __version_patch__]))
37
__version__ = f"{__version__}+{__version_tag__}"
J
Jeff Rasley 已提交
38 39 40
__git_hash__ = git_hash
__git_branch__ = git_branch

41 42 43 44 45 46 47 48 49 50
# Provide backwards compatability with old deepspeed.pt module structure, should hopefully not be used
pt = types.ModuleType('pt', 'dummy pt module for backwards compatability')
deepspeed = sys.modules[__name__]
setattr(deepspeed, 'pt', pt)
setattr(deepspeed.pt, 'deepspeed_utils', deepspeed.runtime.utils)
sys.modules['deepspeed.pt'] = deepspeed.pt
sys.modules['deepspeed.pt.deepspeed_utils'] = deepspeed.runtime.utils
setattr(deepspeed.pt, 'deepspeed_config', deepspeed.runtime.config)
sys.modules['deepspeed.pt.deepspeed_config'] = deepspeed.runtime.config

J
Jeff Rasley 已提交
51 52 53 54 55 56 57 58

def initialize(args,
               model,
               optimizer=None,
               model_parameters=None,
               training_data=None,
               lr_scheduler=None,
               mpu=None,
59
               dist_init_required=None,
J
Jeff Rasley 已提交
60 61
               collate_fn=None,
               config_params=None):
62
    """Initialize the DeepSpeed Engine.
J
Jeff Rasley 已提交
63 64 65 66 67 68 69 70 71 72

    Arguments:
        args: a dictionary containing local_rank and deepspeed_config
            file location

        model: Required: nn.module class before apply any wrappers

        optimizer: Optional: a user defined optimizer, this is typically used instead of defining
            an optimizer in the DeepSpeed json config.

73
        model_parameters: Optional: An iterable of torch.Tensors or dicts.
J
Jeff Rasley 已提交
74 75 76 77 78 79 80 81
            Specifies what Tensors should be optimized.

        training_data: Optional: Dataset of type torch.utils.data.Dataset

        lr_scheduler: Optional: Learning Rate Scheduler Object. It should define a get_lr(),
            step(), state_dict(), and load_state_dict() methods

        mpu: Optional: A model parallelism unit object that implements
S
Shaden Smith 已提交
82
            get_{model,data}_parallel_{rank,group,world_size}()
J
Jeff Rasley 已提交
83

84 85
        dist_init_required: Optional: None will auto-initialize torch.distributed if needed,
            otherwise the user can force it to be initialized or not via boolean.
J
Jeff Rasley 已提交
86 87 88 89 90

        collate_fn: Optional: Merges a list of samples to form a
            mini-batch of Tensor(s).  Used when using batched loading from a
            map-style dataset.

91 92
    Returns:
        A tuple of ``engine``, ``optimizer``, ``training_dataloader``, ``lr_scheduler``
J
Jeff Rasley 已提交
93

94
        * ``engine``: DeepSpeed runtime engine which wraps the client model for distributed training.
95

96 97
        * ``optimizer``: Wrapped optimizer if a user defined ``optimizer`` is supplied, or if
          optimizer is specified in json config else ``None``.
98

99 100
        * ``training_dataloader``: DeepSpeed dataloader if ``training_data`` was supplied,
          otherwise ``None``.
101

102 103
        * ``lr_scheduler``: Wrapped lr scheduler if user ``lr_scheduler`` is passed, or
          if ``lr_scheduler`` specified in JSON configuration. Otherwise ``None``.
J
Jeff Rasley 已提交
104
    """
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
    log_dist("DeepSpeed info: version={}, git-hash={}, git-branch={}".format(
        __version__,
        __git_hash__,
        __git_branch__),
             ranks=[0])

    if not isinstance(model, PipelineModule):
        engine = DeepSpeedEngine(args=args,
                                 model=model,
                                 optimizer=optimizer,
                                 model_parameters=model_parameters,
                                 training_data=training_data,
                                 lr_scheduler=lr_scheduler,
                                 mpu=mpu,
                                 dist_init_required=dist_init_required,
                                 collate_fn=collate_fn,
                                 config_params=config_params)
    else:
        assert mpu is None, "mpu must be None with pipeline parallelism"
        engine = PipelineEngine(args=args,
                                model=model,
                                optimizer=optimizer,
                                model_parameters=model_parameters,
                                training_data=training_data,
                                lr_scheduler=lr_scheduler,
                                mpu=model.mpu(),
                                dist_init_required=dist_init_required,
                                collate_fn=collate_fn,
                                config_params=config_params)
J
Jeff Rasley 已提交
134 135 136 137 138 139 140 141 142 143

    return_items = [
        engine,
        engine.optimizer,
        engine.training_dataloader,
        engine.lr_scheduler
    ]
    return tuple(return_items)


144 145 146 147 148 149 150
def _add_core_arguments(parser):
    r"""Helper (internal) function to update an argument parser with an argument group of the core DeepSpeed arguments.
        The core set of DeepSpeed arguments include the following:
        1) --deepspeed: boolean flag to enable DeepSpeed
        2) --deepspeed_config <json file path>: path of a json configuration file to configure DeepSpeed runtime.

        This is a helper function to the public add_config_arguments()
J
Jeff Rasley 已提交
151 152 153 154 155 156 157 158

    Arguments:
        parser: argument parser
    Return:
        parser: Updated Parser
    """
    group = parser.add_argument_group('DeepSpeed', 'DeepSpeed configurations')

159 160 161 162 163 164
    group.add_argument(
        '--deepspeed',
        default=False,
        action='store_true',
        help=
        'Enable DeepSpeed (helper flag for user code, no impact on DeepSpeed backend)')
J
Jeff Rasley 已提交
165 166 167 168 169 170

    group.add_argument('--deepspeed_config',
                       default=None,
                       type=str,
                       help='DeepSpeed json configuration file.')

171 172 173 174 175 176 177 178
    group.add_argument(
        '--deepscale',
        default=False,
        action='store_true',
        help=
        'Deprecated enable DeepSpeed (helper flag for user code, no impact on DeepSpeed backend)'
    )

179 180 181 182
    group.add_argument('--deepscale_config',
                       default=None,
                       type=str,
                       help='Deprecated DeepSpeed json configuration file.')
J
Jeff Rasley 已提交
183 184 185 186 187 188 189 190 191

    group.add_argument(
        '--deepspeed_mpi',
        default=False,
        action='store_true',
        help=
        "Run via MPI, this will attempt to discover the necessary variables to initialize torch "
        "distributed from the MPI environment")

J
Jeff Rasley 已提交
192 193 194 195
    return parser


def add_config_arguments(parser):
196 197 198 199
    r"""Update the argument parser to enabling parsing of DeepSpeed command line arguments.
        The set of DeepSpeed arguments include the following:
        1) --deepspeed: boolean flag to enable DeepSpeed
        2) --deepspeed_config <json file path>: path of a json configuration file to configure DeepSpeed runtime.
J
Jeff Rasley 已提交
200 201 202 203 204 205

    Arguments:
        parser: argument parser
    Return:
        parser: Updated Parser
    """
206
    parser = _add_core_arguments(parser)
J
Jeff Rasley 已提交
207 208

    return parser