__init__.py 5.7 KB
Newer Older
J
Jeff Rasley 已提交
1 2 3 4 5 6 7 8 9
'''
Copyright 2020 The Microsoft DeepSpeed Team
'''

from deepspeed.pt.deepspeed_light import DeepSpeedLight
from deepspeed.pt.deepspeed_light import ADAM_OPTIMIZER, LAMB_OPTIMIZER
from deepspeed.pt.deepspeed_lr_schedules import add_tuning_arguments

try:
10
    from deepspeed.git_version_info import git_hash, git_branch
J
Jeff Rasley 已提交
11 12 13 14
except ImportError:
    git_hash = None
    git_branch = None

15 16 17 18 19 20 21 22 23
# Export version information
__version_major__ = 0
__version_minor__ = 1
__version_patch__ = 0
__version__ = '.'.join(
    map(str,
        [__version_major__,
         __version_minor__,
         __version_patch__]))
J
Jeff Rasley 已提交
24 25 26 27 28 29 30 31 32 33 34
__git_hash__ = git_hash
__git_branch__ = git_branch


def initialize(args,
               model,
               optimizer=None,
               model_parameters=None,
               training_data=None,
               lr_scheduler=None,
               mpu=None,
35
               dist_init_required=None,
J
Jeff Rasley 已提交
36 37 38 39 40 41 42 43 44 45 46 47
               collate_fn=None):
    r"""Initialize the DeepSpeed Engine.

    Arguments:
        args: a dictionary containing local_rank and deepspeed_config
            file location

        model: Required: nn.module class before apply any wrappers

        optimizer: Optional: a user defined optimizer, this is typically used instead of defining
            an optimizer in the DeepSpeed json config.

48
        model_parameters: Optional: An iterable of torch.Tensors or dicts.
J
Jeff Rasley 已提交
49 50 51 52 53 54 55 56
            Specifies what Tensors should be optimized.

        training_data: Optional: Dataset of type torch.utils.data.Dataset

        lr_scheduler: Optional: Learning Rate Scheduler Object. It should define a get_lr(),
            step(), state_dict(), and load_state_dict() methods

        mpu: Optional: A model parallelism unit object that implements
S
Shaden Smith 已提交
57
            get_{model,data}_parallel_{rank,group,world_size}()
J
Jeff Rasley 已提交
58

59 60
        dist_init_required: Optional: None will auto-initialize torch.distributed if needed,
            otherwise the user can force it to be initialized or not via boolean.
J
Jeff Rasley 已提交
61 62 63 64 65 66

        collate_fn: Optional: Merges a list of samples to form a
            mini-batch of Tensor(s).  Used when using batched loading from a
            map-style dataset.

    Return:
67
        The following tuple is returned by this function.
J
Jeff Rasley 已提交
68 69
        tuple: engine, engine.optimizer, engine.training_dataloader, engine.lr_scheduler

70 71 72 73 74 75 76 77 78 79 80
        engine: DeepSpeed runtime engine which wraps the client model for distributed training.

        engine.optimizer: Wrapped optimizer if a user defined optimizer is passed or
            if optimizer is specified in json config else None.

        engine.training_dataloader: DeepSpeed dataloader if training data was passed else None.

        engine.lr_scheduler: Wrapped lr scheduler if user lr scheduler is passed
            or if lr scheduler specified in json config else None.


J
Jeff Rasley 已提交
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
    """
    print("DeepSpeed info: version={}, git-hash={}, git-branch={}".format(
        __version__,
        __git_hash__,
        __git_branch__),
          flush=True)

    engine = DeepSpeedLight(args=args,
                            model=model,
                            optimizer=optimizer,
                            model_parameters=model_parameters,
                            training_data=training_data,
                            lr_scheduler=lr_scheduler,
                            mpu=mpu,
                            dist_init_required=dist_init_required,
                            collate_fn=collate_fn)

    return_items = [
        engine,
        engine.optimizer,
        engine.training_dataloader,
        engine.lr_scheduler
    ]
    return tuple(return_items)


107 108 109 110 111 112 113
def _add_core_arguments(parser):
    r"""Helper (internal) function to update an argument parser with an argument group of the core DeepSpeed arguments.
        The core set of DeepSpeed arguments include the following:
        1) --deepspeed: boolean flag to enable DeepSpeed
        2) --deepspeed_config <json file path>: path of a json configuration file to configure DeepSpeed runtime.

        This is a helper function to the public add_config_arguments()
J
Jeff Rasley 已提交
114 115 116 117 118 119 120 121

    Arguments:
        parser: argument parser
    Return:
        parser: Updated Parser
    """
    group = parser.add_argument_group('DeepSpeed', 'DeepSpeed configurations')

122 123 124 125 126 127
    group.add_argument(
        '--deepspeed',
        default=False,
        action='store_true',
        help=
        'Enable DeepSpeed (helper flag for user code, no impact on DeepSpeed backend)')
J
Jeff Rasley 已提交
128 129 130 131 132 133

    group.add_argument('--deepspeed_config',
                       default=None,
                       type=str,
                       help='DeepSpeed json configuration file.')

134 135 136 137 138 139 140 141
    group.add_argument(
        '--deepscale',
        default=False,
        action='store_true',
        help=
        'Deprecated enable DeepSpeed (helper flag for user code, no impact on DeepSpeed backend)'
    )

142 143 144 145
    group.add_argument('--deepscale_config',
                       default=None,
                       type=str,
                       help='Deprecated DeepSpeed json configuration file.')
J
Jeff Rasley 已提交
146 147 148 149 150 151 152 153 154

    group.add_argument(
        '--deepspeed_mpi',
        default=False,
        action='store_true',
        help=
        "Run via MPI, this will attempt to discover the necessary variables to initialize torch "
        "distributed from the MPI environment")

J
Jeff Rasley 已提交
155 156 157 158
    return parser


def add_config_arguments(parser):
159 160 161 162
    r"""Update the argument parser to enabling parsing of DeepSpeed command line arguments.
        The set of DeepSpeed arguments include the following:
        1) --deepspeed: boolean flag to enable DeepSpeed
        2) --deepspeed_config <json file path>: path of a json configuration file to configure DeepSpeed runtime.
J
Jeff Rasley 已提交
163 164 165 166 167 168

    Arguments:
        parser: argument parser
    Return:
        parser: Updated Parser
    """
169
    parser = _add_core_arguments(parser)
J
Jeff Rasley 已提交
170 171

    return parser