diff --git a/deepspeed/pt/deepspeed_light.py b/deepspeed/pt/deepspeed_light.py index 918f4127d9d9a40a576498553d9bd83b35332c78..abbb5cd339ea0799b574a5e8d73a7d1ae300ebe9 100755 --- a/deepspeed/pt/deepspeed_light.py +++ b/deepspeed/pt/deepspeed_light.py @@ -26,9 +26,6 @@ from deepspeed.pt.deepspeed_constants import ROUTE_TRAIN, ROUTE_PREDICT, \ import deepspeed.pt.deepspeed_lr_schedules as lr_schedules from deepspeed.pt.deepspeed_csr_tensor import CSRTensor -from apex import amp -from apex.optimizers.fused_adam import FusedAdam - MEMORY_OPT_ALLREDUCE_SIZE = 500000000 SUMMARY_WRITER_DIR_NAME = "JobId" @@ -458,6 +455,7 @@ class DeepSpeedLight(Module): if self.fp16_enabled() and 'max_grad_norm' in optimizer_parameters.keys(): optimizer_parameters['max_grad_norm'] = 0.0 if self.optimizer_name() == ADAM_OPTIMIZER: + from apex.optimizers.fused_adam import FusedAdam optimizer = FusedAdam(model_parameters, **optimizer_parameters) elif self.optimizer_name() == LAMB_OPTIMIZER: optimizer = FusedLamb(model_parameters, **optimizer_parameters) diff --git a/setup.py b/setup.py index 8406cbb007d589496858dcf04445ad4e15f051d8..7957e371bf22b7587c0aa49a6fc0674156883f1e 100644 --- a/setup.py +++ b/setup.py @@ -8,6 +8,7 @@ Create a new wheel via the following command: python setup.py bdist_wheel The wheel will be located at: dist/*.whl """ +import os import torch from deepspeed import __version__ as ds_version from setuptools import setup, find_packages @@ -17,6 +18,30 @@ cmdclass = {} ext_modules = [] cmdclass['build_ext'] = BuildExtension +TORCH_MAJOR = int(torch.__version__.split('.')[0]) +TORCH_MINOR = int(torch.__version__.split('.')[1]) + +if not torch.cuda.is_available(): + # Fix to allow docker buils, similar to https://github.com/NVIDIA/apex/issues/486 + print( + "[WARNING] Torch did not find cuda available, if cross-compling or running with cpu only " + "you can ignore this message. Adding compute capability for Pascal, Volta, and Turing " + "(compute capabilities 6.0, 6.1, 6.2)") + if os.environ.get("TORCH_CUDA_ARCH_LIST", None) is None: + os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5" + +# Fix from apex that might be relevant for us as well, related to https://github.com/NVIDIA/apex/issues/456 +version_ge_1_1 = [] +if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 0): + version_ge_1_1 = ['-DVERSION_GE_1_1'] +version_ge_1_3 = [] +if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 2): + version_ge_1_3 = ['-DVERSION_GE_1_3'] +version_ge_1_5 = [] +if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 4): + version_ge_1_5 = ['-DVERSION_GE_1_5'] +version_dependent_macros = version_ge_1_1 + version_ge_1_3 + version_ge_1_5 + ext_modules.append( CUDAExtension(name='fused_lamb_cuda', sources=['csrc/fused_lamb_cuda.cpp', @@ -24,9 +49,9 @@ ext_modules.append( extra_compile_args={ 'cxx': [ '-O3', - ], + ] + version_dependent_macros, 'nvcc': ['-O3', - '--use_fast_math'] + '--use_fast_math'] + version_dependent_macros })) setup(name='deepspeed',