未验证 提交 3d3f8d36 编写于 作者: J Jeff Rasley 提交者: GitHub

PyTorch 1.3+ build support (#135)

* add support for torch 1.3+ builds inside a docker build environment
* remove apex imports
上级 e0f5cc68
......@@ -26,9 +26,6 @@ from deepspeed.pt.deepspeed_constants import ROUTE_TRAIN, ROUTE_PREDICT, \
import deepspeed.pt.deepspeed_lr_schedules as lr_schedules
from deepspeed.pt.deepspeed_csr_tensor import CSRTensor
from apex import amp
from apex.optimizers.fused_adam import FusedAdam
MEMORY_OPT_ALLREDUCE_SIZE = 500000000
SUMMARY_WRITER_DIR_NAME = "JobId"
......@@ -458,6 +455,7 @@ class DeepSpeedLight(Module):
if self.fp16_enabled() and 'max_grad_norm' in optimizer_parameters.keys():
optimizer_parameters['max_grad_norm'] = 0.0
if self.optimizer_name() == ADAM_OPTIMIZER:
from apex.optimizers.fused_adam import FusedAdam
optimizer = FusedAdam(model_parameters, **optimizer_parameters)
elif self.optimizer_name() == LAMB_OPTIMIZER:
optimizer = FusedLamb(model_parameters, **optimizer_parameters)
......
......@@ -8,6 +8,7 @@ Create a new wheel via the following command: python setup.py bdist_wheel
The wheel will be located at: dist/*.whl
"""
import os
import torch
from deepspeed import __version__ as ds_version
from setuptools import setup, find_packages
......@@ -17,6 +18,30 @@ cmdclass = {}
ext_modules = []
cmdclass['build_ext'] = BuildExtension
TORCH_MAJOR = int(torch.__version__.split('.')[0])
TORCH_MINOR = int(torch.__version__.split('.')[1])
if not torch.cuda.is_available():
# Fix to allow docker buils, similar to https://github.com/NVIDIA/apex/issues/486
print(
"[WARNING] Torch did not find cuda available, if cross-compling or running with cpu only "
"you can ignore this message. Adding compute capability for Pascal, Volta, and Turing "
"(compute capabilities 6.0, 6.1, 6.2)")
if os.environ.get("TORCH_CUDA_ARCH_LIST", None) is None:
os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5"
# Fix from apex that might be relevant for us as well, related to https://github.com/NVIDIA/apex/issues/456
version_ge_1_1 = []
if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 0):
version_ge_1_1 = ['-DVERSION_GE_1_1']
version_ge_1_3 = []
if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 2):
version_ge_1_3 = ['-DVERSION_GE_1_3']
version_ge_1_5 = []
if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 4):
version_ge_1_5 = ['-DVERSION_GE_1_5']
version_dependent_macros = version_ge_1_1 + version_ge_1_3 + version_ge_1_5
ext_modules.append(
CUDAExtension(name='fused_lamb_cuda',
sources=['csrc/fused_lamb_cuda.cpp',
......@@ -24,9 +49,9 @@ ext_modules.append(
extra_compile_args={
'cxx': [
'-O3',
],
] + version_dependent_macros,
'nvcc': ['-O3',
'--use_fast_math']
'--use_fast_math'] + version_dependent_macros
}))
setup(name='deepspeed',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册