Add Windows support in README, use c++17 on Windows to support latest VC & cuda build tool (#1151)

* Add Windows support in README, use c++17 on Windows to support latest vc build tool * Add detailed cpp build tools version in README Co-authored-by: N Jeff Rasley <jerasley@microsoft.com>

Add Windows support in README, use c++17 on Windows to support latest VC & cuda build tool (#1151)
* Add Windows support in README, use c++17 on Windows to support latest vc build tool * Add detailed cpp build tools version in README Co-authored-by: N Jeff Rasley <jerasley@microsoft.com>
71ecf7e6 · eltonzheng · GitHub · 8e48756c · 71ecf7e6 · 71ecf7e6
6 changed file
--- a/README.md
+++ b/README.md
@@ -96,6 +96,12 @@ If you would like to pre-install any of the DeepSpeed extensions/ops (instead
 of JIT compiling) or install pre-compiled ops via PyPI please see our [advanced
 installation instructions](https://www.deepspeed.ai/tutorials/advanced-install/).

+On Windows you can build wheel with following steps, currently only inference mode is supported.
+1. Install pytorch, such as pytorch 1.8 + cuda 11.1
+2. Install visual cpp build tools, such as VS2019 C++ x64/x86 build tools
+3. Launch cmd console with Administrator privilege for creating required symlink folders
+4. Run `python setup.py bdist_wheel` to build wheel in `dist` folder
+
 # Features
 Below we provide a brief feature list, see our detailed [feature
 overview](https://www.deepspeed.ai/features/) for descriptions and usage.

--- a/op_builder/builder.py
+++ b/op_builder/builder.py
@@ -364,6 +364,18 @@ class CUDAOpBuilder(OpBuilder):
        else:
            return ['-O3', '-std=c++14', '-g', '-Wno-reorder']

+    def nvcc_args(self):
+        args = [
+            '-O3',
+            '--use_fast_math',
+            '-std=c++17' if sys.platform == "win32" else '-std=c++14',
+            '-U__CUDA_NO_HALF_OPERATORS__',
+            '-U__CUDA_NO_HALF_CONVERSIONS__',
+            '-U__CUDA_NO_HALF2_OPERATORS__'
+        ]
+
+        return args + self.compute_capability_args()
+
    def libraries_args(self):
        if sys.platform == "win32":
            return ['cublas', 'curand']

--- a/op_builder/cpu_adam.py
+++ b/op_builder/cpu_adam.py
@@ -62,15 +62,3 @@ class CPUAdamBuilder(CUDAOpBuilder):
            '-fopenmp',
            SIMD_WIDTH
        ]
-
-    def nvcc_args(self):
-        args = [
-            '-O3',
-            '--use_fast_math',
-            '-std=c++14',
-            '-U__CUDA_NO_HALF_OPERATORS__',
-            '-U__CUDA_NO_HALF_CONVERSIONS__',
-            '-U__CUDA_NO_HALF2_OPERATORS__'
-        ]
-        args += self.compute_capability_args()
-        return args
--- a/op_builder/quantizer.py
+++ b/op_builder/quantizer.py
@@ -21,15 +21,3 @@ class QuantizerBuilder(CUDAOpBuilder):

    def include_paths(self):
        return ['csrc/includes']
-
-    def nvcc_args(self):
-        args = [
-            '-O3',
-            '--use_fast_math',
-            '-std=c++14',
-            '-U__CUDA_NO_HALF_OPERATORS__',
-            '-U__CUDA_NO_HALF_CONVERSIONS__',
-            '-U__CUDA_NO_HALF2_OPERATORS__'
-        ]
-
-        return args + self.compute_capability_args()
--- a/op_builder/transformer.py
+++ b/op_builder/transformer.py
@@ -30,15 +30,3 @@ class TransformerBuilder(CUDAOpBuilder):

    def include_paths(self):
        return ['csrc/includes']
-
-    def nvcc_args(self):
-        args = [
-            '-O3',
-            '--use_fast_math',
-            '-std=c++14',
-            '-U__CUDA_NO_HALF_OPERATORS__',
-            '-U__CUDA_NO_HALF_CONVERSIONS__',
-            '-U__CUDA_NO_HALF2_OPERATORS__'
-        ]
-
-        return args + self.compute_capability_args()
--- a/op_builder/transformer_inference.py
+++ b/op_builder/transformer_inference.py
@@ -24,15 +24,3 @@ class InferenceBuilder(CUDAOpBuilder):

    def include_paths(self):
        return ['csrc/transformer/inference/includes']
-
-    def nvcc_args(self):
-        args = [
-            '-O3',
-            '--use_fast_math',
-            '-std=c++14',
-            '-U__CUDA_NO_HALF_OPERATORS__',
-            '-U__CUDA_NO_HALF_CONVERSIONS__',
-            '-U__CUDA_NO_HALF2_OPERATORS__',
-        ]
-
-        return args + self.compute_capability_args()