Update curriculum-learning.md (#3031)

Co-authored-by: N Jeff Rasley <jerasley@microsoft.com> Co-authored-by: N Olatunji Ruwase <olruwase@microsoft.com>

Update curriculum-learning.md (#3031)
Co-authored-by: N Jeff Rasley <jerasley@microsoft.com> Co-authored-by: N Olatunji Ruwase <olruwase@microsoft.com>
adc15e1c · Gavin Goodship · GitHub · 1f85569e · adc15e1c · adc15e1c
隐藏空白更改
内联并排

Showing with 25 addition and 24 deletion

docs/_tutorials/curriculum-learning.md docs/_tutorials/curriculum-learning.md +1 -1

setup.py setup.py +24 -23

未找到文件。
--- a/docs/_tutorials/curriculum-learning.md
+++ b/docs/_tutorials/curriculum-learning.md
@@ -130,7 +130,7 @@ In our [paper](https://arxiv.org/abs/2108.06084) section 5.4 we demonstrate that
 ### 2.3 Token-based training termination
-Because curriculum learning changes length of each sequence/sample during training, it is very hard/impossible to use number of steps/samples to terminate the training exactly at the desired number of tokens. Thus, we add a `--train-tokens` config for accurate token-based termination. We recommend increasing your original `--train-samples` or `--train-iters` to a large enough number (e.g., 3X of what you used for baseline), and set `--train-tokens` at the exact desired number of training tokens.
+Because curriculum learning changes the length of each sequence/sample during training, it is very hard/impossible to use  a number of steps/samples to terminate the training exactly at the desired number of tokens. Thus, we add a `--train-tokens` config for accurate token-based termination. We recommend increasing your original `--train-samples` or `--train-iters` to a large enough number (e.g., 3X of what you used for baseline), and set `--train-tokens` at the exact desired number of training tokens.
 ### 2.4 Token-based LR decay

--- a/setup.py
+++ b/setup.py
@@ -6,10 +6,11 @@
 DeepSpeed library
 To build wheel on Windows:
-1. Install pytorch, such as pytorch 1.12 + cuda 11.6
+1. Install pytorch, such as pytorch 1.12 + cuda 11.6.
-2. Install visual cpp build tool
+2. Install visual cpp build tool.
-3. Include cuda toolkit
+3. Include cuda toolkit.
-4. Launch cmd console with Administrator privilege for creating required symlink folders
+4. Launch cmd console with Administrator privilege for creating required symlink folders.
 Create a new wheel via the following command:
 build_win.bat
@@ -36,7 +37,7 @@ from op_builder import get_default_compute_capabilities, OpBuilder
 from op_builder.all_ops import ALL_OPS
 from op_builder.builder import installed_cuda_version
-# fetch rocm state
+# Fetch rocm state.
 is_rocm_pytorch = OpBuilder.is_rocm_pytorch()
 rocm_version = OpBuilder.installed_rocm_version()
@@ -68,12 +69,12 @@ extras_require = {
    'sd': fetch_requirements('requirements/requirements-sd.txt')
 }
-# Add specific cupy version to both onebit extension variants
+# Add specific cupy version to both onebit extension variants.
 if torch_available and torch.cuda.is_available():
    cupy = None
    if is_rocm_pytorch:
        rocm_major, rocm_minor = rocm_version
-        # XXX cupy support for rocm 5 is not available yet
+        # XXX cupy support for rocm 5 is not available yet.
        if rocm_major <= 4:
            cupy = f"cupy-rocm-{rocm_major}-{rocm_minor}"
    else:
@@ -82,7 +83,7 @@ if torch_available and torch.cuda.is_available():
        extras_require['1bit'].append(cupy)
        extras_require['1bit_mpi'].append(cupy)
-# Make an [all] extra that installs all needed dependencies
+# Make an [all] extra that installs all needed dependencies.
 all_extras = set()
 for extra in extras_require.items():
    for req in extra[1]:
@@ -91,7 +92,7 @@ extras_require['all'] = list(all_extras)
 cmdclass = {}
-# For any pre-installed ops force disable ninja
+# For any pre-installed ops force disable ninja.
 if torch_available:
    from accelerator import get_accelerator
    cmdclass['build_ext'] = get_accelerator().build_extension().with_options(use_ninja=False)
@@ -104,7 +105,7 @@ else:
    TORCH_MINOR = "0"
 if torch_available and not torch.cuda.is_available():
-    # Fix to allow docker builds, similar to https://github.com/NVIDIA/apex/issues/486
+    # Fix to allow docker builds, similar to https://github.com/NVIDIA/apex/issues/486.
    print("[WARNING] Torch did not find cuda available, if cross-compiling or running with cpu only "
          "you can ignore this message. Adding compute capability for Pascal, Volta, and Turing "
          "(compute capabilities 6.0, 6.1, 6.2)")
@@ -148,18 +149,18 @@ for op_name, builder in ALL_OPS.items():
    op_compatible = builder.is_compatible()
    compatible_ops[op_name] = op_compatible
-    # If op is requested but not available, throw an error
+    # If op is requested but not available, throw an error.
    if op_enabled(op_name) and not op_compatible:
        env_var = op_envvar(op_name)
        if env_var not in os.environ:
            builder.warning(f"One can disable {op_name} with {env_var}=0")
        abort(f"Unable to pre-compile {op_name}")
-    # if op is compatible but install is not enabled (JIT mode)
+    # If op is compatible but install is not enabled (JIT mode).
    if is_rocm_pytorch and op_compatible and not op_enabled(op_name):
        builder.hipify_extension()
-    # If op install enabled, add builder to extensions
+    # If op install enabled, add builder to extensions.
    if op_enabled(op_name) and op_compatible:
        assert torch_available, f"Unable to pre-compile {op_name}, please first install torch"
        install_ops[op_name] = op_enabled(op_name)
@@ -167,7 +168,7 @@ for op_name, builder in ALL_OPS.items():
 print(f'Install Ops={install_ops}')
-# Write out version/git info
+# Write out version/git info.
 git_hash_cmd = "git rev-parse --short HEAD"
 git_branch_cmd = "git rev-parse --abbrev-ref HEAD"
 if command_exists('git') and 'DS_BUILD_STRING' not in os.environ:
@@ -200,38 +201,38 @@ if sys.platform == "win32":
    create_dir_symlink('..\\accelerator', '.\\deepspeed\\accelerator')
    egg_info.manifest_maker.template = 'MANIFEST_win.in'
-# Parse the DeepSpeed version string from version.txt
+# Parse the DeepSpeed version string from version.txt.
 version_str = open('version.txt', 'r').read().strip()
 # Build specifiers like .devX can be added at install time. Otherwise, add the git hash.
-# example: DS_BUILD_STRING=".dev20201022" python setup.py sdist bdist_wheel
+# Example: DS_BUILD_STRING=".dev20201022" python setup.py sdist bdist_wheel.
-# Building wheel for distribution, update version file
+# Building wheel for distribution, update version file.
 if 'DS_BUILD_STRING' in os.environ:
-    # Build string env specified, probably building for distribution
+    # Build string env specified, probably building for distribution.
    with open('build.txt', 'w') as fd:
        fd.write(os.environ.get('DS_BUILD_STRING'))
    version_str += os.environ.get('DS_BUILD_STRING')
 elif os.path.isfile('build.txt'):
-    # build.txt exists, probably installing from distribution
+    # build.txt exists, probably installing from distribution.
    with open('build.txt', 'r') as fd:
        version_str += fd.read().strip()
 else:
-    # None of the above, probably installing from source
+    # None of the above, probably installing from source.
    version_str += f'+{git_hash}'
 torch_version = ".".join([TORCH_MAJOR, TORCH_MINOR])
 bf16_support = False
-# Set cuda_version to 0.0 if cpu-only
+# Set cuda_version to 0.0 if cpu-only.
 cuda_version = "0.0"
 nccl_version = "0.0"
-# Set hip_version to 0.0 if cpu-only
+# Set hip_version to 0.0 if cpu-only.
 hip_version = "0.0"
 if torch_available and torch.version.cuda is not None:
    cuda_version = ".".join(torch.version.cuda.split('.')[:2])
    if sys.platform != "win32":
        if isinstance(torch.cuda.nccl.version(), int):
-            # This will break if minor version > 9
+            # This will break if minor version > 9.
            nccl_version = ".".join(str(torch.cuda.nccl.version())[:2])
        else:
            nccl_version = ".".join(map(str, torch.cuda.nccl.version()[:2]))