diff --git a/deepspeed/autotuning/autotuner.py b/deepspeed/autotuning/autotuner.py index 73825a69f582c1e5db7b3dff0abfb1ac6130a007..4ea71d317f2b18afad83cf1dccfaff650b718268 100755 --- a/deepspeed/autotuning/autotuner.py +++ b/deepspeed/autotuning/autotuner.py @@ -637,7 +637,7 @@ class Autotuner: logger.info(f"End tuning for space: {tuning_space_name}") return max_micro_batch_size, best_mbs, best_metric_val - def get_plauteu_mbs(self, tuning_space_name): + def get_plateau_mbs(self, tuning_space_name): if tuning_space_name not in self.records: return 0 space_records = self.records[tuning_space_name] diff --git a/deepspeed/compression/compress.py b/deepspeed/compression/compress.py index 951ef1bb2da1047419c5ded25ed11dca8528bc29..2f0e88beee21c040708f6fc7f279826f6c7d5c60 100644 --- a/deepspeed/compression/compress.py +++ b/deepspeed/compression/compress.py @@ -213,14 +213,14 @@ def student_initialization(student_model, teacher_model, deepspeed_config): Example 1: bert.encoder.layer, for BERT_base model's prefix name Example 2: transformer.h, for GPT-2 hugging face prefix name teacher_layer (`list of integers`) - The layer of teacher will be used for student's reinitializedion + The layer of teacher will be used for student's reinitialization Example 1: [1,3,5,7,9], means we want to matches the 2nd/4th/6th/8th/10th layer of teacher to the first 5 layers of student student_layer (`list` or None) The layer of student need to be re-initialized Example 1: None, means we want to reinitialize all the layers Example 1: [0,1,2,3,4], means we want to reinitialize the first 5 layers other_module_name (`list of string`) - The modules will be used for student's reinitializedion + The modules will be used for student's reinitialization Example 1: ['bert.pooler', 'bert.embeddings', 'classifier'], means we want to apply the weight in teacher's embedding/pooler/classier module to the student Example 2: ['transformer.w', 'transformer.ln_f', 'lm_head'], means we want to apply the weight in teacher's embedding layers module to the student Note that teacher_layer should matches student layer diff --git a/deepspeed/nebula/constants.py b/deepspeed/nebula/constants.py index 2bfcef7751456bce5350ac299669420b193c55c7..9fa5769b55979e4dcd5c80ead06d2117dcc2ec40 100644 --- a/deepspeed/nebula/constants.py +++ b/deepspeed/nebula/constants.py @@ -29,8 +29,8 @@ NEBULA_ENABLED_DEFAULT = False # There is a case where customer want to load the checkpoint saved # by raw torch. Because nebula cannot load torch checkpoint directly # as they have different folder structures to bring the gap for -# loading(the data are totally same in bytes for torch and nebula s -# aving). +# loading(the data are totally same in bytes for torch and nebula +# saving). # In this case, we must disable nebula load to use raw torch load. # Customer can just set NEBULA_ENABLE_NEBULA_LOAD to False. Then use # original way of deepspeed to load, i.e. set the value of "--load". diff --git a/deepspeed/runtime/checkpoint_engine/README.md b/deepspeed/runtime/checkpoint_engine/README.md index a19f54889802fa77fbf521456c8b4ac1f2ba8287..c2b7940a414a7fb5df5dd9b3758a2228e7855925 100644 --- a/deepspeed/runtime/checkpoint_engine/README.md +++ b/deepspeed/runtime/checkpoint_engine/README.md @@ -31,7 +31,7 @@ class CheckpointEngine(object): pass def commit(self, tag): - # to tell checkpoint services if all files are readys. + # to tell checkpoint services if all files are ready. pass ``` diff --git a/deepspeed/runtime/checkpoint_engine/checkpoint_engine.py b/deepspeed/runtime/checkpoint_engine/checkpoint_engine.py index 3f8978df031678d10414d0d6bfd9028e3904aad6..a341dffdf692cab4b49a0b7a5e6c7bb8c2658672 100644 --- a/deepspeed/runtime/checkpoint_engine/checkpoint_engine.py +++ b/deepspeed/runtime/checkpoint_engine/checkpoint_engine.py @@ -26,5 +26,5 @@ class CheckpointEngine(object): pass def commit(self, tag): - # to tell checkpoint services if all files are readys. + # to tell checkpoint services if all files are ready. pass diff --git a/deepspeed/runtime/engine.py b/deepspeed/runtime/engine.py index 93ab0bdefc91dc8efb545ae788cbc72874edaaf6..b638969755df9a6b7b717a3865c6d412f570a903 100644 --- a/deepspeed/runtime/engine.py +++ b/deepspeed/runtime/engine.py @@ -1916,7 +1916,7 @@ class DeepSpeedEngine(Module): """ Manually overrides the DeepSpeed engine's gradient accumulation boundary state, this is an optional feature and should be used with care. The state should be set before to the intended - value before each forward/backward. The final fordward/backward should have the + value before each forward/backward. The final forward/backward should have the boundary state set to True. This style allows client code to only call engine.step() once after all the gradient accumulation passes are complete. See example below: .. code-block:: python