diff --git a/deepspeed/autotuning/autotuner.py b/deepspeed/autotuning/autotuner.py
index 73825a69f582c1e5db7b3dff0abfb1ac6130a007..4ea71d317f2b18afad83cf1dccfaff650b718268 100755
--- a/deepspeed/autotuning/autotuner.py
+++ b/deepspeed/autotuning/autotuner.py
@@ -637,7 +637,7 @@ class Autotuner:
         logger.info(f"End tuning for space: {tuning_space_name}")
         return max_micro_batch_size, best_mbs, best_metric_val
 
-    def get_plauteu_mbs(self, tuning_space_name):
+    def get_plateau_mbs(self, tuning_space_name):
         if tuning_space_name not in self.records:
             return 0
         space_records = self.records[tuning_space_name]
diff --git a/deepspeed/compression/compress.py b/deepspeed/compression/compress.py
index 951ef1bb2da1047419c5ded25ed11dca8528bc29..2f0e88beee21c040708f6fc7f279826f6c7d5c60 100644
--- a/deepspeed/compression/compress.py
+++ b/deepspeed/compression/compress.py
@@ -213,14 +213,14 @@ def student_initialization(student_model, teacher_model, deepspeed_config):
             Example 1: bert.encoder.layer, for BERT_base model's prefix name
             Example 2: transformer.h, for GPT-2 hugging face prefix name
         teacher_layer (`list of integers`)
-            The layer of teacher will be used for student's reinitializedion
+            The layer of teacher will be used for student's reinitialization
             Example 1: [1,3,5,7,9], means we want to matches the 2nd/4th/6th/8th/10th layer of teacher to the first 5 layers of student
         student_layer (`list` or None)
             The layer of student need to be re-initialized
             Example 1: None, means we want to reinitialize all the layers
             Example 1: [0,1,2,3,4], means  we want to reinitialize the first 5 layers
         other_module_name (`list of string`)
-            The modules will be used for student's reinitializedion
+            The modules will be used for student's reinitialization
             Example 1: ['bert.pooler', 'bert.embeddings', 'classifier'], means we want to apply the weight in teacher's embedding/pooler/classier module to the student
             Example 2: ['transformer.w', 'transformer.ln_f', 'lm_head'], means we want to apply the weight in teacher's embedding layers module to the student
     Note that teacher_layer should matches student layer
diff --git a/deepspeed/nebula/constants.py b/deepspeed/nebula/constants.py
index 2bfcef7751456bce5350ac299669420b193c55c7..9fa5769b55979e4dcd5c80ead06d2117dcc2ec40 100644
--- a/deepspeed/nebula/constants.py
+++ b/deepspeed/nebula/constants.py
@@ -29,8 +29,8 @@ NEBULA_ENABLED_DEFAULT = False
 # There is a case where customer want to load the checkpoint saved
 # by raw torch. Because nebula cannot load torch checkpoint directly
 # as they have different folder structures to bring the gap for
-# loading(the data are totally same in bytes for torch and nebula s
-# aving).
+# loading(the data are totally same in bytes for torch and nebula
+# saving).
 # In this case, we must disable nebula load to use raw torch load.
 # Customer can just set NEBULA_ENABLE_NEBULA_LOAD to False. Then use
 # original way of deepspeed to load, i.e. set the value of "--load".
diff --git a/deepspeed/runtime/checkpoint_engine/README.md b/deepspeed/runtime/checkpoint_engine/README.md
index a19f54889802fa77fbf521456c8b4ac1f2ba8287..c2b7940a414a7fb5df5dd9b3758a2228e7855925 100644
--- a/deepspeed/runtime/checkpoint_engine/README.md
+++ b/deepspeed/runtime/checkpoint_engine/README.md
@@ -31,7 +31,7 @@ class CheckpointEngine(object):
         pass
 
     def commit(self, tag):
-        # to tell checkpoint services if all files are readys.
+        # to tell checkpoint services if all files are ready.
         pass
 
 ```
diff --git a/deepspeed/runtime/checkpoint_engine/checkpoint_engine.py b/deepspeed/runtime/checkpoint_engine/checkpoint_engine.py
index 3f8978df031678d10414d0d6bfd9028e3904aad6..a341dffdf692cab4b49a0b7a5e6c7bb8c2658672 100644
--- a/deepspeed/runtime/checkpoint_engine/checkpoint_engine.py
+++ b/deepspeed/runtime/checkpoint_engine/checkpoint_engine.py
@@ -26,5 +26,5 @@ class CheckpointEngine(object):
         pass
 
     def commit(self, tag):
-        # to tell checkpoint services if all files are readys.
+        # to tell checkpoint services if all files are ready.
         pass
diff --git a/deepspeed/runtime/engine.py b/deepspeed/runtime/engine.py
index 93ab0bdefc91dc8efb545ae788cbc72874edaaf6..b638969755df9a6b7b717a3865c6d412f570a903 100644
--- a/deepspeed/runtime/engine.py
+++ b/deepspeed/runtime/engine.py
@@ -1916,7 +1916,7 @@ class DeepSpeedEngine(Module):
         """
         Manually overrides the DeepSpeed engine's gradient accumulation boundary state, this is an optional
         feature and should be used with care. The state should be set before to the intended
-        value before each forward/backward. The final fordward/backward should have the
+        value before each forward/backward. The final forward/backward should have the
         boundary state set to True. This style allows client code to only call engine.step() once after all
         the gradient accumulation passes are complete. See example below:
         .. code-block:: python