From 5d14afd26ce89be4d4e6ebdf8fb6a943aebd4aa3 Mon Sep 17 00:00:00 2001 From: digger yu Date: Sat, 3 Jun 2023 01:51:04 +0800 Subject: [PATCH] fix typo deepspeed/runtime (#3663) Co-authored-by: Olatunji Ruwase --- deepspeed/runtime/zero/partition_parameters.py | 8 ++++---- deepspeed/runtime/zero/stage3.py | 4 ++-- deepspeed/runtime/zero/stage_1_and_2.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/deepspeed/runtime/zero/partition_parameters.py b/deepspeed/runtime/zero/partition_parameters.py index d4df1c46..db986c74 100755 --- a/deepspeed/runtime/zero/partition_parameters.py +++ b/deepspeed/runtime/zero/partition_parameters.py @@ -800,9 +800,9 @@ class Init(InsertPostInitMethodToModuleSubClasses): f'"nvme_path" in DeepSpeed Config cannot be None if remote device is {OffloadDeviceEnum.nvme}' def _post_init_method(self, module): - #see_memory_usage(f"Before converting parmas in {module.__class__.__name__}", force=False) + #see_memory_usage(f"Before converting params in {module.__class__.__name__}", force=False) print_rank_0(f'Converting Params in {module.__class__.__name__}', force=False) - see_memory_usage(f"Before converting and partitioning parmas in {module.__class__.__name__}", force=False) + see_memory_usage(f"Before converting and partitioning params in {module.__class__.__name__}", force=False) global param_count for name, param in module.named_parameters(recurse=False): @@ -825,7 +825,7 @@ class Init(InsertPostInitMethodToModuleSubClasses): param.partition() see_memory_usage( - f"Param count {param_count}. After converting and partitioning parmas in {module.__class__.__name__}", + f"Param count {param_count}. After converting and partitioning params in {module.__class__.__name__}", force=False) def _convert_to_deepspeed_param(self, param): @@ -1404,7 +1404,7 @@ class Init(InsertPostInitMethodToModuleSubClasses): partition_size = param.ds_tensor.ds_numel start = self.get_partition_rank() * partition_size end = start + partition_size - #print_rank_0("REduce scatter was executed for praam {param.ds_id}") + #print_rank_0("REduce scatter was executed for param {param.ds_id}") if start < param.ds_numel and end > param.ds_numel: elements = param.ds_numel - start param.grad.view(-1).narrow(0, start, elements).copy_(reduced_partition.narrow(0, 0, elements)) diff --git a/deepspeed/runtime/zero/stage3.py b/deepspeed/runtime/zero/stage3.py index 5918fe25..c5359a82 100644 --- a/deepspeed/runtime/zero/stage3.py +++ b/deepspeed/runtime/zero/stage3.py @@ -892,7 +892,7 @@ class DeepSpeedZeroOptimizer_Stage3(ZeROOptimizer): else: self.fp32_partitioned_groups_flat[i].grad = gradient_buffer.narrow(0, 0, num_elements) - # Initialize the optimizer states with the flattended fp32 partition. + # Initialize the optimizer states with the flattened fp32 partition. if not is_adagrad: self._optimizer_step(i) @@ -906,7 +906,7 @@ class DeepSpeedZeroOptimizer_Stage3(ZeROOptimizer): f'[End] Initialize optimizer states {i} / {num_subgroups} subgroups, num_elems: {num_elements}, swappable opt/param:{swappable_optimizer_subgroup}/{swappable_param_subgroup}', force=False) - # Initialize the optimizer states with the flattended fp32 partition. + # Initialize the optimizer states with the flattened fp32 partition. if is_adagrad: self.optimizer = torch.optim.Adagrad(self.fp32_partitioned_groups_flat, **self.optimizer.defaults) diff --git a/deepspeed/runtime/zero/stage_1_and_2.py b/deepspeed/runtime/zero/stage_1_and_2.py index 67fdcba6..8f15b87a 100755 --- a/deepspeed/runtime/zero/stage_1_and_2.py +++ b/deepspeed/runtime/zero/stage_1_and_2.py @@ -611,7 +611,7 @@ class DeepSpeedZeroOptimizer(ZeROOptimizer): self.single_partition_of_fp32_groups[i].grad = get_accelerator().pin_memory( single_grad_partition) if self.cpu_offload else single_grad_partition - # Initialize the optimizer states with the flattended fp32 partition. + # Initialize the optimizer states with the flattened fp32 partition. # State initialization for the Adagrad optimizer occurs at construction as opposed to other optimizers # which do lazy initialization of the state at the first call to step. if isinstance(self.optimizer, torch.optim.Adagrad): -- GitLab