diff --git a/deepspeed/runtime/zero/stage_1_and_2.py b/deepspeed/runtime/zero/stage_1_and_2.py index 81ca62af83b5ce259580247cbfb5ba3204a9eb7b..59020684842bdee0cbd3dd754b927542d7b0efb0 100755 --- a/deepspeed/runtime/zero/stage_1_and_2.py +++ b/deepspeed/runtime/zero/stage_1_and_2.py @@ -1721,10 +1721,9 @@ class DeepSpeedZeroOptimizer(object): if self.deepspeed_adam_offload: from deepspeed.ops.adam import DeepSpeedCPUAdam if type(self.optimizer) == DeepSpeedCPUAdam and self.dtype == torch.half: - bit16_param_groups = [ + bit16_param_groups = [[ bit16_partitions[partition_id] - for bit16_partitions in self.parallel_partitioned_bit16_groups - ] + ] for bit16_partitions in self.parallel_partitioned_bit16_groups] self.optimizer.step(fp16_param_groups=bit16_param_groups) else: self.optimizer.step()