fix double linear override; spelling (#954)

adac058a · Stas Bekman · GitHub · e6999ebd · adac058a
隐藏空白更改
内联并排

Showing with 1 addition and 8 deletion

deepspeed/runtime/zero/partition_parameters.py deepspeed/runtime/zero/partition_parameters.py +1 -8

未找到文件。
--- a/deepspeed/runtime/zero/partition_parameters.py
+++ b/deepspeed/runtime/zero/partition_parameters.py
@@ -191,7 +191,7 @@ class InsertPostInitMethodToModuleSubClasses(object):

        if self.mem_efficient_linear:
            print_rank_0(
-                f"Your linear layers are being patched with more memory efficient version. This will persit unless manually reset.",
+                "nn.functional.linear has been overridden with a more memory efficient version. This will persist unless manually reset.",
                force=True)
            self.linear_bk = torch.nn.functional.linear
            torch.nn.functional.linear = LinearFunctionForZeroStage3.apply
@@ -361,13 +361,6 @@ class Init(InsertPostInitMethodToModuleSubClasses):
                self._convert_to_deepspeed_param(param)
                param.partition()

-        if mem_efficient_linear:
-            print_rank_0(
-                f"Your linear layers are being patched with more memory efficient version. This will persit unless manually turned reset.",
-                force=True)
-            self.linear_bk = torch.nn.functional.linear
-            torch.nn.functional.linear = LinearFunctionForZeroStage3.apply
-
    def _post_init_method(self, module):
        #see_memory_usage(f"Before converting parmas in {module.__class__.__name__}", force=False)
        print_rank_0(f'Converting Params in {module.__class__.__name__}', force=False)