opt ppo model

54b452e9 · u010280923 · 7928c117 · 54b452e9 · 54b452e9
显示空白变更内容
内联并排

Showing with 2 addition and 2 deletion

train_ppo.py train_ppo.py +1 -1

train_rm.py train_rm.py +1 -1

未找到文件。
--- a/train_ppo.py
+++ b/train_ppo.py
@@ -299,8 +299,8 @@ if __name__ == "__main__":
    if trainer.global_rank == 0:
        for n in rlhf_model.state_dict():
            shape = rlhf_model.state_dict()[n].shape
+            shape = [i for i in shape]
            if len(shape) > 1:
-                shape = [i for i in shape if i != 1]
                print(f"{str(shape[0]).ljust(5)} {str(shape[1]).ljust(5)} {n}")
            else:
                print(f"{str(shape[0]).ljust(5)}       {n}")

--- a/train_rm.py
+++ b/train_rm.py
@@ -267,8 +267,8 @@ if __name__ == "__main__":
    if trainer.global_rank == 0:
        for n in rm_model.state_dict():
            shape = rm_model.state_dict()[n].shape
-            if len(shape) > 1:
            shape = [i for i in shape if i != 1]
+            if len(shape) > 1:
                print(f"{str(shape[0]).ljust(5)} {str(shape[1]).ljust(5)} {n}")
            else:
                print(f"{str(shape[0]).ljust(5)}       {n}")