提交 65604ada 编写于 作者: U u010280923

opt reward model

上级 887ca941
...@@ -248,7 +248,7 @@ class rm_train_callback(pl.Callback): ...@@ -248,7 +248,7 @@ class rm_train_callback(pl.Callback):
to_save_dict = pl_module.state_dict() to_save_dict = pl_module.state_dict()
my_save( my_save(
to_save_dict, to_save_dict,
f"{args.proj_dir}/rwkv-final.pth", f"{args.proj_dir}/rm-final.pth",
) )
...@@ -276,7 +276,7 @@ class rm_train_callback(pl.Callback): ...@@ -276,7 +276,7 @@ class rm_train_callback(pl.Callback):
try: try:
my_save( my_save(
to_save_dict, to_save_dict,
f"{args.proj_dir}/rwkv-{args.epoch_begin + trainer.current_epoch}.pth", f"{args.proj_dir}/rm-{args.epoch_begin + trainer.current_epoch}.pth",
) )
except Exception as e: except Exception as e:
print('Error\n\n', e, '\n\n') print('Error\n\n', e, '\n\n')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册