提交 e1170613 编写于 作者: U u010280923

opt reward model

上级 a1fe3755
......@@ -85,12 +85,9 @@ class RewardModel(pl.LightningModule):
]
def configure_optimizers(self):
# 论文中的参数:lr=1e-5, betas=(0.9, 0.95)
optimizer = torch.optim.Adam([
{"rwkv_params": self.rwkv.parameters()},
{"rm_params": self.parameters()}
], lr=self.args.lr_init, betas=self.args.betas)
# 论文中的参数:
optimizer = torch.optim.Adam(self.parameters(), lr=1e-5, betas=(0.9, 0.95) )
# optimizer = torch.optim.Adam(self.parameters(), lr=self.args.lr_init, betas=self.args.betas)
return optimizer
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册