From fa78eedaf1c929eca34f88412e481ea17c032981 Mon Sep 17 00:00:00 2001 From: u010280923 Date: Mon, 13 Mar 2023 10:41:42 +0800 Subject: [PATCH] opt reward model --- src/rlhf/reward.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/rlhf/reward.py b/src/rlhf/reward.py index cd03bef..fd12d9d 100644 --- a/src/rlhf/reward.py +++ b/src/rlhf/reward.py @@ -181,6 +181,10 @@ class RewardModel(pl.LightningModule): return reward def forward(self, x_p, x_a, m_p, m_a): + # 因为前向传播的时候,需要过两次模型。所以反馈的时候需要冻结其中一次的参数 + # 不然梯度会被计算两次,在包含 deepspeed 框架下会报错 + # 报错信息:Gradient computed twice for this partition. + with torch.enable_grad(): prefer_reward = self.single_forward(x_p, prompt_mask=m_p) with torch.no_grad(): -- GitLab