From fba22c902dc61ef4673cf7dd1d0c2fbed6db58a0 Mon Sep 17 00:00:00 2001 From: u010280923 Date: Fri, 10 Mar 2023 17:52:11 +0800 Subject: [PATCH] bug fixed --- src/dataset.py | 2 +- src/rlhf/reward.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dataset.py b/src/dataset.py index e65c312..326a6e7 100644 --- a/src/dataset.py +++ b/src/dataset.py @@ -283,7 +283,7 @@ class RMDataset(Dataset): preferred_idx = self.tokenizer.tokenizer.encode(preferred) alternate_idx = self.tokenizer.tokenizer.encode(alternate) - prompt_mask = [self.padding_mask_id] * len(prompt_idx) + prompt_mask = [self.prompt_mask_id] * len(prompt_idx) preferred_mask = [self.response_mask_id] * len(preferred_idx) alternate_mask = [self.response_mask_id] * len(alternate_idx) diff --git a/src/rlhf/reward.py b/src/rlhf/reward.py index b9797d1..b0a3824 100644 --- a/src/rlhf/reward.py +++ b/src/rlhf/reward.py @@ -66,9 +66,9 @@ class RewardModel(pl.LightningModule): dim = self.args.n_embd # 用于区分输入中的 prompt 和 response,当作模型参数进行训练,初始化为全0 - self.prompt_embed = nn.Parameter(torch.zeros(1, 1, dim)).to() - self.response_embed = nn.Parameter(torch.zeros(1, 1, dim)) - self.padding_embed = nn.Parameter(torch.zeros(1, 1, dim), requires_grad=False) + self.prompt_embed = nn.Parameter(torch.zeros(dim)) + self.response_embed = nn.Parameter(torch.zeros(dim)) + self.padding_embed = nn.Parameter(torch.zeros(dim), requires_grad=False) # reward 得分计算 self.pred_reward = nn.Sequential( -- GitLab