From efd856e2d3a18913f58deeb765ee450590960ff6 Mon Sep 17 00:00:00 2001 From: u010280923 Date: Mon, 13 Mar 2023 14:48:12 +0800 Subject: [PATCH] reward model finished --- src/rlhf/reward.py | 15 --------------- train_rm.py | 2 +- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/src/rlhf/reward.py b/src/rlhf/reward.py index 5eb8363..580ac19 100644 --- a/src/rlhf/reward.py +++ b/src/rlhf/reward.py @@ -73,11 +73,6 @@ class RewardModel(pl.LightningModule): # reward 得分计算 self.pred_reward = nn.Linear(dim, 1, bias=False) - # self.pred_reward = nn.Sequential( - # nn.Linear(dim, 1, bias=False), - # Rearrange('... 1 -> ...') # 降维 - # ) - def load(self, path): path = Path(path) assert path.exists() @@ -124,20 +119,10 @@ class RewardModel(pl.LightningModule): {"params": [param_dict[n] for n in lr_3x], "weight_decay": 0.0, "my_lr_scale": 3.0}, ] - optim_names = [ - {"params": lr_1x}, - {"params": lr_2x}, - {"params": lr_3x}, - ] - else: optim_groups = [ {"params": [p for n, p in self.named_parameters()], "weight_decay": 0.0}, ] - - optim_names = [ - {"params": [n for n, p in self.named_parameters()]}, - ] if self.deepspeed_offload: return DeepSpeedCPUAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, adamw_mode=False, weight_decay=0, amsgrad=False) diff --git a/train_rm.py b/train_rm.py index 84f6143..bff30bd 100644 --- a/train_rm.py +++ b/train_rm.py @@ -256,7 +256,7 @@ if __name__ == "__main__": trainer.strategy.config["zero_optimization"]["reduce_bucket_size"] = args.ds_bucket_mb * 1000 * 1000 # must set shuffle=True, persistent_workers=False (because worker is in another thread) - data_loader = DataLoader(train_data, shuffle=True, pin_memory=True, batch_size=args.micro_bsz, num_workers=1, persistent_workers=False, drop_last=True) + data_loader = DataLoader(train_data, shuffle=False, pin_memory=True, batch_size=args.micro_bsz, num_workers=1, persistent_workers=False, drop_last=True) trainer.fit(rm_model, data_loader) -- GitLab