提交 efd856e2 编写于 作者: U u010280923

reward model finished

上级 77e4f4cb
......@@ -73,11 +73,6 @@ class RewardModel(pl.LightningModule):
# reward 得分计算
self.pred_reward = nn.Linear(dim, 1, bias=False)
# self.pred_reward = nn.Sequential(
# nn.Linear(dim, 1, bias=False),
# Rearrange('... 1 -> ...') # 降维
# )
def load(self, path):
path = Path(path)
assert path.exists()
......@@ -124,20 +119,10 @@ class RewardModel(pl.LightningModule):
{"params": [param_dict[n] for n in lr_3x], "weight_decay": 0.0, "my_lr_scale": 3.0},
]
optim_names = [
{"params": lr_1x},
{"params": lr_2x},
{"params": lr_3x},
]
else:
optim_groups = [
{"params": [p for n, p in self.named_parameters()], "weight_decay": 0.0},
]
optim_names = [
{"params": [n for n, p in self.named_parameters()]},
]
if self.deepspeed_offload:
return DeepSpeedCPUAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, adamw_mode=False, weight_decay=0, amsgrad=False)
......
......@@ -256,7 +256,7 @@ if __name__ == "__main__":
trainer.strategy.config["zero_optimization"]["reduce_bucket_size"] = args.ds_bucket_mb * 1000 * 1000
# must set shuffle=True, persistent_workers=False (because worker is in another thread)
data_loader = DataLoader(train_data, shuffle=True, pin_memory=True, batch_size=args.micro_bsz, num_workers=1, persistent_workers=False, drop_last=True)
data_loader = DataLoader(train_data, shuffle=False, pin_memory=True, batch_size=args.micro_bsz, num_workers=1, persistent_workers=False, drop_last=True)
trainer.fit(rm_model, data_loader)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册