提交 aaa0d0a2 编写于 作者: U u010280923

bug fixed

上级 5bac191b
......@@ -12,6 +12,7 @@ import pytorch_lightning as pl
from pytorch_lightning.utilities import rank_zero_info
import deepspeed
from deepspeed.ops.adam import DeepSpeedCPUAdam, FusedAdam
from deepspeed.runtime.fp16.onebit.zoadam import ZeroOneAdam
from pytorch_lightning.strategies import DeepSpeedStrategy
from einops import rearrange, repeat, reduce, pack, unpack
......@@ -125,10 +126,10 @@ class RewardModel(pl.LightningModule):
{"params": [p for n, p in self.named_parameters()], "weight_decay": 0.0},
]
if self.deepspeed_offload:
return DeepSpeedCPUAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, adamw_mode=False, weight_decay=0, amsgrad=False)
return FusedAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, adam_w_mode=False, weight_decay=0, amsgrad=False)
# return ZeroOneAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, weight_decay=0, amsgrad=False, cuda_aware=False)
# if self.deepspeed_offload:
# return DeepSpeedCPUAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, adamw_mode=False, weight_decay=0, amsgrad=False)
# return FusedAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, adam_w_mode=False, weight_decay=0, amsgrad=False)
return ZeroOneAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, weight_decay=0, amsgrad=False, cuda_aware=False)
@property
def deepspeed_offload(self) -> bool:
......@@ -174,7 +175,7 @@ class RewardModel(pl.LightningModule):
rm_train=True
)[:, -1, :]
# 所有的 token 向量求平均,并输入到打分模块进行打分
# 计算奖励
reward = self.pred_reward(last_token_embeds)
return reward
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册