提交 aaa0d0a2 编写于 作者: U u010280923

bug fixed

上级 5bac191b
...@@ -12,6 +12,7 @@ import pytorch_lightning as pl ...@@ -12,6 +12,7 @@ import pytorch_lightning as pl
from pytorch_lightning.utilities import rank_zero_info from pytorch_lightning.utilities import rank_zero_info
import deepspeed import deepspeed
from deepspeed.ops.adam import DeepSpeedCPUAdam, FusedAdam from deepspeed.ops.adam import DeepSpeedCPUAdam, FusedAdam
from deepspeed.runtime.fp16.onebit.zoadam import ZeroOneAdam
from pytorch_lightning.strategies import DeepSpeedStrategy from pytorch_lightning.strategies import DeepSpeedStrategy
from einops import rearrange, repeat, reduce, pack, unpack from einops import rearrange, repeat, reduce, pack, unpack
...@@ -125,10 +126,10 @@ class RewardModel(pl.LightningModule): ...@@ -125,10 +126,10 @@ class RewardModel(pl.LightningModule):
{"params": [p for n, p in self.named_parameters()], "weight_decay": 0.0}, {"params": [p for n, p in self.named_parameters()], "weight_decay": 0.0},
] ]
if self.deepspeed_offload: # if self.deepspeed_offload:
return DeepSpeedCPUAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, adamw_mode=False, weight_decay=0, amsgrad=False) # return DeepSpeedCPUAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, adamw_mode=False, weight_decay=0, amsgrad=False)
return FusedAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, adam_w_mode=False, weight_decay=0, amsgrad=False) # return FusedAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, adam_w_mode=False, weight_decay=0, amsgrad=False)
# return ZeroOneAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, weight_decay=0, amsgrad=False, cuda_aware=False) return ZeroOneAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, weight_decay=0, amsgrad=False, cuda_aware=False)
@property @property
def deepspeed_offload(self) -> bool: def deepspeed_offload(self) -> bool:
...@@ -174,7 +175,7 @@ class RewardModel(pl.LightningModule): ...@@ -174,7 +175,7 @@ class RewardModel(pl.LightningModule):
rm_train=True rm_train=True
)[:, -1, :] )[:, -1, :]
# 所有的 token 向量求平均,并输入到打分模块进行打分 # 计算奖励
reward = self.pred_reward(last_token_embeds) reward = self.pred_reward(last_token_embeds)
return reward return reward
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册