提交 6e8dbe4b 编写于 作者: U u010280923

opt reward model

上级 60942751
......@@ -136,7 +136,7 @@ class RewardModel(pl.LightningModule):
strategy = self.trainer.strategy
if isinstance(strategy, DeepSpeedStrategy):
cfg = strategy.config["zero_optimization"]
return cfg.get("offload_optimizer") or cfg.get("offload_param")
return bool(cfg.get("offload_optimizer") or cfg.get("offload_param"))
return False
def single_forward(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册