提交 6e8dbe4b 编写于 作者: U u010280923

opt reward model

上级 60942751
...@@ -136,7 +136,7 @@ class RewardModel(pl.LightningModule): ...@@ -136,7 +136,7 @@ class RewardModel(pl.LightningModule):
strategy = self.trainer.strategy strategy = self.trainer.strategy
if isinstance(strategy, DeepSpeedStrategy): if isinstance(strategy, DeepSpeedStrategy):
cfg = strategy.config["zero_optimization"] cfg = strategy.config["zero_optimization"]
return cfg.get("offload_optimizer") or cfg.get("offload_param") return bool(cfg.get("offload_optimizer") or cfg.get("offload_param"))
return False return False
def single_forward( def single_forward(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册