未验证 提交 a30ac325 编写于 作者: C ceci3 提交者: GitHub

fix lstm loss (#237)

上级 a5c2b5df
...@@ -193,8 +193,7 @@ class LSTM(RLBaseController): ...@@ -193,8 +193,7 @@ class LSTM(RLBaseController):
fluid.layers.assign(self.baseline - (1.0 - self.decay) * fluid.layers.assign(self.baseline - (1.0 - self.decay) *
(self.baseline - self.rewards), self.baseline) (self.baseline - self.rewards), self.baseline)
self.loss = -1.0 * self.sample_log_probs * ( self.loss = self.sample_log_probs * (self.rewards - self.baseline)
self.rewards - self.baseline)
fluid.clip.set_gradient_clip( fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0)) clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0))
optimizer = fluid.optimizer.Adam(learning_rate=1e-3) optimizer = fluid.optimizer.Adam(learning_rate=1e-3)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册