From aede5aee1854164a1c5caa5ef7f32c582338424f Mon Sep 17 00:00:00 2001 From: Bo Zhou <2466956298@qq.com> Date: Mon, 23 Mar 2020 21:47:27 +0800 Subject: [PATCH] resolve the compatibility issue (#226) * fix compatibility issue with the newest paddle * remove logging lines * resolve the compatibility issue with the newest paddle * yapf Co-authored-by: robot --- examples/DQN/atari_agent.py | 2 +- examples/IMPALA/train.py | 4 +++- parl/algorithms/fluid/impala/impala.py | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/examples/DQN/atari_agent.py b/examples/DQN/atari_agent.py index 4af4478..8a33ac4 100644 --- a/examples/DQN/atari_agent.py +++ b/examples/DQN/atari_agent.py @@ -106,7 +106,7 @@ class AtariAgent(parl.Agent): 'reward': reward, 'next_obs': next_obs.astype('float32'), 'terminal': terminal, - 'lr': lr + 'lr': np.float32(lr) } cost = self.fluid_executor.run( self.learn_program, feed=feed, fetch_list=[self.cost])[0] diff --git a/examples/IMPALA/train.py b/examples/IMPALA/train.py index 8440ee7..bfb4ad5 100755 --- a/examples/IMPALA/train.py +++ b/examples/IMPALA/train.py @@ -121,7 +121,9 @@ class Learner(object): yield [ obs_np, actions_np, behaviour_logits_np, rewards_np, - dones_np, self.lr, self.entropy_coeff + dones_np, + np.float32(self.lr), + np.float32(self.entropy_coeff) ] def run_learn(self): diff --git a/parl/algorithms/fluid/impala/impala.py b/parl/algorithms/fluid/impala/impala.py index 025f96f..bdfe53a 100644 --- a/parl/algorithms/fluid/impala/impala.py +++ b/parl/algorithms/fluid/impala/impala.py @@ -78,6 +78,7 @@ class VTraceLoss(object): self.entropy = layers.reduce_sum(policy_entropy) # The summed weighted loss + entropy_coeff = layers.reshape(entropy_coeff, shape=[1]) self.total_loss = (self.pi_loss + self.vf_loss * vf_loss_coeff + self.entropy * entropy_coeff) -- GitLab