From aede5aee1854164a1c5caa5ef7f32c582338424f Mon Sep 17 00:00:00 2001
From: Bo Zhou <2466956298@qq.com>
Date: Mon, 23 Mar 2020 21:47:27 +0800
Subject: [PATCH] resolve the compatibility issue (#226)

* fix compatibility issue with the newest paddle

* remove logging lines

* resolve the compatibility issue with the newest paddle

* yapf

Co-authored-by: robot <zenghongsheng@baidu.com>
---
 examples/DQN/atari_agent.py            | 2 +-
 examples/IMPALA/train.py               | 4 +++-
 parl/algorithms/fluid/impala/impala.py | 1 +
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/examples/DQN/atari_agent.py b/examples/DQN/atari_agent.py
index 4af4478..8a33ac4 100644
--- a/examples/DQN/atari_agent.py
+++ b/examples/DQN/atari_agent.py
@@ -106,7 +106,7 @@ class AtariAgent(parl.Agent):
             'reward': reward,
             'next_obs': next_obs.astype('float32'),
             'terminal': terminal,
-            'lr': lr
+            'lr': np.float32(lr)
         }
         cost = self.fluid_executor.run(
             self.learn_program, feed=feed, fetch_list=[self.cost])[0]
diff --git a/examples/IMPALA/train.py b/examples/IMPALA/train.py
index 8440ee7..bfb4ad5 100755
--- a/examples/IMPALA/train.py
+++ b/examples/IMPALA/train.py
@@ -121,7 +121,9 @@ class Learner(object):
 
                 yield [
                     obs_np, actions_np, behaviour_logits_np, rewards_np,
-                    dones_np, self.lr, self.entropy_coeff
+                    dones_np,
+                    np.float32(self.lr),
+                    np.float32(self.entropy_coeff)
                 ]
 
     def run_learn(self):
diff --git a/parl/algorithms/fluid/impala/impala.py b/parl/algorithms/fluid/impala/impala.py
index 025f96f..bdfe53a 100644
--- a/parl/algorithms/fluid/impala/impala.py
+++ b/parl/algorithms/fluid/impala/impala.py
@@ -78,6 +78,7 @@ class VTraceLoss(object):
         self.entropy = layers.reduce_sum(policy_entropy)
 
         # The summed weighted loss
+        entropy_coeff = layers.reshape(entropy_coeff, shape=[1])
         self.total_loss = (self.pi_loss + self.vf_loss * vf_loss_coeff +
                            self.entropy * entropy_coeff)
 
-- 
GitLab