From f46ad361bab2ebce22214752299702c3fbd6b6c9 Mon Sep 17 00:00:00 2001 From: Hongsheng Zeng Date: Wed, 25 Mar 2020 20:35:53 +0800 Subject: [PATCH] fix a2c cannot run in paddle 1.6.0 (#232) * fix a2c cannot run in paddle 1.6.0 * fix impala compatibility * yapf --- examples/A2C/atari_agent.py | 5 ++++- examples/IMPALA/atari_agent.py | 5 ++++- examples/IMPALA/train.py | 2 +- examples/LiftSim_baseline/A2C/lift_agent.py | 5 ++++- parl/algorithms/fluid/a3c.py | 1 - parl/algorithms/fluid/impala/impala.py | 1 - 6 files changed, 13 insertions(+), 6 deletions(-) diff --git a/examples/A2C/atari_agent.py b/examples/A2C/atari_agent.py index 5604f71..94d2125 100755 --- a/examples/A2C/atari_agent.py +++ b/examples/A2C/atari_agent.py @@ -71,7 +71,10 @@ class AtariAgent(parl.Agent): lr = layers.data( name='lr', shape=[1], dtype='float32', append_batch_size=False) entropy_coeff = layers.data( - name='entropy_coeff', shape=[], dtype='float32') + name='entropy_coeff', + shape=[1], + dtype='float32', + append_batch_size=False) total_loss, pi_loss, vf_loss, entropy = self.alg.learn( obs, actions, advantages, target_values, lr, entropy_coeff) diff --git a/examples/IMPALA/atari_agent.py b/examples/IMPALA/atari_agent.py index 98d4a4c..0746f95 100755 --- a/examples/IMPALA/atari_agent.py +++ b/examples/IMPALA/atari_agent.py @@ -58,7 +58,10 @@ class AtariAgent(parl.Agent): lr = layers.data( name='lr', shape=[1], dtype='float32', append_batch_size=False) entropy_coeff = layers.data( - name='entropy_coeff', shape=[], dtype='float32') + name='entropy_coeff', + shape=[1], + dtype='float32', + append_batch_size=False) self.learn_reader = fluid.layers.create_py_reader_by_data( capacity=32, diff --git a/examples/IMPALA/train.py b/examples/IMPALA/train.py index bfb4ad5..cf9e55c 100755 --- a/examples/IMPALA/train.py +++ b/examples/IMPALA/train.py @@ -123,7 +123,7 @@ class Learner(object): obs_np, actions_np, behaviour_logits_np, rewards_np, dones_np, np.float32(self.lr), - np.float32(self.entropy_coeff) + np.array([self.entropy_coeff], dtype='float32') ] def run_learn(self): diff --git a/examples/LiftSim_baseline/A2C/lift_agent.py b/examples/LiftSim_baseline/A2C/lift_agent.py index 9c2d64d..1dd35e5 100644 --- a/examples/LiftSim_baseline/A2C/lift_agent.py +++ b/examples/LiftSim_baseline/A2C/lift_agent.py @@ -67,7 +67,10 @@ class LiftAgent(parl.Agent): lr = layers.data( name='lr', shape=[1], dtype='float32', append_batch_size=False) entropy_coeff = layers.data( - name='entropy_coeff', shape=[], dtype='float32') + name='entropy_coeff', + shape=[1], + dtype='float32', + append_batch_size=False) total_loss, pi_loss, vf_loss, entropy = self.alg.learn( obs, actions, advantages, target_values, lr, entropy_coeff) diff --git a/parl/algorithms/fluid/a3c.py b/parl/algorithms/fluid/a3c.py index 27aa1dc..9b9f57e 100644 --- a/parl/algorithms/fluid/a3c.py +++ b/parl/algorithms/fluid/a3c.py @@ -72,7 +72,6 @@ class A3C(Algorithm): policy_entropy = policy_distribution.entropy() entropy = layers.reduce_sum(policy_entropy) - entropy_coeff = layers.reshape(entropy_coeff, shape=[1]) total_loss = ( pi_loss + vf_loss * self.vf_loss_coeff + entropy * entropy_coeff) diff --git a/parl/algorithms/fluid/impala/impala.py b/parl/algorithms/fluid/impala/impala.py index bdfe53a..025f96f 100644 --- a/parl/algorithms/fluid/impala/impala.py +++ b/parl/algorithms/fluid/impala/impala.py @@ -78,7 +78,6 @@ class VTraceLoss(object): self.entropy = layers.reduce_sum(policy_entropy) # The summed weighted loss - entropy_coeff = layers.reshape(entropy_coeff, shape=[1]) self.total_loss = (self.pi_loss + self.vf_loss * vf_loss_coeff + self.entropy * entropy_coeff) -- GitLab