From 7a16adc0c49b3aaaa85bc5821286af422909e700 Mon Sep 17 00:00:00 2001 From: Bo Zhou <2466956298@qq.com> Date: Sun, 22 Mar 2020 23:15:44 +0800 Subject: [PATCH] fix compatibility issue with the newest paddle (#218) * fix compatibility issue with the newest paddle * remove logging lines Co-authored-by: robot --- parl/algorithms/fluid/a3c.py | 1 + parl/algorithms/fluid/maddpg.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/parl/algorithms/fluid/a3c.py b/parl/algorithms/fluid/a3c.py index 9b9f57e..27aa1dc 100644 --- a/parl/algorithms/fluid/a3c.py +++ b/parl/algorithms/fluid/a3c.py @@ -72,6 +72,7 @@ class A3C(Algorithm): policy_entropy = policy_distribution.entropy() entropy = layers.reduce_sum(policy_entropy) + entropy_coeff = layers.reshape(entropy_coeff, shape=[1]) total_loss = ( pi_loss + vf_loss * self.vf_loss_coeff + entropy * entropy_coeff) diff --git a/parl/algorithms/fluid/maddpg.py b/parl/algorithms/fluid/maddpg.py index 4bf7994..36b1470 100644 --- a/parl/algorithms/fluid/maddpg.py +++ b/parl/algorithms/fluid/maddpg.py @@ -27,10 +27,11 @@ from parl.core.fluid.policy_distribution import SoftMultiCategoricalDistribution def SoftPDistribution(logits, act_space): - """input: + """Args: logits: the output of policy model act_space: action space, must be gym.spaces.Discrete or multiagent.multi_discrete.MultiDiscrete - output: + + Return: instance of SoftCategoricalDistribution or SoftMultiCategoricalDistribution """ # is instance of gym.spaces.Discrete -- GitLab