diff --git a/parl/algorithms/fluid/a3c.py b/parl/algorithms/fluid/a3c.py index 9b9f57e8eb5bfd59e3f79c1fc42e4d1374618f23..27aa1dc8785315e4347cebcc3b13f8d80659e0a9 100644 --- a/parl/algorithms/fluid/a3c.py +++ b/parl/algorithms/fluid/a3c.py @@ -72,6 +72,7 @@ class A3C(Algorithm): policy_entropy = policy_distribution.entropy() entropy = layers.reduce_sum(policy_entropy) + entropy_coeff = layers.reshape(entropy_coeff, shape=[1]) total_loss = ( pi_loss + vf_loss * self.vf_loss_coeff + entropy * entropy_coeff) diff --git a/parl/algorithms/fluid/maddpg.py b/parl/algorithms/fluid/maddpg.py index 4bf799413165d81d00238a7c156511a03619ba5d..36b14709aaf5e5e0a2cacc97bd94b1097caf2404 100644 --- a/parl/algorithms/fluid/maddpg.py +++ b/parl/algorithms/fluid/maddpg.py @@ -27,10 +27,11 @@ from parl.core.fluid.policy_distribution import SoftMultiCategoricalDistribution def SoftPDistribution(logits, act_space): - """input: + """Args: logits: the output of policy model act_space: action space, must be gym.spaces.Discrete or multiagent.multi_discrete.MultiDiscrete - output: + + Return: instance of SoftCategoricalDistribution or SoftMultiCategoricalDistribution """ # is instance of gym.spaces.Discrete