未验证 提交 7a16adc0 编写于 作者: B Bo Zhou 提交者: GitHub

fix compatibility issue with the newest paddle (#218)

* fix compatibility issue with the newest paddle

* remove logging lines
Co-authored-by: Nrobot <zenghongsheng@baidu.com>
上级 d96dba18
...@@ -72,6 +72,7 @@ class A3C(Algorithm): ...@@ -72,6 +72,7 @@ class A3C(Algorithm):
policy_entropy = policy_distribution.entropy() policy_entropy = policy_distribution.entropy()
entropy = layers.reduce_sum(policy_entropy) entropy = layers.reduce_sum(policy_entropy)
entropy_coeff = layers.reshape(entropy_coeff, shape=[1])
total_loss = ( total_loss = (
pi_loss + vf_loss * self.vf_loss_coeff + entropy * entropy_coeff) pi_loss + vf_loss * self.vf_loss_coeff + entropy * entropy_coeff)
......
...@@ -27,10 +27,11 @@ from parl.core.fluid.policy_distribution import SoftMultiCategoricalDistribution ...@@ -27,10 +27,11 @@ from parl.core.fluid.policy_distribution import SoftMultiCategoricalDistribution
def SoftPDistribution(logits, act_space): def SoftPDistribution(logits, act_space):
"""input: """Args:
logits: the output of policy model logits: the output of policy model
act_space: action space, must be gym.spaces.Discrete or multiagent.multi_discrete.MultiDiscrete act_space: action space, must be gym.spaces.Discrete or multiagent.multi_discrete.MultiDiscrete
output:
Return:
instance of SoftCategoricalDistribution or SoftMultiCategoricalDistribution instance of SoftCategoricalDistribution or SoftMultiCategoricalDistribution
""" """
# is instance of gym.spaces.Discrete # is instance of gym.spaces.Discrete
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册