diff --git a/fluid/policy_gradient/brain.py b/fluid/policy_gradient/brain.py index 8387833065d89e0a61b90734771a8d9db5ac1eb4..9c7041e698b0cdc03d614ffdef7d779f4f632327 100644 --- a/fluid/policy_gradient/brain.py +++ b/fluid/policy_gradient/brain.py @@ -45,7 +45,7 @@ class PolicyGradient: label=acts) # this is negative log of chosen action neg_log_prob_weight = fluid.layers.elementwise_mul(x=neg_log_prob, y=vt) loss = fluid.layers.reduce_mean( - x=neg_log_prob_weight) # reward guided loss + neg_log_prob_weight) # reward guided loss sgd_optimizer = fluid.optimizer.SGD(self.lr) sgd_optimizer.minimize(loss)