From ab673e3f0630b98546c701e1aba4fea3ff6bb48f Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Mon, 9 Apr 2018 20:14:04 -0700 Subject: [PATCH] "add inference" --- fluid/policy_gradient/brain.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fluid/policy_gradient/brain.py b/fluid/policy_gradient/brain.py index 9c7041e6..272d0f81 100644 --- a/fluid/policy_gradient/brain.py +++ b/fluid/policy_gradient/brain.py @@ -36,9 +36,10 @@ class PolicyGradient: act="tanh" # tanh activation ) # fc2 - self.all_act_prob = fluid.layers.fc(input=fc1, + all_act_prob = fluid.layers.fc(input=fc1, size=self.n_actions, act="softmax") + self.inferece_program = fluid.defaul_main_program().clone() # to maximize total reward (log_p * R) is to minimize -(log_p * R) neg_log_prob = fluid.layers.cross_entropy( input=self.all_act_prob, @@ -53,7 +54,7 @@ class PolicyGradient: def choose_action(self, observation): prob_weights = self.exe.run( - fluid.default_main_program().prune(self.all_act_prob), + self.inferece_program, feed={"obs": observation[np.newaxis, :]}, fetch_list=[self.all_act_prob]) prob_weights = np.array(prob_weights[0]) -- GitLab