From 9e0ab0b2d487e93b6a4fe5994371daf52dad22f6 Mon Sep 17 00:00:00 2001 From: songyouwei Date: Tue, 24 Dec 2019 20:47:06 +0800 Subject: [PATCH] update build_once for RL (#4112) test=develop --- dygraph/reinforcement_learning/actor_critic.py | 14 +++++++------- dygraph/reinforcement_learning/reinforce.py | 12 ++++++------ .../test_actor_critic_load.py | 14 +++++++------- .../reinforcement_learning/test_reinforce_load.py | 12 ++++++------ 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/dygraph/reinforcement_learning/actor_critic.py b/dygraph/reinforcement_learning/actor_critic.py index f68a53f8..7fce62ab 100644 --- a/dygraph/reinforcement_learning/actor_critic.py +++ b/dygraph/reinforcement_learning/actor_critic.py @@ -9,7 +9,7 @@ import paddle.fluid as fluid import paddle.fluid.dygraph.nn as nn import paddle.fluid.framework as framework -parser = argparse.ArgumentParser(description='PyTorch REINFORCE example') +parser = argparse.ArgumentParser() parser.add_argument( '--gamma', type=float, @@ -40,12 +40,12 @@ SavedAction = namedtuple('SavedAction', ['log_prob', 'value']) class Policy(fluid.dygraph.Layer): - def __init__(self, name_scope): - super(Policy, self).__init__(name_scope) + def __init__(self): + super(Policy, self).__init__() - self.affine1 = nn.FC(self.full_name(), size=128) - self.action_head = nn.FC(self.full_name(), size=2) - self.value_head = nn.FC(self.full_name(), size=1) + self.affine1 = nn.Linear(4, 128) + self.action_head = nn.Linear(128, 2) + self.value_head = nn.Linear(128, 1) self.saved_actions = [] self.rewards = [] @@ -65,7 +65,7 @@ with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = args.seed fluid.default_main_program().random_seed = args.seed np.random.seed(args.seed) - policy = Policy("PolicyModel") + policy = Policy() eps = np.finfo(np.float32).eps.item() optimizer = fluid.optimizer.AdamOptimizer(learning_rate=3e-2) diff --git a/dygraph/reinforcement_learning/reinforce.py b/dygraph/reinforcement_learning/reinforce.py index 2a23b345..d9d6a15e 100644 --- a/dygraph/reinforcement_learning/reinforce.py +++ b/dygraph/reinforcement_learning/reinforce.py @@ -8,7 +8,7 @@ import paddle.fluid as fluid import paddle.fluid.dygraph.nn as nn import paddle.fluid.framework as framework -parser = argparse.ArgumentParser(description='PyTorch REINFORCE example') +parser = argparse.ArgumentParser() parser.add_argument( '--gamma', type=float, @@ -37,11 +37,11 @@ env.seed(args.seed) class Policy(fluid.dygraph.Layer): - def __init__(self, name_scope): - super(Policy, self).__init__(name_scope) + def __init__(self): + super(Policy, self).__init__() - self.affine1 = nn.FC(self.full_name(), size=128) - self.affine2 = nn.FC(self.full_name(), size=2) + self.affine1 = nn.Linear(4, 128) + self.affine2 = nn.Linear(128, 2) self.dropout_ratio = 0.6 self.saved_log_probs = [] @@ -64,7 +64,7 @@ with fluid.dygraph.guard(): fluid.default_main_program().random_seed = args.seed np.random.seed(args.seed) - policy = Policy("PolicyModel") + policy = Policy() eps = np.finfo(np.float32).eps.item() optimizer = fluid.optimizer.AdamOptimizer(learning_rate=1e-2) diff --git a/dygraph/reinforcement_learning/test_actor_critic_load.py b/dygraph/reinforcement_learning/test_actor_critic_load.py index 2ddbfd8c..c58b9495 100644 --- a/dygraph/reinforcement_learning/test_actor_critic_load.py +++ b/dygraph/reinforcement_learning/test_actor_critic_load.py @@ -9,7 +9,7 @@ import paddle.fluid as fluid import paddle.fluid.dygraph.nn as nn import paddle.fluid.framework as framework -parser = argparse.ArgumentParser(description='PyTorch REINFORCE example') +parser = argparse.ArgumentParser() parser.add_argument( '--gamma', type=float, @@ -40,12 +40,12 @@ SavedAction = namedtuple('SavedAction', ['log_prob', 'value']) class Policy(fluid.dygraph.Layer): - def __init__(self, name_scope): - super(Policy, self).__init__(name_scope) + def __init__(self): + super(Policy, self).__init__() - self.affine1 = nn.FC(self.full_name(), size=128) - self.action_head = nn.FC(self.full_name(), size=2) - self.value_head = nn.FC(self.full_name(), size=1) + self.affine1 = nn.Linear(4, 128) + self.action_head = nn.Linear(128, 2) + self.value_head = nn.Linear(128, 1) self.saved_actions = [] self.rewards = [] @@ -65,7 +65,7 @@ with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = args.seed fluid.default_main_program().random_seed = args.seed np.random.seed(args.seed) - policy = Policy("PolicyModel") + policy = Policy() eps = np.finfo(np.float32).eps.item() optimizer = fluid.optimizer.AdamOptimizer(learning_rate=3e-2) diff --git a/dygraph/reinforcement_learning/test_reinforce_load.py b/dygraph/reinforcement_learning/test_reinforce_load.py index db7245d1..9d8aa5dc 100644 --- a/dygraph/reinforcement_learning/test_reinforce_load.py +++ b/dygraph/reinforcement_learning/test_reinforce_load.py @@ -8,7 +8,7 @@ import paddle.fluid as fluid import paddle.fluid.dygraph.nn as nn import paddle.fluid.framework as framework -parser = argparse.ArgumentParser(description='PyTorch REINFORCE example') +parser = argparse.ArgumentParser() parser.add_argument( '--gamma', type=float, @@ -37,11 +37,11 @@ env.seed(args.seed) class Policy(fluid.dygraph.Layer): - def __init__(self, name_scope): - super(Policy, self).__init__(name_scope) + def __init__(self): + super(Policy, self).__init__() - self.affine1 = nn.FC(self.full_name(), size=128) - self.affine2 = nn.FC(self.full_name(), size=2) + self.affine1 = nn.Linear(4, 128) + self.affine2 = nn.Linear(128, 2) self.dropout_ratio = 0.6 self.saved_log_probs = [] @@ -64,7 +64,7 @@ with fluid.dygraph.guard(): fluid.default_main_program().random_seed = args.seed np.random.seed(args.seed) - policy = Policy("PolicyModel") + policy = Policy() eps = np.finfo(np.float32).eps.item() optimizer = fluid.optimizer.AdamOptimizer(learning_rate=1e-2) -- GitLab