提交 9e0ab0b2 编写于 作者: S songyouwei 提交者: hong

update build_once for RL (#4112)

test=develop
上级 894429c9
...@@ -9,7 +9,7 @@ import paddle.fluid as fluid ...@@ -9,7 +9,7 @@ import paddle.fluid as fluid
import paddle.fluid.dygraph.nn as nn import paddle.fluid.dygraph.nn as nn
import paddle.fluid.framework as framework import paddle.fluid.framework as framework
parser = argparse.ArgumentParser(description='PyTorch REINFORCE example') parser = argparse.ArgumentParser()
parser.add_argument( parser.add_argument(
'--gamma', '--gamma',
type=float, type=float,
...@@ -40,12 +40,12 @@ SavedAction = namedtuple('SavedAction', ['log_prob', 'value']) ...@@ -40,12 +40,12 @@ SavedAction = namedtuple('SavedAction', ['log_prob', 'value'])
class Policy(fluid.dygraph.Layer): class Policy(fluid.dygraph.Layer):
def __init__(self, name_scope): def __init__(self):
super(Policy, self).__init__(name_scope) super(Policy, self).__init__()
self.affine1 = nn.FC(self.full_name(), size=128) self.affine1 = nn.Linear(4, 128)
self.action_head = nn.FC(self.full_name(), size=2) self.action_head = nn.Linear(128, 2)
self.value_head = nn.FC(self.full_name(), size=1) self.value_head = nn.Linear(128, 1)
self.saved_actions = [] self.saved_actions = []
self.rewards = [] self.rewards = []
...@@ -65,7 +65,7 @@ with fluid.dygraph.guard(): ...@@ -65,7 +65,7 @@ with fluid.dygraph.guard():
fluid.default_startup_program().random_seed = args.seed fluid.default_startup_program().random_seed = args.seed
fluid.default_main_program().random_seed = args.seed fluid.default_main_program().random_seed = args.seed
np.random.seed(args.seed) np.random.seed(args.seed)
policy = Policy("PolicyModel") policy = Policy()
eps = np.finfo(np.float32).eps.item() eps = np.finfo(np.float32).eps.item()
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=3e-2) optimizer = fluid.optimizer.AdamOptimizer(learning_rate=3e-2)
......
...@@ -8,7 +8,7 @@ import paddle.fluid as fluid ...@@ -8,7 +8,7 @@ import paddle.fluid as fluid
import paddle.fluid.dygraph.nn as nn import paddle.fluid.dygraph.nn as nn
import paddle.fluid.framework as framework import paddle.fluid.framework as framework
parser = argparse.ArgumentParser(description='PyTorch REINFORCE example') parser = argparse.ArgumentParser()
parser.add_argument( parser.add_argument(
'--gamma', '--gamma',
type=float, type=float,
...@@ -37,11 +37,11 @@ env.seed(args.seed) ...@@ -37,11 +37,11 @@ env.seed(args.seed)
class Policy(fluid.dygraph.Layer): class Policy(fluid.dygraph.Layer):
def __init__(self, name_scope): def __init__(self):
super(Policy, self).__init__(name_scope) super(Policy, self).__init__()
self.affine1 = nn.FC(self.full_name(), size=128) self.affine1 = nn.Linear(4, 128)
self.affine2 = nn.FC(self.full_name(), size=2) self.affine2 = nn.Linear(128, 2)
self.dropout_ratio = 0.6 self.dropout_ratio = 0.6
self.saved_log_probs = [] self.saved_log_probs = []
...@@ -64,7 +64,7 @@ with fluid.dygraph.guard(): ...@@ -64,7 +64,7 @@ with fluid.dygraph.guard():
fluid.default_main_program().random_seed = args.seed fluid.default_main_program().random_seed = args.seed
np.random.seed(args.seed) np.random.seed(args.seed)
policy = Policy("PolicyModel") policy = Policy()
eps = np.finfo(np.float32).eps.item() eps = np.finfo(np.float32).eps.item()
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=1e-2) optimizer = fluid.optimizer.AdamOptimizer(learning_rate=1e-2)
......
...@@ -9,7 +9,7 @@ import paddle.fluid as fluid ...@@ -9,7 +9,7 @@ import paddle.fluid as fluid
import paddle.fluid.dygraph.nn as nn import paddle.fluid.dygraph.nn as nn
import paddle.fluid.framework as framework import paddle.fluid.framework as framework
parser = argparse.ArgumentParser(description='PyTorch REINFORCE example') parser = argparse.ArgumentParser()
parser.add_argument( parser.add_argument(
'--gamma', '--gamma',
type=float, type=float,
...@@ -40,12 +40,12 @@ SavedAction = namedtuple('SavedAction', ['log_prob', 'value']) ...@@ -40,12 +40,12 @@ SavedAction = namedtuple('SavedAction', ['log_prob', 'value'])
class Policy(fluid.dygraph.Layer): class Policy(fluid.dygraph.Layer):
def __init__(self, name_scope): def __init__(self):
super(Policy, self).__init__(name_scope) super(Policy, self).__init__()
self.affine1 = nn.FC(self.full_name(), size=128) self.affine1 = nn.Linear(4, 128)
self.action_head = nn.FC(self.full_name(), size=2) self.action_head = nn.Linear(128, 2)
self.value_head = nn.FC(self.full_name(), size=1) self.value_head = nn.Linear(128, 1)
self.saved_actions = [] self.saved_actions = []
self.rewards = [] self.rewards = []
...@@ -65,7 +65,7 @@ with fluid.dygraph.guard(): ...@@ -65,7 +65,7 @@ with fluid.dygraph.guard():
fluid.default_startup_program().random_seed = args.seed fluid.default_startup_program().random_seed = args.seed
fluid.default_main_program().random_seed = args.seed fluid.default_main_program().random_seed = args.seed
np.random.seed(args.seed) np.random.seed(args.seed)
policy = Policy("PolicyModel") policy = Policy()
eps = np.finfo(np.float32).eps.item() eps = np.finfo(np.float32).eps.item()
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=3e-2) optimizer = fluid.optimizer.AdamOptimizer(learning_rate=3e-2)
......
...@@ -8,7 +8,7 @@ import paddle.fluid as fluid ...@@ -8,7 +8,7 @@ import paddle.fluid as fluid
import paddle.fluid.dygraph.nn as nn import paddle.fluid.dygraph.nn as nn
import paddle.fluid.framework as framework import paddle.fluid.framework as framework
parser = argparse.ArgumentParser(description='PyTorch REINFORCE example') parser = argparse.ArgumentParser()
parser.add_argument( parser.add_argument(
'--gamma', '--gamma',
type=float, type=float,
...@@ -37,11 +37,11 @@ env.seed(args.seed) ...@@ -37,11 +37,11 @@ env.seed(args.seed)
class Policy(fluid.dygraph.Layer): class Policy(fluid.dygraph.Layer):
def __init__(self, name_scope): def __init__(self):
super(Policy, self).__init__(name_scope) super(Policy, self).__init__()
self.affine1 = nn.FC(self.full_name(), size=128) self.affine1 = nn.Linear(4, 128)
self.affine2 = nn.FC(self.full_name(), size=2) self.affine2 = nn.Linear(128, 2)
self.dropout_ratio = 0.6 self.dropout_ratio = 0.6
self.saved_log_probs = [] self.saved_log_probs = []
...@@ -64,7 +64,7 @@ with fluid.dygraph.guard(): ...@@ -64,7 +64,7 @@ with fluid.dygraph.guard():
fluid.default_main_program().random_seed = args.seed fluid.default_main_program().random_seed = args.seed
np.random.seed(args.seed) np.random.seed(args.seed)
policy = Policy("PolicyModel") policy = Policy()
eps = np.finfo(np.float32).eps.item() eps = np.finfo(np.float32).eps.item()
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=1e-2) optimizer = fluid.optimizer.AdamOptimizer(learning_rate=1e-2)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册