提交 9e0ab0b2 编写于 作者: S songyouwei 提交者: hong

update build_once for RL (#4112)

test=develop
上级 894429c9
......@@ -9,7 +9,7 @@ import paddle.fluid as fluid
import paddle.fluid.dygraph.nn as nn
import paddle.fluid.framework as framework
parser = argparse.ArgumentParser(description='PyTorch REINFORCE example')
parser = argparse.ArgumentParser()
parser.add_argument(
'--gamma',
type=float,
......@@ -40,12 +40,12 @@ SavedAction = namedtuple('SavedAction', ['log_prob', 'value'])
class Policy(fluid.dygraph.Layer):
def __init__(self, name_scope):
super(Policy, self).__init__(name_scope)
def __init__(self):
super(Policy, self).__init__()
self.affine1 = nn.FC(self.full_name(), size=128)
self.action_head = nn.FC(self.full_name(), size=2)
self.value_head = nn.FC(self.full_name(), size=1)
self.affine1 = nn.Linear(4, 128)
self.action_head = nn.Linear(128, 2)
self.value_head = nn.Linear(128, 1)
self.saved_actions = []
self.rewards = []
......@@ -65,7 +65,7 @@ with fluid.dygraph.guard():
fluid.default_startup_program().random_seed = args.seed
fluid.default_main_program().random_seed = args.seed
np.random.seed(args.seed)
policy = Policy("PolicyModel")
policy = Policy()
eps = np.finfo(np.float32).eps.item()
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=3e-2)
......
......@@ -8,7 +8,7 @@ import paddle.fluid as fluid
import paddle.fluid.dygraph.nn as nn
import paddle.fluid.framework as framework
parser = argparse.ArgumentParser(description='PyTorch REINFORCE example')
parser = argparse.ArgumentParser()
parser.add_argument(
'--gamma',
type=float,
......@@ -37,11 +37,11 @@ env.seed(args.seed)
class Policy(fluid.dygraph.Layer):
def __init__(self, name_scope):
super(Policy, self).__init__(name_scope)
def __init__(self):
super(Policy, self).__init__()
self.affine1 = nn.FC(self.full_name(), size=128)
self.affine2 = nn.FC(self.full_name(), size=2)
self.affine1 = nn.Linear(4, 128)
self.affine2 = nn.Linear(128, 2)
self.dropout_ratio = 0.6
self.saved_log_probs = []
......@@ -64,7 +64,7 @@ with fluid.dygraph.guard():
fluid.default_main_program().random_seed = args.seed
np.random.seed(args.seed)
policy = Policy("PolicyModel")
policy = Policy()
eps = np.finfo(np.float32).eps.item()
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=1e-2)
......
......@@ -9,7 +9,7 @@ import paddle.fluid as fluid
import paddle.fluid.dygraph.nn as nn
import paddle.fluid.framework as framework
parser = argparse.ArgumentParser(description='PyTorch REINFORCE example')
parser = argparse.ArgumentParser()
parser.add_argument(
'--gamma',
type=float,
......@@ -40,12 +40,12 @@ SavedAction = namedtuple('SavedAction', ['log_prob', 'value'])
class Policy(fluid.dygraph.Layer):
def __init__(self, name_scope):
super(Policy, self).__init__(name_scope)
def __init__(self):
super(Policy, self).__init__()
self.affine1 = nn.FC(self.full_name(), size=128)
self.action_head = nn.FC(self.full_name(), size=2)
self.value_head = nn.FC(self.full_name(), size=1)
self.affine1 = nn.Linear(4, 128)
self.action_head = nn.Linear(128, 2)
self.value_head = nn.Linear(128, 1)
self.saved_actions = []
self.rewards = []
......@@ -65,7 +65,7 @@ with fluid.dygraph.guard():
fluid.default_startup_program().random_seed = args.seed
fluid.default_main_program().random_seed = args.seed
np.random.seed(args.seed)
policy = Policy("PolicyModel")
policy = Policy()
eps = np.finfo(np.float32).eps.item()
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=3e-2)
......
......@@ -8,7 +8,7 @@ import paddle.fluid as fluid
import paddle.fluid.dygraph.nn as nn
import paddle.fluid.framework as framework
parser = argparse.ArgumentParser(description='PyTorch REINFORCE example')
parser = argparse.ArgumentParser()
parser.add_argument(
'--gamma',
type=float,
......@@ -37,11 +37,11 @@ env.seed(args.seed)
class Policy(fluid.dygraph.Layer):
def __init__(self, name_scope):
super(Policy, self).__init__(name_scope)
def __init__(self):
super(Policy, self).__init__()
self.affine1 = nn.FC(self.full_name(), size=128)
self.affine2 = nn.FC(self.full_name(), size=2)
self.affine1 = nn.Linear(4, 128)
self.affine2 = nn.Linear(128, 2)
self.dropout_ratio = 0.6
self.saved_log_probs = []
......@@ -64,7 +64,7 @@ with fluid.dygraph.guard():
fluid.default_main_program().random_seed = args.seed
np.random.seed(args.seed)
policy = Policy("PolicyModel")
policy = Policy()
eps = np.finfo(np.float32).eps.item()
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=1e-2)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册