提交 47315983 编写于 作者: N niuyazhe

hotfix(nyz): fix lunarlander dqn config and get formatted config

上级 fd908cdc
...@@ -349,12 +349,59 @@ def save_config_formatted(config_: dict, path: str = 'formatted_total_config.py' ...@@ -349,12 +349,59 @@ def save_config_formatted(config_: dict, path: str = 'formatted_total_config.py'
f.write(" replay_buffer=dict(\n") f.write(" replay_buffer=dict(\n")
for k4, v4 in v3.items(): for k4, v4 in v3.items():
if (k4 != 'monitor' and k4 != 'thruput_controller'): if (k4 != 'monitor' and k4 != 'thruput_controller'):
if (isinstance(v4, str)): if (isinstance(v4, dict)):
f.write(" {}='{}',\n".format(k4, v4)) f.write(" {}=dict(\n".format(k4))
elif v4 == float('inf'): for k5, v5 in v4.items():
f.write(" {}=float('{}'),\n".format(k4, v4)) if (isinstance(v5, str)):
f.write(" {}='{}',\n".format(k5, v5))
elif v5 == float('inf'):
f.write(" {}=float('{}'),\n".format(k5, v5))
elif (isinstance(v5, dict)):
f.write(" {}=dict(\n".format(k5))
for k6, v6 in v5.items():
if (isinstance(v6, str)):
f.write(" {}='{}',\n".format(k6, v6))
elif v6 == float('inf'):
f.write(
" {}=float('{}'),\n".format(
k6, v6
)
)
elif (isinstance(v6, dict)):
f.write(" {}=dict(\n".format(k6))
for k7, v7 in v6.items():
if (isinstance(v7, str)):
f.write(
" {}='{}',\n".format(
k7, v7
)
)
elif v7 == float('inf'):
f.write(
" {}=float('{}'),\n".
format(k7, v7)
)
else:
f.write(
" {}={},\n".format(
k7, v7
)
)
f.write(" ),\n")
else:
f.write(" {}={},\n".format(k6, v6))
f.write(" ),\n")
else:
f.write(" {}={},\n".format(k5, v5))
f.write(" ),\n")
else: else:
f.write(" {}={},\n".format(k4, v4)) if (isinstance(v4, str)):
f.write(" {}='{}',\n".format(k4, v4))
elif v4 == float('inf'):
f.write(" {}=float('{}'),\n".format(k4, v4))
else:
f.write(" {}={},\n".format(k4, v4))
else: else:
if (k4 == 'monitor'): if (k4 == 'monitor'):
f.write(" monitor=dict(\n") f.write(" monitor=dict(\n")
......
...@@ -17,11 +17,10 @@ lunarlander_dqn_default_config = dict( ...@@ -17,11 +17,10 @@ lunarlander_dqn_default_config = dict(
cuda=False, cuda=False,
# Whether the RL algorithm is on-policy or off-policy. # Whether the RL algorithm is on-policy or off-policy.
on_policy=False, on_policy=False,
# Model config used for model creating. Remember to change this, especially "obs_dim" and "action_dim" according to specific env.
model=dict( model=dict(
obs_dim=8, obs_shape=8,
action_dim=4, action_shape=4,
encoder_hidden_dim_list=[512, 64], encoder_hidden_size_list=[512, 64],
# Whether to use dueling head. # Whether to use dueling head.
dueling=True, dueling=True,
), ),
...@@ -31,8 +30,6 @@ lunarlander_dqn_default_config = dict( ...@@ -31,8 +30,6 @@ lunarlander_dqn_default_config = dict(
nstep=nstep, nstep=nstep,
# learn_mode config # learn_mode config
learn=dict( learn=dict(
# How many steps to train after collector's one collection. Bigger "train_iteration" means bigger off-policy.
# collect data -> train fixed steps -> collect data -> ...
update_per_collect=10, update_per_collect=10,
batch_size=64, batch_size=64,
learning_rate=0.001, learning_rate=0.001,
...@@ -55,7 +52,7 @@ lunarlander_dqn_default_config = dict( ...@@ -55,7 +52,7 @@ lunarlander_dqn_default_config = dict(
type='exp', type='exp',
start=0.95, start=0.95,
end=0.1, end=0.1,
decay=50_000, decay=50000,
), ),
replay_buffer=dict(replay_buffer_size=100000, ) replay_buffer=dict(replay_buffer_size=100000, )
), ),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册