提交 d5a8d268 编写于 作者: L LI Yunxiang 提交者: Bo Zhou

update dqn lr_scheduler (#164)

上级 bb9b78b4
......@@ -24,13 +24,14 @@ CONTEXT_LEN = 4
class AtariAgent(parl.Agent):
def __init__(self, algorithm, act_dim, start_lr, total_step):
def __init__(self, algorithm, act_dim, start_lr, total_step, update_freq):
super(AtariAgent, self).__init__(algorithm)
assert isinstance(act_dim, int)
self.act_dim = act_dim
self.exploration = 1.1
self.global_step = 0
self.update_target_steps = 10000 // 4
self.update_freq = update_freq
self.lr_scheduler = LinearDecayScheduler(start_lr, total_step)
......@@ -95,7 +96,7 @@ class AtariAgent(parl.Agent):
self.alg.sync_target()
self.global_step += 1
lr = self.lr_scheduler.step(step_num=obs.shape[0])
lr = self.lr_scheduler.step(step_num=self.update_freq)
act = np.expand_dims(act, -1)
reward = np.clip(reward, -1, 1)
......
......@@ -101,7 +101,8 @@ def main():
algorithm,
act_dim=act_dim,
start_lr=LEARNING_RATE,
total_step=args.train_total_steps)
total_step=args.train_total_steps,
update_freq=UPDATE_FREQ)
with tqdm(
total=MEMORY_WARMUP_SIZE, desc='[Replay Memory Warm Up]') as pbar:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册