update dqn lr_scheduler (#164)

d5a8d268 · LI Yunxiang · Bo Zhou · bb9b78b4 · d5a8d268 · d5a8d268
隐藏空白更改
内联并排

Showing with 5 addition and 3 deletion

examples/DQN/atari_agent.py examples/DQN/atari_agent.py +3 -2

examples/DQN/train.py examples/DQN/train.py +2 -1

未找到文件。
--- a/examples/DQN/atari_agent.py
+++ b/examples/DQN/atari_agent.py
@@ -24,13 +24,14 @@ CONTEXT_LEN = 4
 class AtariAgent(parl.Agent):
-    def __init__(self, algorithm, act_dim, start_lr, total_step):
+    def __init__(self, algorithm, act_dim, start_lr, total_step, update_freq):
        super(AtariAgent, self).__init__(algorithm)
        assert isinstance(act_dim, int)
        self.act_dim = act_dim
        self.exploration = 1.1
        self.global_step = 0
        self.update_target_steps = 10000 // 4
+        self.update_freq = update_freq
        self.lr_scheduler = LinearDecayScheduler(start_lr, total_step)
@@ -95,7 +96,7 @@ class AtariAgent(parl.Agent):
            self.alg.sync_target()
        self.global_step += 1
-        lr = self.lr_scheduler.step(step_num=obs.shape[0])
+        lr = self.lr_scheduler.step(step_num=self.update_freq)
        act = np.expand_dims(act, -1)
        reward = np.clip(reward, -1, 1)

--- a/examples/DQN/train.py
+++ b/examples/DQN/train.py
@@ -101,7 +101,8 @@ def main():
        algorithm,
        act_dim=act_dim,
        start_lr=LEARNING_RATE,
-        total_step=args.train_total_steps)
+        total_step=args.train_total_steps,
+        update_freq=UPDATE_FREQ)
    with tqdm(
            total=MEMORY_WARMUP_SIZE, desc='[Replay Memory Warm Up]') as pbar: