diff --git a/fluid/DeepQNetwork/DQN_agent.py b/fluid/DeepQNetwork/DQN_agent.py index 3e377c9d0eb34f4bef844427da6d77b3fe19e331..67eb3ce6a29bb723b481d6b1c2f517f037d52942 100644 --- a/fluid/DeepQNetwork/DQN_agent.py +++ b/fluid/DeepQNetwork/DQN_agent.py @@ -135,10 +135,10 @@ class DQNModel(object): def _build_sync_target_network(self): vars = list(fluid.default_main_program().list_vars()) - policy_vars = filter( - lambda x: 'GRAD' not in x.name and 'policy' in x.name, vars) - target_vars = filter( - lambda x: 'GRAD' not in x.name and 'target' in x.name, vars) + policy_vars = list(filter( + lambda x: 'GRAD' not in x.name and 'policy' in x.name, vars)) + target_vars = list(filter( + lambda x: 'GRAD' not in x.name and 'target' in x.name, vars)) policy_vars.sort(key=lambda x: x.name) target_vars.sort(key=lambda x: x.name) diff --git a/fluid/DeepQNetwork/DoubleDQN_agent.py b/fluid/DeepQNetwork/DoubleDQN_agent.py index a46b2bd62e5c989b14858ed893f7fb132a0c5767..09b4b2119bab3fbdfa9bb9cfb8fae40fa34f87e1 100644 --- a/fluid/DeepQNetwork/DoubleDQN_agent.py +++ b/fluid/DeepQNetwork/DoubleDQN_agent.py @@ -142,10 +142,10 @@ class DoubleDQNModel(object): def _build_sync_target_network(self): vars = list(fluid.default_main_program().list_vars()) - policy_vars = filter( - lambda x: 'GRAD' not in x.name and 'policy' in x.name, vars) - target_vars = filter( - lambda x: 'GRAD' not in x.name and 'target' in x.name, vars) + policy_vars = list(filter( + lambda x: 'GRAD' not in x.name and 'policy' in x.name, vars)) + target_vars = list(filter( + lambda x: 'GRAD' not in x.name and 'target' in x.name, vars)) policy_vars.sort(key=lambda x: x.name) target_vars.sort(key=lambda x: x.name) diff --git a/fluid/DeepQNetwork/DuelingDQN_agent.py b/fluid/DeepQNetwork/DuelingDQN_agent.py index 582111730da408b741ad6b6eeded34685d026f40..d6224ef34a2cb1ec0a09d9ed2e87a2f89ab82142 100644 --- a/fluid/DeepQNetwork/DuelingDQN_agent.py +++ b/fluid/DeepQNetwork/DuelingDQN_agent.py @@ -145,10 +145,10 @@ class DuelingDQNModel(object): def _build_sync_target_network(self): vars = list(fluid.default_main_program().list_vars()) - policy_vars = filter( - lambda x: 'GRAD' not in x.name and 'policy' in x.name, vars) - target_vars = filter( - lambda x: 'GRAD' not in x.name and 'target' in x.name, vars) + policy_vars = list(filter( + lambda x: 'GRAD' not in x.name and 'policy' in x.name, vars)) + target_vars = list(filter( + lambda x: 'GRAD' not in x.name and 'target' in x.name, vars)) policy_vars.sort(key=lambda x: x.name) target_vars.sort(key=lambda x: x.name) diff --git a/fluid/DeepQNetwork/atari.py b/fluid/DeepQNetwork/atari.py index 5006de4d4e6b57110ca7301395e170666d24e8b4..46b7542019121b36e3e8923dba350e1d8a71fa34 100644 --- a/fluid/DeepQNetwork/atari.py +++ b/fluid/DeepQNetwork/atari.py @@ -55,7 +55,7 @@ class AtariPlayer(gym.Env): try: ALEInterface.setLoggerMode(ALEInterface.Logger.Error) except AttributeError: - print "You're not using latest ALE" + print("You're not using latest ALE") # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: diff --git a/fluid/DeepQNetwork/train.py b/fluid/DeepQNetwork/train.py index 63439be7c8da481c946b0cb0bd571637bd875105..614823c52d7e8c29b8e4565fc58f52cfa11b9640 100644 --- a/fluid/DeepQNetwork/train.py +++ b/fluid/DeepQNetwork/train.py @@ -74,7 +74,7 @@ def get_player(rom, viz=False, train=False): def eval_agent(agent, env): episode_reward = [] - for _ in tqdm(xrange(30), desc='eval agent'): + for _ in tqdm(range(30), desc='eval agent'): state = env.reset() total_reward = 0 step = 0