提交 ef687124 编写于 作者: R robot

Compatible with python3 is supported

上级 40b3e11d
......@@ -135,10 +135,10 @@ class DQNModel(object):
def _build_sync_target_network(self):
vars = list(fluid.default_main_program().list_vars())
policy_vars = filter(
lambda x: 'GRAD' not in x.name and 'policy' in x.name, vars)
target_vars = filter(
lambda x: 'GRAD' not in x.name and 'target' in x.name, vars)
policy_vars = list(filter(
lambda x: 'GRAD' not in x.name and 'policy' in x.name, vars))
target_vars = list(filter(
lambda x: 'GRAD' not in x.name and 'target' in x.name, vars))
policy_vars.sort(key=lambda x: x.name)
target_vars.sort(key=lambda x: x.name)
......
......@@ -142,10 +142,10 @@ class DoubleDQNModel(object):
def _build_sync_target_network(self):
vars = list(fluid.default_main_program().list_vars())
policy_vars = filter(
lambda x: 'GRAD' not in x.name and 'policy' in x.name, vars)
target_vars = filter(
lambda x: 'GRAD' not in x.name and 'target' in x.name, vars)
policy_vars = list(filter(
lambda x: 'GRAD' not in x.name and 'policy' in x.name, vars))
target_vars = list(filter(
lambda x: 'GRAD' not in x.name and 'target' in x.name, vars))
policy_vars.sort(key=lambda x: x.name)
target_vars.sort(key=lambda x: x.name)
......
......@@ -145,10 +145,10 @@ class DuelingDQNModel(object):
def _build_sync_target_network(self):
vars = list(fluid.default_main_program().list_vars())
policy_vars = filter(
lambda x: 'GRAD' not in x.name and 'policy' in x.name, vars)
target_vars = filter(
lambda x: 'GRAD' not in x.name and 'target' in x.name, vars)
policy_vars = list(filter(
lambda x: 'GRAD' not in x.name and 'policy' in x.name, vars))
target_vars = list(filter(
lambda x: 'GRAD' not in x.name and 'target' in x.name, vars))
policy_vars.sort(key=lambda x: x.name)
target_vars.sort(key=lambda x: x.name)
......
......@@ -55,7 +55,7 @@ class AtariPlayer(gym.Env):
try:
ALEInterface.setLoggerMode(ALEInterface.Logger.Error)
except AttributeError:
print "You're not using latest ALE"
print("You're not using latest ALE")
# avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
with _ALE_LOCK:
......
......@@ -74,7 +74,7 @@ def get_player(rom, viz=False, train=False):
def eval_agent(agent, env):
episode_reward = []
for _ in tqdm(xrange(30), desc='eval agent'):
for _ in tqdm(range(30), desc='eval agent'):
state = env.reset()
total_reward = 0
step = 0
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册