From 2448dd5e4c42f21dd47b7ada085411fadbb3cfda Mon Sep 17 00:00:00 2001 From: zenghsh3 Date: Tue, 26 Jun 2018 11:09:13 +0800 Subject: [PATCH] Fix with pre-commit --- fluid/DeepQNetwork/README.md | 2 +- fluid/DeepQNetwork/README_cn.md | 12 ++++++------ fluid/DeepQNetwork/train.py | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fluid/DeepQNetwork/README.md b/fluid/DeepQNetwork/README.md index 83ce1368..2a2db7cb 100644 --- a/fluid/DeepQNetwork/README.md +++ b/fluid/DeepQNetwork/README.md @@ -29,7 +29,7 @@ Based on PaddlePaddle's next-generation API Fluid, the DQN model of deep reinfor recommended to compile and install PaddlePaddle from source code + Install other dependencies: ``` - pip install -r requirement.txt + pip install -r requirement.txt pip install gym[atari] ``` Install ale_python_interface, can reference:https://github.com/mgbellemare/Arcade-Learning-Environment diff --git a/fluid/DeepQNetwork/README_cn.md b/fluid/DeepQNetwork/README_cn.md index 130144fa..87eb6123 100644 --- a/fluid/DeepQNetwork/README_cn.md +++ b/fluid/DeepQNetwork/README_cn.md @@ -1,6 +1,6 @@ # 基于PaddlePaddle的Fluid版本复现DQN, DoubleDQN, DuelingDQN三个模型 基于PaddlePaddle下一代API Fluid复现了深度强化学习领域的DQN模型,在经典的Atari 游戏上复现了论文同等水平的指标,模型接收游戏的图像作为输入,采用端到端的模型直接预测下一步要执行的控制信号,本仓库一共包含以下3类模型。 -+ DQN模型: ++ DQN模型: [Human-level Control Through Deep Reinforcement Learning](http://www.nature.com/nature/journal/v518/n7540/full/nature14236.html) + DoubleDQN模型: [Deep Reinforcement Learning with Double Q-Learning](https://www.aaai.org/ocs/index.php/AAAI/AAAI16/paper/viewPaper/12389) @@ -19,15 +19,15 @@ + gym + tqdm + opencv-python - + paddlepaddle-gpu>=0.12.0 + + paddlepaddle-gpu>=0.12.0 + ale_python_interface + 下载依赖: + 安装PaddlePaddle: - 建议通过PaddlePaddle源码进行编译安装 + 建议通过PaddlePaddle源码进行编译安装 + 下载其它依赖: ``` - pip install -r requirement.txt + pip install -r requirement.txt pip install gym[atari] ``` 安装ale_python_interface可以参考:https://github.com/mgbellemare/Arcade-Learning-Environment @@ -46,10 +46,10 @@ 训练更多游戏,可以下载游戏rom从[这里](https://github.com/openai/atari-py/tree/master/atari_py/atari_roms) -+ 测试模型: ++ 测试模型: ``` # Play the game with saved model and calculate the average rewards - # 使用训练过程中保存的最好模型玩游戏,以及计算平均奖励(rewards) + # 使用训练过程中保存的最好模型玩游戏,以及计算平均奖励(rewards) python play.py --rom ./rom_files/pong.bin --use_cuda --model_path ./saved_model/DQN-pong # 以可视化的形式来玩游戏 diff --git a/fluid/DeepQNetwork/train.py b/fluid/DeepQNetwork/train.py index ffbe099c..63439be7 100644 --- a/fluid/DeepQNetwork/train.py +++ b/fluid/DeepQNetwork/train.py @@ -122,7 +122,7 @@ def train_agent(): total_step = 0 max_reward = None save_path = os.path.join(args.model_dirname, '{}-{}'.format( - args.alg, os.path.basename(args.rom).split('.')[0])) + args.alg, os.path.basename(args.rom).split('.')[0])) while True: # start epoch total_reward, step = run_train_episode(agent, env, exp) @@ -136,7 +136,7 @@ def train_agent(): test_flag += 1 print("eval_agent done, (steps, eval_reward): ({}, {})".format( total_step, eval_reward)) - + if max_reward is None or eval_reward > max_reward: max_reward = eval_reward fluid.io.save_inference_model(save_path, ['state'], -- GitLab