diff --git a/fluid/DeepQNetwork/README.md b/fluid/DeepQNetwork/README.md index 83ce1368f483df5fe8a3e057991815e003abb8fe..2a2db7cb76c26227d640cfc4f5972771f8c53ac0 100644 --- a/fluid/DeepQNetwork/README.md +++ b/fluid/DeepQNetwork/README.md @@ -29,7 +29,7 @@ Based on PaddlePaddle's next-generation API Fluid, the DQN model of deep reinfor recommended to compile and install PaddlePaddle from source code + Install other dependencies: ``` - pip install -r requirement.txt + pip install -r requirement.txt pip install gym[atari] ``` Install ale_python_interface, can reference:https://github.com/mgbellemare/Arcade-Learning-Environment diff --git a/fluid/DeepQNetwork/README_cn.md b/fluid/DeepQNetwork/README_cn.md index 130144fae2500185540628e47e519d3ff7699c73..87eb61238aa6b81d83b23cb6ea3a18bccff9618e 100644 --- a/fluid/DeepQNetwork/README_cn.md +++ b/fluid/DeepQNetwork/README_cn.md @@ -1,6 +1,6 @@ # 基于PaddlePaddle的Fluid版本复现DQN, DoubleDQN, DuelingDQN三个模型 基于PaddlePaddle下一代API Fluid复现了深度强化学习领域的DQN模型,在经典的Atari 游戏上复现了论文同等水平的指标,模型接收游戏的图像作为输入,采用端到端的模型直接预测下一步要执行的控制信号,本仓库一共包含以下3类模型。 -+ DQN模型: ++ DQN模型: [Human-level Control Through Deep Reinforcement Learning](http://www.nature.com/nature/journal/v518/n7540/full/nature14236.html) + DoubleDQN模型: [Deep Reinforcement Learning with Double Q-Learning](https://www.aaai.org/ocs/index.php/AAAI/AAAI16/paper/viewPaper/12389) @@ -19,15 +19,15 @@ + gym + tqdm + opencv-python - + paddlepaddle-gpu>=0.12.0 + + paddlepaddle-gpu>=0.12.0 + ale_python_interface + 下载依赖: + 安装PaddlePaddle: - 建议通过PaddlePaddle源码进行编译安装 + 建议通过PaddlePaddle源码进行编译安装 + 下载其它依赖: ``` - pip install -r requirement.txt + pip install -r requirement.txt pip install gym[atari] ``` 安装ale_python_interface可以参考:https://github.com/mgbellemare/Arcade-Learning-Environment @@ -46,10 +46,10 @@ 训练更多游戏,可以下载游戏rom从[这里](https://github.com/openai/atari-py/tree/master/atari_py/atari_roms) -+ 测试模型: ++ 测试模型: ``` # Play the game with saved model and calculate the average rewards - # 使用训练过程中保存的最好模型玩游戏,以及计算平均奖励(rewards) + # 使用训练过程中保存的最好模型玩游戏,以及计算平均奖励(rewards) python play.py --rom ./rom_files/pong.bin --use_cuda --model_path ./saved_model/DQN-pong # 以可视化的形式来玩游戏 diff --git a/fluid/DeepQNetwork/train.py b/fluid/DeepQNetwork/train.py index ffbe099c13c959d53eddab8963a3240717c6f5d1..63439be7c8da481c946b0cb0bd571637bd875105 100644 --- a/fluid/DeepQNetwork/train.py +++ b/fluid/DeepQNetwork/train.py @@ -122,7 +122,7 @@ def train_agent(): total_step = 0 max_reward = None save_path = os.path.join(args.model_dirname, '{}-{}'.format( - args.alg, os.path.basename(args.rom).split('.')[0])) + args.alg, os.path.basename(args.rom).split('.')[0])) while True: # start epoch total_reward, step = run_train_episode(agent, env, exp) @@ -136,7 +136,7 @@ def train_agent(): test_flag += 1 print("eval_agent done, (steps, eval_reward): ({}, {})".format( total_step, eval_reward)) - + if max_reward is None or eval_reward > max_reward: max_reward = eval_reward fluid.io.save_inference_model(save_path, ['state'],