提交 7cd8ee2d 编写于 作者: R robot

add saved model of pong and breakout, update README.MD

上级 843a00f7
# Reproduce DQN, DoubleDQN, DuelingDQN model with fluid version of PaddlePaddle # 基于PaddlePaddle的Fluid版本复现DQN, DoubleDQN, DuelingDQN三个模型
+ DQN in: + DQN模型:
[Human-level Control Through Deep Reinforcement Learning](http://www.nature.com/nature/journal/v518/n7540/full/nature14236.html) [Human-level Control Through Deep Reinforcement Learning](http://www.nature.com/nature/journal/v518/n7540/full/nature14236.html)
+ DoubleDQN in: + DoubleDQN模型:
[Deep Reinforcement Learning with Double Q-Learning](https://www.aaai.org/ocs/index.php/AAAI/AAAI16/paper/viewPaper/12389) [Deep Reinforcement Learning with Double Q-Learning](https://www.aaai.org/ocs/index.php/AAAI/AAAI16/paper/viewPaper/12389)
+ DuelingDQN in: + DuelingDQN模型:
[Dueling Network Architectures for Deep Reinforcement Learning](http://proceedings.mlr.press/v48/wangf16.html) [Dueling Network Architectures for Deep Reinforcement Learning](http://proceedings.mlr.press/v48/wangf16.html)
# Atari benchmark & performance # Atari游戏表现
## [Atari games introduction](https://gym.openai.com/envs/#atari) ## [Atari游戏介绍](https://gym.openai.com/envs/#atari)
+ Pong game result + Pong游戏训练结果
![DQN result](assets/dqn.png) ![DQN result](assets/dqn.png)
# How to use # 使用教程
+ Dependencies: + 依赖:
+ python2.7 + python2.7
+ gym + gym
+ tqdm + tqdm
+ opencv-python
+ paddlepaddle-gpu==0.12.0 + paddlepaddle-gpu==0.12.0
+ Start Training: 可以通过以下命令来下载所需依赖:
``` ```
# To train a model for Pong game with gpu (use DQN model as default) pip install -r requirement.txt
```
+ 训练模型:
```
# 使用GPU训练Pong游戏(默认使用DQN模型)
python train.py --rom ./rom_files/pong.bin --use_cuda python train.py --rom ./rom_files/pong.bin --use_cuda
# To train a model for Pong with DoubleDQN # 训练DoubleDQN模型
python train.py --rom ./rom_files/pong.bin --use_cuda --alg DoubleDQN python train.py --rom ./rom_files/pong.bin --use_cuda --alg DoubleDQN
# To train a model for Pong with DuelingDQN # 训练DuelingDQN模型
python train.py --rom ./rom_files/pong.bin --use_cuda --alg DuelingDQN python train.py --rom ./rom_files/pong.bin --use_cuda --alg DuelingDQN
``` ```
To train more games, can install more rom files from [here](https://github.com/openai/atari-py/tree/master/atari_py/atari_roms) 训练更多游戏,可以下载游戏rom从[这里](https://github.com/openai/atari-py/tree/master/atari_py/atari_roms)
+ Start Testing: + 测试模型:
``` ```
# Play the game with saved model and calculate the average rewards # Play the game with saved model and calculate the average rewards
python play.py --rom ./rom_files/pong.bin --use_cuda --model_path ./saved_model/DQN-pong/stepXXXXX # 使用训练过程中保存的最好模型玩游戏,以及计算平均奖励(rewards)
python play.py --rom ./rom_files/pong.bin --use_cuda --model_path ./saved_model/DQN-pong
# Play the game with visualization # 以可视化的形式来玩游戏
python play.py --rom ./rom_files/pong.bin --use_cuda --model_path ./saved_model/DQN-pong/stepXXXXX --viz 0.01 python play.py --rom ./rom_files/pong.bin --use_cuda --model_path ./saved_model/DQN-pong --viz 0.01
``` ```
...@@ -11,7 +11,7 @@ from tqdm import tqdm ...@@ -11,7 +11,7 @@ from tqdm import tqdm
def predict_action(exe, state, predict_program, feed_names, fetch_targets, def predict_action(exe, state, predict_program, feed_names, fetch_targets,
action_dim): action_dim):
if np.random.randint(100) == 0: if np.random.random() < 0.001:
act = np.random.randint(action_dim) act = np.random.randint(action_dim)
else: else:
state = np.expand_dims(state, axis=0) state = np.expand_dims(state, axis=0)
......
...@@ -120,6 +120,9 @@ def train_agent(): ...@@ -120,6 +120,9 @@ def train_agent():
pbar = tqdm(total=1e8) pbar = tqdm(total=1e8)
recent_100_reward = [] recent_100_reward = []
total_step = 0 total_step = 0
max_reward = None
save_path = os.path.join(args.model_dirname, '{}-{}'.format(
args.alg, os.path.basename(args.rom).split('.')[0]))
while True: while True:
# start epoch # start epoch
total_reward, step = run_train_episode(agent, env, exp) total_reward, step = run_train_episode(agent, env, exp)
...@@ -133,15 +136,12 @@ def train_agent(): ...@@ -133,15 +136,12 @@ def train_agent():
test_flag += 1 test_flag += 1
print("eval_agent done, (steps, eval_reward): ({}, {})".format( print("eval_agent done, (steps, eval_reward): ({}, {})".format(
total_step, eval_reward)) total_step, eval_reward))
if total_step // args.save_every_steps == save_flag: if max_reward is None or eval_reward > max_reward:
save_flag += 1 max_reward = eval_reward
save_path = os.path.join(args.model_dirname, '{}-{}'.format( fluid.io.save_inference_model(save_path, ['state'],
args.alg, os.path.basename(args.rom).split('.')[0]), agent.pred_value, agent.exe,
'step{}'.format(total_step)) agent.predict_program)
fluid.io.save_inference_model(save_path, ['state'],
agent.pred_value, agent.exe,
agent.predict_program)
pbar.close() pbar.close()
...@@ -173,11 +173,6 @@ if __name__ == '__main__': ...@@ -173,11 +173,6 @@ if __name__ == '__main__':
type=str, type=str,
default='saved_model', default='saved_model',
help='dirname to save model') help='dirname to save model')
parser.add_argument(
'--save_every_steps',
type=int,
default=100000,
help='every steps number to save model')
parser.add_argument( parser.add_argument(
'--test_every_steps', '--test_every_steps',
type=int, type=int,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册