run.py 694 字节
Newer Older
W
wanghaoshuang 已提交
1
from brain import PolicyGradient
W
Add env  
wanghaoshuang 已提交
2 3
from env import Env
import numpy as np
W
wanghaoshuang 已提交
4

W
Add env  
wanghaoshuang 已提交
5 6 7 8
n_actions = 2
interval = 0.01
stage_len = 10
epoches = 10000
W
wanghaoshuang 已提交
9 10 11

if __name__ == "__main__":

W
Add env  
wanghaoshuang 已提交
12 13 14 15 16 17
    brain = PolicyGradient(n_actions, stage_len)
    e = Env(stage_len, interval)
    brain.build_net()
    done = False

    for epoch in range(epoches):
W
wanghaoshuang 已提交
18
        if (epoch % 500 == 1) or epoch < 5 or epoch > 3000:
W
Add env  
wanghaoshuang 已提交
19 20 21 22 23 24 25 26 27 28 29
            e.render = True
        else:
            e.render = False
        e.reset()
        while not done:
            s = e.status()
            action = brain.choose_action(s)
            r, done, _ = e.move(action)
            brain.store_transition(s, action, r)
        done = False
        brain.learn()