run.py 237 字节
Newer Older
W
wanghaoshuang 已提交
1 2 3 4 5 6 7 8 9 10 11
from brain import PolicyGradient

n_features = 10
n_actions = 4

if __name__ == "__main__":

    brain = PolicyGradient(n_actions, n_features)
    brain.store_transition([1] * n_features, 1, 1.0)
    #brain.build_net()
    brain.learn()