文件 · github/fork/Banmahhhh/develop · PaddlePaddle / PARL · GitCode

import parl
from parl.algorithms import DQN, DDQN

class AtariModel(parl.Model):
    """AtariModel
    This class defines the forward part for an algorithm,
    its input is state observed on the environment.
    """
    def __init__(self, img_shape, action_dim):
        # define your layers
        self.cnn1 = layers.conv_2d(num_filters=32, filter_size=5,
                         stride=1, padding=2, act='relu')
        ...
        self.fc1 = layers.fc(action_dim)

    def value(self, img):
        # define how to estimate the Q value based on the image of atari games.
        img = img / 255.0
        l = self.cnn1(img)
        ...
        Q = self.fc1(l)
        return Q
"""
three steps to build an agent
   1.  define a forward model which is critic_model in this example
   2.  a. to build a DQN algorithm, just pass the critic_model to `DQN`
       b. to build a DDQN algorithm, just replace DQN in the following line with DDQN
   3.  define the I/O part in AtariAgent so that it could update the algorithm based on the interactive data
"""

model = AtariModel(img_shape=(32, 32), action_dim=4)
algorithm = DQN(model)
agent = AtariAgent(algorithm)
#============Agent.py=================
@parl.remote_class
class Agent(object):

    def say_hello(self):
        print("Hello World!")

    def sum(self, a, b):
        return a+b

# launch `Agent.py` at any computation platforms such as a CPU cluster.
if __main__ == '__main__':
    agent = Agent()
    agent.as_remote(server_address)


#============Server.py=================
remote_manager = parl.RemoteManager()
agent = remote_manager.get_remote()
agent.say_hello()
ans = agent.sum(1,5) # run remotely and not consume any local computation resources
pip install parl