diff --git a/.github/decorator.png b/.github/decorator.png new file mode 100644 index 0000000000000000000000000000000000000000..82a11269002c97d7e608c1ae568996047ea24ed5 Binary files /dev/null and b/.github/decorator.png differ diff --git a/README.md b/README.md index fbeff6689561a48012f9ed17ccd70e5b903af888..0a8f55c2148ba27637474254942ef293543b093e 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,9 @@ > PARL is a flexible and high-efficient reinforcement learning framework based on [PaddlePaddle](https://github.com/PaddlePaddle/Paddle). # Features -**Reproducible**. We provide algorithms that stably reproduce the result of many influential reinforcement learning algorithms +**Reproducible**. We provide algorithms that stably reproduce the result of many influential reinforcement learning algorithms. -**Large Scale**. Ability to support high performance parallelization of training with thousands of CPUs and multi-GPUs +**Large Scale**. Ability to support high performance parallelization of training with thousands of CPUs and multi-GPUs. **Reusable**. Algorithms provided in repository could be directly adapted to a new task by defining a forward network and training mechanism will be built automatically. @@ -44,6 +44,7 @@ class AtariModel(parl.Model): stride=1, padding=2, act='relu') ... self.fc1 = layers.fc(action_dim) + def value(self, img): # define how to estimate the Q value based on the image of atari games. img = img / 255.0 @@ -64,6 +65,42 @@ algorithm = DQN(model) agent = AtariAgent(algorithm) ``` +# Parallelization +PARL provides a compact API for distributed training, allowing users to transfer the code into a parallelized version by simply adding a decorator. +Here is a `Hello World!` example to demonstrate how easily it is to leverage outer computation resources. +```python +#============Agent.py================= +@parl.remote_class +class Agent(object): + + def say_hello(self): + print("Hello World!") + + def sum(self, a, b): + return a+b + +# launch `Agent.py` at any computation platforms such as a CPU cluster. +if __main__ == '__main__': + agent = Agent() + agent.as_remote(server_address) + + +#============Server.py================= +remote_manager = parl.RemoteManager() +agent = remote_manager.get_remote() +agent.say_hello() +ans = agent.sum(1,5) # run remotely and not comsume any local computation resources +``` +Two steps to use outer computation resources: +1. use the `parl.remote_class` to decorate a class at first, after which it is transfered to be a new class that can run in other CPUs or machines. +2. Get remote objects from the `RemoteManager`, and these objects have the same functions as the real ones. However, calling any function of these objects **does not** consume local computation resources since they are executed elsewhere. + +PARL +As shown in the above figure, real actors(orange circle) are running at the cpu cluster, while the learner(bule circle) is running at the local gpu with several remote actors(yellow circle with dotted edge). + +For users, they can write code in a simple way, just like writing multi-thread code, but with actors consuming remote resources. We have also provided examples of parallized algorithms like IMPALA, A2C and GA3C. For more details in usage please refer to these examples. + + # Install: ### Dependencies - Python 2.7 or 3.5+. diff --git a/parl/remote/remote_decorator.py b/parl/remote/remote_decorator.py index a81b45bc5d4aababddbd9aebd496730f428267b5..41a9e2bf969c53aec2e2b2d6d52bd9dd3df94d20 100644 --- a/parl/remote/remote_decorator.py +++ b/parl/remote/remote_decorator.py @@ -32,7 +32,7 @@ Class Simulator(object): ... sim = Simulator() -sim.as_remote(server_ip='172.18.202.45', port=8001) +sim.as_remote(server_ip='172.18.202.45', server_port=8001) """