replace PE with compiler(new feature in paddle151). (#99)

* fix the compatibility issue * fix the comment issue * support paddle 1.5.1 and replace PE with compiler * yapf&copyright * yapf * fix the teamcity problem * fix the teamcity problem * fix comment * only support paddle 1.5.1 * Cmake * fix comment

replace PE with compiler(new feature in paddle151). (#99)
* fix the compatibility issue * fix the comment issue * support paddle 1.5.1 and replace PE with compiler * yapf&copyright * yapf * fix the teamcity problem * fix the teamcity problem * fix comment * only support paddle 1.5.1 * Cmake * fix comment
d33f3002 · Bo Zhou · GitHub · 33516338 · d33f3002 · d33f3002
17 changed file
--- a/.teamcity/build.sh
+++ b/.teamcity/build.sh
@@ -132,7 +132,6 @@ function main() {
          pip install -i https://pypi.tuna.tsinghua.edu.cn/simple .
          pip3.6 install -i https://pypi.tuna.tsinghua.edu.cn/simple .
          /root/miniconda3/envs/empty_env/bin/pip install -i https://pypi.tuna.tsinghua.edu.cn/simple .
-          /root/miniconda3/envs/paddle1.4.0/bin/pip install -i https://pypi.tuna.tsinghua.edu.cn/simple .
          run_test_with_gpu
          run_test_with_cpu
          run_import_test

--- a/.teamcity/requirements.txt
+++ b/.teamcity/requirements.txt
 # requirements for unittest
-paddlepaddle-gpu==1.3.0.post97
+paddlepaddle-gpu==1.5.1.post97
 gym
 details
 parameterized
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -48,11 +48,6 @@ function(import_test TARGET_NAME)
        /root/miniconda3/envs/empty_env/bin/python -u ${py_test_SRCS} ${py_test_ARGS}
        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})

-    add_test(NAME ${TARGET_NAME}_with_paddle1.4.0
-        COMMAND env PYTHONPATH=.:${py_test_ENVS}
-        /root/miniconda3/envs/paddle1.4.0/bin/python -u ${py_test_SRCS} ${py_test_ARGS}
-        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
-
 endfunction()

 function(docs_test)

--- a/README.md
+++ b/README.md
@@ -106,7 +106,7 @@ For users, they can write code in a simple way, just like writing multi-thread c
 # Install:
 ### Dependencies
 - Python 2.7 or 3.5+. 
- [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) >=1.2.1 (**Optional**, if you only want to use APIs related to parallelization alone)  
+- [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) >=1.5.1 (**Optional**, if you only want to use APIs related to parallelization alone)  


 ```

--- a/examples/A2C/README.md
+++ b/examples/A2C/README.md
@@ -16,7 +16,7 @@ Mean episode reward in training process after 10 million sample steps.

 ## How to use
 ### Dependencies
-+ [paddlepaddle>=1.3.0](https://github.com/PaddlePaddle/Paddle)
+ [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
 + [parl](https://github.com/PaddlePaddle/PARL)
 + gym
 + atari-py

--- a/examples/A2C/atari_agent.py
+++ b/examples/A2C/atari_agent.py
@@ -38,19 +38,6 @@ class AtariAgent(parl.Agent):
        self.entropy_coeff_scheduler = PiecewiseScheduler(
            config['entropy_coeff_scheduler'])

-        exec_strategy = fluid.ExecutionStrategy()
-        exec_strategy.use_experimental_executor = True
-        exec_strategy.num_threads = 4
-        build_strategy = fluid.BuildStrategy()
-        build_strategy.remove_unnecessary_lock = True
-
-        # Use ParallelExecutor to make learn program run faster
-        self.learn_exe = fluid.ParallelExecutor(
-            use_cuda=machine_info.is_gpu_available(),
-            main_program=self.learn_program,
-            build_strategy=build_strategy,
-            exec_strategy=exec_strategy)
-
    def build_program(self):
        self.sample_program = fluid.Program()
        self.predict_program = fluid.Program()
@@ -88,9 +75,8 @@ class AtariAgent(parl.Agent):

            total_loss, pi_loss, vf_loss, entropy = self.alg.learn(
                obs, actions, advantages, target_values, lr, entropy_coeff)
-            self.learn_outputs = [
-                total_loss.name, pi_loss.name, vf_loss.name, entropy.name
-            ]
+            self.learn_outputs = [total_loss, pi_loss, vf_loss, entropy]
+        self.learn_program = parl.compile(self.learn_program, total_loss)

    def sample(self, obs_np):
        """
@@ -161,7 +147,8 @@ class AtariAgent(parl.Agent):
        lr = self.lr_scheduler.step(step_num=obs_np.shape[0])
        entropy_coeff = self.entropy_coeff_scheduler.step()

-        total_loss, pi_loss, vf_loss, entropy = self.learn_exe.run(
+        total_loss, pi_loss, vf_loss, entropy = self.fluid_executor.run(
+            self.learn_program,
            feed={
                'obs': obs_np,
                'actions': actions_np,

--- a/examples/DDPG/README.md
+++ b/examples/DDPG/README.md
@@ -15,7 +15,7 @@ Please see [here](https://github.com/openai/mujoco-py) to know more about Mujoco
 ## How to use
 ### Dependencies:
 + python3.5+
-+ [paddlepaddle>=1.0.0](https://github.com/PaddlePaddle/Paddle)
+ [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
 + [parl](https://github.com/PaddlePaddle/PARL)
 + gym
 + tqdm

--- a/examples/DQN/README.md
+++ b/examples/DQN/README.md
@@ -15,8 +15,7 @@ Please see [here](https://gym.openai.com/envs/#atari) to know more about Atari g

 ## How to use
 ### Dependencies:
-+ python2.7 or python3.5+
-+ [paddlepaddle>=1.0.0](https://github.com/PaddlePaddle/Paddle)
+ [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
 + [parl](https://github.com/PaddlePaddle/PARL)
 + gym
 + tqdm

--- a/examples/GA3C/README.md
+++ b/examples/GA3C/README.md
@@ -16,7 +16,7 @@ Results with one learner (in a P40 GPU) and 24 simulators (in 12 CPU) in 10 mill

 ## How to use
 ### Dependencies
-+ [paddlepaddle>=1.3.0](https://github.com/PaddlePaddle/Paddle)
+ [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
 + [parl](https://github.com/PaddlePaddle/PARL)
 + gym
 + atari-py

--- a/examples/IMPALA/README.md
+++ b/examples/IMPALA/README.md
@@ -20,7 +20,7 @@ Result with one learner (in a P40 GPU) and 32 actors (in 32 CPUs).

 ## How to use
 ### Dependencies
-+ [paddlepaddle>=1.3.0](https://github.com/PaddlePaddle/Paddle)
+ [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
 + [parl](https://github.com/PaddlePaddle/PARL)
 + gym
 + atari-py

--- a/examples/IMPALA/atari_agent.py
+++ b/examples/IMPALA/atari_agent.py
@@ -28,19 +28,6 @@ class AtariAgent(parl.Agent):
        self.act_dim = act_dim
        super(AtariAgent, self).__init__(algorithm)

-        exec_strategy = fluid.ExecutionStrategy()
-        exec_strategy.use_experimental_executor = True
-        exec_strategy.num_threads = 4
-        build_strategy = fluid.BuildStrategy()
-        build_strategy.remove_unnecessary_lock = True
-
-        # Use ParallelExecutor to make learn program run faster
-        self.learn_exe = fluid.ParallelExecutor(
-            use_cuda=machine_info.is_gpu_available(),
-            main_program=self.learn_program,
-            build_strategy=build_strategy,
-            exec_strategy=exec_strategy)
-
        if learn_data_provider:
            self.learn_reader.decorate_tensor_provider(learn_data_provider)
            self.learn_reader.start()
@@ -86,9 +73,10 @@ class AtariAgent(parl.Agent):
            vtrace_loss, kl = self.alg.learn(obs, actions, behaviour_logits,
                                             rewards, dones, lr, entropy_coeff)
            self.learn_outputs = [
-                vtrace_loss.total_loss.name, vtrace_loss.pi_loss.name,
-                vtrace_loss.vf_loss.name, vtrace_loss.entropy.name, kl.name
+                vtrace_loss.total_loss, vtrace_loss.pi_loss,
+                vtrace_loss.vf_loss, vtrace_loss.entropy, kl
            ]
+        self.learn_program = parl.compile(self.learn_program, total_loss)

    def sample(self, obs_np):
        """
@@ -125,6 +113,6 @@ class AtariAgent(parl.Agent):
        return predict_actions

    def learn(self):
-        total_loss, pi_loss, vf_loss, entropy, kl = self.learn_exe.run(
-            fetch_list=self.learn_outputs)
+        total_loss, pi_loss, vf_loss, entropy, kl = self.fluid_executor.run(
+            self.learn_program, fetch_list=self.learn_outputs)
        return total_loss, pi_loss, vf_loss, entropy, kl
--- a/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/README.md
+++ b/examples/NeurIPS2018-AI-for-Prosthetics-Challenge/README.md
@@ -19,7 +19,7 @@ For more technical details about our solution, we provide:

 ## Dependencies
 - python3.6
- [paddlepaddle>=1.2.1](https://github.com/PaddlePaddle/Paddle)
+- [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
 - [osim-rl](https://github.com/stanfordnmbl/osim-rl)
 - [grpcio==1.12.1](https://grpc.io/docs/quickstart/python.html)
 - tqdm

--- a/examples/PPO/README.md
+++ b/examples/PPO/README.md
@@ -18,7 +18,7 @@ Please see [here](https://github.com/openai/mujoco-py) to know more about Mujoco
 ## How to use
 ### Dependencies:
 + python3.5+
-+ [paddlepaddle>=1.0.0](https://github.com/PaddlePaddle/Paddle)
+ [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
 + [parl](https://github.com/PaddlePaddle/PARL)
 + gym
 + tqdm

--- a/examples/QuickStart/README.md
+++ b/examples/QuickStart/README.md
@@ -4,7 +4,7 @@ Train an agent with PARL to solve the CartPole problem, a classical benchmark in
 ## How to use
 ### Dependencies:

-+ [paddlepaddle>=1.0.0](https://github.com/PaddlePaddle/Paddle)
+ [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
 + [parl](https://github.com/PaddlePaddle/PARL)
 + gym


--- a/parl/__init__.py
+++ b/parl/__init__.py
@@ -24,6 +24,7 @@ from parl.utils.utils import _HAS_FLUID

 if _HAS_FLUID:
    from parl.core.fluid import *
+    from parl.core.fluid.plutils.compiler import compile
 else:
    print(
        "WARNING:PARL: Failed to import paddle. Only APIs for parallelization are available."

--- a/parl/core/fluid/plutils/compiler.py
+++ b/parl/core/fluid/plutils/compiler.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.fluid as fluid
+import os
+
+
+def compile(program, loss=None):
+    """ transfer the program into a new program that runs in multi-cpus or multi-gpus.
+    This function uses the `fluid.compiler.CompiledProgram` to transfer the program.
+    For more detail about speeding the program, please visit 
+    "https://www.paddlepaddle.org.cn/documentation/docs/zh/1.5/user_guides/howto/training/single_node.html#id7"
+
+    Args:
+        program(fluid.Program): a normal fluid program.
+        loss_name(str): Optional. The loss tensor of a trainable program. Set it to None if you are transferring a prediction or evaluation program.
+    """
+    if loss is not None:
+        assert isinstance(
+            loss, fluid.framework.
+            Variable), 'type of loss is expected to be a fluid tensor'
+    # TODO: after solving the learning rate issue that occurs in training A2C algorithm, set it to 3.
+    os.environ['CPU_NUM'] = '1'
+    exec_strategy = fluid.ExecutionStrategy()
+    exec_strategy.num_threads = 3 * 4
+
+    build_strategy = fluid.BuildStrategy()
+    build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
+
+    return fluid.compiler.CompiledProgram(program).with_data_parallel(
+        loss_name=loss.name,
+        exec_strategy=exec_strategy,
+        build_strategy=build_strategy)
--- a/parl/utils/utils.py
+++ b/parl/utils/utils.py
@@ -71,10 +71,18 @@ def is_PY3():
    return sys.version_info[0] == 3


+def get_fluid_version():
+    import paddle
+    fluid_version = int(paddle.__version__.replace('.', ''))
+    return fluid_version
+
+
 MAX_INT32 = 0x7fffffff

 try:
    from paddle import fluid
+    fluid_version = get_fluid_version()
+    assert fluid_version >= 151, "PARL requires paddle>=1.5.1"
    _HAS_FLUID = True
 except ImportError:
    _HAS_FLUID = False