未验证 提交 d33f3002 编写于 作者: B Bo Zhou 提交者: GitHub

replace PE with compiler(new feature in paddle151). (#99)

* fix the compatibility issue

* fix the comment issue

* support paddle 1.5.1 and replace PE with compiler

* yapf&copyright

* yapf

* fix the teamcity problem

* fix the teamcity problem

* fix comment

* only support paddle 1.5.1

* Cmake

* fix comment
上级 33516338
...@@ -132,7 +132,6 @@ function main() { ...@@ -132,7 +132,6 @@ function main() {
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple . pip install -i https://pypi.tuna.tsinghua.edu.cn/simple .
pip3.6 install -i https://pypi.tuna.tsinghua.edu.cn/simple . pip3.6 install -i https://pypi.tuna.tsinghua.edu.cn/simple .
/root/miniconda3/envs/empty_env/bin/pip install -i https://pypi.tuna.tsinghua.edu.cn/simple . /root/miniconda3/envs/empty_env/bin/pip install -i https://pypi.tuna.tsinghua.edu.cn/simple .
/root/miniconda3/envs/paddle1.4.0/bin/pip install -i https://pypi.tuna.tsinghua.edu.cn/simple .
run_test_with_gpu run_test_with_gpu
run_test_with_cpu run_test_with_cpu
run_import_test run_import_test
......
# requirements for unittest # requirements for unittest
paddlepaddle-gpu==1.3.0.post97 paddlepaddle-gpu==1.5.1.post97
gym gym
details details
parameterized parameterized
...@@ -48,11 +48,6 @@ function(import_test TARGET_NAME) ...@@ -48,11 +48,6 @@ function(import_test TARGET_NAME)
/root/miniconda3/envs/empty_env/bin/python -u ${py_test_SRCS} ${py_test_ARGS} /root/miniconda3/envs/empty_env/bin/python -u ${py_test_SRCS} ${py_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
add_test(NAME ${TARGET_NAME}_with_paddle1.4.0
COMMAND env PYTHONPATH=.:${py_test_ENVS}
/root/miniconda3/envs/paddle1.4.0/bin/python -u ${py_test_SRCS} ${py_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
endfunction() endfunction()
function(docs_test) function(docs_test)
......
...@@ -106,7 +106,7 @@ For users, they can write code in a simple way, just like writing multi-thread c ...@@ -106,7 +106,7 @@ For users, they can write code in a simple way, just like writing multi-thread c
# Install: # Install:
### Dependencies ### Dependencies
- Python 2.7 or 3.5+. - Python 2.7 or 3.5+.
- [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) >=1.2.1 (**Optional**, if you only want to use APIs related to parallelization alone) - [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) >=1.5.1 (**Optional**, if you only want to use APIs related to parallelization alone)
``` ```
......
...@@ -16,7 +16,7 @@ Mean episode reward in training process after 10 million sample steps. ...@@ -16,7 +16,7 @@ Mean episode reward in training process after 10 million sample steps.
## How to use ## How to use
### Dependencies ### Dependencies
+ [paddlepaddle>=1.3.0](https://github.com/PaddlePaddle/Paddle) + [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
+ [parl](https://github.com/PaddlePaddle/PARL) + [parl](https://github.com/PaddlePaddle/PARL)
+ gym + gym
+ atari-py + atari-py
......
...@@ -38,19 +38,6 @@ class AtariAgent(parl.Agent): ...@@ -38,19 +38,6 @@ class AtariAgent(parl.Agent):
self.entropy_coeff_scheduler = PiecewiseScheduler( self.entropy_coeff_scheduler = PiecewiseScheduler(
config['entropy_coeff_scheduler']) config['entropy_coeff_scheduler'])
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.use_experimental_executor = True
exec_strategy.num_threads = 4
build_strategy = fluid.BuildStrategy()
build_strategy.remove_unnecessary_lock = True
# Use ParallelExecutor to make learn program run faster
self.learn_exe = fluid.ParallelExecutor(
use_cuda=machine_info.is_gpu_available(),
main_program=self.learn_program,
build_strategy=build_strategy,
exec_strategy=exec_strategy)
def build_program(self): def build_program(self):
self.sample_program = fluid.Program() self.sample_program = fluid.Program()
self.predict_program = fluid.Program() self.predict_program = fluid.Program()
...@@ -88,9 +75,8 @@ class AtariAgent(parl.Agent): ...@@ -88,9 +75,8 @@ class AtariAgent(parl.Agent):
total_loss, pi_loss, vf_loss, entropy = self.alg.learn( total_loss, pi_loss, vf_loss, entropy = self.alg.learn(
obs, actions, advantages, target_values, lr, entropy_coeff) obs, actions, advantages, target_values, lr, entropy_coeff)
self.learn_outputs = [ self.learn_outputs = [total_loss, pi_loss, vf_loss, entropy]
total_loss.name, pi_loss.name, vf_loss.name, entropy.name self.learn_program = parl.compile(self.learn_program, total_loss)
]
def sample(self, obs_np): def sample(self, obs_np):
""" """
...@@ -161,7 +147,8 @@ class AtariAgent(parl.Agent): ...@@ -161,7 +147,8 @@ class AtariAgent(parl.Agent):
lr = self.lr_scheduler.step(step_num=obs_np.shape[0]) lr = self.lr_scheduler.step(step_num=obs_np.shape[0])
entropy_coeff = self.entropy_coeff_scheduler.step() entropy_coeff = self.entropy_coeff_scheduler.step()
total_loss, pi_loss, vf_loss, entropy = self.learn_exe.run( total_loss, pi_loss, vf_loss, entropy = self.fluid_executor.run(
self.learn_program,
feed={ feed={
'obs': obs_np, 'obs': obs_np,
'actions': actions_np, 'actions': actions_np,
......
...@@ -15,7 +15,7 @@ Please see [here](https://github.com/openai/mujoco-py) to know more about Mujoco ...@@ -15,7 +15,7 @@ Please see [here](https://github.com/openai/mujoco-py) to know more about Mujoco
## How to use ## How to use
### Dependencies: ### Dependencies:
+ python3.5+ + python3.5+
+ [paddlepaddle>=1.0.0](https://github.com/PaddlePaddle/Paddle) + [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
+ [parl](https://github.com/PaddlePaddle/PARL) + [parl](https://github.com/PaddlePaddle/PARL)
+ gym + gym
+ tqdm + tqdm
......
...@@ -15,8 +15,7 @@ Please see [here](https://gym.openai.com/envs/#atari) to know more about Atari g ...@@ -15,8 +15,7 @@ Please see [here](https://gym.openai.com/envs/#atari) to know more about Atari g
## How to use ## How to use
### Dependencies: ### Dependencies:
+ python2.7 or python3.5+ + [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
+ [paddlepaddle>=1.0.0](https://github.com/PaddlePaddle/Paddle)
+ [parl](https://github.com/PaddlePaddle/PARL) + [parl](https://github.com/PaddlePaddle/PARL)
+ gym + gym
+ tqdm + tqdm
......
...@@ -16,7 +16,7 @@ Results with one learner (in a P40 GPU) and 24 simulators (in 12 CPU) in 10 mill ...@@ -16,7 +16,7 @@ Results with one learner (in a P40 GPU) and 24 simulators (in 12 CPU) in 10 mill
## How to use ## How to use
### Dependencies ### Dependencies
+ [paddlepaddle>=1.3.0](https://github.com/PaddlePaddle/Paddle) + [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
+ [parl](https://github.com/PaddlePaddle/PARL) + [parl](https://github.com/PaddlePaddle/PARL)
+ gym + gym
+ atari-py + atari-py
......
...@@ -20,7 +20,7 @@ Result with one learner (in a P40 GPU) and 32 actors (in 32 CPUs). ...@@ -20,7 +20,7 @@ Result with one learner (in a P40 GPU) and 32 actors (in 32 CPUs).
## How to use ## How to use
### Dependencies ### Dependencies
+ [paddlepaddle>=1.3.0](https://github.com/PaddlePaddle/Paddle) + [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
+ [parl](https://github.com/PaddlePaddle/PARL) + [parl](https://github.com/PaddlePaddle/PARL)
+ gym + gym
+ atari-py + atari-py
......
...@@ -28,19 +28,6 @@ class AtariAgent(parl.Agent): ...@@ -28,19 +28,6 @@ class AtariAgent(parl.Agent):
self.act_dim = act_dim self.act_dim = act_dim
super(AtariAgent, self).__init__(algorithm) super(AtariAgent, self).__init__(algorithm)
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.use_experimental_executor = True
exec_strategy.num_threads = 4
build_strategy = fluid.BuildStrategy()
build_strategy.remove_unnecessary_lock = True
# Use ParallelExecutor to make learn program run faster
self.learn_exe = fluid.ParallelExecutor(
use_cuda=machine_info.is_gpu_available(),
main_program=self.learn_program,
build_strategy=build_strategy,
exec_strategy=exec_strategy)
if learn_data_provider: if learn_data_provider:
self.learn_reader.decorate_tensor_provider(learn_data_provider) self.learn_reader.decorate_tensor_provider(learn_data_provider)
self.learn_reader.start() self.learn_reader.start()
...@@ -86,9 +73,10 @@ class AtariAgent(parl.Agent): ...@@ -86,9 +73,10 @@ class AtariAgent(parl.Agent):
vtrace_loss, kl = self.alg.learn(obs, actions, behaviour_logits, vtrace_loss, kl = self.alg.learn(obs, actions, behaviour_logits,
rewards, dones, lr, entropy_coeff) rewards, dones, lr, entropy_coeff)
self.learn_outputs = [ self.learn_outputs = [
vtrace_loss.total_loss.name, vtrace_loss.pi_loss.name, vtrace_loss.total_loss, vtrace_loss.pi_loss,
vtrace_loss.vf_loss.name, vtrace_loss.entropy.name, kl.name vtrace_loss.vf_loss, vtrace_loss.entropy, kl
] ]
self.learn_program = parl.compile(self.learn_program, total_loss)
def sample(self, obs_np): def sample(self, obs_np):
""" """
...@@ -125,6 +113,6 @@ class AtariAgent(parl.Agent): ...@@ -125,6 +113,6 @@ class AtariAgent(parl.Agent):
return predict_actions return predict_actions
def learn(self): def learn(self):
total_loss, pi_loss, vf_loss, entropy, kl = self.learn_exe.run( total_loss, pi_loss, vf_loss, entropy, kl = self.fluid_executor.run(
fetch_list=self.learn_outputs) self.learn_program, fetch_list=self.learn_outputs)
return total_loss, pi_loss, vf_loss, entropy, kl return total_loss, pi_loss, vf_loss, entropy, kl
...@@ -19,7 +19,7 @@ For more technical details about our solution, we provide: ...@@ -19,7 +19,7 @@ For more technical details about our solution, we provide:
## Dependencies ## Dependencies
- python3.6 - python3.6
- [paddlepaddle>=1.2.1](https://github.com/PaddlePaddle/Paddle) - [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
- [osim-rl](https://github.com/stanfordnmbl/osim-rl) - [osim-rl](https://github.com/stanfordnmbl/osim-rl)
- [grpcio==1.12.1](https://grpc.io/docs/quickstart/python.html) - [grpcio==1.12.1](https://grpc.io/docs/quickstart/python.html)
- tqdm - tqdm
......
...@@ -18,7 +18,7 @@ Please see [here](https://github.com/openai/mujoco-py) to know more about Mujoco ...@@ -18,7 +18,7 @@ Please see [here](https://github.com/openai/mujoco-py) to know more about Mujoco
## How to use ## How to use
### Dependencies: ### Dependencies:
+ python3.5+ + python3.5+
+ [paddlepaddle>=1.0.0](https://github.com/PaddlePaddle/Paddle) + [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
+ [parl](https://github.com/PaddlePaddle/PARL) + [parl](https://github.com/PaddlePaddle/PARL)
+ gym + gym
+ tqdm + tqdm
......
...@@ -4,7 +4,7 @@ Train an agent with PARL to solve the CartPole problem, a classical benchmark in ...@@ -4,7 +4,7 @@ Train an agent with PARL to solve the CartPole problem, a classical benchmark in
## How to use ## How to use
### Dependencies: ### Dependencies:
+ [paddlepaddle>=1.0.0](https://github.com/PaddlePaddle/Paddle) + [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
+ [parl](https://github.com/PaddlePaddle/PARL) + [parl](https://github.com/PaddlePaddle/PARL)
+ gym + gym
......
...@@ -24,6 +24,7 @@ from parl.utils.utils import _HAS_FLUID ...@@ -24,6 +24,7 @@ from parl.utils.utils import _HAS_FLUID
if _HAS_FLUID: if _HAS_FLUID:
from parl.core.fluid import * from parl.core.fluid import *
from parl.core.fluid.plutils.compiler import compile
else: else:
print( print(
"WARNING:PARL: Failed to import paddle. Only APIs for parallelization are available." "WARNING:PARL: Failed to import paddle. Only APIs for parallelization are available."
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import os
def compile(program, loss=None):
""" transfer the program into a new program that runs in multi-cpus or multi-gpus.
This function uses the `fluid.compiler.CompiledProgram` to transfer the program.
For more detail about speeding the program, please visit
"https://www.paddlepaddle.org.cn/documentation/docs/zh/1.5/user_guides/howto/training/single_node.html#id7"
Args:
program(fluid.Program): a normal fluid program.
loss_name(str): Optional. The loss tensor of a trainable program. Set it to None if you are transferring a prediction or evaluation program.
"""
if loss is not None:
assert isinstance(
loss, fluid.framework.
Variable), 'type of loss is expected to be a fluid tensor'
# TODO: after solving the learning rate issue that occurs in training A2C algorithm, set it to 3.
os.environ['CPU_NUM'] = '1'
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.num_threads = 3 * 4
build_strategy = fluid.BuildStrategy()
build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
return fluid.compiler.CompiledProgram(program).with_data_parallel(
loss_name=loss.name,
exec_strategy=exec_strategy,
build_strategy=build_strategy)
...@@ -71,10 +71,18 @@ def is_PY3(): ...@@ -71,10 +71,18 @@ def is_PY3():
return sys.version_info[0] == 3 return sys.version_info[0] == 3
def get_fluid_version():
import paddle
fluid_version = int(paddle.__version__.replace('.', ''))
return fluid_version
MAX_INT32 = 0x7fffffff MAX_INT32 = 0x7fffffff
try: try:
from paddle import fluid from paddle import fluid
fluid_version = get_fluid_version()
assert fluid_version >= 151, "PARL requires paddle>=1.5.1"
_HAS_FLUID = True _HAS_FLUID = True
except ImportError: except ImportError:
_HAS_FLUID = False _HAS_FLUID = False
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册