未验证 提交 d33f3002 编写于 作者: B Bo Zhou 提交者: GitHub

replace PE with compiler(new feature in paddle151). (#99)

* fix the compatibility issue

* fix the comment issue

* support paddle 1.5.1 and replace PE with compiler

* yapf&copyright

* yapf

* fix the teamcity problem

* fix the teamcity problem

* fix comment

* only support paddle 1.5.1

* Cmake

* fix comment
上级 33516338
......@@ -132,7 +132,6 @@ function main() {
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple .
pip3.6 install -i https://pypi.tuna.tsinghua.edu.cn/simple .
/root/miniconda3/envs/empty_env/bin/pip install -i https://pypi.tuna.tsinghua.edu.cn/simple .
/root/miniconda3/envs/paddle1.4.0/bin/pip install -i https://pypi.tuna.tsinghua.edu.cn/simple .
run_test_with_gpu
run_test_with_cpu
run_import_test
......
# requirements for unittest
paddlepaddle-gpu==1.3.0.post97
paddlepaddle-gpu==1.5.1.post97
gym
details
parameterized
......@@ -48,11 +48,6 @@ function(import_test TARGET_NAME)
/root/miniconda3/envs/empty_env/bin/python -u ${py_test_SRCS} ${py_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
add_test(NAME ${TARGET_NAME}_with_paddle1.4.0
COMMAND env PYTHONPATH=.:${py_test_ENVS}
/root/miniconda3/envs/paddle1.4.0/bin/python -u ${py_test_SRCS} ${py_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
endfunction()
function(docs_test)
......
......@@ -106,7 +106,7 @@ For users, they can write code in a simple way, just like writing multi-thread c
# Install:
### Dependencies
- Python 2.7 or 3.5+.
- [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) >=1.2.1 (**Optional**, if you only want to use APIs related to parallelization alone)
- [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) >=1.5.1 (**Optional**, if you only want to use APIs related to parallelization alone)
```
......
......@@ -16,7 +16,7 @@ Mean episode reward in training process after 10 million sample steps.
## How to use
### Dependencies
+ [paddlepaddle>=1.3.0](https://github.com/PaddlePaddle/Paddle)
+ [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
+ [parl](https://github.com/PaddlePaddle/PARL)
+ gym
+ atari-py
......
......@@ -38,19 +38,6 @@ class AtariAgent(parl.Agent):
self.entropy_coeff_scheduler = PiecewiseScheduler(
config['entropy_coeff_scheduler'])
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.use_experimental_executor = True
exec_strategy.num_threads = 4
build_strategy = fluid.BuildStrategy()
build_strategy.remove_unnecessary_lock = True
# Use ParallelExecutor to make learn program run faster
self.learn_exe = fluid.ParallelExecutor(
use_cuda=machine_info.is_gpu_available(),
main_program=self.learn_program,
build_strategy=build_strategy,
exec_strategy=exec_strategy)
def build_program(self):
self.sample_program = fluid.Program()
self.predict_program = fluid.Program()
......@@ -88,9 +75,8 @@ class AtariAgent(parl.Agent):
total_loss, pi_loss, vf_loss, entropy = self.alg.learn(
obs, actions, advantages, target_values, lr, entropy_coeff)
self.learn_outputs = [
total_loss.name, pi_loss.name, vf_loss.name, entropy.name
]
self.learn_outputs = [total_loss, pi_loss, vf_loss, entropy]
self.learn_program = parl.compile(self.learn_program, total_loss)
def sample(self, obs_np):
"""
......@@ -161,7 +147,8 @@ class AtariAgent(parl.Agent):
lr = self.lr_scheduler.step(step_num=obs_np.shape[0])
entropy_coeff = self.entropy_coeff_scheduler.step()
total_loss, pi_loss, vf_loss, entropy = self.learn_exe.run(
total_loss, pi_loss, vf_loss, entropy = self.fluid_executor.run(
self.learn_program,
feed={
'obs': obs_np,
'actions': actions_np,
......
......@@ -15,7 +15,7 @@ Please see [here](https://github.com/openai/mujoco-py) to know more about Mujoco
## How to use
### Dependencies:
+ python3.5+
+ [paddlepaddle>=1.0.0](https://github.com/PaddlePaddle/Paddle)
+ [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
+ [parl](https://github.com/PaddlePaddle/PARL)
+ gym
+ tqdm
......
......@@ -15,8 +15,7 @@ Please see [here](https://gym.openai.com/envs/#atari) to know more about Atari g
## How to use
### Dependencies:
+ python2.7 or python3.5+
+ [paddlepaddle>=1.0.0](https://github.com/PaddlePaddle/Paddle)
+ [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
+ [parl](https://github.com/PaddlePaddle/PARL)
+ gym
+ tqdm
......
......@@ -16,7 +16,7 @@ Results with one learner (in a P40 GPU) and 24 simulators (in 12 CPU) in 10 mill
## How to use
### Dependencies
+ [paddlepaddle>=1.3.0](https://github.com/PaddlePaddle/Paddle)
+ [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
+ [parl](https://github.com/PaddlePaddle/PARL)
+ gym
+ atari-py
......
......@@ -20,7 +20,7 @@ Result with one learner (in a P40 GPU) and 32 actors (in 32 CPUs).
## How to use
### Dependencies
+ [paddlepaddle>=1.3.0](https://github.com/PaddlePaddle/Paddle)
+ [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
+ [parl](https://github.com/PaddlePaddle/PARL)
+ gym
+ atari-py
......
......@@ -28,19 +28,6 @@ class AtariAgent(parl.Agent):
self.act_dim = act_dim
super(AtariAgent, self).__init__(algorithm)
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.use_experimental_executor = True
exec_strategy.num_threads = 4
build_strategy = fluid.BuildStrategy()
build_strategy.remove_unnecessary_lock = True
# Use ParallelExecutor to make learn program run faster
self.learn_exe = fluid.ParallelExecutor(
use_cuda=machine_info.is_gpu_available(),
main_program=self.learn_program,
build_strategy=build_strategy,
exec_strategy=exec_strategy)
if learn_data_provider:
self.learn_reader.decorate_tensor_provider(learn_data_provider)
self.learn_reader.start()
......@@ -86,9 +73,10 @@ class AtariAgent(parl.Agent):
vtrace_loss, kl = self.alg.learn(obs, actions, behaviour_logits,
rewards, dones, lr, entropy_coeff)
self.learn_outputs = [
vtrace_loss.total_loss.name, vtrace_loss.pi_loss.name,
vtrace_loss.vf_loss.name, vtrace_loss.entropy.name, kl.name
vtrace_loss.total_loss, vtrace_loss.pi_loss,
vtrace_loss.vf_loss, vtrace_loss.entropy, kl
]
self.learn_program = parl.compile(self.learn_program, total_loss)
def sample(self, obs_np):
"""
......@@ -125,6 +113,6 @@ class AtariAgent(parl.Agent):
return predict_actions
def learn(self):
total_loss, pi_loss, vf_loss, entropy, kl = self.learn_exe.run(
fetch_list=self.learn_outputs)
total_loss, pi_loss, vf_loss, entropy, kl = self.fluid_executor.run(
self.learn_program, fetch_list=self.learn_outputs)
return total_loss, pi_loss, vf_loss, entropy, kl
......@@ -19,7 +19,7 @@ For more technical details about our solution, we provide:
## Dependencies
- python3.6
- [paddlepaddle>=1.2.1](https://github.com/PaddlePaddle/Paddle)
- [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
- [osim-rl](https://github.com/stanfordnmbl/osim-rl)
- [grpcio==1.12.1](https://grpc.io/docs/quickstart/python.html)
- tqdm
......
......@@ -18,7 +18,7 @@ Please see [here](https://github.com/openai/mujoco-py) to know more about Mujoco
## How to use
### Dependencies:
+ python3.5+
+ [paddlepaddle>=1.0.0](https://github.com/PaddlePaddle/Paddle)
+ [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
+ [parl](https://github.com/PaddlePaddle/PARL)
+ gym
+ tqdm
......
......@@ -4,7 +4,7 @@ Train an agent with PARL to solve the CartPole problem, a classical benchmark in
## How to use
### Dependencies:
+ [paddlepaddle>=1.0.0](https://github.com/PaddlePaddle/Paddle)
+ [paddlepaddle>=1.5.1](https://github.com/PaddlePaddle/Paddle)
+ [parl](https://github.com/PaddlePaddle/PARL)
+ gym
......
......@@ -24,6 +24,7 @@ from parl.utils.utils import _HAS_FLUID
if _HAS_FLUID:
from parl.core.fluid import *
from parl.core.fluid.plutils.compiler import compile
else:
print(
"WARNING:PARL: Failed to import paddle. Only APIs for parallelization are available."
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import os
def compile(program, loss=None):
""" transfer the program into a new program that runs in multi-cpus or multi-gpus.
This function uses the `fluid.compiler.CompiledProgram` to transfer the program.
For more detail about speeding the program, please visit
"https://www.paddlepaddle.org.cn/documentation/docs/zh/1.5/user_guides/howto/training/single_node.html#id7"
Args:
program(fluid.Program): a normal fluid program.
loss_name(str): Optional. The loss tensor of a trainable program. Set it to None if you are transferring a prediction or evaluation program.
"""
if loss is not None:
assert isinstance(
loss, fluid.framework.
Variable), 'type of loss is expected to be a fluid tensor'
# TODO: after solving the learning rate issue that occurs in training A2C algorithm, set it to 3.
os.environ['CPU_NUM'] = '1'
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.num_threads = 3 * 4
build_strategy = fluid.BuildStrategy()
build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
return fluid.compiler.CompiledProgram(program).with_data_parallel(
loss_name=loss.name,
exec_strategy=exec_strategy,
build_strategy=build_strategy)
......@@ -71,10 +71,18 @@ def is_PY3():
return sys.version_info[0] == 3
def get_fluid_version():
import paddle
fluid_version = int(paddle.__version__.replace('.', ''))
return fluid_version
MAX_INT32 = 0x7fffffff
try:
from paddle import fluid
fluid_version = get_fluid_version()
assert fluid_version >= 151, "PARL requires paddle>=1.5.1"
_HAS_FLUID = True
except ImportError:
_HAS_FLUID = False
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册