提交 1a1e1f03 编写于 作者: B Bo Zhou 提交者: Hongsheng Zeng

redesign basic class in PARL (#26)

* redesign basic class in PARL

* code style fixed

* update yaml's version

* update yaml's version & update code to fix style problem

* add debug message for  function

* delete test code

* rename function: has_fun -> has_func
上级 2fc4e8c3
......@@ -4,8 +4,8 @@ repos:
hooks:
- id: remove-crlf
files: (?!.*third_party)^.*$ | (?!.*book)^.*$
- repo: https://github.com/PaddlePaddle/mirrors-yapf.git
sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
- repo: https://github.com/pre-commit/mirrors-yapf.git
sha: v0.24.0
hooks:
- id: yapf
files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
......
......@@ -20,20 +20,20 @@ option(WITH_TESTING "Include unit testing" ON)
set(PADDLE_PYTHON_PATH "" CACHE STRING "Python path to PaddlePaddle Fluid")
function(py_test TARGET_NAME)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS ARGS ENVS)
cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_test(NAME ${TARGET_NAME}
COMMAND env PYTHONPATH=.:${py_test_ENVS}
python -u ${py_test_SRCS} ${py_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS ARGS ENVS)
cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_test(NAME ${TARGET_NAME}
COMMAND env PYTHONPATH=.:${py_test_ENVS}
python -u ${py_test_SRCS} ${py_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
endfunction()
if (WITH_TESTING)
file(GLOB_RECURSE TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py ENVS ${PADDLE_PYTHON_PATH})
endforeach()
file(GLOB_RECURSE TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_test.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py ENVS ${PADDLE_PYTHON_PATH})
endforeach()
endif()
......@@ -169,5 +169,6 @@ class SimpleQ(Algorithm):
x=(exploration_counter_ > self.total_exploration_batches),
dtype="float32")
## if the counter already hits the limit, we do not change the counter
layers.assign(switch * counter +
(1 - switch) * exploration_counter_, counter)
layers.assign(
switch * counter + (1 - switch) * exploration_counter_,
counter)
......@@ -37,8 +37,8 @@ def check_last_exp_error(is_last_exp, idx, game_status):
def check_type_error(type1, type2):
if type1.__name__ != type2.__name__:
raise TypeError('{} expected, but {} given.'
.format(type1.__name__, type2.__name__))
raise TypeError('{} expected, but {} given.'.format(
type1.__name__, type2.__name__))
def check_eq(v1, v2):
......
......@@ -107,8 +107,8 @@ class ReplayBuffer(object):
for _ in xrange(num_samples):
while True:
idx = random.randint(0, len(self.buffer) - 1)
if not self.buffer_end(idx) and not self.buffer[
idx].game_status:
if not self.buffer_end(
idx) and not self.buffer[idx].game_status:
break
yield Sample(idx, 1)
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import parl.layers as layers
from parl.layers import Network
import parl.framework.policy_distribution as pd
from abc import ABCMeta, abstractmethod
def check_duplicate_spec_names(model):
"""
Check if there are two specs that have the same name.
"""
specs = model.get_input_specs() \
+ model.get_action_specs() \
+ model.get_state_specs() \
+ model.get_reward_specs()
names = [name for name, _ in specs]
duplicates = set([n for n in names if names.count(n) > 1])
assert not duplicates, \
"duplicate names with different specs: " + " ".join(duplicates)
class Model(Network):
"""
A Model is owned by an Algorithm. It implements the entire network model of
a specific problem.
"""
__metaclass__ = ABCMeta
def __init__(self):
super(Model, self).__init__()
@abstractmethod
def get_input_specs(self):
"""
Output: list of tuples
"""
pass
def get_state_specs(self):
"""
States are optional to a Model.
Output: list of tuples
"""
return []
@abstractmethod
def get_action_specs(self):
"""
Output: list of tuples
"""
pass
def get_reward_specs(self):
"""
By default, a scalar reward.
User can specify a vector of rewards for some problems
"""
return [("reward", dict(shape=[1]))]
def policy(self, inputs, states):
"""
Return: action_dists: a dict of action distribution objects
states
An action distribution object can be created with
PolicyDistribution().
Optional: a model might not always have to implement policy()
"""
raise NotImplementedError()
def value(self, inputs, states):
"""
Return: values: a dict of estimated values for the current observations and states
For example, "q_value" and "v_value"
Optional: a model might not always have to implement value()
"""
raise NotImplementedError()
class Algorithm(object):
"""
An Algorithm implements two functions:
1. predict() computes forward
2. learn() computes a cost for optimization
An algorithm should be only part of a network. The user only needs to
implement the rest of the network in the Model class.
"""
def __init__(self, model, hyperparas, gpu_id):
assert isinstance(model, Model)
check_duplicate_spec_names(model)
self.model = model
self.hp = hyperparas
self.gpu_id = gpu_id
def get_input_specs(self):
return self.model.get_input_specs()
def get_state_specs(self):
return self.model.get_state_specs()
def get_action_specs(self):
"""
For non-RL algortihms, this can return []
"""
return self.model.get_action_specs()
def get_reward_specs(self):
"""
For non-RL algortihms, this can return []
"""
return self.model.get_reward_specs()
def before_every_batch(self):
"""
A callback function inserted before every batch of training.
See ComputationTask.learn()
"""
pass
def after_every_batch(self):
"""
A callback function inserted after every batch of training.
See ComputationTask.learn()
"""
pass
def predict(self, inputs, states):
"""
Given the inputs and states, this function does forward prediction and updates states.
Input: inputs(dict), states(dict)
Output: actions(dict), states(dict)
Optional: an algorithm might not implement predict()
"""
pass
def _rl_predict(self, behavior_model, inputs, states):
"""
Given a behavior model (not necessarily equal to self.model), this function
performs a normal RL prediction according to inputs and states.
A behavior model different from self.model indicates off-policy training.
The user can choose to call this function for convenience.
"""
distributions, states = behavior_model.policy(inputs, states)
actions = {}
for key, dist in distributions.iteritems():
actions[key] = dist()
return actions, states
def learn(self, inputs, next_inputs, states, next_states, next_episode_end,
actions, rewards):
"""
This function computes a learning cost to be optimized.
The return should be the cost.
Output: cost(dict)
Optional: an algorithm might not implement learn()
"""
pass
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import parl.layers as layers
from parl.framework.model_base import Network, Model
from abc import ABCMeta, abstractmethod
__all__ = ['Algorithm']
class Algorithm(object):
"""
Algorithm defines the way how we update the model. For example,
after defining forward network in `Network` class, you should define how to update the model here.
Before creating a customized algorithm, please do check algorithms of PARL.
Most common used algorithms like DQN/DDPG/PPO have been providing in algorithms, go and have a try.
It's easy to use them and just try pl.algorithms.DQN.
An Algorithm implements two functions:
1. define_predict() build forward process which was defined in Network
2. define_learn() computes a cost for optimization
An algorithm should be updating part of a network. The user only needs to
implement the rest of the network(forward) in the Model class.
"""
def __init__(self, model, hyperparas=None):
assert isinstance(model, Model)
self.model = model
self.hp = hyperparas
def define_predict(self, obs):
"""
describe process for building predcition program
"""
raise NotImplementedError()
def define_learn(self, obs, action, reward, next_obs, terminal):
"""define how to update the model here, you may need to do the following:
1. define a cost for optimization
2. specify your optimizer
3. optimize model defined in Model
"""
raise NotImplementedError()
......@@ -14,25 +14,16 @@
import paddle.fluid as fluid
import parl.layers as layers
from parl.framework.algorithm import Model, Algorithm
from parl.framework.algorithm_base import Algorithm
from parl.framework.base import Model
def split_list(l, sizes):
"""
Split a list into several chunks, each chunk with a size in sizes
"""
chunks = []
offset = 0
for size in sizes:
chunks.append(l[offset:offset + size])
offset += size
return chunks
__all__ = ['ComputationTask']
class ComputationTask(object):
"""
A ComputationTask is responsible for the general data flow
outside the algorithm
outside the algorithm.
A ComputationTask is created in a bottom-up way:
a. create a Model
......@@ -43,140 +34,40 @@ class ComputationTask(object):
def __init__(self, algorithm):
assert isinstance(algorithm, Algorithm)
self.alg = algorithm
## create an Fluid executor
self._define_program()
place = fluid.CPUPlace() if self.alg.gpu_id < 0 \
else fluid.CUDAPlace(self.alg.gpu_id)
self.build_program()
self.fluid_executor = fluid.Executor(place)
self.fluid_executor.run(fluid.default_startup_program())
def _create_data_layers(self, specs):
data_layers = {}
for name, args in specs:
data_layers[name] = layers.data(name, **args)
return data_layers
def _define_program(self):
self.learn_program = fluid.Program()
self.predict_program = fluid.Program()
def _get_next_specs(specs):
return [("next_" + spec[0], spec[1]) for spec in specs]
def _select_data(data_layer_dict, specs):
return {name: data_layer_dict[name] for name, _ in specs}
input_specs = self.alg.get_input_specs()
state_specs = self.alg.get_state_specs()
next_input_specs = _get_next_specs(input_specs)
next_state_specs = _get_next_specs(state_specs)
action_specs = self.alg.get_action_specs()
reward_specs = self.alg.get_reward_specs()
next_episode_end_specs = [("next_episode_end", dict(shape=[1]))]
self.action_names = sorted([name for name, _ in action_specs])
self.state_names = sorted([name for name, _ in state_specs])
with fluid.program_guard(self.predict_program):
data_layer_dict = self._create_data_layers(input_specs)
data_layer_dict.update(self._create_data_layers(state_specs))
self.predict_feed_names = sorted(data_layer_dict.keys())
inputs = _select_data(data_layer_dict, input_specs)
states = _select_data(data_layer_dict, state_specs)
### call alg predict()
pred_actions, pred_states = self.alg.predict(inputs, states)
self.predict_fetch = [pred_actions, pred_states]
with fluid.program_guard(self.learn_program):
data_layer_dict = self._create_data_layers(input_specs)
data_layer_dict.update(self._create_data_layers(state_specs))
data_layer_dict.update(self._create_data_layers(next_input_specs))
data_layer_dict.update(self._create_data_layers(next_state_specs))
data_layer_dict.update(self._create_data_layers(action_specs))
data_layer_dict.update(self._create_data_layers(reward_specs))
data_layer_dict.update(
self._create_data_layers(next_episode_end_specs))
self.learn_feed_names = sorted(data_layer_dict.keys())
inputs = _select_data(data_layer_dict, input_specs)
states = _select_data(data_layer_dict, state_specs)
next_inputs = _select_data(data_layer_dict, next_input_specs)
next_states = _select_data(data_layer_dict, next_state_specs)
actions = _select_data(data_layer_dict, action_specs)
rewards = _select_data(data_layer_dict, reward_specs)
next_episode_end = _select_data(data_layer_dict,
next_episode_end_specs)
## call alg learn()
### TODO: implement a recurrent layer to strip the sequence information
self.cost = self.alg.learn(inputs, next_inputs, states,
next_states, next_episode_end, actions,
rewards)
def predict(self, inputs, states=dict()):
def build_program(self):
"""build your training program and prediction program here,
using the functions define_learn and define_predict in algorithm.
To build the program, you may need to do the following:
a. create a new program in fluid with program guard
b. define your data layer
c. build your training/prediction program, pass the data variable
defined in step b to `define_training/define_prediction` of algorithm
"""
ComputationTask predict API
This function is responsible to convert Python data to Fluid tensors, and
then convert the computational results in the reverse way.
"""
data = {}
data.update(inputs)
data.update(states)
assert sorted(data.keys()) == self.predict_feed_names, \
"field names mismatch: %s %s" % (data.keys(), self.predict_feed_names)
feed = {n: data[n] for n in self.predict_feed_names}
### run the predict_program and fetch the computational results
action_tensors, state_tensors = self.predict_fetch
action_tensors = list(action_tensors.iteritems())
state_tensors = list(state_tensors.iteritems())
result = self.fluid_executor.run(
self.predict_program,
feed=feed,
fetch_list=[t for _, t in action_tensors + state_tensors])
## actions and states are numpy arrays
actions, states = split_list(
result, [len(action_tensors), len(state_tensors)])
raise NotImplementedError
## wrap the results into dictionaries for better access
actions = dict(zip([name for name, _ in action_tensors], actions))
states = dict(zip([name for name, _ in state_tensors], states))
assert sorted(actions.keys()) == self.action_names
assert sorted(states.keys()) == self.state_names
return actions, states
def predict(self, obs):
"""This function will predict the action given current observation of the enviroment.
def learn(self,
inputs,
next_inputs,
next_episode_end,
actions,
rewards,
states=dict(),
next_states=dict()):
Note that this function will only do the prediction and it doesn't try any exploration,
To explore in the action space, you should create your process in `sample` function below.
In formally, this function is often used in test process.
"""
ComputationTask learn API
This function is responsible to convert Python data to Fluid tensors, and
then convert the computational results in the reverse way.
raise NotImplementedError
def sample(self, obs):
"""This function will predict the action given current observation of the enviroment.
Additionaly, action will be added noise here to explore a new trajectory. In formally,
this function is often used in training process.
"""
data = {}
data.update(inputs)
data.update(next_inputs)
data.update(states)
data.update(next_states)
data.update(next_episode_end)
data.update(actions)
data.update(rewards)
assert sorted(data.keys()) == self.learn_feed_names, \
"field names mismatch: %s %s" % ()
feed = {n: data[n] for n in self.learn_feed_names}
raise NotImplementedError
self.alg.before_every_batch()
## run the learn program and fetch the sole cost output
result = self.fluid_executor.run(self.learn_program,
feed=feed,
fetch_list=[self.cost["cost"]])
self.alg.after_every_batch()
return dict(cost=result[0])
def learn(self, obs, action, reward, next_obs, terminal):
"""pass data to the training program to update model,
this function is the training interface for ComputationTask.
"""
raise NotImplementedError
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Base class to define an Algorithm.
"""
from abc import ABCMeta, abstractmethod
from parl.utils.utils import has_func
__all__ = ['Network', 'Model']
class Network(object):
"""
A Network is an unordered set of LayerFuncs or Networks.
"""
def sync_paras_to(self, target_net):
assert not target_net is self, "cannot copy between identical networks"
assert isinstance(target_net, Network)
assert self.__class__.__name__ == target_net.__class__.__name__, \
"must be the same class for para syncing!"
for attr in self.__dict__:
if not attr in target_net.__dict__:
continue
val = getattr(self, attr)
target_val = getattr(target_net, attr)
assert type(val) == type(target_val), \
"[Error]sync_paras_to failed, \
ensure that the destination model is generated by deep copied from source model"
### TODO: sync paras recursively
if has_func(val, 'sync_paras_to'):
val.sync_paras_to(target_val)
elif isinstance(val, tuple) or isinstance(val, list) or isinstance(
val, set):
for v, tv in zip(val, target_val):
v.sync_paras_to(tv)
elif isinstance(val, dict):
for k in val.keys():
assert k in target_val
val[k].sync_paras_to(target_val[k])
else:
# for any other type, we do not copy
pass
class Model(Network):
"""
A Model is owned by an Algorithm.
It implements the entire network model(forward part) to solve a specific problem.
In conclusion, Model is responsible for forward and
Algorithm is responsible for backward.
Model can also be used to construct target model, which has the same structure as initial model.
Here is an example:
```python
class Actor(Model):
__init__(self, obs_dim, act_dim):
self.obs_dim = obs_dim
self.act_dim = act_dim
self.fc1 = layers.fc(size=128, act='relu')
self.fc2 = layers.fc(size=64, act='relu')
actor = Actor(obs_dim=12, act_dim=2)
target_actor = copy.deepcopy(actor)
```
Note that it's the model structure that is copied from initial actor,
parameters in initial model havn't been copied to target model.
To copy parameters, you must explicitly use sync_paras_to function after the program is initialized.
"""
__metaclass__ = ABCMeta
def __init__(self):
super(Model, self).__init__()
def policy(self, *args):
"""
Implement your policy here.
The function was later used by algorithm
Return: action_dists: a dict of action distribution objects
states
Optional: a model might not always have to implement policy()
"""
raise NotImplementedError()
def value(self, *args):
"""
Return: values: a dict of estimated values for the current observations and states
For example, "q_value" and "v_value"
Optional: a model might not always have to implement value()
"""
raise NotImplementedError()
......@@ -113,7 +113,6 @@ def q_categorical_distribution(q_value):
assert len(q_value.shape) == 2, "[batch_size, num_actions]"
max_id = comf.argmax_layer(q_value)
prob = layers.cast(
x=layers.one_hot(
input=max_id, depth=q_value.shape[-1]),
x=layers.one_hot(input=max_id, depth=q_value.shape[-1]),
dtype="float32")
return CategoricalDistribution(prob)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import parl.layers as layers
from parl.framework.model_base import Model
from parl.framework.algorithm_base import Algorithm
from copy import deepcopy
import numpy as np
import unittest
import sys
class Value(Model):
def __init__(self, obs_dim, act_dim):
self.obs_dim = obs_dim
self.act_dim = act_dim
self.fc1 = layers.fc(size=256, act='relu')
self.fc2 = layers.fc(size=128, act='relu')
self.fc3 = layers.fc(size=self.act_dim)
def value(self, obs):
out = self.fc1(obs)
out = self.fc2(out)
value = self.fc3(out)
return value
class QLearning(Algorithm):
def __init__(self, critic_model):
self.critic_model = critic_model
self.target_model = deepcopy(critic_model)
def define_predict(self, obs):
self.q_value = self.critic_model.value(obs)
self.q_target_value = self.target_model.value(obs)
class AlgorithmBaseTest(unittest.TestCase):
def test_sync_paras_in_one_program(self):
critic_model = Value(obs_dim=4, act_dim=1)
dqn = QLearning(critic_model)
pred_program = fluid.Program()
with fluid.program_guard(pred_program):
obs = layers.data(name='obs', shape=[4], dtype='float32')
dqn.define_predict(obs)
place = fluid.CUDAPlace(0)
executor = fluid.Executor(place)
executor.run(fluid.default_startup_program())
N = 10
random_obs = np.random.random(size=(N, 4)).astype('float32')
for i in range(N):
x = np.expand_dims(random_obs[i], axis=0)
outputs = executor.run(
pred_program,
feed={'obs': x},
fetch_list=[dqn.q_value, dqn.q_target_value])
self.assertNotEqual(outputs[0].flatten(), outputs[1].flatten())
critic_model.sync_paras_to(dqn.target_model)
random_obs = np.random.random(size=(N, 4)).astype('float32')
for i in range(N):
x = np.expand_dims(random_obs[i], axis=0)
outputs = executor.run(
pred_program,
feed={'obs': x},
fetch_list=[dqn.q_value, dqn.q_target_value])
self.assertEqual(outputs[0].flatten(), outputs[1].flatten())
def test_sync_paras_among_programs(self):
critic_model = Value(obs_dim=4, act_dim=1)
dqn = QLearning(critic_model)
dqn_2 = deepcopy(dqn)
pred_program = fluid.Program()
pred_program_2 = fluid.Program()
with fluid.program_guard(pred_program):
obs = layers.data(name='obs', shape=[4], dtype='float32')
dqn.define_predict(obs)
# algorithm #2
with fluid.program_guard(pred_program_2):
obs_2 = layers.data(name='obs_2', shape=[4], dtype='float32')
dqn_2.define_predict(obs_2)
place = fluid.CUDAPlace(0)
executor = fluid.Executor(place)
executor.run(fluid.default_startup_program())
N = 10
random_obs = np.random.random(size=(N, 4)).astype('float32')
for i in range(N):
x = np.expand_dims(random_obs[i], axis=0)
outputs = executor.run(
pred_program, feed={'obs': x}, fetch_list=[dqn.q_value])
outputs_2 = executor.run(
pred_program_2, feed={'obs_2': x}, fetch_list=[dqn_2.q_value])
self.assertNotEqual(outputs[0].flatten(), outputs_2[0].flatten())
dqn.critic_model.sync_paras_to(dqn_2.critic_model)
random_obs = np.random.random(size=(N, 4)).astype('float32')
for i in range(N):
x = np.expand_dims(random_obs[i], axis=0)
outputs = executor.run(
pred_program, feed={'obs': x}, fetch_list=[dqn.q_value])
outputs_2 = executor.run(
pred_program_2, feed={'obs_2': x}, fetch_list=[dqn_2.q_value])
self.assertEqual(outputs[0].flatten(), outputs_2[0].flatten())
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import parl.layers as layers
from parl.framework.model_base import Model
from copy import deepcopy
import unittest
class Value(Model):
def __init__(self, obs_dim, act_dim):
self.obs_dim = obs_dim
self.act_dim = act_dim
self.fc1 = layers.fc(size=256, act='relu')
self.fc2 = layers.fc(size=128, act='relu')
class ModelBaseTest(unittest.TestCase):
def test_network_copy(self):
value = Value(obs_dim=2, act_dim=1)
target_value = deepcopy(value)
self.assertNotEqual(value.fc1.param_name, target_value.fc1.param_name)
self.assertNotEqual(value.fc1.bias_name, target_value.fc1.bias_name)
self.assertNotEqual(value.fc2.param_name, target_value.fc2.param_name)
self.assertNotEqual(value.fc2.param_name, target_value.fc2.param_name)
if __name__ == '__main__':
unittest.main()
......@@ -14,7 +14,8 @@
import paddle.fluid as fluid
import parl.layers as layers
from parl.framework.algorithm import Model, Algorithm
from parl.framework.algorithm_base import Algorithm
from parl.framework.base import Model
from parl.layers import common_functions as comf
from parl.model_zoo.simple_models import SimpleModelDeterministic
import numpy as np
......@@ -37,8 +38,9 @@ class TestAlgorithmParas(unittest.TestCase):
Test case for copying parameters
"""
alg1 = TestAlgorithm(model=SimpleModelDeterministic(
dims=10, mlp_layer_confs=[dict(size=10)]))
alg1 = TestAlgorithm(
model=SimpleModelDeterministic(
dims=10, mlp_layer_confs=[dict(size=10)]))
alg2 = deepcopy(alg1)
batch_size = 10
......@@ -90,8 +92,9 @@ class TestAlgorithmParas(unittest.TestCase):
"""
Test case for copying parameters between two different programs
"""
alg1 = TestAlgorithm(model=SimpleModelDeterministic(
dims=10, mlp_layer_confs=[dict(size=10)]))
alg1 = TestAlgorithm(
model=SimpleModelDeterministic(
dims=10, mlp_layer_confs=[dict(size=10)]))
alg2 = deepcopy(alg1)
batch_size = 10
......@@ -117,12 +120,10 @@ class TestAlgorithmParas(unittest.TestCase):
alg1.model.sync_paras_to(alg2.model, alg2.gpu_id)
outputs1 = exe.run(program1,
feed={'x': sensor},
fetch_list=y1.values())
outputs2 = exe.run(program2,
feed={'x': sensor},
fetch_list=y2.values())
outputs1 = exe.run(
program1, feed={'x': sensor}, fetch_list=y1.values())
outputs2 = exe.run(
program2, feed={'x': sensor}, fetch_list=y2.values())
self.assertEqual(
np.sum(outputs1[0].flatten()), np.sum(outputs2[0].flatten()))
......
......@@ -14,7 +14,7 @@
import paddle.fluid as fluid
import parl.layers as layers
from parl.framework.algorithm import Model
from parl.framework.base import Model
from parl.framework.computation_task import ComputationTask
import parl.framework.policy_distribution as pd
from parl.layers import common_functions as comf
......@@ -33,10 +33,9 @@ class TestModelCNN(Model):
self.conv = layers.conv2d(
num_filters=1, filter_size=3, bias_attr=False)
self.mlp = comf.MLP([
dict(
size=32, act="relu", bias_attr=False), dict(
size=16, act="relu", bias_attr=False), dict(
size=num_actions, act="softmax", bias_attr=False)
dict(size=32, act="relu", bias_attr=False),
dict(size=16, act="relu", bias_attr=False),
dict(size=num_actions, act="softmax", bias_attr=False)
])
self.height = height
self.width = width
......@@ -90,28 +89,28 @@ class TestComputationTask(unittest.TestCase):
dims = 100
ac = SimpleAC(model=SimpleModelAC(
dims=dims,
num_actions=num_actions,
mlp_layer_confs=[
dict(
size=32, act="relu", bias_attr=False), dict(
size=16, act="relu", bias_attr=False), dict(
size=num_actions, act="softmax", bias_attr=False)
]))
ac_cnn = SimpleAC(model=TestModelCNN(
width=84, height=84, num_actions=num_actions))
q = SimpleQ(model=SimpleModelQ(
dims=dims,
num_actions=num_actions,
mlp_layer_confs=[
dict(
size=32, act="relu", bias_attr=False), dict(
size=16, act="relu", bias_attr=False), dict(
size=num_actions, bias_attr=False)
]))
ac = SimpleAC(
model=SimpleModelAC(
dims=dims,
num_actions=num_actions,
mlp_layer_confs=[
dict(size=32, act="relu", bias_attr=False),
dict(size=16, act="relu", bias_attr=False),
dict(size=num_actions, act="softmax", bias_attr=False)
]))
ac_cnn = SimpleAC(
model=TestModelCNN(width=84, height=84, num_actions=num_actions))
q = SimpleQ(
model=SimpleModelQ(
dims=dims,
num_actions=num_actions,
mlp_layer_confs=[
dict(size=32, act="relu", bias_attr=False),
dict(size=16, act="relu", bias_attr=False),
dict(size=num_actions, bias_attr=False)
]))
batch_size = 10
height, width = 84, 84
......@@ -130,8 +129,9 @@ class TestComputationTask(unittest.TestCase):
"""
Test case for two CTs sharing parameters
"""
alg = TestAlgorithm(model=SimpleModelDeterministic(
dims=10, mlp_layer_confs=[dict(size=10)]))
alg = TestAlgorithm(
model=SimpleModelDeterministic(
dims=10, mlp_layer_confs=[dict(size=10)]))
ct0 = ComputationTask(algorithm=alg)
ct1 = ComputationTask(algorithm=alg)
......@@ -150,8 +150,9 @@ class TestComputationTask(unittest.TestCase):
Test case for two CTs copying parameters
"""
alg = TestAlgorithm(model=SimpleModelDeterministic(
dims=10, mlp_layer_confs=[dict(size=10)]))
alg = TestAlgorithm(
model=SimpleModelDeterministic(
dims=10, mlp_layer_confs=[dict(size=10)]))
ct0 = ComputationTask(algorithm=alg)
ct1 = ComputationTask(algorithm=deepcopy(alg))
......@@ -181,8 +182,8 @@ class TestComputationTask(unittest.TestCase):
num_actions = 2
dims = 100
batch_size = 8
sensor = np.ones(
[batch_size, dims]).astype("float32") / dims # normalize
sensor = np.ones([batch_size, dims
]).astype("float32") / dims # normalize
next_sensor = np.zeros([batch_size, dims]).astype("float32")
for on_policy in [True, False]:
......@@ -192,11 +193,9 @@ class TestComputationTask(unittest.TestCase):
dims=dims,
num_actions=num_actions,
mlp_layer_confs=[
dict(
size=64, act="relu", bias_attr=False), dict(
size=32, act="relu", bias_attr=False),
dict(
size=num_actions, act="softmax")
dict(size=64, act="relu", bias_attr=False),
dict(size=32, act="relu", bias_attr=False),
dict(size=num_actions, act="softmax")
]),
hyperparas=dict(lr=1e-1))
ct = ComputationTask(algorithm=alg)
......@@ -206,9 +205,8 @@ class TestComputationTask(unittest.TestCase):
dims=dims,
num_actions=num_actions,
mlp_layer_confs=[
dict(
size=64, act="relu", bias_attr=False), dict(
size=32, act="relu", bias_attr=False),
dict(size=64, act="relu", bias_attr=False),
dict(size=32, act="relu", bias_attr=False),
dict(size=num_actions)
]),
update_ref_interval=100,
......@@ -222,15 +220,16 @@ class TestComputationTask(unittest.TestCase):
actions = np.expand_dims(actions, 1)
else:
## randomly assemble a batch
actions = np.random.choice(
[0, 1], size=(batch_size, 1),
p=[0.5, 0.5]).astype("int")
actions = np.random.choice([0, 1],
size=(batch_size, 1),
p=[0.5, 0.5]).astype("int")
rewards = (1 - actions).astype("float32")
cost = ct.learn(
inputs=dict(sensor=sensor),
next_inputs=dict(next_sensor=next_sensor),
next_episode_end=dict(next_episode_end=np.ones(
(batch_size, 1)).astype("float32")),
next_episode_end=dict(
next_episode_end=np.ones((batch_size,
1)).astype("float32")),
actions=dict(action=actions),
rewards=dict(reward=rewards))
......
......@@ -50,8 +50,8 @@ class CNN(Feedforward):
"""
def __init__(self, multi_conv_layers):
super(CNN, self).__init__(
[layers.conv2d(**c) for c in multi_conv_layers])
super(CNN,
self).__init__([layers.conv2d(**c) for c in multi_conv_layers])
def argmax_layer(input):
......@@ -93,6 +93,5 @@ def idx_select(input, idx):
assert input.shape
select = layers.cast(
x=layers.one_hot(
input=idx, depth=num_entries), dtype="float32")
x=layers.one_hot(input=idx, depth=num_entries), dtype="float32")
return inner_prod(select, input)
......@@ -24,6 +24,7 @@ import paddle.fluid.layers as layers
import paddle.fluid.unique_name as unique_name
from copy import deepcopy
import inspect
from parl.framework.model_base import Network
def update_attr_name(name, default_name, attr, is_bias):
......@@ -61,7 +62,7 @@ class LayerFunc(object):
self.param_attr = param_attr
self.bias_attr = bias_attr
def sync_paras_to(self, target_layer, gpu_id):
def sync_paras_to(self, target_layer, gpu_id=0):
"""
Copy the paras from self to a target layer
"""
......@@ -125,40 +126,6 @@ class LayerFunc(object):
return None
class Network(object):
"""
A Network is an unordered set of LayerFuncs or Networks.
"""
def sync_paras_to(self, target_net, gpu_id):
assert not target_net is self, "cannot copy between identical networks"
assert isinstance(target_net, Network)
assert self.__class__.__name__ == target_net.__class__.__name__, \
"must be the same class for para syncing!"
for attr in self.__dict__:
if not attr in target_net.__dict__:
continue
val = getattr(self, attr)
target_val = getattr(target_net, attr)
assert type(val) == type(target_val)
### TODO: sync paras recursively
if isinstance(val, Network) or isinstance(val, LayerFunc):
val.sync_paras_to(target_val, gpu_id)
elif isinstance(val, tuple) or isinstance(val, list) or isinstance(
val, set):
for v, tv in zip(val, target_val):
v.sync_paras_to(tv, gpu_id)
elif isinstance(val, dict):
for k in val.keys():
assert k in target_val
val[k].sync_paras_to(target_val[k], gpu_id)
else:
# for any other type, we do not copy
pass
def check_caller_name():
stack = inspect.stack()
## we trace back to the call stack and make sure Network.__init__ is on the path
......@@ -194,13 +161,14 @@ def fc(size,
super(FC_, self).__init__(param_attr, bias_attr)
def __call__(self, input, is_test=False):
return layers.fc(input=input,
size=size,
num_flatten_dims=num_flatten_dims,
param_attr=self.param_attr,
bias_attr=self.bias_attr,
act=act,
is_test=is_test)
return layers.fc(
input=input,
size=size,
num_flatten_dims=num_flatten_dims,
param_attr=self.param_attr,
bias_attr=self.bias_attr,
act=act,
is_test=is_test)
return FC_()
......
......@@ -14,7 +14,7 @@
import unittest
import parl.layers as layers
from parl.layers import Network
from parl.framework.model_base import Network
class MyNetWork(Network):
......@@ -25,8 +25,8 @@ class MyNetWork(Network):
self.fc4 = layers.fc(100, param_attr=False)
self.fc5 = layers.fc(100, name="fc", bias_attr=False)
self.embedding = layers.embedding((100, 128))
self.embedding_custom = layers.embedding(
(100, 128), name="embedding_custom")
self.embedding_custom = layers.embedding((100, 128),
name="embedding_custom")
## although here self.conv2d shares param with self.embedding,
## it might be invalid because the param sizes do not match
self.conv2d = layers.conv2d(
......
......@@ -14,7 +14,7 @@
import unittest
import parl.layers as layers
from parl.layers import Network
from parl.framework.model_base import Network
import paddle.fluid as fluid
import numpy as np
......@@ -25,8 +25,8 @@ class MyNetWork(Network):
self.fc2 = layers.fc(64, bias_attr=False)
self.fc3 = layers.fc(64, name="fc")
self.fc4 = layers.fc(64, name="fc")
self.embedding = layers.embedding(
(100, 64), param_attr=self.fc1.param_attr)
self.embedding = layers.embedding((100, 64),
param_attr=self.fc1.param_attr)
class TestParamSharing(unittest.TestCase):
......@@ -56,8 +56,7 @@ class TestParamSharing(unittest.TestCase):
with fluid.program_guard(main_program2):
x_ = layers.data(name='x', shape=[1], dtype="int")
cx_ = layers.cast(
x=layers.one_hot(
input=x_, depth=dict_size), dtype="float32")
x=layers.one_hot(input=x_, depth=dict_size), dtype="float32")
y1_ = net.fc1(input=cx_)
y2_ = net.embedding(input=x_)
......@@ -71,9 +70,10 @@ class TestParamSharing(unittest.TestCase):
exe.run(fluid.default_startup_program())
######################################################
outputs = exe.run(main_program1,
feed={"x": input_cx},
fetch_list=[y1, y11, y2, y3, y4])
outputs = exe.run(
main_program1,
feed={"x": input_cx},
fetch_list=[y1, y11, y2, y3, y4])
old_y1 = outputs[0]
self.assertEqual(
np.sum(outputs[0].flatten()), np.sum(outputs[1].flatten()))
......@@ -82,10 +82,13 @@ class TestParamSharing(unittest.TestCase):
self.assertNotEqual(
np.sum(outputs[3].flatten()), np.sum(outputs[4].flatten()))
outputs = exe.run(main_program2,
feed={'x': input_x,
'x1': input_cx},
fetch_list=[y1_, y2_, y3_])
outputs = exe.run(
main_program2,
feed={
'x': input_x,
'x1': input_cx
},
fetch_list=[y1_, y2_, y3_])
### test two different layers sharing the same para matrix
self.assertEqual(
......
......@@ -13,7 +13,7 @@
# limitations under the License.
import parl.layers as layers
from parl.framework.algorithm import Model
from parl.framework.base import Model
import parl.framework.policy_distribution as pd
from parl.layers import common_functions as comf
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from parl.utils.utils import *
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = ['has_func']
def has_func(obj, fun):
"""check if a class has specified function: https://stackoverflow.com/a/5268474
Args:
obj: the class to check
fun: specified function to check
Returns:
A bool to indicate if obj has funtion "fun"
"""
check_fun = getattr(obj, fun, None)
return callable(check_fun)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册