redesign basic class in PARL (#26)

* redesign basic class in PARL * code style fixed * update yaml's version * update yaml's version & update code to fix style problem * add debug message for function * delete test code * rename function: has_fun -> has_func

redesign basic class in PARL (#26)
* redesign basic class in PARL * code style fixed * update yaml's version * update yaml's version & update code to fix style problem * add debug message for function * delete test code * rename function: has_fun -> has_func
1a1e1f03 · Bo Zhou · Hongsheng Zeng · 2fc4e8c3 · 1a1e1f03 · 1a1e1f03
21 changed file
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,8 +4,8 @@ repos:
    hooks:
    -   id: remove-crlf
        files: (?!.*third_party)^.*$ | (?!.*book)^.*$
-   repo: https://github.com/PaddlePaddle/mirrors-yapf.git
-    sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
+-   repo: https://github.com/pre-commit/mirrors-yapf.git
+    sha: v0.24.0
    hooks:
    -   id: yapf
        files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -20,20 +20,20 @@ option(WITH_TESTING "Include unit testing" ON)
 set(PADDLE_PYTHON_PATH "" CACHE STRING "Python path to PaddlePaddle Fluid")

 function(py_test TARGET_NAME)
-  set(options "")
-  set(oneValueArgs "")
-  set(multiValueArgs SRCS DEPS ARGS ENVS)
-  cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
-  add_test(NAME ${TARGET_NAME}
-    COMMAND env PYTHONPATH=.:${py_test_ENVS}
-    python -u ${py_test_SRCS} ${py_test_ARGS}
-    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+    set(options "")
+    set(oneValueArgs "")
+    set(multiValueArgs SRCS DEPS ARGS ENVS)
+    cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+    add_test(NAME ${TARGET_NAME}
+        COMMAND env PYTHONPATH=.:${py_test_ENVS}
+        python -u ${py_test_SRCS} ${py_test_ARGS}
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
 endfunction()

 if (WITH_TESTING)
-  file(GLOB_RECURSE TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
-  string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
-  foreach(src ${TEST_OPS})
-    py_test(${src} SRCS ${src}.py ENVS ${PADDLE_PYTHON_PATH})
-  endforeach()
+    file(GLOB_RECURSE TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_test.py")
+    string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
+    foreach(src ${TEST_OPS})
+        py_test(${src} SRCS ${src}.py ENVS ${PADDLE_PYTHON_PATH})
+    endforeach()
 endif()
--- a/parl/algorithm_zoo/simple_algorithms.py
+++ b/parl/algorithm_zoo/simple_algorithms.py
@@ -169,5 +169,6 @@ class SimpleQ(Algorithm):
                x=(exploration_counter_ > self.total_exploration_batches),
                dtype="float32")
            ## if the counter already hits the limit, we do not change the counter
-            layers.assign(switch * counter +
-                          (1 - switch) * exploration_counter_, counter)
+            layers.assign(
+                switch * counter + (1 - switch) * exploration_counter_,
+                counter)
--- a/parl/common/error_handling.py
+++ b/parl/common/error_handling.py
@@ -37,8 +37,8 @@ def check_last_exp_error(is_last_exp, idx, game_status):

 def check_type_error(type1, type2):
    if type1.__name__ != type2.__name__:
-        raise TypeError('{} expected, but {} given.'
-                        .format(type1.__name__, type2.__name__))
+        raise TypeError('{} expected, but {} given.'.format(
+            type1.__name__, type2.__name__))


 def check_eq(v1, v2):

--- a/parl/common/replay_buffer.py
+++ b/parl/common/replay_buffer.py
@@ -107,8 +107,8 @@ class ReplayBuffer(object):
        for _ in xrange(num_samples):
            while True:
                idx = random.randint(0, len(self.buffer) - 1)
-                if not self.buffer_end(idx) and not self.buffer[
-                        idx].game_status:
+                if not self.buffer_end(
+                        idx) and not self.buffer[idx].game_status:
                    break
            yield Sample(idx, 1)


--- a/parl/framework/algorithm.py
+++ b/parl/framework/algorithm.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.fluid as fluid
-import parl.layers as layers
-from parl.layers import Network
-import parl.framework.policy_distribution as pd
-from abc import ABCMeta, abstractmethod
-
-
-def check_duplicate_spec_names(model):
-    """
-    Check if there are two specs that have the same name.
-    """
-    specs = model.get_input_specs() \
-            + model.get_action_specs() \
-            + model.get_state_specs() \
-            + model.get_reward_specs()
-    names = [name for name, _ in specs]
-    duplicates = set([n for n in names if names.count(n) > 1])
-    assert not duplicates, \
-        "duplicate names with different specs: " + " ".join(duplicates)
-
-
-class Model(Network):
-    """
-    A Model is owned by an Algorithm. It implements the entire network model of
-    a specific problem.
-    """
-    __metaclass__ = ABCMeta
-
-    def __init__(self):
-        super(Model, self).__init__()
-
-    @abstractmethod
-    def get_input_specs(self):
-        """
-        Output: list of tuples
-        """
-        pass
-
-    def get_state_specs(self):
-        """
-        States are optional to a Model.
-        Output: list of tuples
-        """
-        return []
-
-    @abstractmethod
-    def get_action_specs(self):
-        """
-        Output: list of tuples
-        """
-        pass
-
-    def get_reward_specs(self):
-        """
-        By default, a scalar reward.
-        User can specify a vector of rewards for some problems
-        """
-        return [("reward", dict(shape=[1]))]
-
-    def policy(self, inputs, states):
-        """
-        Return: action_dists: a dict of action distribution objects
-                states
-                An action distribution object can be created with
-                PolicyDistribution().
-        Optional: a model might not always have to implement policy()
-        """
-        raise NotImplementedError()
-
-    def value(self, inputs, states):
-        """
-        Return: values: a dict of estimated values for the current observations and states
-                        For example, "q_value" and "v_value"
-        Optional: a model might not always have to implement value()
-        """
-        raise NotImplementedError()
-
-
-class Algorithm(object):
-    """
-    An Algorithm implements two functions:
-    1. predict() computes forward
-    2. learn() computes a cost for optimization
-
-    An algorithm should be only part of a network. The user only needs to
-    implement the rest of the network in the Model class.
-    """
-
-    def __init__(self, model, hyperparas, gpu_id):
-        assert isinstance(model, Model)
-        check_duplicate_spec_names(model)
-        self.model = model
-        self.hp = hyperparas
-        self.gpu_id = gpu_id
-
-    def get_input_specs(self):
-        return self.model.get_input_specs()
-
-    def get_state_specs(self):
-        return self.model.get_state_specs()
-
-    def get_action_specs(self):
-        """
-        For non-RL algortihms, this can return []
-        """
-        return self.model.get_action_specs()
-
-    def get_reward_specs(self):
-        """
-        For non-RL algortihms, this can return []
-        """
-        return self.model.get_reward_specs()
-
-    def before_every_batch(self):
-        """
-        A callback function inserted before every batch of training.
-        See ComputationTask.learn()
-        """
-        pass
-
-    def after_every_batch(self):
-        """
-        A callback function inserted after every batch of training.
-        See ComputationTask.learn()
-        """
-        pass
-
-    def predict(self, inputs, states):
-        """
-        Given the inputs and states, this function does forward prediction and updates states.
-        Input: inputs(dict), states(dict)
-        Output: actions(dict), states(dict)
-
-        Optional: an algorithm might not implement predict()
-        """
-        pass
-
-    def _rl_predict(self, behavior_model, inputs, states):
-        """
-        Given a behavior model (not necessarily equal to self.model), this function
-        performs a normal RL prediction according to inputs and states.
-        A behavior model different from self.model indicates off-policy training.
-
-        The user can choose to call this function for convenience.
-        """
-        distributions, states = behavior_model.policy(inputs, states)
-        actions = {}
-        for key, dist in distributions.iteritems():
-            actions[key] = dist()
-        return actions, states
-
-    def learn(self, inputs, next_inputs, states, next_states, next_episode_end,
-              actions, rewards):
-        """
-        This function computes a learning cost to be optimized.
-        The return should be the cost.
-        Output: cost(dict)
-
-        Optional: an algorithm might not implement learn()
-        """
-        pass
--- a/parl/framework/algorithm_base.py
+++ b/parl/framework/algorithm_base.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.fluid as fluid
+import parl.layers as layers
+from parl.framework.model_base import Network, Model
+from abc import ABCMeta, abstractmethod
+
+__all__ = ['Algorithm']
+
+
+class Algorithm(object):
+    """
+    Algorithm defines the way how we update the model. For example,
+    after defining forward network in `Network` class, you should define how to update the model here.
+    Before creating a customized algorithm, please do check algorithms of PARL.
+    Most common used algorithms like DQN/DDPG/PPO have been providing in algorithms, go and have a try.
+    It's easy to use them and just try pl.algorithms.DQN.
+
+    An Algorithm implements two functions:
+    1. define_predict() build forward process which was defined in Network
+    2. define_learn() computes a cost for optimization
+
+    An algorithm should be updating part of a network. The user only needs to 
+    implement the rest of the network(forward) in the Model class.
+    """
+
+    def __init__(self, model, hyperparas=None):
+        assert isinstance(model, Model)
+        self.model = model
+        self.hp = hyperparas
+
+    def define_predict(self, obs):
+        """
+        describe process for building predcition program
+        """
+        raise NotImplementedError()
+
+    def define_learn(self, obs, action, reward, next_obs, terminal):
+        """define how to update the model here, you may need to do the following:
+            1. define a cost for optimization
+            2. specify your optimizer
+            3. optimize model defined in Model
+        """
+        raise NotImplementedError()
--- a/parl/framework/computation_task.py
+++ b/parl/framework/computation_task.py
@@ -14,25 +14,16 @@

 import paddle.fluid as fluid
 import parl.layers as layers
-from parl.framework.algorithm import Model, Algorithm
+from parl.framework.algorithm_base import Algorithm
+from parl.framework.base import Model

-
-def split_list(l, sizes):
-    """
-    Split a list into several chunks, each chunk with a size in sizes
-    """
-    chunks = []
-    offset = 0
-    for size in sizes:
-        chunks.append(l[offset:offset + size])
-        offset += size
-    return chunks
+__all__ = ['ComputationTask']


 class ComputationTask(object):
    """
    A ComputationTask is responsible for the general data flow
-    outside the algorithm
+    outside the algorithm.

    A ComputationTask is created in a bottom-up way:
    a. create a Model
@@ -43,140 +34,40 @@ class ComputationTask(object):
    def __init__(self, algorithm):
        assert isinstance(algorithm, Algorithm)
        self.alg = algorithm
-        ## create an Fluid executor
-        self._define_program()
-        place = fluid.CPUPlace() if self.alg.gpu_id < 0 \
-                else fluid.CUDAPlace(self.alg.gpu_id)
+        self.build_program()
        self.fluid_executor = fluid.Executor(place)
        self.fluid_executor.run(fluid.default_startup_program())

-    def _create_data_layers(self, specs):
-        data_layers = {}
-        for name, args in specs:
-            data_layers[name] = layers.data(name, **args)
-        return data_layers
-
-    def _define_program(self):
-        self.learn_program = fluid.Program()
-        self.predict_program = fluid.Program()
-
-        def _get_next_specs(specs):
-            return [("next_" + spec[0], spec[1]) for spec in specs]
-
-        def _select_data(data_layer_dict, specs):
-            return {name: data_layer_dict[name] for name, _ in specs}
-
-        input_specs = self.alg.get_input_specs()
-        state_specs = self.alg.get_state_specs()
-        next_input_specs = _get_next_specs(input_specs)
-        next_state_specs = _get_next_specs(state_specs)
-        action_specs = self.alg.get_action_specs()
-        reward_specs = self.alg.get_reward_specs()
-        next_episode_end_specs = [("next_episode_end", dict(shape=[1]))]
-
-        self.action_names = sorted([name for name, _ in action_specs])
-        self.state_names = sorted([name for name, _ in state_specs])
-
-        with fluid.program_guard(self.predict_program):
-            data_layer_dict = self._create_data_layers(input_specs)
-            data_layer_dict.update(self._create_data_layers(state_specs))
-            self.predict_feed_names = sorted(data_layer_dict.keys())
-
-            inputs = _select_data(data_layer_dict, input_specs)
-            states = _select_data(data_layer_dict, state_specs)
-
-            ### call alg predict()
-            pred_actions, pred_states = self.alg.predict(inputs, states)
-            self.predict_fetch = [pred_actions, pred_states]
-
-        with fluid.program_guard(self.learn_program):
-            data_layer_dict = self._create_data_layers(input_specs)
-            data_layer_dict.update(self._create_data_layers(state_specs))
-            data_layer_dict.update(self._create_data_layers(next_input_specs))
-            data_layer_dict.update(self._create_data_layers(next_state_specs))
-            data_layer_dict.update(self._create_data_layers(action_specs))
-            data_layer_dict.update(self._create_data_layers(reward_specs))
-            data_layer_dict.update(
-                self._create_data_layers(next_episode_end_specs))
-            self.learn_feed_names = sorted(data_layer_dict.keys())
-
-            inputs = _select_data(data_layer_dict, input_specs)
-            states = _select_data(data_layer_dict, state_specs)
-            next_inputs = _select_data(data_layer_dict, next_input_specs)
-            next_states = _select_data(data_layer_dict, next_state_specs)
-            actions = _select_data(data_layer_dict, action_specs)
-            rewards = _select_data(data_layer_dict, reward_specs)
-            next_episode_end = _select_data(data_layer_dict,
-                                            next_episode_end_specs)
-
-            ## call alg learn()
-            ### TODO: implement a recurrent layer to strip the sequence information
-            self.cost = self.alg.learn(inputs, next_inputs, states,
-                                       next_states, next_episode_end, actions,
-                                       rewards)
-
-    def predict(self, inputs, states=dict()):
+    def build_program(self):
+        """build your training program and prediction program here, 
+        using the functions define_learn and define_predict in algorithm.
+        
+        To build the program, you may need to do the following:
+        a. create a new program in fluid with program guard
+        b. define your data layer
+        c. build your training/prediction program, pass the data variable 
+           defined in step b to `define_training/define_prediction` of algorithm
        """
-        ComputationTask predict API
-        This function is responsible to convert Python data to Fluid tensors, and
-        then convert the computational results in the reverse way.
-        """
-        data = {}
-        data.update(inputs)
-        data.update(states)
-        assert sorted(data.keys()) == self.predict_feed_names, \
-            "field names mismatch: %s %s" % (data.keys(), self.predict_feed_names)
-        feed = {n: data[n] for n in self.predict_feed_names}
-
-        ### run the predict_program and fetch the computational results
-        action_tensors, state_tensors = self.predict_fetch
-        action_tensors = list(action_tensors.iteritems())
-        state_tensors = list(state_tensors.iteritems())
-        result = self.fluid_executor.run(
-            self.predict_program,
-            feed=feed,
-            fetch_list=[t for _, t in action_tensors + state_tensors])
-
-        ## actions and states are numpy arrays
-        actions, states = split_list(
-            result, [len(action_tensors), len(state_tensors)])
+        raise NotImplementedError

-        ## wrap the results into dictionaries for better access
-        actions = dict(zip([name for name, _ in action_tensors], actions))
-        states = dict(zip([name for name, _ in state_tensors], states))
-        assert sorted(actions.keys()) == self.action_names
-        assert sorted(states.keys()) == self.state_names
-        return actions, states
+    def predict(self, obs):
+        """This function will predict the action given current observation of the enviroment.

-    def learn(self,
-              inputs,
-              next_inputs,
-              next_episode_end,
-              actions,
-              rewards,
-              states=dict(),
-              next_states=dict()):
+        Note that this function will only do the prediction and it doesn't try any exploration,
+        To explore in the action space, you should create your process in `sample` function below.
+        In formally, this function is often used in test process.
        """
-        ComputationTask learn API
-        This function is responsible to convert Python data to Fluid tensors, and
-        then convert the computational results in the reverse way.
+        raise NotImplementedError
+
+    def sample(self, obs):
+        """This function will predict the action given current observation of the enviroment.
+        Additionaly, action will be added noise here to explore a new trajectory. In formally,
+        this function is often used in training process.
        """
-        data = {}
-        data.update(inputs)
-        data.update(next_inputs)
-        data.update(states)
-        data.update(next_states)
-        data.update(next_episode_end)
-        data.update(actions)
-        data.update(rewards)
-        assert sorted(data.keys()) == self.learn_feed_names, \
-            "field names mismatch: %s %s" % ()
-        feed = {n: data[n] for n in self.learn_feed_names}
+        raise NotImplementedError

-        self.alg.before_every_batch()
-        ## run the learn program and fetch the sole cost output
-        result = self.fluid_executor.run(self.learn_program,
-                                         feed=feed,
-                                         fetch_list=[self.cost["cost"]])
-        self.alg.after_every_batch()
-        return dict(cost=result[0])
+    def learn(self, obs, action, reward, next_obs, terminal):
+        """pass data to the training program to update model, 
+        this function is the training interface for ComputationTask.
+        """
+        raise NotImplementedError
--- a/parl/framework/model_base.py
+++ b/parl/framework/model_base.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Base class to define an Algorithm.
+"""
+
+from abc import ABCMeta, abstractmethod
+from parl.utils.utils import has_func
+
+__all__ = ['Network', 'Model']
+
+
+class Network(object):
+    """
+    A Network is an unordered set of LayerFuncs or Networks.
+    """
+
+    def sync_paras_to(self, target_net):
+        assert not target_net is self, "cannot copy between identical networks"
+        assert isinstance(target_net, Network)
+        assert self.__class__.__name__ == target_net.__class__.__name__, \
+            "must be the same class for para syncing!"
+
+        for attr in self.__dict__:
+            if not attr in target_net.__dict__:
+                continue
+            val = getattr(self, attr)
+            target_val = getattr(target_net, attr)
+
+            assert type(val) == type(target_val), \
+                "[Error]sync_paras_to failed, \
+                ensure that the destination model is generated by deep copied from source model"
+
+            ### TODO: sync paras recursively
+            if has_func(val, 'sync_paras_to'):
+                val.sync_paras_to(target_val)
+            elif isinstance(val, tuple) or isinstance(val, list) or isinstance(
+                    val, set):
+                for v, tv in zip(val, target_val):
+                    v.sync_paras_to(tv)
+            elif isinstance(val, dict):
+                for k in val.keys():
+                    assert k in target_val
+                    val[k].sync_paras_to(target_val[k])
+            else:
+                # for any other type, we do not copy
+                pass
+
+
+class Model(Network):
+    """
+    A Model is owned by an Algorithm. 
+    It implements the entire network model(forward part) to solve a specific problem.
+    In conclusion, Model is responsible for forward and 
+    Algorithm is responsible for backward.
+
+    Model can also be used to construct target model, which has the same structure as initial model.
+    Here is an example:
+        ```python
+        class Actor(Model):
+            __init__(self, obs_dim, act_dim):
+                self.obs_dim = obs_dim
+                self.act_dim = act_dim
+                self.fc1 = layers.fc(size=128, act='relu')
+                self.fc2 = layers.fc(size=64, act='relu')
+        actor = Actor(obs_dim=12, act_dim=2)
+        target_actor = copy.deepcopy(actor)
+        ```
+
+    Note that it's the model structure that is copied from initial actor,
+    parameters in initial model havn't been copied to target model.
+    To copy parameters, you must explicitly use sync_paras_to function after the program is initialized.
+
+    """
+    __metaclass__ = ABCMeta
+
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def policy(self, *args):
+        """
+        Implement your policy here. 
+        The function was later used by algorithm 
+        Return: action_dists: a dict of action distribution objects
+                states
+        Optional: a model might not always have to implement policy()
+        """
+        raise NotImplementedError()
+
+    def value(self, *args):
+        """
+        Return: values: a dict of estimated values for the current observations and states
+                        For example, "q_value" and "v_value"
+        Optional: a model might not always have to implement value()
+        """
+        raise NotImplementedError()
--- a/parl/framework/policy_distribution.py
+++ b/parl/framework/policy_distribution.py
@@ -113,7 +113,6 @@ def q_categorical_distribution(q_value):
    assert len(q_value.shape) == 2, "[batch_size, num_actions]"
    max_id = comf.argmax_layer(q_value)
    prob = layers.cast(
-        x=layers.one_hot(
-            input=max_id, depth=q_value.shape[-1]),
+        x=layers.one_hot(input=max_id, depth=q_value.shape[-1]),
        dtype="float32")
    return CategoricalDistribution(prob)
--- a/parl/framework/tests/algorithm_test.py
+++ b/parl/framework/tests/algorithm_test.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.fluid as fluid
+import parl.layers as layers
+from parl.framework.model_base import Model
+from parl.framework.algorithm_base import Algorithm
+from copy import deepcopy
+import numpy as np
+import unittest
+import sys
+
+
+class Value(Model):
+    def __init__(self, obs_dim, act_dim):
+        self.obs_dim = obs_dim
+        self.act_dim = act_dim
+
+        self.fc1 = layers.fc(size=256, act='relu')
+        self.fc2 = layers.fc(size=128, act='relu')
+        self.fc3 = layers.fc(size=self.act_dim)
+
+    def value(self, obs):
+        out = self.fc1(obs)
+        out = self.fc2(out)
+        value = self.fc3(out)
+        return value
+
+
+class QLearning(Algorithm):
+    def __init__(self, critic_model):
+        self.critic_model = critic_model
+        self.target_model = deepcopy(critic_model)
+
+    def define_predict(self, obs):
+        self.q_value = self.critic_model.value(obs)
+        self.q_target_value = self.target_model.value(obs)
+
+
+class AlgorithmBaseTest(unittest.TestCase):
+    def test_sync_paras_in_one_program(self):
+        critic_model = Value(obs_dim=4, act_dim=1)
+        dqn = QLearning(critic_model)
+        pred_program = fluid.Program()
+        with fluid.program_guard(pred_program):
+            obs = layers.data(name='obs', shape=[4], dtype='float32')
+            dqn.define_predict(obs)
+        place = fluid.CUDAPlace(0)
+        executor = fluid.Executor(place)
+        executor.run(fluid.default_startup_program())
+
+        N = 10
+        random_obs = np.random.random(size=(N, 4)).astype('float32')
+        for i in range(N):
+            x = np.expand_dims(random_obs[i], axis=0)
+            outputs = executor.run(
+                pred_program,
+                feed={'obs': x},
+                fetch_list=[dqn.q_value, dqn.q_target_value])
+            self.assertNotEqual(outputs[0].flatten(), outputs[1].flatten())
+        critic_model.sync_paras_to(dqn.target_model)
+
+        random_obs = np.random.random(size=(N, 4)).astype('float32')
+        for i in range(N):
+            x = np.expand_dims(random_obs[i], axis=0)
+            outputs = executor.run(
+                pred_program,
+                feed={'obs': x},
+                fetch_list=[dqn.q_value, dqn.q_target_value])
+            self.assertEqual(outputs[0].flatten(), outputs[1].flatten())
+
+    def test_sync_paras_among_programs(self):
+        critic_model = Value(obs_dim=4, act_dim=1)
+        dqn = QLearning(critic_model)
+        dqn_2 = deepcopy(dqn)
+        pred_program = fluid.Program()
+        pred_program_2 = fluid.Program()
+        with fluid.program_guard(pred_program):
+            obs = layers.data(name='obs', shape=[4], dtype='float32')
+            dqn.define_predict(obs)
+
+        # algorithm #2
+        with fluid.program_guard(pred_program_2):
+            obs_2 = layers.data(name='obs_2', shape=[4], dtype='float32')
+            dqn_2.define_predict(obs_2)
+
+        place = fluid.CUDAPlace(0)
+        executor = fluid.Executor(place)
+        executor.run(fluid.default_startup_program())
+
+        N = 10
+        random_obs = np.random.random(size=(N, 4)).astype('float32')
+        for i in range(N):
+            x = np.expand_dims(random_obs[i], axis=0)
+            outputs = executor.run(
+                pred_program, feed={'obs': x}, fetch_list=[dqn.q_value])
+
+            outputs_2 = executor.run(
+                pred_program_2, feed={'obs_2': x}, fetch_list=[dqn_2.q_value])
+            self.assertNotEqual(outputs[0].flatten(), outputs_2[0].flatten())
+        dqn.critic_model.sync_paras_to(dqn_2.critic_model)
+
+        random_obs = np.random.random(size=(N, 4)).astype('float32')
+        for i in range(N):
+            x = np.expand_dims(random_obs[i], axis=0)
+            outputs = executor.run(
+                pred_program, feed={'obs': x}, fetch_list=[dqn.q_value])
+
+            outputs_2 = executor.run(
+                pred_program_2, feed={'obs_2': x}, fetch_list=[dqn_2.q_value])
+            self.assertEqual(outputs[0].flatten(), outputs_2[0].flatten())
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/parl/framework/tests/model_base_test.py
+++ b/parl/framework/tests/model_base_test.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.fluid as fluid
+import parl.layers as layers
+from parl.framework.model_base import Model
+from copy import deepcopy
+import unittest
+
+
+class Value(Model):
+    def __init__(self, obs_dim, act_dim):
+        self.obs_dim = obs_dim
+        self.act_dim = act_dim
+
+        self.fc1 = layers.fc(size=256, act='relu')
+        self.fc2 = layers.fc(size=128, act='relu')
+
+
+class ModelBaseTest(unittest.TestCase):
+    def test_network_copy(self):
+        value = Value(obs_dim=2, act_dim=1)
+        target_value = deepcopy(value)
+        self.assertNotEqual(value.fc1.param_name, target_value.fc1.param_name)
+        self.assertNotEqual(value.fc1.bias_name, target_value.fc1.bias_name)
+
+        self.assertNotEqual(value.fc2.param_name, target_value.fc2.param_name)
+        self.assertNotEqual(value.fc2.param_name, target_value.fc2.param_name)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/parl/framework/tests/test_algorithm.py
+++ b/parl/framework/tests/test_algorithm.py
@@ -14,7 +14,8 @@

 import paddle.fluid as fluid
 import parl.layers as layers
-from parl.framework.algorithm import Model, Algorithm
+from parl.framework.algorithm_base import Algorithm
+from parl.framework.base import Model
 from parl.layers import common_functions as comf
 from parl.model_zoo.simple_models import SimpleModelDeterministic
 import numpy as np
@@ -37,8 +38,9 @@ class TestAlgorithmParas(unittest.TestCase):
        Test case for copying parameters
        """

-        alg1 = TestAlgorithm(model=SimpleModelDeterministic(
-            dims=10, mlp_layer_confs=[dict(size=10)]))
+        alg1 = TestAlgorithm(
+            model=SimpleModelDeterministic(
+                dims=10, mlp_layer_confs=[dict(size=10)]))
        alg2 = deepcopy(alg1)

        batch_size = 10
@@ -90,8 +92,9 @@ class TestAlgorithmParas(unittest.TestCase):
        """
        Test case for copying parameters between two different programs
        """
-        alg1 = TestAlgorithm(model=SimpleModelDeterministic(
-            dims=10, mlp_layer_confs=[dict(size=10)]))
+        alg1 = TestAlgorithm(
+            model=SimpleModelDeterministic(
+                dims=10, mlp_layer_confs=[dict(size=10)]))
        alg2 = deepcopy(alg1)

        batch_size = 10
@@ -117,12 +120,10 @@ class TestAlgorithmParas(unittest.TestCase):

        alg1.model.sync_paras_to(alg2.model, alg2.gpu_id)

-        outputs1 = exe.run(program1,
-                           feed={'x': sensor},
-                           fetch_list=y1.values())
-        outputs2 = exe.run(program2,
-                           feed={'x': sensor},
-                           fetch_list=y2.values())
+        outputs1 = exe.run(
+            program1, feed={'x': sensor}, fetch_list=y1.values())
+        outputs2 = exe.run(
+            program2, feed={'x': sensor}, fetch_list=y2.values())
        self.assertEqual(
            np.sum(outputs1[0].flatten()), np.sum(outputs2[0].flatten()))


--- a/parl/framework/tests/test_computation_task.py
+++ b/parl/framework/tests/test_computation_task.py
@@ -14,7 +14,7 @@

 import paddle.fluid as fluid
 import parl.layers as layers
-from parl.framework.algorithm import Model
+from parl.framework.base import Model
 from parl.framework.computation_task import ComputationTask
 import parl.framework.policy_distribution as pd
 from parl.layers import common_functions as comf
@@ -33,10 +33,9 @@ class TestModelCNN(Model):
        self.conv = layers.conv2d(
            num_filters=1, filter_size=3, bias_attr=False)
        self.mlp = comf.MLP([
-            dict(
-                size=32, act="relu", bias_attr=False), dict(
-                    size=16, act="relu", bias_attr=False), dict(
-                        size=num_actions, act="softmax", bias_attr=False)
+            dict(size=32, act="relu", bias_attr=False),
+            dict(size=16, act="relu", bias_attr=False),
+            dict(size=num_actions, act="softmax", bias_attr=False)
        ])
        self.height = height
        self.width = width
@@ -90,28 +89,28 @@ class TestComputationTask(unittest.TestCase):

        dims = 100

-        ac = SimpleAC(model=SimpleModelAC(
-            dims=dims,
-            num_actions=num_actions,
-            mlp_layer_confs=[
-                dict(
-                    size=32, act="relu", bias_attr=False), dict(
-                        size=16, act="relu", bias_attr=False), dict(
-                            size=num_actions, act="softmax", bias_attr=False)
-            ]))
-
-        ac_cnn = SimpleAC(model=TestModelCNN(
-            width=84, height=84, num_actions=num_actions))
-
-        q = SimpleQ(model=SimpleModelQ(
-            dims=dims,
-            num_actions=num_actions,
-            mlp_layer_confs=[
-                dict(
-                    size=32, act="relu", bias_attr=False), dict(
-                        size=16, act="relu", bias_attr=False), dict(
-                            size=num_actions, bias_attr=False)
-            ]))
+        ac = SimpleAC(
+            model=SimpleModelAC(
+                dims=dims,
+                num_actions=num_actions,
+                mlp_layer_confs=[
+                    dict(size=32, act="relu", bias_attr=False),
+                    dict(size=16, act="relu", bias_attr=False),
+                    dict(size=num_actions, act="softmax", bias_attr=False)
+                ]))
+
+        ac_cnn = SimpleAC(
+            model=TestModelCNN(width=84, height=84, num_actions=num_actions))
+
+        q = SimpleQ(
+            model=SimpleModelQ(
+                dims=dims,
+                num_actions=num_actions,
+                mlp_layer_confs=[
+                    dict(size=32, act="relu", bias_attr=False),
+                    dict(size=16, act="relu", bias_attr=False),
+                    dict(size=num_actions, bias_attr=False)
+                ]))

        batch_size = 10
        height, width = 84, 84
@@ -130,8 +129,9 @@ class TestComputationTask(unittest.TestCase):
        """
        Test case for two CTs sharing parameters
        """
-        alg = TestAlgorithm(model=SimpleModelDeterministic(
-            dims=10, mlp_layer_confs=[dict(size=10)]))
+        alg = TestAlgorithm(
+            model=SimpleModelDeterministic(
+                dims=10, mlp_layer_confs=[dict(size=10)]))
        ct0 = ComputationTask(algorithm=alg)
        ct1 = ComputationTask(algorithm=alg)

@@ -150,8 +150,9 @@ class TestComputationTask(unittest.TestCase):
        Test case for two CTs copying parameters
        """

-        alg = TestAlgorithm(model=SimpleModelDeterministic(
-            dims=10, mlp_layer_confs=[dict(size=10)]))
+        alg = TestAlgorithm(
+            model=SimpleModelDeterministic(
+                dims=10, mlp_layer_confs=[dict(size=10)]))

        ct0 = ComputationTask(algorithm=alg)
        ct1 = ComputationTask(algorithm=deepcopy(alg))
@@ -181,8 +182,8 @@ class TestComputationTask(unittest.TestCase):
        num_actions = 2
        dims = 100
        batch_size = 8
-        sensor = np.ones(
-            [batch_size, dims]).astype("float32") / dims  # normalize
+        sensor = np.ones([batch_size, dims
+                          ]).astype("float32") / dims  # normalize
        next_sensor = np.zeros([batch_size, dims]).astype("float32")

        for on_policy in [True, False]:
@@ -192,11 +193,9 @@ class TestComputationTask(unittest.TestCase):
                        dims=dims,
                        num_actions=num_actions,
                        mlp_layer_confs=[
-                            dict(
-                                size=64, act="relu", bias_attr=False), dict(
-                                    size=32, act="relu", bias_attr=False),
-                            dict(
-                                size=num_actions, act="softmax")
+                            dict(size=64, act="relu", bias_attr=False),
+                            dict(size=32, act="relu", bias_attr=False),
+                            dict(size=num_actions, act="softmax")
                        ]),
                    hyperparas=dict(lr=1e-1))
                ct = ComputationTask(algorithm=alg)
@@ -206,9 +205,8 @@ class TestComputationTask(unittest.TestCase):
                        dims=dims,
                        num_actions=num_actions,
                        mlp_layer_confs=[
-                            dict(
-                                size=64, act="relu", bias_attr=False), dict(
-                                    size=32, act="relu", bias_attr=False),
+                            dict(size=64, act="relu", bias_attr=False),
+                            dict(size=32, act="relu", bias_attr=False),
                            dict(size=num_actions)
                        ]),
                    update_ref_interval=100,
@@ -222,15 +220,16 @@ class TestComputationTask(unittest.TestCase):
                    actions = np.expand_dims(actions, 1)
                else:
                    ## randomly assemble a batch
-                    actions = np.random.choice(
-                        [0, 1], size=(batch_size, 1),
-                        p=[0.5, 0.5]).astype("int")
+                    actions = np.random.choice([0, 1],
+                                               size=(batch_size, 1),
+                                               p=[0.5, 0.5]).astype("int")
                rewards = (1 - actions).astype("float32")
                cost = ct.learn(
                    inputs=dict(sensor=sensor),
                    next_inputs=dict(next_sensor=next_sensor),
-                    next_episode_end=dict(next_episode_end=np.ones(
-                        (batch_size, 1)).astype("float32")),
+                    next_episode_end=dict(
+                        next_episode_end=np.ones((batch_size,
+                                                  1)).astype("float32")),
                    actions=dict(action=actions),
                    rewards=dict(reward=rewards))


--- a/parl/layers/common_functions.py
+++ b/parl/layers/common_functions.py
@@ -50,8 +50,8 @@ class CNN(Feedforward):
    """

    def __init__(self, multi_conv_layers):
-        super(CNN, self).__init__(
-            [layers.conv2d(**c) for c in multi_conv_layers])
+        super(CNN,
+              self).__init__([layers.conv2d(**c) for c in multi_conv_layers])


 def argmax_layer(input):
@@ -93,6 +93,5 @@ def idx_select(input, idx):

    assert input.shape
    select = layers.cast(
-        x=layers.one_hot(
-            input=idx, depth=num_entries), dtype="float32")
+        x=layers.one_hot(input=idx, depth=num_entries), dtype="float32")
    return inner_prod(select, input)
--- a/parl/layers/layer_wrappers.py
+++ b/parl/layers/layer_wrappers.py
@@ -24,6 +24,7 @@ import paddle.fluid.layers as layers
 import paddle.fluid.unique_name as unique_name
 from copy import deepcopy
 import inspect
+from parl.framework.model_base import Network


 def update_attr_name(name, default_name, attr, is_bias):
@@ -61,7 +62,7 @@ class LayerFunc(object):
        self.param_attr = param_attr
        self.bias_attr = bias_attr

-    def sync_paras_to(self, target_layer, gpu_id):
+    def sync_paras_to(self, target_layer, gpu_id=0):
        """
        Copy the paras from self to a target layer
        """
@@ -125,40 +126,6 @@ class LayerFunc(object):
            return None


-class Network(object):
-    """
-    A Network is an unordered set of LayerFuncs or Networks.
-    """
-
-    def sync_paras_to(self, target_net, gpu_id):
-        assert not target_net is self, "cannot copy between identical networks"
-        assert isinstance(target_net, Network)
-        assert self.__class__.__name__ == target_net.__class__.__name__, \
-            "must be the same class for para syncing!"
-
-        for attr in self.__dict__:
-            if not attr in target_net.__dict__:
-                continue
-            val = getattr(self, attr)
-            target_val = getattr(target_net, attr)
-
-            assert type(val) == type(target_val)
-            ### TODO: sync paras recursively
-            if isinstance(val, Network) or isinstance(val, LayerFunc):
-                val.sync_paras_to(target_val, gpu_id)
-            elif isinstance(val, tuple) or isinstance(val, list) or isinstance(
-                    val, set):
-                for v, tv in zip(val, target_val):
-                    v.sync_paras_to(tv, gpu_id)
-            elif isinstance(val, dict):
-                for k in val.keys():
-                    assert k in target_val
-                    val[k].sync_paras_to(target_val[k], gpu_id)
-            else:
-                # for any other type, we do not copy
-                pass
-
-
 def check_caller_name():
    stack = inspect.stack()
    ## we trace back to the call stack and make sure Network.__init__ is on the path
@@ -194,13 +161,14 @@ def fc(size,
            super(FC_, self).__init__(param_attr, bias_attr)

        def __call__(self, input, is_test=False):
-            return layers.fc(input=input,
-                             size=size,
-                             num_flatten_dims=num_flatten_dims,
-                             param_attr=self.param_attr,
-                             bias_attr=self.bias_attr,
-                             act=act,
-                             is_test=is_test)
+            return layers.fc(
+                input=input,
+                size=size,
+                num_flatten_dims=num_flatten_dims,
+                param_attr=self.param_attr,
+                bias_attr=self.bias_attr,
+                act=act,
+                is_test=is_test)

    return FC_()


--- a/parl/layers/tests/test_param_name.py
+++ b/parl/layers/tests/test_param_name.py
@@ -14,7 +14,7 @@

 import unittest
 import parl.layers as layers
-from parl.layers import Network
+from parl.framework.model_base import Network


 class MyNetWork(Network):
@@ -25,8 +25,8 @@ class MyNetWork(Network):
        self.fc4 = layers.fc(100, param_attr=False)
        self.fc5 = layers.fc(100, name="fc", bias_attr=False)
        self.embedding = layers.embedding((100, 128))
-        self.embedding_custom = layers.embedding(
-            (100, 128), name="embedding_custom")
+        self.embedding_custom = layers.embedding((100, 128),
+                                                 name="embedding_custom")
        ## although here self.conv2d shares param with self.embedding,
        ## it might be invalid because the param sizes do not match
        self.conv2d = layers.conv2d(

--- a/parl/layers/tests/test_param_sharing.py
+++ b/parl/layers/tests/test_param_sharing.py
@@ -14,7 +14,7 @@

 import unittest
 import parl.layers as layers
-from parl.layers import Network
+from parl.framework.model_base import Network
 import paddle.fluid as fluid
 import numpy as np

@@ -25,8 +25,8 @@ class MyNetWork(Network):
        self.fc2 = layers.fc(64, bias_attr=False)
        self.fc3 = layers.fc(64, name="fc")
        self.fc4 = layers.fc(64, name="fc")
-        self.embedding = layers.embedding(
-            (100, 64), param_attr=self.fc1.param_attr)
+        self.embedding = layers.embedding((100, 64),
+                                          param_attr=self.fc1.param_attr)


 class TestParamSharing(unittest.TestCase):
@@ -56,8 +56,7 @@ class TestParamSharing(unittest.TestCase):
        with fluid.program_guard(main_program2):
            x_ = layers.data(name='x', shape=[1], dtype="int")
            cx_ = layers.cast(
-                x=layers.one_hot(
-                    input=x_, depth=dict_size), dtype="float32")
+                x=layers.one_hot(input=x_, depth=dict_size), dtype="float32")
            y1_ = net.fc1(input=cx_)
            y2_ = net.embedding(input=x_)

@@ -71,9 +70,10 @@ class TestParamSharing(unittest.TestCase):
        exe.run(fluid.default_startup_program())
        ######################################################

-        outputs = exe.run(main_program1,
-                          feed={"x": input_cx},
-                          fetch_list=[y1, y11, y2, y3, y4])
+        outputs = exe.run(
+            main_program1,
+            feed={"x": input_cx},
+            fetch_list=[y1, y11, y2, y3, y4])
        old_y1 = outputs[0]
        self.assertEqual(
            np.sum(outputs[0].flatten()), np.sum(outputs[1].flatten()))
@@ -82,10 +82,13 @@ class TestParamSharing(unittest.TestCase):
        self.assertNotEqual(
            np.sum(outputs[3].flatten()), np.sum(outputs[4].flatten()))

-        outputs = exe.run(main_program2,
-                          feed={'x': input_x,
-                                'x1': input_cx},
-                          fetch_list=[y1_, y2_, y3_])
+        outputs = exe.run(
+            main_program2,
+            feed={
+                'x': input_x,
+                'x1': input_cx
+            },
+            fetch_list=[y1_, y2_, y3_])

        ### test two different layers sharing the same para matrix
        self.assertEqual(

--- a/parl/model_zoo/simple_models.py
+++ b/parl/model_zoo/simple_models.py
@@ -13,7 +13,7 @@
 # limitations under the License.

 import parl.layers as layers
-from parl.framework.algorithm import Model
+from parl.framework.base import Model
 import parl.framework.policy_distribution as pd
 from parl.layers import common_functions as comf


--- a/parl/utils/__init__.py
+++ b/parl/utils/__init__.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from parl.utils.utils import *
--- a/parl/utils/utils.py
+++ b/parl/utils/utils.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__all__ = ['has_func']
+
+
+def has_func(obj, fun):
+    """check if a class has specified function: https://stackoverflow.com/a/5268474
+
+    Args:
+        obj: the class to check
+        fun: specified function to check
+    Returns:
+        A bool to indicate if obj has funtion "fun"
+    """
+    check_fun = getattr(obj, fun, None)
+    return callable(check_fun)