未验证 提交 fb9af353 编写于 作者: B Bo Zhou 提交者: GitHub

Basic docs (#103)

* fix the compatibility issue

* fix the comment issue

* support paddle 1.5.1 and replace PE with compiler

* yapf&copyright

* yapf

* fix the teamcity problem

* fix the teamcity problem

* fix comment

* only support paddle 1.5.1

* Cmake

* fix comment

* add documentaion for basic classes

* change the order of three basic classes

* fix commemt & yapf

* comment #2

* yapf#3

* fix commemt@zhs

* minor change

* modify comment

* Update algorithm.py

* Update agent.py

* Update algorithm.py
上级 6e58ccfb
.highlight code, .highlight pre {
color:#fdce93;
background-color:#3f3f3f;
}
.highlight .hll {
background-color:#222;
}
.highlight .err {
color:#e37170;
background-color:#3d3535;
}
.highlight .k {
color:#f0dfaf;
}
.highlight .p {
color:#41706f;
}
.highlight .cs {
color:#cd0000;
font-weight:700;
}
.highlight .gd {
color:#cd0000;
}
.highlight .ge {
color:#ccc;
font-style:italic;
}
.highlight .gr {
color:red;
}
.highlight .go {
color:gray;
}
.highlight .gs {
color:#ccc;
font-weight:700;
}
.highlight .gu {
color:purple;
font-weight:700;
}
.highlight .gt {
color:#0040D0;
}
.highlight .kc {
color:#dca3a3;
}
.highlight .kd {
color:#ffff86;
}
.highlight .kn {
color:#dfaf8f;
font-weight:700;
}
.highlight .kp {
color:#cdcf99;
}
.highlight .kr {
color:#cdcd00;
}
.highlight .ni {
color:#c28182;
}
.highlight .ne {
color:#c3bf9f;
font-weight:700;
}
.highlight .nn {
color:#8fbede;
}
.highlight .vi {
color:#ffffc7;
}
.highlight .c,.preview-zenburn .highlight .g,.preview-zenburn .highlight .cm,.preview-zenburn .highlight .cp,.preview-zenburn .highlight .c1 {
color:#7f9f7f;
}
.highlight .l,.preview-zenburn .highlight .x,.preview-zenburn .highlight .no,.preview-zenburn .highlight .nd,.preview-zenburn .highlight .nl,.preview-zenburn .highlight .nx,.preview-zenburn .highlight .py,.preview-zenburn .highlight .w {
color:#ccc;
}
.highlight .n,.preview-zenburn .highlight .nv,.preview-zenburn .highlight .vg {
color:#dcdccc;
}
.highlight .o,.preview-zenburn .highlight .ow {
color:#f0efd0;
}
.highlight .gh,.preview-zenburn .highlight .gp {
color:#dcdccc;
font-weight:700;
}
.highlight .gi,.preview-zenburn .highlight .kt {
color:#00cd00;
}
.highlight .ld,.preview-zenburn .highlight .s,.preview-zenburn .highlight .sb,.preview-zenburn .highlight .sc,.preview-zenburn .highlight .sd,.preview-zenburn .highlight .s2,.preview-zenburn .highlight .se,.preview-zenburn .highlight .sh,.preview-zenburn .highlight .si,.preview-zenburn .highlight .sx,.preview-zenburn .highlight .sr,.preview-zenburn .highlight .s1,.preview-zenburn .highlight .ss {
color:#cc9393;
}
.highlight .m,.preview-zenburn .highlight .mf,.preview-zenburn .highlight .mh,.preview-zenburn .highlight .mi,.preview-zenburn .highlight .mo,.preview-zenburn .highlight .il {
color:#8cd0d3;
}
.highlight .na,.preview-zenburn .highlight .nt {
color:#9ac39f;
}
.highlight .nb,.preview-zenburn .highlight .nc,.preview-zenburn .highlight .nf,.preview-zenburn .highlight .bp,.preview-zenburn .highlight .vc {
color:#efef8f;
}
parl.Agent
------
.. autoclass:: parl.core.fluid.agent.Agent
:members:
parl.Algorithm
------
.. autoclass:: parl.core.fluid.algorithm.Algorithm
:members:
......@@ -63,7 +63,7 @@ templates_path = ['_templates']
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = 'zh_CN'
language = 'en'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
......@@ -85,3 +85,32 @@ html_static_path = ['_static']
html_logo = './images/PARL-logo-2.png'
master_doc = 'index'
napoleon_use_ivar = True
# do not skip documentation of the __init__ function of a class
def skip(app, what, name, obj, would_skip, options):
if name == "__init__":
return False
return would_skip
aliases = dict()
aliases['parl.core.fluid.agent.Agent'] = ['parl.agent']
def parl_class_docstring(app, what, name, obj, options, lines):
if what != 'class':
return
obj = parl.Agent
name = parl.Agent
#lines[0] = ['wohenhao']
def setup(app):
app.connect("autodoc-skip-member", skip)
app.connect("autodoc-process-docstring", parl_class_docstring)
add_module_names = False
......@@ -70,6 +70,7 @@ Abstractions
:maxdepth: 1
:caption: APIs
./api_docs.utils
./api_docs.index
model.rst
algorithm.rst
agent.rst
parl.Model
------
.. autoclass:: parl.core.fluid.model.Model
:members:
......@@ -26,11 +26,49 @@ __all__ = ['Agent']
class Agent(AgentBase):
"""
| `alias`: ``parl.Agent``
| `alias`: ``parl.core.fluid.agent.Agent``
| Agent is one of the three basic classes of PARL.
| It is responsible for interacting with the environment and collecting data for training the policy.
| To implement a customized ``Agent``, users can:
.. code-block:: python
import parl
class MyAgent(parl.Agent):
def __init__(self, algorithm, act_dim):
super(MyAgent, self).__init__(algorithm)
self.act_dim = act_dim
This class will initialize the neural network parameters automatically, and provides an executor for users to run the programs (self.fluid_executor).
Attributes:
gpu_id (int): deprecated. specify which GPU to be used. -1 if to use the CPU.
fluid_executor (fluid.Executor): executor for running programs of the agent.
alg (parl.algorithm): algorithm of this agent.
Public Functions:
- ``build_program`` (**abstract function**): build various programs for the agent to interact with outer environment.
- ``get_weights``: return a Python dictionary containing all the parameters of self.alg.
- ``set_weights``: copy parameters from ``set_weights()`` to this agent.
- ``sample``: return a noisy action to perform exploration according to the policy.
- ``predict``: return an action given current observation.
- ``learn``: update the parameters of self.alg using the `learn_program` defined in `build_program()`.
Todo:
- allow users to get parameters of a specified model by specifying the model's name in ``get_weights()``.
"""
def __init__(self, algorithm, gpu_id=None):
"""Build program and run initialization for default_startup_program
"""Build programs by calling the method ``self.build_program()`` and run initialization function of ``fluid.default_startup_program()``.
Args:
algorithm (parl.Algorithm): instance of `parl.core.fluid.algorithm.Algorithm`
algorithm (parl.Algorithm): an instance of `parl.Algorithm`. This algorithm is then passed to `self.alg`.
gpu_id (int): deprecated. specify which GPU to be used. -1 if to use the CPU.
"""
if gpu_id is not None:
warnings.warn(
......@@ -41,8 +79,6 @@ class Agent(AgentBase):
assert isinstance(algorithm, Algorithm)
super(Agent, self).__init__(algorithm)
# alias for self.algorithm
# use self.algorithm is suggested
self.alg = algorithm
self.gpu_id = 0 if machine_info.is_gpu_available() else -1
......@@ -54,71 +90,70 @@ class Agent(AgentBase):
self.fluid_executor.run(fluid.default_startup_program())
def build_program(self):
"""Build leran/predict/sample program here with the
learn/predict/sample function defined in algorithm.
"""Build various programs here with the
learn, predict, sample functions of the algorithm.
Note:
It's unnecessary to call this function explictly since
it will be called automatically in the initialization function.
| Users **must** implement this function in an ``Agent``.
| This function will be called automatically in the initialization function.
To build the program, you may need to do the following:
a. Create a new program of fluid with program guard;
b. Define data input layers;
c. Pass the data variable defined in step b to learn/predict/sample of algorithm;
To build a program, you must do the following:
a. Create a fluid program with ``fluid.program_guard()``;
b. Define data layers for feeding the data;
c. Build various programs(e.g., learn_program, predict_program) with data layers defined in step b.
Example:
.. code-block:: python
self.pred_program = fluid.Program()
with fluid.program_guard(self.pred_program):
obs = layers.data(
name='obs', shape=[self.obs_dim], dtype='float32')
self.act_prob = self.alg.predict(obs)
"""
raise NotImplementedError
@deprecated(
deprecated_in='1.2', removed_in='1.3', replace_function='get_weights')
def get_params(self):
""" Get parameters of self.algorithm
""" Returns a Python dictionary containing the whole parameters of self.alg.
Returns:
List of numpy array.
a Python List containing the parameters of self.alg.
"""
return self.algorithm.get_params()
@deprecated(
deprecated_in='1.2', removed_in='1.3', replace_function='set_weights')
def set_params(self, params):
"""Set parameters of self.algorithm
"""Copy parameters from ``get_params()`` into this agent.
Args:
params: List of numpy array.
params(dict): a Python List containing the parameters of self.alg.
"""
self.algorithm.set_params(params)
def learn(self, *args, **kwargs):
"""The training interface for Agent.
This function will usually do the following things:
1. Accept numpy data as input;
2. Feed numpy data;
3. Run learn program defined in `build_program`.
"""The training interface for ``Agent``.
This function feeds the training data into the learn_program defined in ``build_program()``.
"""
raise NotImplementedError
def predict(self, *args, **kwargs):
"""Predict the action when given the observation of the enviroment.
In general, this function is used in test process.
"""Predict an action when given the observation of the environment.
This function will usually do the following things:
1. Accept numpy data as input;
2. Feed numpy data;
3. Run predict program defined in `build_program`.
This function feeds the observation into the prediction program defined in ``build_program()``. It is often used in the evaluation stage.
"""
raise NotImplementedError
def sample(self, *args, **kwargs):
"""Sample the action when given the observation of the enviroment.
"""Return an action with noise when given the observation of the environment.
In general, this function is used in train process.
In general, this function is used in train process as noise is added to the action to preform exploration.
This function will usually do the following things:
1. Accept numpy data as input;
2. Feed numpy data;
3. Run predict/sample program defined in `build_program`.
4. Add sampling operation in numpy level. (unnecessary if sampling operation have done in `Algorithm`).
"""
raise NotImplementedError
......@@ -23,16 +23,46 @@ __all__ = ['Algorithm']
class Algorithm(AlgorithmBase):
"""Algorithm defines the way how we update the model.
To implement a new algorithm, you may need implement the learn/predict/sample functions.
"""
| `alias`: ``parl.Algorithm``
| `alias`: ``parl.core.fluid.algorithm.Algorithm``
| ``Algorithm`` defines the way how to update the parameters of the ``Model``. This is where we define loss functions and the optimizer of the neural network. An ``Algorithm`` has at least a model.
| PARL has implemented various algorithms(DQN/DDPG/PPO/A3C/IMPALA) that can be reused quickly, which can be accessed with ``parl.algorithms``.
Example:
.. code-block:: python
import parl
model = Model()
dqn = parl.algorithms.DQN(model, lr=1e-3)
Before creating a customized algorithm, please do check algorithms of PARL.
Most common used algorithms like DQN/DDPG/PPO/A3C/IMPALA have been provided in `parl.algorithms`,
go and have a try.
Attributes:
model(``parl.Model``): a neural network that represents a policy or a Q-value function.
Pulic Functions:
- ``get_weights``: return a Python dictionary containing parameters of the current model.
- ``set_weights``: copy parameters from ``get_weights()`` to the model.
- ``sample``: return a noisy action to perform exploration according to the policy.
- ``predict``: return an action given current observation.
- ``learn``: define the loss function and create an optimizer to minized the loss.
Note:
``Algorithm`` defines all its computation inside a ``fluid.Program``, such that the returns of functions(`sample`, `predict`, `learn`) are tensors.
``Agent`` also has functions like `sample`, `predict`, and `learn`, but they return numpy array for the agent.
"""
def __init__(self, model=None, hyperparas=None):
"""
Args:
model(``parl.Model``): a neural network that represents a policy or a Q-value function.
hyperparas(dict): a dict storing the hyper-parameters relative to training.
"""
if model is not None:
warnings.warn(
"the `model` argument of `__init__` function in `parl.Algorithm` is deprecated since version 1.2 and will be removed in version 1.3.",
......@@ -52,34 +82,34 @@ class Algorithm(AlgorithmBase):
@deprecated(
deprecated_in='1.2', removed_in='1.3', replace_function='get_weights')
def get_params(self):
""" Get parameters of self.model
""" Get parameters of self.model.
Returns:
List of numpy array.
params(dict): a Python List containing the parameters of self.model.
"""
return self.model.get_params()
@deprecated(
deprecated_in='1.2', removed_in='1.3', replace_function='set_weights')
def set_params(self, params):
""" Set parameters of self.model
""" Set parameters from ``get_params`` to the model.
Args:
params: List of numpy array.
params(dict ): a Python List containing the parameters of self.model.
"""
self.model.set_params(params)
def learn(self, *args, **kwargs):
""" define learning process, such as how to optimize the model.
""" Define the loss function and create an optimizer to minize the loss.
"""
raise NotImplementedError
def predict(self, *args, **kwargs):
""" define predicting process, such as using policy model to predict actions when given observations.
""" Refine the predicting process, e.g,. use the policy model to predict actions.
"""
raise NotImplementedError
def sample(self, *args, **kwargs):
""" define sampling process, such as using policy model to sample actions when given observations.
""" Define the sampling process. This function returns an action with noise to perform exploration.
"""
raise NotImplementedError
......@@ -24,45 +24,46 @@ __all__ = ['Model']
class Model(ModelBase):
"""A `Model`, a collection of `parl.layers`, is owned by an `Algorithm`.
"""
| `alias`: ``parl.Model``
| `alias`: ``parl.core.fluid.agent.Model``
| ``Model`` is a base class of PARL for the neural network. A ``Model`` is usually a policy or Q-value function, which predicts an action or an estimate according to the environmental observation.
| To track all the layers , users are required to implement neural networks with the layers from ``parl.layers`` (e.g., parl.layers.fc). These layers has the same APIs as fluid.layers.
It implements the entire network (forward part) to solve a specific problem.
| ``Model`` supports duplicating a ``Model`` instance in a pythonic way:
`Model` can also use deepcopy way to construct target model, which has the same structure as initial model.
Note that only the model definition is copied here. To copy the parameters from the current model
to the target model, you must explicitly use `sync_weights_to` function after the program is initialized.
| ``copied_model = copy.deepcopy(model)``
NOTE:
You need initialize start up program before calling `sync_weights_to` API.
Example:
Here is an example:
.. code-block:: python
import parl.layers as layers
import parl.Model as Model
class MLPModel(Model):
def __init__(self):
self.fc = layers.fc(size=64)
import parl
def policy(self, obs):
out = self.fc(obs)
return out
model = MLPModel()
target_model = deepcopy(model) # automatically create new unique parameters names for target_model.fc
class Policy(parl.Model):
def __init__(self):
self.fc = parl.layers.fc(size=12, act='softmax')
# build program
x = layers.data(name='x', shape=[100], dtype="float32")
y1 = model.policy(x)
y2 = target_model.policy(x)
def policy(self, obs):
out = self.fc(obs)
return out
policy = Policy()
copied_policy = copy.deepcopy(model)
...
# Need initialize program before calling sync_weights_to
fluid_executor.run(fluid.default_startup_program())
...
Attributes:
model_id(str): each model instance has its uniqe model_id.
# synchronize parameters
model.sync_weights_to(target_model)
Public Functions:
- ``sync_weights_to``: synchronize parameters of the current model to another model.
- ``get_weights``: return a list containing all the parameters of the current model.
- ``set_weights``: copy parameters from ``set_weights()`` to the model.
- ``forward``: define the computations of a neural network. **Should** be overridden by all subclasses.
- ``parameters``: return a list containting names of parameters of the model.
- ``set_model_id``: set ``model_id`` of current model explicitly.
- ``get_model_id``: return the ``model_id`` of current model.
"""
......@@ -80,36 +81,50 @@ class Model(ModelBase):
target_net_weights = decay * target_net_weights + (1 - decay) * source_net_weights
Args:
target_net (`Model`): `Model` object deepcopy from source `Model`.
decay (float): Float. The decay to use.
target_model (`parl.Model`): an instance of ``Model`` that has the same neural network architecture as the current model.
decay (float): the rate of decline in copying parameters. 0 if no parameters decay when synchronizing the parameters.
share_vars_parallel_executor (fluid.ParallelExecutor): if not None, will use fluid.ParallelExecutor
to run program instead of fluid.Executor
"""
self.sync_weights_to(
other_model=target_net,
target_model=target_net,
decay=decay,
share_vars_parallel_executor=share_vars_parallel_executor)
def sync_weights_to(self,
other_model,
target_model,
decay=0.0,
share_vars_parallel_executor=None):
"""Synchronize weights in the model to another model.
"""Synchronize parameters of current model to another model.
To speed up the synchronizing process, will create a program implictly to finish the process. And will
To speed up the synchronizing process, will create a program implicitly to finish the process. And will
also cache the program to avoid creating program repeatedly.
other_model_weights = decay * other_model_weights + (1 - decay) * current_model_weights
target_model_weights = decay * target_model_weights + (1 - decay) * current_model_weights
Args:
other_model (`parl.Model`): object instanced from the same `parl.Model` class with current model.
decay (float): Float. The decay to use.
share_vars_parallel_executor (fluid.ParallelExecutor): if not None, will use fluid.ParallelExecutor
to run program instead of fluid.Executor
target_model (`parl.Model`): an instance of ``Model`` that has the same neural network architecture as the current model.
decay (float): the rate of decline in copying parameters. 0 if no parameters decay when synchronizing the parameters.
share_vars_parallel_executor (fluid.ParallelExecutor): if not None, will use ``fluid.ParallelExecutor``
to run program instead of ``fluid.Executor``.
Example:
.. code-block:: python
import copy
# create a model that has the same neural network structures.
target_model = copy.deepcopy(model)
# after initilizing the parameters ...
model.sync_weights_to(target_mdodel)
Note:
Before calling ``sync_weights_to``, parameters of the model must have been initialized.
"""
args_hash_id = hashlib.md5('{}_{}'.format(
id(other_model), decay).encode('utf-8')).hexdigest()
id(target_model), decay).encode('utf-8')).hexdigest()
has_cached = False
try:
if self._cached_id == args_hash_id:
......@@ -121,13 +136,13 @@ class Model(ModelBase):
# Can not run _cached program, need create a new program
self._cached_id = args_hash_id
assert not other_model is self, "cannot copy between identical model"
assert isinstance(other_model, Model)
assert self.__class__.__name__ == other_model.__class__.__name__, \
assert not target_model is self, "cannot copy between identical model"
assert isinstance(target_model, Model)
assert self.__class__.__name__ == target_model.__class__.__name__, \
"must be the same class for params syncing!"
assert (decay >= 0 and decay <= 1)
param_pairs = self._get_parameter_pairs(self, other_model)
param_pairs = self._get_parameter_pairs(self, target_model)
self._cached_sync_weights_program = fluid.Program()
......@@ -170,26 +185,37 @@ class Model(ModelBase):
@deprecated(
deprecated_in='1.2', removed_in='1.3', replace_function='parameters')
def parameter_names(self):
"""Get param_attr names of all parameters in the Model.
"""Get names of all parameters in this ``Model``.
Only parameter created by parl.layers included.
The order of parameter names will be consistent between
different instances of same `Model`.
Only parameters created by ``parl.layers`` are included.
The order of parameter names is consistent among
different instances of the same `Model`.
Returns:
list of string, param_attr names of all parameters
param_names(list): list of string containing parameter names of all parameters.
"""
return self.parameters()
def parameters(self):
"""Get param_attr names of all parameters in the Model.
"""Get names of all parameters in this ``Model``.
Only parameter created by parl.layers included.
The order of parameter names will be consistent between
different instances of same `Model`.
Only parameters created by ``parl.layers`` are included.
The order of parameter names is consistent among
different instances of the same `Model`.
Returns:
list of string, param_attr names of all parameters
param_names(list): list of string containing parameter names of all parameters
Example:
.. code-block:: python
model = Model()
model.parameters()
# output:
['fc0.w0', 'fc0.bias0']
"""
try:
return self._parameter_names
......@@ -200,10 +226,10 @@ class Model(ModelBase):
@deprecated(
deprecated_in='1.2', removed_in='1.3', replace_function='get_weights')
def get_params(self):
"""Get numpy arrays of parameters in the model.
""" Return a Python list containing parameters of current model.
Returns:
List of numpy array.
parameters: a Python list containing parameters of the current model.
"""
return self.get_weights()
......@@ -213,15 +239,14 @@ class Model(ModelBase):
"""Set parameters in the model with params.
Args:
params (List): List of numpy array.
params (List): List of numpy array .
"""
self.set_weights(weights=params)
def get_weights(self):
"""Get numpy arrays of weights in the model.
"""Returns a Python list containing parameters of current model.
Returns:
List of numpy array.
Returns: a Python list containing the parameters of current model.
"""
weights = []
for param_name in self.parameters():
......@@ -231,10 +256,10 @@ class Model(ModelBase):
return weights
def set_weights(self, weights):
"""Set weights in the model with given `weights`.
"""Copy parameters from ``set_weights()`` to the model.
Args:
weights (List): List of numpy array.
weights (list): a Python list containing the parameters.
"""
assert len(weights) == len(self.parameters()), \
'size of input weights should be same as weights number of current model'
......@@ -242,13 +267,13 @@ class Model(ModelBase):
set_value(param_name, weight)
def _get_parameter_names(self, obj):
""" Recursively get parameter names in obj,
""" Recursively get parameter names in a model and its child attributes.
Args:
obj (`Model`/`LayerFunc`/list/tuple/dict): input object
obj (``parl.Model``): an instance of ``Model``
Returns:
parameter_names (List): all parameter names in obj
parameter_names (list): all parameter names in this model.
"""
parameter_names = []
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册