diff --git a/docs/_static/pygments.css b/docs/_static/pygments.css new file mode 100644 index 0000000000000000000000000000000000000000..287591d54fe9b8532a9fe4297f3303d9f3b3750f --- /dev/null +++ b/docs/_static/pygments.css @@ -0,0 +1,136 @@ +.highlight code, .highlight pre { +color:#fdce93; +background-color:#3f3f3f; +} + +.highlight .hll { +background-color:#222; +} + +.highlight .err { +color:#e37170; +background-color:#3d3535; +} + +.highlight .k { +color:#f0dfaf; +} + +.highlight .p { +color:#41706f; +} + +.highlight .cs { +color:#cd0000; +font-weight:700; +} + +.highlight .gd { +color:#cd0000; +} + +.highlight .ge { +color:#ccc; +font-style:italic; +} + +.highlight .gr { +color:red; +} + +.highlight .go { +color:gray; +} + +.highlight .gs { +color:#ccc; +font-weight:700; +} + +.highlight .gu { +color:purple; +font-weight:700; +} + +.highlight .gt { +color:#0040D0; +} + +.highlight .kc { +color:#dca3a3; +} + +.highlight .kd { +color:#ffff86; +} + +.highlight .kn { +color:#dfaf8f; +font-weight:700; +} + +.highlight .kp { +color:#cdcf99; +} + +.highlight .kr { +color:#cdcd00; +} + +.highlight .ni { +color:#c28182; +} + +.highlight .ne { +color:#c3bf9f; +font-weight:700; +} + +.highlight .nn { +color:#8fbede; +} + +.highlight .vi { +color:#ffffc7; +} + +.highlight .c,.preview-zenburn .highlight .g,.preview-zenburn .highlight .cm,.preview-zenburn .highlight .cp,.preview-zenburn .highlight .c1 { +color:#7f9f7f; +} + +.highlight .l,.preview-zenburn .highlight .x,.preview-zenburn .highlight .no,.preview-zenburn .highlight .nd,.preview-zenburn .highlight .nl,.preview-zenburn .highlight .nx,.preview-zenburn .highlight .py,.preview-zenburn .highlight .w { +color:#ccc; +} + +.highlight .n,.preview-zenburn .highlight .nv,.preview-zenburn .highlight .vg { +color:#dcdccc; +} + +.highlight .o,.preview-zenburn .highlight .ow { +color:#f0efd0; +} + +.highlight .gh,.preview-zenburn .highlight .gp { +color:#dcdccc; +font-weight:700; +} + +.highlight .gi,.preview-zenburn .highlight .kt { +color:#00cd00; +} + +.highlight .ld,.preview-zenburn .highlight .s,.preview-zenburn .highlight .sb,.preview-zenburn .highlight .sc,.preview-zenburn .highlight .sd,.preview-zenburn .highlight .s2,.preview-zenburn .highlight .se,.preview-zenburn .highlight .sh,.preview-zenburn .highlight .si,.preview-zenburn .highlight .sx,.preview-zenburn .highlight .sr,.preview-zenburn .highlight .s1,.preview-zenburn .highlight .ss { +color:#cc9393; +} + +.highlight .m,.preview-zenburn .highlight .mf,.preview-zenburn .highlight .mh,.preview-zenburn .highlight .mi,.preview-zenburn .highlight .mo,.preview-zenburn .highlight .il { +color:#8cd0d3; +} + +.highlight .na,.preview-zenburn .highlight .nt { +color:#9ac39f; +} + +.highlight .nb,.preview-zenburn .highlight .nc,.preview-zenburn .highlight .nf,.preview-zenburn .highlight .bp,.preview-zenburn .highlight .vc { +color:#efef8f; +} diff --git a/docs/agent.rst b/docs/agent.rst new file mode 100644 index 0000000000000000000000000000000000000000..b4c5233a0142dcef96ab9e355c8696127a75c4a9 --- /dev/null +++ b/docs/agent.rst @@ -0,0 +1,5 @@ +parl.Agent +------ +.. autoclass:: parl.core.fluid.agent.Agent + :members: + diff --git a/docs/algorithm.rst b/docs/algorithm.rst new file mode 100644 index 0000000000000000000000000000000000000000..1d0065d960a59afc8dd3c25a8d10d3e9399c011a --- /dev/null +++ b/docs/algorithm.rst @@ -0,0 +1,5 @@ +parl.Algorithm +------ +.. autoclass:: parl.core.fluid.algorithm.Algorithm + :members: + diff --git a/docs/conf.py b/docs/conf.py index d5d21650b1b0f49245f609589d02873ef8741fa8..315394670a99a645f58aa0034f90924d74816c1a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -63,7 +63,7 @@ templates_path = ['_templates'] # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = 'zh_CN' +language = 'en' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -85,3 +85,32 @@ html_static_path = ['_static'] html_logo = './images/PARL-logo-2.png' master_doc = 'index' +napoleon_use_ivar = True + + +# do not skip documentation of the __init__ function of a class +def skip(app, what, name, obj, would_skip, options): + if name == "__init__": + return False + return would_skip + + +aliases = dict() +aliases['parl.core.fluid.agent.Agent'] = ['parl.agent'] + + +def parl_class_docstring(app, what, name, obj, options, lines): + if what != 'class': + return + obj = parl.Agent + name = parl.Agent + + #lines[0] = ['wohenhao'] + + +def setup(app): + app.connect("autodoc-skip-member", skip) + app.connect("autodoc-process-docstring", parl_class_docstring) + + +add_module_names = False diff --git a/docs/index.rst b/docs/index.rst index 98dbe823809374680e88ec1befef7d6851629589..b0bc8ccb9e509bfbc6ea48a27cf444785d4f09ad 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -70,6 +70,7 @@ Abstractions :maxdepth: 1 :caption: APIs - ./api_docs.utils - ./api_docs.index + model.rst + algorithm.rst + agent.rst diff --git a/docs/model.rst b/docs/model.rst new file mode 100644 index 0000000000000000000000000000000000000000..7a9180f20f02dd733d0e46085eec68b7af06126f --- /dev/null +++ b/docs/model.rst @@ -0,0 +1,5 @@ +parl.Model +------ +.. autoclass:: parl.core.fluid.model.Model + :members: + diff --git a/parl/core/fluid/agent.py b/parl/core/fluid/agent.py index c5f8e53453b607c283662e5e6853c2e50ddf124e..449ad8cf903085fc6d8586033aef0d056361ade9 100644 --- a/parl/core/fluid/agent.py +++ b/parl/core/fluid/agent.py @@ -26,11 +26,49 @@ __all__ = ['Agent'] class Agent(AgentBase): + """ + | `alias`: ``parl.Agent`` + | `alias`: ``parl.core.fluid.agent.Agent`` + + | Agent is one of the three basic classes of PARL. + + | It is responsible for interacting with the environment and collecting data for training the policy. + | To implement a customized ``Agent``, users can: + + .. code-block:: python + + import parl + + class MyAgent(parl.Agent): + def __init__(self, algorithm, act_dim): + super(MyAgent, self).__init__(algorithm) + self.act_dim = act_dim + This class will initialize the neural network parameters automatically, and provides an executor for users to run the programs (self.fluid_executor). + + Attributes: + gpu_id (int): deprecated. specify which GPU to be used. -1 if to use the CPU. + fluid_executor (fluid.Executor): executor for running programs of the agent. + alg (parl.algorithm): algorithm of this agent. + + Public Functions: + - ``build_program`` (**abstract function**): build various programs for the agent to interact with outer environment. + - ``get_weights``: return a Python dictionary containing all the parameters of self.alg. + - ``set_weights``: copy parameters from ``set_weights()`` to this agent. + - ``sample``: return a noisy action to perform exploration according to the policy. + - ``predict``: return an action given current observation. + - ``learn``: update the parameters of self.alg using the `learn_program` defined in `build_program()`. + + Todo: + - allow users to get parameters of a specified model by specifying the model's name in ``get_weights()``. + + """ + def __init__(self, algorithm, gpu_id=None): - """Build program and run initialization for default_startup_program + """Build programs by calling the method ``self.build_program()`` and run initialization function of ``fluid.default_startup_program()``. Args: - algorithm (parl.Algorithm): instance of `parl.core.fluid.algorithm.Algorithm` + algorithm (parl.Algorithm): an instance of `parl.Algorithm`. This algorithm is then passed to `self.alg`. + gpu_id (int): deprecated. specify which GPU to be used. -1 if to use the CPU. """ if gpu_id is not None: warnings.warn( @@ -41,8 +79,6 @@ class Agent(AgentBase): assert isinstance(algorithm, Algorithm) super(Agent, self).__init__(algorithm) - # alias for self.algorithm - # use self.algorithm is suggested self.alg = algorithm self.gpu_id = 0 if machine_info.is_gpu_available() else -1 @@ -54,71 +90,70 @@ class Agent(AgentBase): self.fluid_executor.run(fluid.default_startup_program()) def build_program(self): - """Build leran/predict/sample program here with the - learn/predict/sample function defined in algorithm. + """Build various programs here with the + learn, predict, sample functions of the algorithm. Note: - It's unnecessary to call this function explictly since - it will be called automatically in the initialization function. + | Users **must** implement this function in an ``Agent``. + | This function will be called automatically in the initialization function. - To build the program, you may need to do the following: - a. Create a new program of fluid with program guard; - b. Define data input layers; - c. Pass the data variable defined in step b to learn/predict/sample of algorithm; + To build a program, you must do the following: + a. Create a fluid program with ``fluid.program_guard()``; + b. Define data layers for feeding the data; + c. Build various programs(e.g., learn_program, predict_program) with data layers defined in step b. + + Example: + + .. code-block:: python + + self.pred_program = fluid.Program() + + with fluid.program_guard(self.pred_program): + obs = layers.data( + name='obs', shape=[self.obs_dim], dtype='float32') + self.act_prob = self.alg.predict(obs) + + """ raise NotImplementedError @deprecated( deprecated_in='1.2', removed_in='1.3', replace_function='get_weights') def get_params(self): - """ Get parameters of self.algorithm + """ Returns a Python dictionary containing the whole parameters of self.alg. Returns: - List of numpy array. + a Python List containing the parameters of self.alg. """ return self.algorithm.get_params() @deprecated( deprecated_in='1.2', removed_in='1.3', replace_function='set_weights') def set_params(self, params): - """Set parameters of self.algorithm + """Copy parameters from ``get_params()`` into this agent. Args: - params: List of numpy array. + params(dict): a Python List containing the parameters of self.alg. """ self.algorithm.set_params(params) def learn(self, *args, **kwargs): - """The training interface for Agent. - - This function will usually do the following things: - 1. Accept numpy data as input; - 2. Feed numpy data; - 3. Run learn program defined in `build_program`. + """The training interface for ``Agent``. + This function feeds the training data into the learn_program defined in ``build_program()``. """ raise NotImplementedError def predict(self, *args, **kwargs): - """Predict the action when given the observation of the enviroment. - - In general, this function is used in test process. + """Predict an action when given the observation of the environment. - This function will usually do the following things: - 1. Accept numpy data as input; - 2. Feed numpy data; - 3. Run predict program defined in `build_program`. + This function feeds the observation into the prediction program defined in ``build_program()``. It is often used in the evaluation stage. """ raise NotImplementedError def sample(self, *args, **kwargs): - """Sample the action when given the observation of the enviroment. + """Return an action with noise when given the observation of the environment. - In general, this function is used in train process. + In general, this function is used in train process as noise is added to the action to preform exploration. - This function will usually do the following things: - 1. Accept numpy data as input; - 2. Feed numpy data; - 3. Run predict/sample program defined in `build_program`. - 4. Add sampling operation in numpy level. (unnecessary if sampling operation have done in `Algorithm`). """ raise NotImplementedError diff --git a/parl/core/fluid/algorithm.py b/parl/core/fluid/algorithm.py index ec30b44c28026e11523f0dfa329aedb2f0d52d5d..1a05a9991a658e13282f847f2cf4772dc19b2572 100644 --- a/parl/core/fluid/algorithm.py +++ b/parl/core/fluid/algorithm.py @@ -23,16 +23,46 @@ __all__ = ['Algorithm'] class Algorithm(AlgorithmBase): - """Algorithm defines the way how we update the model. - - To implement a new algorithm, you may need implement the learn/predict/sample functions. + """ + | `alias`: ``parl.Algorithm`` + | `alias`: ``parl.core.fluid.algorithm.Algorithm`` + + | ``Algorithm`` defines the way how to update the parameters of the ``Model``. This is where we define loss functions and the optimizer of the neural network. An ``Algorithm`` has at least a model. + + | PARL has implemented various algorithms(DQN/DDPG/PPO/A3C/IMPALA) that can be reused quickly, which can be accessed with ``parl.algorithms``. + + Example: + + .. code-block:: python + + import parl + + model = Model() + dqn = parl.algorithms.DQN(model, lr=1e-3) - Before creating a customized algorithm, please do check algorithms of PARL. - Most common used algorithms like DQN/DDPG/PPO/A3C/IMPALA have been provided in `parl.algorithms`, - go and have a try. + Attributes: + model(``parl.Model``): a neural network that represents a policy or a Q-value function. + + Pulic Functions: + - ``get_weights``: return a Python dictionary containing parameters of the current model. + - ``set_weights``: copy parameters from ``get_weights()`` to the model. + - ``sample``: return a noisy action to perform exploration according to the policy. + - ``predict``: return an action given current observation. + - ``learn``: define the loss function and create an optimizer to minized the loss. + + Note: + + ``Algorithm`` defines all its computation inside a ``fluid.Program``, such that the returns of functions(`sample`, `predict`, `learn`) are tensors. + ``Agent`` also has functions like `sample`, `predict`, and `learn`, but they return numpy array for the agent. + """ def __init__(self, model=None, hyperparas=None): + """ + Args: + model(``parl.Model``): a neural network that represents a policy or a Q-value function. + hyperparas(dict): a dict storing the hyper-parameters relative to training. + """ if model is not None: warnings.warn( "the `model` argument of `__init__` function in `parl.Algorithm` is deprecated since version 1.2 and will be removed in version 1.3.", @@ -52,34 +82,34 @@ class Algorithm(AlgorithmBase): @deprecated( deprecated_in='1.2', removed_in='1.3', replace_function='get_weights') def get_params(self): - """ Get parameters of self.model + """ Get parameters of self.model. Returns: - List of numpy array. + params(dict): a Python List containing the parameters of self.model. """ return self.model.get_params() @deprecated( deprecated_in='1.2', removed_in='1.3', replace_function='set_weights') def set_params(self, params): - """ Set parameters of self.model + """ Set parameters from ``get_params`` to the model. Args: - params: List of numpy array. + params(dict ): a Python List containing the parameters of self.model. """ self.model.set_params(params) def learn(self, *args, **kwargs): - """ define learning process, such as how to optimize the model. + """ Define the loss function and create an optimizer to minize the loss. """ raise NotImplementedError def predict(self, *args, **kwargs): - """ define predicting process, such as using policy model to predict actions when given observations. + """ Refine the predicting process, e.g,. use the policy model to predict actions. """ raise NotImplementedError def sample(self, *args, **kwargs): - """ define sampling process, such as using policy model to sample actions when given observations. + """ Define the sampling process. This function returns an action with noise to perform exploration. """ raise NotImplementedError diff --git a/parl/core/fluid/model.py b/parl/core/fluid/model.py index 1e24211413a5a675d523887574b1384532b04230..c7aa3fd8fb1e078e29d2fa80ccf92a698492112b 100644 --- a/parl/core/fluid/model.py +++ b/parl/core/fluid/model.py @@ -24,45 +24,46 @@ __all__ = ['Model'] class Model(ModelBase): - """A `Model`, a collection of `parl.layers`, is owned by an `Algorithm`. + """ + | `alias`: ``parl.Model`` + | `alias`: ``parl.core.fluid.agent.Model`` + + | ``Model`` is a base class of PARL for the neural network. A ``Model`` is usually a policy or Q-value function, which predicts an action or an estimate according to the environmental observation. + + | To track all the layers , users are required to implement neural networks with the layers from ``parl.layers`` (e.g., parl.layers.fc). These layers has the same APIs as fluid.layers. - It implements the entire network (forward part) to solve a specific problem. + | ``Model`` supports duplicating a ``Model`` instance in a pythonic way: - `Model` can also use deepcopy way to construct target model, which has the same structure as initial model. - Note that only the model definition is copied here. To copy the parameters from the current model - to the target model, you must explicitly use `sync_weights_to` function after the program is initialized. + | ``copied_model = copy.deepcopy(model)`` - NOTE: - You need initialize start up program before calling `sync_weights_to` API. + Example: - Here is an example: .. code-block:: python - import parl.layers as layers - import parl.Model as Model - class MLPModel(Model): - def __init__(self): - self.fc = layers.fc(size=64) + import parl - def policy(self, obs): - out = self.fc(obs) - return out - - model = MLPModel() - target_model = deepcopy(model) # automatically create new unique parameters names for target_model.fc + class Policy(parl.Model): + def __init__(self): + self.fc = parl.layers.fc(size=12, act='softmax') - # build program - x = layers.data(name='x', shape=[100], dtype="float32") - y1 = model.policy(x) - y2 = target_model.policy(x) + def policy(self, obs): + out = self.fc(obs) + return out + + policy = Policy() + copied_policy = copy.deepcopy(model) - ... - # Need initialize program before calling sync_weights_to - fluid_executor.run(fluid.default_startup_program()) - ... + Attributes: + model_id(str): each model instance has its uniqe model_id. - # synchronize parameters - model.sync_weights_to(target_model) + Public Functions: + - ``sync_weights_to``: synchronize parameters of the current model to another model. + - ``get_weights``: return a list containing all the parameters of the current model. + - ``set_weights``: copy parameters from ``set_weights()`` to the model. + - ``forward``: define the computations of a neural network. **Should** be overridden by all subclasses. + - ``parameters``: return a list containting names of parameters of the model. + - ``set_model_id``: set ``model_id`` of current model explicitly. + - ``get_model_id``: return the ``model_id`` of current model. """ @@ -80,36 +81,50 @@ class Model(ModelBase): target_net_weights = decay * target_net_weights + (1 - decay) * source_net_weights Args: - target_net (`Model`): `Model` object deepcopy from source `Model`. - decay (float): Float. The decay to use. + target_model (`parl.Model`): an instance of ``Model`` that has the same neural network architecture as the current model. + decay (float): the rate of decline in copying parameters. 0 if no parameters decay when synchronizing the parameters. share_vars_parallel_executor (fluid.ParallelExecutor): if not None, will use fluid.ParallelExecutor to run program instead of fluid.Executor """ self.sync_weights_to( - other_model=target_net, + target_model=target_net, decay=decay, share_vars_parallel_executor=share_vars_parallel_executor) def sync_weights_to(self, - other_model, + target_model, decay=0.0, share_vars_parallel_executor=None): - """Synchronize weights in the model to another model. + """Synchronize parameters of current model to another model. - To speed up the synchronizing process, will create a program implictly to finish the process. And will + To speed up the synchronizing process, will create a program implicitly to finish the process. And will also cache the program to avoid creating program repeatedly. - other_model_weights = decay * other_model_weights + (1 - decay) * current_model_weights + target_model_weights = decay * target_model_weights + (1 - decay) * current_model_weights Args: - other_model (`parl.Model`): object instanced from the same `parl.Model` class with current model. - decay (float): Float. The decay to use. - share_vars_parallel_executor (fluid.ParallelExecutor): if not None, will use fluid.ParallelExecutor - to run program instead of fluid.Executor + target_model (`parl.Model`): an instance of ``Model`` that has the same neural network architecture as the current model. + decay (float): the rate of decline in copying parameters. 0 if no parameters decay when synchronizing the parameters. + share_vars_parallel_executor (fluid.ParallelExecutor): if not None, will use ``fluid.ParallelExecutor`` + to run program instead of ``fluid.Executor``. + + Example: + + .. code-block:: python + + import copy + # create a model that has the same neural network structures. + target_model = copy.deepcopy(model) + + # after initilizing the parameters ... + model.sync_weights_to(target_mdodel) + + Note: + Before calling ``sync_weights_to``, parameters of the model must have been initialized. """ args_hash_id = hashlib.md5('{}_{}'.format( - id(other_model), decay).encode('utf-8')).hexdigest() + id(target_model), decay).encode('utf-8')).hexdigest() has_cached = False try: if self._cached_id == args_hash_id: @@ -121,13 +136,13 @@ class Model(ModelBase): # Can not run _cached program, need create a new program self._cached_id = args_hash_id - assert not other_model is self, "cannot copy between identical model" - assert isinstance(other_model, Model) - assert self.__class__.__name__ == other_model.__class__.__name__, \ + assert not target_model is self, "cannot copy between identical model" + assert isinstance(target_model, Model) + assert self.__class__.__name__ == target_model.__class__.__name__, \ "must be the same class for params syncing!" assert (decay >= 0 and decay <= 1) - param_pairs = self._get_parameter_pairs(self, other_model) + param_pairs = self._get_parameter_pairs(self, target_model) self._cached_sync_weights_program = fluid.Program() @@ -170,26 +185,37 @@ class Model(ModelBase): @deprecated( deprecated_in='1.2', removed_in='1.3', replace_function='parameters') def parameter_names(self): - """Get param_attr names of all parameters in the Model. + """Get names of all parameters in this ``Model``. - Only parameter created by parl.layers included. - The order of parameter names will be consistent between - different instances of same `Model`. + Only parameters created by ``parl.layers`` are included. + The order of parameter names is consistent among + different instances of the same `Model`. Returns: - list of string, param_attr names of all parameters + param_names(list): list of string containing parameter names of all parameters. """ return self.parameters() def parameters(self): - """Get param_attr names of all parameters in the Model. + """Get names of all parameters in this ``Model``. - Only parameter created by parl.layers included. - The order of parameter names will be consistent between - different instances of same `Model`. + Only parameters created by ``parl.layers`` are included. + The order of parameter names is consistent among + different instances of the same `Model`. Returns: - list of string, param_attr names of all parameters + param_names(list): list of string containing parameter names of all parameters + + Example: + + .. code-block:: python + + model = Model() + model.parameters() + + # output: + ['fc0.w0', 'fc0.bias0'] + """ try: return self._parameter_names @@ -200,10 +226,10 @@ class Model(ModelBase): @deprecated( deprecated_in='1.2', removed_in='1.3', replace_function='get_weights') def get_params(self): - """Get numpy arrays of parameters in the model. + """ Return a Python list containing parameters of current model. Returns: - List of numpy array. + parameters: a Python list containing parameters of the current model. """ return self.get_weights() @@ -213,15 +239,14 @@ class Model(ModelBase): """Set parameters in the model with params. Args: - params (List): List of numpy array. + params (List): List of numpy array . """ self.set_weights(weights=params) def get_weights(self): - """Get numpy arrays of weights in the model. + """Returns a Python list containing parameters of current model. - Returns: - List of numpy array. + Returns: a Python list containing the parameters of current model. """ weights = [] for param_name in self.parameters(): @@ -231,10 +256,10 @@ class Model(ModelBase): return weights def set_weights(self, weights): - """Set weights in the model with given `weights`. + """Copy parameters from ``set_weights()`` to the model. Args: - weights (List): List of numpy array. + weights (list): a Python list containing the parameters. """ assert len(weights) == len(self.parameters()), \ 'size of input weights should be same as weights number of current model' @@ -242,13 +267,13 @@ class Model(ModelBase): set_value(param_name, weight) def _get_parameter_names(self, obj): - """ Recursively get parameter names in obj, + """ Recursively get parameter names in a model and its child attributes. Args: - obj (`Model`/`LayerFunc`/list/tuple/dict): input object + obj (``parl.Model``): an instance of ``Model`` Returns: - parameter_names (List): all parameter names in obj + parameter_names (list): all parameter names in this model. """ parameter_names = []