Merge pull request #1719 from QiJune/feature/add_v2_api_doc

Add more docs for paddle v2 api

Merge pull request #1719 from QiJune/feature/add_v2_api_doc
Add more docs for paddle v2 api
e3296559 · QI JUN · GitHub · 7384966f · 398d50dd · e3296559
17 changed file
--- a/doc/api/v2/config/optimizer.rst
+++ b/doc/api/v2/config/optimizer.rst
-..  _api_v2.optimizer:
 ==========
 Optimizer
 ==========

--- a/doc/api/v2/data.rst
+++ b/doc/api/v2/data.rst
-========
+==================================
-Datasets
+Data Reader Interface and DataSets
-========
+==================================
 DataTypes
@@ -49,7 +49,6 @@ mnist
    :members:
    :noindex:
 cifar
 +++++
@@ -61,7 +60,7 @@ conll05
 +++++++
 ..  automodule:: paddle.v2.dataset.conll05
-    :members:
+    :members: get_dict,get_embedding,test
    :noindex:
 imdb
@@ -85,6 +84,12 @@ movielens
    :members:
    :noindex:
+..  autoclass:: paddle.v2.dataset.movielens.MovieInfo
+    :noindex:
+..  autoclass:: paddle.v2.dataset.movielens.UserInfo
+    :noindex:
 sentiment
 +++++++++
@@ -102,7 +107,7 @@ uci_housing
 wmt14
 +++++
-..  automodule:: paddle.v2.dataset.uci_housing
+..  automodule:: paddle.v2.dataset.wmt14
    :members:
    :noindex:
--- a/doc/api/v2/run_logic.rst
+++ b/doc/api/v2/run_logic.rst
@@ -6,18 +6,21 @@ Parameters
 ==========
 ..  automodule:: paddle.v2.parameters
+    :members: Parameters
    :noindex:
 Trainer
 =======
 ..  automodule:: paddle.v2.trainer
+    :members: SGD
    :noindex:
 Event
 =====
 ..  automodule:: paddle.v2.event
+    :members:
    :noindex:
 Inference
@@ -25,3 +28,4 @@ Inference
 ..  autofunction:: paddle.v2.infer
    :noindex:
\ No newline at end of file
--- a/python/paddle/v2/data_feeder.py
+++ b/python/paddle/v2/data_feeder.py
@@ -67,7 +67,7 @@ class DataFeeder(DataProviderConverter):
        #                       [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ],  # first sample
        #                       [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ]   # second sample
        #                     ]
-        arg = feeder(minibatch_data)
+        arg = feeder.convert(minibatch_data)
    ..  note::

--- a/python/paddle/v2/dataset/cifar.py
+++ b/python/paddle/v2/dataset/cifar.py
@@ -12,9 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html
+CIFAR dataset.
+This module will download dataset from
+https://www.cs.toronto.edu/~kriz/cifar.html and parse train/test set into
+paddle reader creators.
+The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes,
+with 6000 images per class. There are 50000 training images and 10000 test
+images.
+The CIFAR-100 dataset is just like the CIFAR-10, except it has 100 classes
+containing 600 images each. There are 500 training images and 100 testing
+images per class.
-TODO(yuyang18): Complete the comments.
 """
 import cPickle
@@ -54,20 +65,56 @@ def reader_creator(filename, sub_name):
 def train100():
+    """
+    CIFAR-100 training set creator.
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 99].
+    :return: Training reader creator
+    :rtype: callable
+    """
    return reader_creator(
        download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'train')
 def test100():
+    """
+    CIFAR-100 test set cretor.
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+    :return: Test reader creator.
+    :rtype: callable
+    """
    return reader_creator(download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'test')
 def train10():
+    """
+    CIFAR-10 training set creator.
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+    :return: Training reader creator
+    :rtype: callable
+    """
    return reader_creator(
        download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'data_batch')
 def test10():
+    """
+    CIFAR-10 test set cretor.
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+    :return: Test reader creator.
+    :rtype: callable
+    """
    return reader_creator(
        download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'test_batch')

--- a/python/paddle/v2/dataset/conll05.py
+++ b/python/paddle/v2/dataset/conll05.py
@@ -11,19 +11,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Conll05 dataset.
+Paddle semantic role labeling Book and demo use this dataset as an example.
+Because Conll05 is not free in public, the default downloaded URL is test set
+of Conll05 (which is public). Users can change URL and MD5 to their Conll
+dataset. And a pre-trained word vector model based on Wikipedia corpus is used
+to initialize SRL model.
+"""
 import tarfile
 import gzip
 import itertools
 from common import download
-"""
-Conll 2005 dataset.  Paddle semantic role labeling Book and demo use this
-dataset as an example. Because Conll 2005 is not free in public, the default
-downloaded URL is test set of Conll 2005 (which is public). Users can change
-URL and MD5 to their Conll dataset.
-TODO(yuyang18): Complete comments.
-"""
 __all__ = ['test, get_dict', 'get_embedding']
@@ -179,6 +179,9 @@ def reader_creator(corpus_reader,
 def get_dict():
+    """
+    Get the word, verb and label dictionary of Wikipedia corpus.
+    """
    word_dict = load_dict(download(WORDDICT_URL, 'conll05st', WORDDICT_MD5))
    verb_dict = load_dict(download(VERBDICT_URL, 'conll05st', VERBDICT_MD5))
    label_dict = load_dict(download(TRGDICT_URL, 'conll05st', TRGDICT_MD5))
@@ -186,10 +189,24 @@ def get_dict():
 def get_embedding():
+    """
+    Get the trained word vector based on Wikipedia corpus.
+    """
    return download(EMB_URL, 'conll05st', EMB_MD5)
 def test():
+    """
+    Conll05 test set creator.
+    Because the training dataset is not free, the test dataset is used for
+    training. It returns a reader creator, each sample in the reader is nine
+    features, including sentence sequence, predicate, predicate context,
+    predicate context flag and tagged sequence.
+    :return: Training reader creator
+    :rtype: callable
+    """
    word_dict, verb_dict, label_dict = get_dict()
    reader = corpus_reader(
        download(DATA_URL, 'conll05st', DATA_MD5),

--- a/python/paddle/v2/dataset/imdb.py
+++ b/python/paddle/v2/dataset/imdb.py
@@ -12,9 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz
+IMDB dataset.
-TODO(yuyang18): Complete comments.
+This module downloads IMDB dataset from
+http://ai.stanford.edu/%7Eamaas/data/sentiment/. This dataset contains a set
+of 25,000 highly polar movie reviews for training, and 25,000 for testing.
+Besides, this module also provides API for building dictionary.
 """
 import paddle.v2.dataset.common
@@ -31,8 +34,11 @@ URL = 'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz'
 MD5 = '7c2ac02c03563afcf9b574c7e56c153a'
-# Read files that match pattern.  Tokenize and yield each file.
 def tokenize(pattern):
+    """
+    Read files that match the given pattern.  Tokenize and yield each file.
+    """
    with tarfile.open(paddle.v2.dataset.common.download(URL, 'imdb',
                                                        MD5)) as tarf:
        # Note that we should use tarfile.next(), which does
@@ -49,6 +55,10 @@ def tokenize(pattern):
 def build_dict(pattern, cutoff):
+    """
+    Build a word dictionary from the corpus. Keys of the dictionary are words,
+    and values are zero-based IDs of these words.
+    """
    word_freq = collections.defaultdict(int)
    for doc in tokenize(pattern):
        for word in doc:
@@ -110,18 +120,46 @@ def reader_creator(pos_pattern, neg_pattern, word_idx, buffer_size):
 def train(word_idx):
+    """
+    IMDB training set creator.
+    It returns a reader creator, each sample in the reader is an zero-based ID
+    sequence and label in [0, 1].
+    :param word_idx: word dictionary
+    :type word_idx: dict
+    :return: Training reader creator
+    :rtype: callable
+    """
    return reader_creator(
        re.compile("aclImdb/train/pos/.*\.txt$"),
        re.compile("aclImdb/train/neg/.*\.txt$"), word_idx, 1000)
 def test(word_idx):
+    """
+    IMDB test set creator.
+    It returns a reader creator, each sample in the reader is an zero-based ID
+    sequence and label in [0, 1].
+    :param word_idx: word dictionary
+    :type word_idx: dict
+    :return: Test reader creator
+    :rtype: callable
+    """
    return reader_creator(
        re.compile("aclImdb/test/pos/.*\.txt$"),
        re.compile("aclImdb/test/neg/.*\.txt$"), word_idx, 1000)
 def word_dict():
+    """
+    Build a word dictionary from the corpus.
+    :return: Word dictionary
+    :rtype: dict
+    """
    return build_dict(
        re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150)

--- a/python/paddle/v2/dataset/imikolov.py
+++ b/python/paddle/v2/dataset/imikolov.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/
+imikolov's simple dataset.
-Complete comments.
+This module will download dataset from 
+http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set
+into paddle reader creators.
 """
 import paddle.v2.dataset.common
 import collections
@@ -40,6 +42,10 @@ def word_count(f, word_freq=None):
 def build_dict():
+    """
+    Build a word dictionary from the corpus,  Keys of the dictionary are words,
+    and values are zero-based IDs of these words.
+    """
    train_filename = './simple-examples/data/ptb.train.txt'
    test_filename = './simple-examples/data/ptb.valid.txt'
    with tarfile.open(
@@ -84,10 +90,36 @@ def reader_creator(filename, word_idx, n):
 def train(word_idx, n):
+    """
+    imikolov training set creator.
+    It returns a reader creator, each sample in the reader is a word ID
+    tuple.
+    :param word_idx: word dictionary
+    :type word_idx: dict
+    :param n: sliding window size
+    :type n: int
+    :return: Training reader creator
+    :rtype: callable
+    """
    return reader_creator('./simple-examples/data/ptb.train.txt', word_idx, n)
 def test(word_idx, n):
+    """
+    imikolov test set creator.
+    It returns a reader creator, each sample in the reader is a word ID
+    tuple.
+    :param word_idx: word dictionary
+    :type word_idx: dict
+    :param n: sliding window size
+    :type n: int
+    :return: Test reader creator
+    :rtype: callable
+    """
    return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n)

--- a/python/paddle/v2/dataset/mnist.py
+++ b/python/paddle/v2/dataset/mnist.py
@@ -15,7 +15,7 @@
 MNIST dataset.
 This module will download dataset from http://yann.lecun.com/exdb/mnist/ and
-parse train set and test set into paddle reader creators.
+parse training set and test set into paddle reader creators.
 """
 import paddle.v2.dataset.common
 import subprocess
@@ -76,12 +76,12 @@ def reader_creator(image_filename, label_filename, buffer_size):
 def train():
    """
-    MNIST train set creator.
+    MNIST training set creator.
    It returns a reader creator, each sample in the reader is image pixels in
    [0, 1] and label in [0, 9].
-    :return: Train reader creator
+    :return: Training reader creator
    :rtype: callable
    """
    return reader_creator(

--- a/python/paddle/v2/dataset/movielens.py
+++ b/python/paddle/v2/dataset/movielens.py
@@ -14,7 +14,12 @@
 """
 Movielens 1-M dataset.
-TODO(yuyang18): Complete comments.
+Movielens 1-M dataset contains 1 million ratings from 6000 users on 4000
+movies, which was collected by GroupLens Research. This module will download
+Movielens 1-M dataset from 
+http://files.grouplens.org/datasets/movielens/ml-1m.zip and parse training
+set and test set into paddle reader creators.
 """
 import zipfile
@@ -35,12 +40,19 @@ MD5 = 'c4d9eecfca2ab87c1945afe126590906'
 class MovieInfo(object):
+    """
+    Movie id, title and categories information are stored in MovieInfo.
+    """
    def __init__(self, index, categories, title):
        self.index = int(index)
        self.categories = categories
        self.title = title
    def value(self):
+        """
+        Get information from a movie.
+        """
        return [
            self.index, [CATEGORIES_DICT[c] for c in self.categories],
            [MOVIE_TITLE_DICT[w.lower()] for w in self.title.split()]
@@ -55,6 +67,10 @@ class MovieInfo(object):
 class UserInfo(object):
+    """
+    User id, gender, age, and job information are stored in UserInfo.
+    """
    def __init__(self, index, gender, age, job_id):
        self.index = int(index)
        self.is_male = gender == 'M'
@@ -62,6 +78,9 @@ class UserInfo(object):
        self.job_id = int(job_id)
    def value(self):
+        """
+        Get information from a user.
+        """
        return [self.index, 0 if self.is_male else 1, self.age, self.job_id]
    def __str__(self):
@@ -148,6 +167,9 @@ test = functools.partial(__reader_creator__, is_test=True)
 def get_movie_title_dict():
+    """
+    Get movie title dictionary.
+    """
    __initialize_meta_info__()
    return MOVIE_TITLE_DICT
@@ -160,11 +182,17 @@ def __max_index_info__(a, b):
 def max_movie_id():
+    """
+    Get the maximum value of movie id.
+    """
    __initialize_meta_info__()
    return reduce(__max_index_info__, MOVIE_INFO.viewvalues()).index
 def max_user_id():
+    """
+    Get the maximum value of user id.
+    """
    __initialize_meta_info__()
    return reduce(__max_index_info__, USER_INFO.viewvalues()).index
@@ -177,21 +205,33 @@ def __max_job_id_impl__(a, b):
 def max_job_id():
+    """
+    Get the maximum value of job id.
+    """
    __initialize_meta_info__()
    return reduce(__max_job_id_impl__, USER_INFO.viewvalues()).job_id
 def movie_categories():
+    """
+    Get movie categoriges dictionary.
+    """
    __initialize_meta_info__()
    return CATEGORIES_DICT
 def user_info():
+    """
+    Get user info dictionary.
+    """
    __initialize_meta_info__()
    return USER_INFO
 def movie_info():
+    """
+    Get movie info dictionary.
+    """
    __initialize_meta_info__()
    return MOVIE_INFO

--- a/python/paddle/v2/dataset/sentiment.py
+++ b/python/paddle/v2/dataset/sentiment.py
@@ -113,7 +113,7 @@ def reader_creator(data):
 def train():
    """
-    Default train set reader creator
+    Default training set reader creator
    """
    data_set = load_sentiment_data()
    return reader_creator(data_set[0:NUM_TRAINING_INSTANCES])

--- a/python/paddle/v2/dataset/uci_housing.py
+++ b/python/paddle/v2/dataset/uci_housing.py
@@ -14,7 +14,9 @@
 """
 UCI Housing dataset.
-TODO(yuyang18): Complete comments.
+This module will download dataset from
+https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ and
+parse training set and test set into paddle reader creators.
 """
 import numpy as np
@@ -70,6 +72,15 @@ def load_data(filename, feature_num=14, ratio=0.8):
 def train():
+    """
+    UCI_HOUSING training set creator.
+    It returns a reader creator, each sample in the reader is features after
+    normalization and price number.
+    :return: Training reader creator
+    :rtype: callable
+    """
    global UCI_TRAIN_DATA
    load_data(download(URL, 'uci_housing', MD5))
@@ -81,6 +92,15 @@ def train():
 def test():
+    """
+    UCI_HOUSING test set creator.
+    It returns a reader creator, each sample in the reader is features after
+    normalization and price number.
+    :return: Test reader creator
+    :rtype: callable
+    """
    global UCI_TEST_DATA
    load_data(download(URL, 'uci_housing', MD5))

--- a/python/paddle/v2/dataset/wmt14.py
+++ b/python/paddle/v2/dataset/wmt14.py
@@ -12,7 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-wmt14 dataset
+WMT14 dataset.
+The original WMT14 dataset is too large and a small set of data for set is
+provided. This module will download dataset from
+http://paddlepaddle.cdn.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz and
+parse training set and test set into paddle reader creators.
 """
 import tarfile
 import gzip
@@ -99,11 +104,31 @@ def reader_creator(tar_file, file_name, dict_size):
 def train(dict_size):
+    """
+    WMT14 training set creator.
+    It returns a reader creator, each sample in the reader is source language
+    word ID sequence, target language word ID sequence and next word ID
+    sequence.
+    :return: Training reader creator
+    :rtype: callable
+    """
    return reader_creator(
        download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'train/train', dict_size)
 def test(dict_size):
+    """
+    WMT14 test set creator.
+    It returns a reader creator, each sample in the reader is source language
+    word ID sequence, target language word ID sequence and next word ID
+    sequence.
+    :return: Test reader creator
+    :rtype: callable
+    """
    return reader_creator(
        download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'test/test', dict_size)

--- a/python/paddle/v2/event.py
+++ b/python/paddle/v2/event.py
 """
-All training events.
+Testing and training events.
 There are:
+* TestResult
 * BeginIteration
 * EndIteration
 * BeginPass
 * EndPass
-TODO(yuyang18): Complete it!
 """
 import py_paddle.swig_paddle as api

--- a/python/paddle/v2/inference.py
+++ b/python/paddle/v2/inference.py
@@ -9,6 +9,17 @@ __all__ = ['infer']
 class Inference(object):
+    """
+    Inference combines neural network output and parameters together
+    to do inference.
+    :param outptut_layer: The neural network that should be inferenced.
+    :type output_layer: paddle.v2.config_base.Layer or the sequence
+                        of paddle.v2.config_base.Layer
+    :param parameters: The parameters dictionary.
+    :type parameters: paddle.v2.parameters.Parameters
+    """
    def __init__(self, output_layer, parameters):
        topo = topology.Topology(output_layer)
        gm = api.GradientMachine.createFromConfigProto(

--- a/python/paddle/v2/optimizer.py
+++ b/python/paddle/v2/optimizer.py
@@ -47,6 +47,35 @@ class Optimizer(object):
 class Momentum(Optimizer):
+    """
+    SGD Optimizer.
+    SGD is an optimization method, trying to find a neural network that
+    minimize the "cost/error" of it by iteration. In paddle's implementation
+    SGD Optimizer is synchronized, which means all gradients will be wait to
+    calculate and reduced into one gradient, then do optimize operation.
+    The neural network consider the learning problem of minimizing an objective
+    function, that has the form of a sum
+    ..  math::
+        Q(w) = \\sum_{i}^{n} Q_i(w)
+    The value of function Q sometimes is the cost of neural network (Mean
+    Square Error between prediction and label for example). The function Q is
+    parametrised by w, the weight/bias of neural network. And weights is what to
+    be learned. The i is the i-th observation in (trainning) data.
+    So, the SGD method will optimize the weight by
+    ..  math::
+        w = w - \\eta \\nabla Q(w) = w - \\eta \\sum_{i}^{n} \\nabla Q_i(w)
+    where :math:`\\eta` is learning rate. And :math:`n` is batch size.
+    """
    def __init__(self, momentum=None, sparse=False, **kwargs):
        learning_method = v1_optimizers.MomentumOptimizer(
            momentum=momentum, sparse=sparse)
@@ -55,6 +84,26 @@ class Momentum(Optimizer):
 class Adam(Optimizer):
+    """
+    Adam optimizer.
+    The details of please refer `Adam: A Method for Stochastic Optimization
+    <https://arxiv.org/abs/1412.6980>`_
+    ..  math::
+        m(w, t) & = \\beta_1 m(w, t-1) + (1 - \\beta_1) \\nabla Q_i(w) \\\\
+        v(w, t) & = \\beta_2 v(w, t-1) + (1 - \\beta_2)(\\nabla Q_i(w)) ^2 \\\\
+        w & = w - \\frac{\\eta}{\\sqrt{v(w,t) + \\epsilon}}
+    :param beta1: the :math:`\\beta_1` in equation.
+    :type beta1: float
+    :param beta2: the :math:`\\beta_2` in equation.
+    :type beta2: float
+    :param epsilon: the :math:`\\epsilon` in equation. It is used to prevent
+                        divided by zero.
+    :type epsilon: float
+    """
    def __init__(self, beta1=0.9, beta2=0.999, epsilon=1e-8, **kwargs):
        learning_method = v1_optimizers.AdamOptimizer(
            beta1=beta1, beta2=beta2, epsilon=epsilon)
@@ -62,6 +111,24 @@ class Adam(Optimizer):
 class Adamax(Optimizer):
+    """
+    Adamax optimizer.
+    The details of please refer this `Adam: A Method for Stochastic Optimization
+    <https://arxiv.org/abs/1412.6980>`_
+    ..  math::
+        m_t & = \\beta_1 * m_{t-1} + (1-\\beta_1)* \\nabla Q_i(w) \\\\
+        u_t & = max(\\beta_2*u_{t-1}, abs(\\nabla Q_i(w))) \\\\
+        w_t & = w_{t-1} - (\\eta/(1-\\beta_1^t))*m_t/u_t
+    :param beta1: the :math:`\\beta_1` in the equation.
+    :type beta1: float
+    :param beta2: the :math:`\\beta_2` in the equation.
+    :type beta2: float
+    """
    def __init__(self, beta1=0.9, beta2=0.999, **kwargs):
        learning_method = v1_optimizers.AdamaxOptimizer(
            beta1=beta1, beta2=beta2)
@@ -69,12 +136,40 @@ class Adamax(Optimizer):
 class AdaGrad(Optimizer):
+    """
+    Adagrad(for ADAptive GRAdient algorithm) optimizer.
+    For details please refer this `Adaptive Subgradient Methods for
+    Online Learning and Stochastic Optimization
+    <http://www.magicbroom.info/Papers/DuchiHaSi10.pdf>`_.
+    ..  math::
+        G &= \\sum_{\\tau=1}^{t} g_{\\tau} g_{\\tau}^T \\\\
+        w & = w - \\eta diag(G)^{-\\frac{1}{2}} \\circ g
+    """
    def __init__(self, **kwargs):
        learning_method = v1_optimizers.AdaGradOptimizer()
        super(AdaGrad, self).__init__(learning_method=learning_method, **kwargs)
 class DecayedAdaGrad(Optimizer):
+    """
+    AdaGrad method with decayed sum gradients. The equations of this method
+    show as follow.
+    ..  math::
+        E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\
+        learning\\_rate &= 1/sqrt( ( E(g_t^2) + \\epsilon )
+    :param rho: The :math:`\\rho` parameter in that equation
+    :type rho: float
+    :param epsilon: The :math:`\\epsilon` parameter in that equation.
+    :type epsilon: float
+    """
    def __init__(self, rho=0.95, epsilon=1e-06, **kwargs):
        learning_method = v1_optimizers.DecayedAdaGradOptimizer(
            rho=rho, epsilon=epsilon)
@@ -83,6 +178,24 @@ class DecayedAdaGrad(Optimizer):
 class AdaDelta(Optimizer):
+    """
+    AdaDelta method. The details of adadelta please refer to this
+    `ADADELTA: AN ADAPTIVE LEARNING RATE METHOD
+    <http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf>`_.
+    ..  math::
+        E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\
+        learning\\_rate &= sqrt( ( E(dx_{t-1}^2) + \\epsilon ) / ( \\
+                          E(g_t^2) + \\epsilon ) ) \\\\
+        E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\\_rate)^2
+    :param rho: :math:`\\rho` in equation
+    :type rho: float
+    :param epsilon: :math:`\\rho` in equation
+    :type epsilon: float
+    """
    def __init__(self, rho=0.95, epsilon=1e-06, **kwargs):
        learning_method = v1_optimizers.AdaDeltaOptimizer(
            rho=rho, epsilon=epsilon)
@@ -91,6 +204,24 @@ class AdaDelta(Optimizer):
 class RMSProp(Optimizer):
+    """
+    RMSProp(for Root Mean Square Propagation) optimizer. For details please
+    refer this `slide <http://www.cs.toronto.edu/~tijmen/csc321/slides/
+    lecture_slides_lec6.pdf>`_.
+    The equations of this method as follows:
+    ..  math::
+        v(w, t) & = \\rho v(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\\\
+        w & = w - \\frac{\\eta} {\\sqrt{v(w,t) + \\epsilon}} \\nabla Q_{i}(w)
+    :param rho: the :math:`\\rho` in the equation. The forgetting factor.
+    :type rho: float
+    :param epsilon: the :math:`\\epsilon` in the equation.
+    :type epsilon: float
+    """
    def __init__(self, rho=0.95, epsilon=1e-6, **kwargs):
        learning_method = v1_optimizers.RMSPropOptimizer(
            rho=rho, epsilon=epsilon)

--- a/python/paddle/v2/trainer.py
+++ b/python/paddle/v2/trainer.py
+"""
+Module Trainer
+"""
 import collections
 import py_paddle.swig_paddle as api
@@ -9,10 +12,6 @@ from . import optimizer as v2_optimizer
 from . import parameters as v2_parameters
 __all__ = ['SGD']
-"""
-Trainer package
-TODO(yuyang18): Complete comments.
-"""
 def default_event_handler(event):
@@ -29,7 +28,8 @@ def default_event_handler(event):
 class SGD(object):
    """
    Simple SGD Trainer.
-    TODO(yuyang18): Complete comments
+    SGD Trainer combines data reader, network topolopy and update_equation together
+    to train/test a neural network.
    :param update_equation: The optimizer object.
    :type update_equation: paddle.v2.optimizer.Optimizer
@@ -74,7 +74,9 @@ class SGD(object):
        """
        Training method. Will train num_passes of input data.
-        :param reader:
+        :param reader: A reader that reads and yeilds data items. Usually we use a
+                       batched reader to do mini-batch training.
+        :type reader: collections.Iterable
        :param num_passes: The total train passes.
        :param event_handler: Event handler. A method will be invoked when event
                              occurred.
@@ -132,6 +134,16 @@ class SGD(object):
        self.__gradient_machine__.finish()
    def test(self, reader, feeding=None):
+        """
+        Testing method. Will test input data.
+        :param reader: A reader that reads and yeilds data items.
+        :type reader: collections.Iterable  
+        :param feeding: Feeding is a map of neural network input name and array
+                        index that reader returns.
+        :type feeding: dict
+        :return:
+        """
        feeder = DataFeeder(self.__data_types__, feeding)
        evaluator = self.__gradient_machine__.makeEvaluator()
        out_args = api.Arguments.createArguments(0)