diff --git a/doc/api/v2/config/optimizer.rst b/doc/api/v2/config/optimizer.rst
index ec6ba0aa46239f3806ca950e8863b953d0c4150b..b32373fdef52a7aa9d64b12cda3f76cb2abf351b 100644
--- a/doc/api/v2/config/optimizer.rst
+++ b/doc/api/v2/config/optimizer.rst
@@ -1,5 +1,3 @@
-..  _api_v2.optimizer:
-
 ==========
 Optimizer
 ==========
diff --git a/doc/api/v2/data.rst b/doc/api/v2/data.rst
index b042320bc2922a1ddfa06b5d8479ac9134ae9d89..fef87c4fbdb452771ecdb361c6eeae5b32bcee14 100644
--- a/doc/api/v2/data.rst
+++ b/doc/api/v2/data.rst
@@ -1,6 +1,6 @@
-========
-Datasets
-========
+==================================
+Data Reader Interface and DataSets
+==================================
 
 
 DataTypes
@@ -49,7 +49,6 @@ mnist
     :members:
     :noindex:
 
-
 cifar
 +++++
 
@@ -61,7 +60,7 @@ conll05
 +++++++
 
 ..  automodule:: paddle.v2.dataset.conll05
-    :members:
+    :members: get_dict,get_embedding,test
     :noindex:
 
 imdb
@@ -85,6 +84,12 @@ movielens
     :members:
     :noindex:
 
+..  autoclass:: paddle.v2.dataset.movielens.MovieInfo
+    :noindex:
+    
+..  autoclass:: paddle.v2.dataset.movielens.UserInfo
+    :noindex:
+
 sentiment
 +++++++++
 
@@ -102,7 +107,7 @@ uci_housing
 wmt14
 +++++
 
-..  automodule:: paddle.v2.dataset.uci_housing
+..  automodule:: paddle.v2.dataset.wmt14
     :members:
     :noindex:
 
diff --git a/doc/api/v2/run_logic.rst b/doc/api/v2/run_logic.rst
index 94921e1a7b9c0a95931136bfb65d2560dba8b8ee..5c97651f6536d89d2b5926d4b2907a547aa86b55 100644
--- a/doc/api/v2/run_logic.rst
+++ b/doc/api/v2/run_logic.rst
@@ -6,18 +6,21 @@ Parameters
 ==========
 
 ..  automodule:: paddle.v2.parameters
+    :members: Parameters
     :noindex:
 
 Trainer
 =======
 
 ..  automodule:: paddle.v2.trainer
+    :members: SGD
     :noindex:
 
 Event
 =====
 
 ..  automodule:: paddle.v2.event
+    :members:
     :noindex:
 
 Inference
@@ -25,3 +28,4 @@ Inference
 
 ..  autofunction:: paddle.v2.infer
     :noindex:
+    
\ No newline at end of file
diff --git a/python/paddle/v2/data_feeder.py b/python/paddle/v2/data_feeder.py
index ca3e44e5a0187da33654f4955197196b150da196..2698251b9e15046eb14f71c3f5b0546ecbb4a5dd 100644
--- a/python/paddle/v2/data_feeder.py
+++ b/python/paddle/v2/data_feeder.py
@@ -67,7 +67,7 @@ class DataFeeder(DataProviderConverter):
         #                       [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ],  # first sample
         #                       [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ]   # second sample
         #                     ]
-        arg = feeder(minibatch_data)
+        arg = feeder.convert(minibatch_data)
 
     ..  note::
 
diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py
index 3a8b98b8f045b0eb58be69649486cbd0a571f118..41fda1e8f24cdef13d8ab3645862814100a1cd4c 100644
--- a/python/paddle/v2/dataset/cifar.py
+++ b/python/paddle/v2/dataset/cifar.py
@@ -12,9 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html
+CIFAR dataset.
+
+This module will download dataset from
+https://www.cs.toronto.edu/~kriz/cifar.html and parse train/test set into
+paddle reader creators.
+
+The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes,
+with 6000 images per class. There are 50000 training images and 10000 test
+images.
+
+The CIFAR-100 dataset is just like the CIFAR-10, except it has 100 classes
+containing 600 images each. There are 500 training images and 100 testing
+images per class.
 
-TODO(yuyang18): Complete the comments.
 """
 
 import cPickle
@@ -54,20 +65,56 @@ def reader_creator(filename, sub_name):
 
 
 def train100():
+    """
+    CIFAR-100 training set creator.
+
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 99].
+
+    :return: Training reader creator
+    :rtype: callable
+    """
     return reader_creator(
         download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'train')
 
 
 def test100():
+    """
+    CIFAR-100 test set cretor.
+
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+
+    :return: Test reader creator.
+    :rtype: callable
+    """
     return reader_creator(download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'test')
 
 
 def train10():
+    """
+    CIFAR-10 training set creator.
+
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+
+    :return: Training reader creator
+    :rtype: callable
+    """
     return reader_creator(
         download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'data_batch')
 
 
 def test10():
+    """
+    CIFAR-10 test set cretor.
+
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+
+    :return: Test reader creator.
+    :rtype: callable
+    """
     return reader_creator(
         download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'test_batch')
 
diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py
index f1b0ce16f21ad13d4564242c2359355236093032..12d648bf6557ed6e437320e56a80294abac29f18 100644
--- a/python/paddle/v2/dataset/conll05.py
+++ b/python/paddle/v2/dataset/conll05.py
@@ -11,19 +11,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Conll05 dataset.
+Paddle semantic role labeling Book and demo use this dataset as an example.
+Because Conll05 is not free in public, the default downloaded URL is test set
+of Conll05 (which is public). Users can change URL and MD5 to their Conll
+dataset. And a pre-trained word vector model based on Wikipedia corpus is used
+to initialize SRL model.
+"""
 
 import tarfile
 import gzip
 import itertools
 from common import download
-"""
-Conll 2005 dataset.  Paddle semantic role labeling Book and demo use this
-dataset as an example. Because Conll 2005 is not free in public, the default
-downloaded URL is test set of Conll 2005 (which is public). Users can change
-URL and MD5 to their Conll dataset.
-
-TODO(yuyang18): Complete comments.
-"""
 
 __all__ = ['test, get_dict', 'get_embedding']
 
@@ -179,6 +179,9 @@ def reader_creator(corpus_reader,
 
 
 def get_dict():
+    """
+    Get the word, verb and label dictionary of Wikipedia corpus.
+    """
     word_dict = load_dict(download(WORDDICT_URL, 'conll05st', WORDDICT_MD5))
     verb_dict = load_dict(download(VERBDICT_URL, 'conll05st', VERBDICT_MD5))
     label_dict = load_dict(download(TRGDICT_URL, 'conll05st', TRGDICT_MD5))
@@ -186,10 +189,24 @@ def get_dict():
 
 
 def get_embedding():
+    """
+    Get the trained word vector based on Wikipedia corpus.
+    """
     return download(EMB_URL, 'conll05st', EMB_MD5)
 
 
 def test():
+    """
+    Conll05 test set creator.
+
+    Because the training dataset is not free, the test dataset is used for
+    training. It returns a reader creator, each sample in the reader is nine
+    features, including sentence sequence, predicate, predicate context,
+    predicate context flag and tagged sequence.
+
+    :return: Training reader creator
+    :rtype: callable
+    """
     word_dict, verb_dict, label_dict = get_dict()
     reader = corpus_reader(
         download(DATA_URL, 'conll05st', DATA_MD5),
diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py
index 9a7ccff4d5cd2563053adb0aae95fc6d10ad2a50..5dc5abfe53d90ec3adc9a27a49ed086953146497 100644
--- a/python/paddle/v2/dataset/imdb.py
+++ b/python/paddle/v2/dataset/imdb.py
@@ -12,9 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz
+IMDB dataset.
 
-TODO(yuyang18): Complete comments.
+This module downloads IMDB dataset from
+http://ai.stanford.edu/%7Eamaas/data/sentiment/. This dataset contains a set
+of 25,000 highly polar movie reviews for training, and 25,000 for testing.
+Besides, this module also provides API for building dictionary.
 """
 
 import paddle.v2.dataset.common
@@ -31,8 +34,11 @@ URL = 'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz'
 MD5 = '7c2ac02c03563afcf9b574c7e56c153a'
 
 
-# Read files that match pattern.  Tokenize and yield each file.
 def tokenize(pattern):
+    """
+    Read files that match the given pattern.  Tokenize and yield each file.
+    """
+
     with tarfile.open(paddle.v2.dataset.common.download(URL, 'imdb',
                                                         MD5)) as tarf:
         # Note that we should use tarfile.next(), which does
@@ -49,6 +55,10 @@ def tokenize(pattern):
 
 
 def build_dict(pattern, cutoff):
+    """
+    Build a word dictionary from the corpus. Keys of the dictionary are words,
+    and values are zero-based IDs of these words.
+    """
     word_freq = collections.defaultdict(int)
     for doc in tokenize(pattern):
         for word in doc:
@@ -110,18 +120,46 @@ def reader_creator(pos_pattern, neg_pattern, word_idx, buffer_size):
 
 
 def train(word_idx):
+    """
+    IMDB training set creator.
+
+    It returns a reader creator, each sample in the reader is an zero-based ID
+    sequence and label in [0, 1].
+
+    :param word_idx: word dictionary
+    :type word_idx: dict
+    :return: Training reader creator
+    :rtype: callable
+    """
     return reader_creator(
         re.compile("aclImdb/train/pos/.*\.txt$"),
         re.compile("aclImdb/train/neg/.*\.txt$"), word_idx, 1000)
 
 
 def test(word_idx):
+    """
+    IMDB test set creator.
+
+    It returns a reader creator, each sample in the reader is an zero-based ID
+    sequence and label in [0, 1].
+
+    :param word_idx: word dictionary
+    :type word_idx: dict
+    :return: Test reader creator
+    :rtype: callable
+    """
     return reader_creator(
         re.compile("aclImdb/test/pos/.*\.txt$"),
         re.compile("aclImdb/test/neg/.*\.txt$"), word_idx, 1000)
 
 
 def word_dict():
+    """
+    Build a word dictionary from the corpus.
+
+    :return: Word dictionary
+    :rtype: dict
+    """
     return build_dict(
         re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150)
 
diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/v2/dataset/imikolov.py
index 5d7e0282b4db639e6590ade66241328d6ab8b5e3..41ca27e23632bea7e410f9d91920bbc539d38279 100644
--- a/python/paddle/v2/dataset/imikolov.py
+++ b/python/paddle/v2/dataset/imikolov.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/
+imikolov's simple dataset.
 
-Complete comments.
+This module will download dataset from 
+http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set
+into paddle reader creators.
 """
 import paddle.v2.dataset.common
 import collections
@@ -40,6 +42,10 @@ def word_count(f, word_freq=None):
 
 
 def build_dict():
+    """
+    Build a word dictionary from the corpus,  Keys of the dictionary are words,
+    and values are zero-based IDs of these words.
+    """
     train_filename = './simple-examples/data/ptb.train.txt'
     test_filename = './simple-examples/data/ptb.valid.txt'
     with tarfile.open(
@@ -84,10 +90,36 @@ def reader_creator(filename, word_idx, n):
 
 
 def train(word_idx, n):
+    """
+    imikolov training set creator.
+
+    It returns a reader creator, each sample in the reader is a word ID
+    tuple.
+
+    :param word_idx: word dictionary
+    :type word_idx: dict
+    :param n: sliding window size
+    :type n: int
+    :return: Training reader creator
+    :rtype: callable
+    """
     return reader_creator('./simple-examples/data/ptb.train.txt', word_idx, n)
 
 
 def test(word_idx, n):
+    """
+    imikolov test set creator.
+
+    It returns a reader creator, each sample in the reader is a word ID
+    tuple.
+
+    :param word_idx: word dictionary
+    :type word_idx: dict
+    :param n: sliding window size
+    :type n: int
+    :return: Test reader creator
+    :rtype: callable
+    """
     return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n)
 
 
diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py
index 48a39b5493a8004d6eb034498a797af9c662bd19..c1347d3c66da858104858bfb6739d84051322146 100644
--- a/python/paddle/v2/dataset/mnist.py
+++ b/python/paddle/v2/dataset/mnist.py
@@ -15,7 +15,7 @@
 MNIST dataset.
 
 This module will download dataset from http://yann.lecun.com/exdb/mnist/ and
-parse train set and test set into paddle reader creators.
+parse training set and test set into paddle reader creators.
 """
 import paddle.v2.dataset.common
 import subprocess
@@ -76,12 +76,12 @@ def reader_creator(image_filename, label_filename, buffer_size):
 
 def train():
     """
-    MNIST train set creator.
+    MNIST training set creator.
 
     It returns a reader creator, each sample in the reader is image pixels in
     [0, 1] and label in [0, 9].
 
-    :return: Train reader creator
+    :return: Training reader creator
     :rtype: callable
     """
     return reader_creator(
diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py
index e148ddeca0370cd76128a31ce3a4d488e9737d98..837a85912663826f0483aff4f6a38f3945375d82 100644
--- a/python/paddle/v2/dataset/movielens.py
+++ b/python/paddle/v2/dataset/movielens.py
@@ -14,7 +14,12 @@
 """
 Movielens 1-M dataset.
 
-TODO(yuyang18): Complete comments.
+Movielens 1-M dataset contains 1 million ratings from 6000 users on 4000
+movies, which was collected by GroupLens Research. This module will download
+Movielens 1-M dataset from 
+http://files.grouplens.org/datasets/movielens/ml-1m.zip and parse training
+set and test set into paddle reader creators.
+
 """
 
 import zipfile
@@ -35,12 +40,19 @@ MD5 = 'c4d9eecfca2ab87c1945afe126590906'
 
 
 class MovieInfo(object):
+    """
+    Movie id, title and categories information are stored in MovieInfo.
+    """
+
     def __init__(self, index, categories, title):
         self.index = int(index)
         self.categories = categories
         self.title = title
 
     def value(self):
+        """
+        Get information from a movie.
+        """
         return [
             self.index, [CATEGORIES_DICT[c] for c in self.categories],
             [MOVIE_TITLE_DICT[w.lower()] for w in self.title.split()]
@@ -55,6 +67,10 @@ class MovieInfo(object):
 
 
 class UserInfo(object):
+    """
+    User id, gender, age, and job information are stored in UserInfo.
+    """
+
     def __init__(self, index, gender, age, job_id):
         self.index = int(index)
         self.is_male = gender == 'M'
@@ -62,6 +78,9 @@ class UserInfo(object):
         self.job_id = int(job_id)
 
     def value(self):
+        """
+        Get information from a user.
+        """
         return [self.index, 0 if self.is_male else 1, self.age, self.job_id]
 
     def __str__(self):
@@ -148,6 +167,9 @@ test = functools.partial(__reader_creator__, is_test=True)
 
 
 def get_movie_title_dict():
+    """
+    Get movie title dictionary.
+    """
     __initialize_meta_info__()
     return MOVIE_TITLE_DICT
 
@@ -160,11 +182,17 @@ def __max_index_info__(a, b):
 
 
 def max_movie_id():
+    """
+    Get the maximum value of movie id.
+    """
     __initialize_meta_info__()
     return reduce(__max_index_info__, MOVIE_INFO.viewvalues()).index
 
 
 def max_user_id():
+    """
+    Get the maximum value of user id.
+    """
     __initialize_meta_info__()
     return reduce(__max_index_info__, USER_INFO.viewvalues()).index
 
@@ -177,21 +205,33 @@ def __max_job_id_impl__(a, b):
 
 
 def max_job_id():
+    """
+    Get the maximum value of job id.
+    """
     __initialize_meta_info__()
     return reduce(__max_job_id_impl__, USER_INFO.viewvalues()).job_id
 
 
 def movie_categories():
+    """
+    Get movie categoriges dictionary.
+    """
     __initialize_meta_info__()
     return CATEGORIES_DICT
 
 
 def user_info():
+    """
+    Get user info dictionary.
+    """
     __initialize_meta_info__()
     return USER_INFO
 
 
 def movie_info():
+    """
+    Get movie info dictionary.
+    """
     __initialize_meta_info__()
     return MOVIE_INFO
 
diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py
index 0eeb6d5affd8c280fb74edc82cf24bf418ca8ef9..4dd34e7383fe2a290fcf61474914183a383e2b9c 100644
--- a/python/paddle/v2/dataset/sentiment.py
+++ b/python/paddle/v2/dataset/sentiment.py
@@ -113,7 +113,7 @@ def reader_creator(data):
 
 def train():
     """
-    Default train set reader creator
+    Default training set reader creator
     """
     data_set = load_sentiment_data()
     return reader_creator(data_set[0:NUM_TRAINING_INSTANCES])
diff --git a/python/paddle/v2/dataset/uci_housing.py b/python/paddle/v2/dataset/uci_housing.py
index dab8620441c966b19d8218025f8d8fa5b40d1c2c..3469fd9ce12dd4d934004f90286979b73048a5c8 100644
--- a/python/paddle/v2/dataset/uci_housing.py
+++ b/python/paddle/v2/dataset/uci_housing.py
@@ -14,7 +14,9 @@
 """
 UCI Housing dataset.
 
-TODO(yuyang18): Complete comments.
+This module will download dataset from
+https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ and
+parse training set and test set into paddle reader creators.
 """
 
 import numpy as np
@@ -70,6 +72,15 @@ def load_data(filename, feature_num=14, ratio=0.8):
 
 
 def train():
+    """
+    UCI_HOUSING training set creator.
+
+    It returns a reader creator, each sample in the reader is features after
+    normalization and price number.
+
+    :return: Training reader creator
+    :rtype: callable
+    """
     global UCI_TRAIN_DATA
     load_data(download(URL, 'uci_housing', MD5))
 
@@ -81,6 +92,15 @@ def train():
 
 
 def test():
+    """
+    UCI_HOUSING test set creator.
+
+    It returns a reader creator, each sample in the reader is features after
+    normalization and price number.
+
+    :return: Test reader creator
+    :rtype: callable
+    """
     global UCI_TEST_DATA
     load_data(download(URL, 'uci_housing', MD5))
 
diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py
index 23ca8036281b16447403c1bfcec5e11f839ab94e..0902f87741c342b237439081703081b467dc6f35 100644
--- a/python/paddle/v2/dataset/wmt14.py
+++ b/python/paddle/v2/dataset/wmt14.py
@@ -12,7 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-wmt14 dataset
+WMT14 dataset.
+The original WMT14 dataset is too large and a small set of data for set is
+provided. This module will download dataset from
+http://paddlepaddle.cdn.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz and
+parse training set and test set into paddle reader creators.
+
 """
 import tarfile
 import gzip
@@ -99,11 +104,31 @@ def reader_creator(tar_file, file_name, dict_size):
 
 
 def train(dict_size):
+    """
+    WMT14 training set creator.
+
+    It returns a reader creator, each sample in the reader is source language
+    word ID sequence, target language word ID sequence and next word ID
+    sequence.
+
+    :return: Training reader creator
+    :rtype: callable
+    """
     return reader_creator(
         download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'train/train', dict_size)
 
 
 def test(dict_size):
+    """
+    WMT14 test set creator.
+
+    It returns a reader creator, each sample in the reader is source language
+    word ID sequence, target language word ID sequence and next word ID
+    sequence.
+
+    :return: Test reader creator
+    :rtype: callable
+    """
     return reader_creator(
         download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'test/test', dict_size)
 
diff --git a/python/paddle/v2/event.py b/python/paddle/v2/event.py
index 1ad52b8baa411269d29732685871a875df5185cc..fd6050fa339d280ad54e40128ea6bae25132c873 100644
--- a/python/paddle/v2/event.py
+++ b/python/paddle/v2/event.py
@@ -1,14 +1,13 @@
 """
-All training events.
+Testing and training events.
 
 There are:
 
+* TestResult
 * BeginIteration
 * EndIteration
 * BeginPass
 * EndPass
-
-TODO(yuyang18): Complete it!
 """
 import py_paddle.swig_paddle as api
 
diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py
index 3210bf0642cda53cded52d7c38a8770e36290c19..c178336303f53769863063922868cd2a22e4b957 100644
--- a/python/paddle/v2/inference.py
+++ b/python/paddle/v2/inference.py
@@ -9,6 +9,17 @@ __all__ = ['infer']
 
 
 class Inference(object):
+    """
+    Inference combines neural network output and parameters together
+    to do inference.
+
+    :param outptut_layer: The neural network that should be inferenced.
+    :type output_layer: paddle.v2.config_base.Layer or the sequence
+                        of paddle.v2.config_base.Layer
+    :param parameters: The parameters dictionary.
+    :type parameters: paddle.v2.parameters.Parameters
+    """
+
     def __init__(self, output_layer, parameters):
         topo = topology.Topology(output_layer)
         gm = api.GradientMachine.createFromConfigProto(
diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py
index 1a01d95c205c0626374e1814a170ce2d58f23a60..feefd7d758ba09f5d8f818ca1b12b00c5f0e9797 100644
--- a/python/paddle/v2/optimizer.py
+++ b/python/paddle/v2/optimizer.py
@@ -47,6 +47,35 @@ class Optimizer(object):
 
 
 class Momentum(Optimizer):
+    """
+    SGD Optimizer.
+
+    SGD is an optimization method, trying to find a neural network that
+    minimize the "cost/error" of it by iteration. In paddle's implementation
+    SGD Optimizer is synchronized, which means all gradients will be wait to
+    calculate and reduced into one gradient, then do optimize operation.
+
+    The neural network consider the learning problem of minimizing an objective
+    function, that has the form of a sum
+
+    ..  math::
+
+        Q(w) = \\sum_{i}^{n} Q_i(w)
+
+    The value of function Q sometimes is the cost of neural network (Mean
+    Square Error between prediction and label for example). The function Q is
+    parametrised by w, the weight/bias of neural network. And weights is what to
+    be learned. The i is the i-th observation in (trainning) data.
+
+    So, the SGD method will optimize the weight by
+
+    ..  math::
+
+        w = w - \\eta \\nabla Q(w) = w - \\eta \\sum_{i}^{n} \\nabla Q_i(w)
+
+    where :math:`\\eta` is learning rate. And :math:`n` is batch size.
+    """
+
     def __init__(self, momentum=None, sparse=False, **kwargs):
         learning_method = v1_optimizers.MomentumOptimizer(
             momentum=momentum, sparse=sparse)
@@ -55,6 +84,26 @@ class Momentum(Optimizer):
 
 
 class Adam(Optimizer):
+    """
+    Adam optimizer.
+    The details of please refer `Adam: A Method for Stochastic Optimization
+    <https://arxiv.org/abs/1412.6980>`_
+
+    ..  math::
+
+        m(w, t) & = \\beta_1 m(w, t-1) + (1 - \\beta_1) \\nabla Q_i(w) \\\\
+        v(w, t) & = \\beta_2 v(w, t-1) + (1 - \\beta_2)(\\nabla Q_i(w)) ^2 \\\\
+        w & = w - \\frac{\\eta}{\\sqrt{v(w,t) + \\epsilon}}
+
+    :param beta1: the :math:`\\beta_1` in equation.
+    :type beta1: float
+    :param beta2: the :math:`\\beta_2` in equation.
+    :type beta2: float
+    :param epsilon: the :math:`\\epsilon` in equation. It is used to prevent
+                        divided by zero.
+    :type epsilon: float
+    """
+
     def __init__(self, beta1=0.9, beta2=0.999, epsilon=1e-8, **kwargs):
         learning_method = v1_optimizers.AdamOptimizer(
             beta1=beta1, beta2=beta2, epsilon=epsilon)
@@ -62,6 +111,24 @@ class Adam(Optimizer):
 
 
 class Adamax(Optimizer):
+    """
+    Adamax optimizer.
+
+    The details of please refer this `Adam: A Method for Stochastic Optimization
+    <https://arxiv.org/abs/1412.6980>`_
+
+    ..  math::
+
+        m_t & = \\beta_1 * m_{t-1} + (1-\\beta_1)* \\nabla Q_i(w) \\\\
+        u_t & = max(\\beta_2*u_{t-1}, abs(\\nabla Q_i(w))) \\\\
+        w_t & = w_{t-1} - (\\eta/(1-\\beta_1^t))*m_t/u_t
+
+    :param beta1: the :math:`\\beta_1` in the equation.
+    :type beta1: float
+    :param beta2: the :math:`\\beta_2` in the equation.
+    :type beta2: float
+    """
+
     def __init__(self, beta1=0.9, beta2=0.999, **kwargs):
         learning_method = v1_optimizers.AdamaxOptimizer(
             beta1=beta1, beta2=beta2)
@@ -69,12 +136,40 @@ class Adamax(Optimizer):
 
 
 class AdaGrad(Optimizer):
+    """
+    Adagrad(for ADAptive GRAdient algorithm) optimizer.
+
+    For details please refer this `Adaptive Subgradient Methods for
+    Online Learning and Stochastic Optimization
+    <http://www.magicbroom.info/Papers/DuchiHaSi10.pdf>`_.
+
+    ..  math::
+
+        G &= \\sum_{\\tau=1}^{t} g_{\\tau} g_{\\tau}^T \\\\
+        w & = w - \\eta diag(G)^{-\\frac{1}{2}} \\circ g
+    """
+
     def __init__(self, **kwargs):
         learning_method = v1_optimizers.AdaGradOptimizer()
         super(AdaGrad, self).__init__(learning_method=learning_method, **kwargs)
 
 
 class DecayedAdaGrad(Optimizer):
+    """
+    AdaGrad method with decayed sum gradients. The equations of this method
+    show as follow.
+
+    ..  math::
+
+        E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\
+        learning\\_rate &= 1/sqrt( ( E(g_t^2) + \\epsilon )
+
+    :param rho: The :math:`\\rho` parameter in that equation
+    :type rho: float
+    :param epsilon: The :math:`\\epsilon` parameter in that equation.
+    :type epsilon: float
+    """
+
     def __init__(self, rho=0.95, epsilon=1e-06, **kwargs):
         learning_method = v1_optimizers.DecayedAdaGradOptimizer(
             rho=rho, epsilon=epsilon)
@@ -83,6 +178,24 @@ class DecayedAdaGrad(Optimizer):
 
 
 class AdaDelta(Optimizer):
+    """
+    AdaDelta method. The details of adadelta please refer to this
+    `ADADELTA: AN ADAPTIVE LEARNING RATE METHOD
+    <http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf>`_.
+
+    ..  math::
+
+        E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\
+        learning\\_rate &= sqrt( ( E(dx_{t-1}^2) + \\epsilon ) / ( \\
+                          E(g_t^2) + \\epsilon ) ) \\\\
+        E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\\_rate)^2
+
+    :param rho: :math:`\\rho` in equation
+    :type rho: float
+    :param epsilon: :math:`\\rho` in equation
+    :type epsilon: float
+    """
+
     def __init__(self, rho=0.95, epsilon=1e-06, **kwargs):
         learning_method = v1_optimizers.AdaDeltaOptimizer(
             rho=rho, epsilon=epsilon)
@@ -91,6 +204,24 @@ class AdaDelta(Optimizer):
 
 
 class RMSProp(Optimizer):
+    """
+    RMSProp(for Root Mean Square Propagation) optimizer. For details please
+    refer this `slide <http://www.cs.toronto.edu/~tijmen/csc321/slides/
+    lecture_slides_lec6.pdf>`_.
+
+    The equations of this method as follows:
+
+    ..  math::
+
+        v(w, t) & = \\rho v(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\\\
+        w & = w - \\frac{\\eta} {\\sqrt{v(w,t) + \\epsilon}} \\nabla Q_{i}(w)
+
+    :param rho: the :math:`\\rho` in the equation. The forgetting factor.
+    :type rho: float
+    :param epsilon: the :math:`\\epsilon` in the equation.
+    :type epsilon: float
+    """
+
     def __init__(self, rho=0.95, epsilon=1e-6, **kwargs):
         learning_method = v1_optimizers.RMSPropOptimizer(
             rho=rho, epsilon=epsilon)
diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py
index f5797a86c2b71502a7791453ff86c6a486c9f185..68b4967cc031dfa2dd164d822aff97585f923e48 100644
--- a/python/paddle/v2/trainer.py
+++ b/python/paddle/v2/trainer.py
@@ -1,3 +1,6 @@
+"""
+Module Trainer
+"""
 import collections
 
 import py_paddle.swig_paddle as api
@@ -9,10 +12,6 @@ from . import optimizer as v2_optimizer
 from . import parameters as v2_parameters
 
 __all__ = ['SGD']
-"""
-Trainer package
-TODO(yuyang18): Complete comments.
-"""
 
 
 def default_event_handler(event):
@@ -29,7 +28,8 @@ def default_event_handler(event):
 class SGD(object):
     """
     Simple SGD Trainer.
-    TODO(yuyang18): Complete comments
+    SGD Trainer combines data reader, network topolopy and update_equation together
+    to train/test a neural network.
 
     :param update_equation: The optimizer object.
     :type update_equation: paddle.v2.optimizer.Optimizer
@@ -74,7 +74,9 @@ class SGD(object):
         """
         Training method. Will train num_passes of input data.
 
-        :param reader:
+        :param reader: A reader that reads and yeilds data items. Usually we use a
+                       batched reader to do mini-batch training.
+        :type reader: collections.Iterable
         :param num_passes: The total train passes.
         :param event_handler: Event handler. A method will be invoked when event
                               occurred.
@@ -132,6 +134,16 @@ class SGD(object):
         self.__gradient_machine__.finish()
 
     def test(self, reader, feeding=None):
+        """
+        Testing method. Will test input data.
+
+        :param reader: A reader that reads and yeilds data items.
+        :type reader: collections.Iterable  
+        :param feeding: Feeding is a map of neural network input name and array
+                        index that reader returns.
+        :type feeding: dict
+        :return:
+        """
         feeder = DataFeeder(self.__data_types__, feeding)
         evaluator = self.__gradient_machine__.makeEvaluator()
         out_args = api.Arguments.createArguments(0)