Merge branch 'develop' of github.com:baidu/Paddle into feature/c_api

28c4cee5 · Yu Yang · bda20086 · fb05a731 · 28c4cee5 · 28c4cee5
24 changed file
--- a/doc/api/v2/config/optimizer.rst
+++ b/doc/api/v2/config/optimizer.rst
-..  _api_v2.optimizer:
-
 ==========
 Optimizer
 ==========

--- a/doc/api/v2/data.rst
+++ b/doc/api/v2/data.rst
-========
-Datasets
-========
+==================================
+Data Reader Interface and DataSets
+==================================


 DataTypes
@@ -49,7 +49,6 @@ mnist
    :members:
    :noindex:

-
 cifar
 +++++

@@ -61,7 +60,7 @@ conll05
 +++++++

 ..  automodule:: paddle.v2.dataset.conll05
-    :members:
+    :members: get_dict,get_embedding,test
    :noindex:

 imdb
@@ -85,6 +84,12 @@ movielens
    :members:
    :noindex:

+..  autoclass:: paddle.v2.dataset.movielens.MovieInfo
+    :noindex:
+    
+..  autoclass:: paddle.v2.dataset.movielens.UserInfo
+    :noindex:
+
 sentiment
 +++++++++

@@ -102,7 +107,7 @@ uci_housing
 wmt14
 +++++

-..  automodule:: paddle.v2.dataset.uci_housing
+..  automodule:: paddle.v2.dataset.wmt14
    :members:
    :noindex:

--- a/doc/api/v2/run_logic.rst
+++ b/doc/api/v2/run_logic.rst
@@ -6,18 +6,21 @@ Parameters
 ==========

 ..  automodule:: paddle.v2.parameters
+    :members: Parameters
    :noindex:

 Trainer
 =======

 ..  automodule:: paddle.v2.trainer
+    :members: SGD
    :noindex:

 Event
 =====

 ..  automodule:: paddle.v2.event
+    :members:
    :noindex:

 Inference
@@ -25,3 +28,4 @@ Inference

 ..  autofunction:: paddle.v2.infer
    :noindex:
+    
\ No newline at end of file
--- a/doc/tutorials/embedding_model/index_cn.md
+++ b/doc/tutorials/embedding_model/index_cn.md
@@ -6,9 +6,10 @@

 ## 介绍 ###
 ### 中文字典 ###
-我们的字典使用内部的分词工具对百度知道和百度百科的语料进行分词后产生。分词风格如下： "《红楼梦》"将被分为 "《"，"红楼梦"，"》"，和 "《红楼梦》"。字典采用UTF8编码，输出有2列：词本身和词频。字典共包含 3206325个词和3个特殊标记：
+我们的字典使用内部的分词工具对百度知道和百度百科的语料进行分词后产生。分词风格如下： "《红楼梦》"将被分为 "《"，"红楼梦"，"》"，和 "《红楼梦》"。字典采用UTF8编码，输出有2列：词本身和词频。字典共包含 3206326个词和4个特殊标记：
  - `<s>`: 分词序列的开始
  - `<e>`: 分词序列的结束
+  - `PALCEHOLDER_JUST_IGNORE_THE_EMBEDDING`: 占位符，没有实际意义
  - `<unk>`: 未知词

 ### 中文词向量的预训练模型 ###

--- a/doc/tutorials/embedding_model/index_en.md
+++ b/doc/tutorials/embedding_model/index_en.md
@@ -6,9 +6,10 @@ We thank @lipeng for the pull request that defined the model schemas and pretrai

 ## Introduction ###
 ### Chinese Word Dictionary ###
-Our Chinese-word dictionary is created on Baidu ZhiDao and Baidu Baike by using in-house word segmentor. For example, the participle of "《红楼梦》" is "《"，"红楼梦"，"》"，and "《红楼梦》". Our dictionary (using UTF-8 format) has has two columns: word and its frequency. The total word count is 3206325, including 3 special token:
+Our Chinese-word dictionary is created on Baidu ZhiDao and Baidu Baike by using in-house word segmentor. For example, the participle of "《红楼梦》" is "《"，"红楼梦"，"》"，and "《红楼梦》". Our dictionary (using UTF-8 format) has has two columns: word and its frequency. The total word count is 3206326, including 4 special token:
  - `<s>`: the start of a sequence
  - `<e>`: the end of a sequence
+  - `PALCEHOLDER_JUST_IGNORE_THE_EMBEDDING`: a placeholder, just ignore it and its embedding
  - `<unk>`: a word not included in dictionary

 ### Pretrained Chinese Word Embedding Model ###

--- a/paddle/gserver/gradientmachines/GradientMachine.h
+++ b/paddle/gserver/gradientmachines/GradientMachine.h
@@ -134,9 +134,7 @@ public:
    backward(callback);
  }

-  virtual Argument getLayerOutput(const std::string& layerName) {
-    return *((Argument*)nullptr);
-  }
+  virtual Argument getLayerOutput(const std::string& layerName) = 0;

  // see comment in Layer.h for the function with the same name
  virtual void resetState() {}

--- a/paddle/scripts/docker/build.sh
+++ b/paddle/scripts/docker/build.sh
@@ -9,6 +9,9 @@ if [ ${WITH_GPU} == "ON" ]; then
  GPU_DOCKER_PKG="python-pip python-dev"
 else
  BASE_IMAGE="python:2.7.13-slim"
+  # FIXME: python base image uses different python version than WITH_GPU
+  # need to change PYTHONHOME to /usr/local when using python base image
+  CPU_DOCKER_PYTHON_HOME_ENV="ENV PYTHONHOME /usr/local"
 fi

 DOCKERFILE_GPU_ENV=""
@@ -97,7 +100,8 @@ ADD build/*.deb /usr/local/opt/paddle/deb/
 RUN dpkg -i /usr/local/opt/paddle/deb/*.deb && \
    rm -f /usr/local/opt/paddle/deb/*.deb && \
    paddle version
-${DOCKERFILE_CUDNN_DSO} 
+${CPU_DOCKER_PYTHON_HOME_ENV}
+${DOCKERFILE_CUDNN_DSO}
 ${DOCKERFILE_GPU_ENV}
 # default command shows the paddle version and exit
 CMD ["paddle", "version"]

--- a/paddle/utils/.gitignore
+++ b/paddle/utils/.gitignore
 enable_virtualenv.c
-PythonUtil.cpp
--- a/paddle/utils/CMakeLists.txt
+++ b/paddle/utils/CMakeLists.txt
 # The utilities for paddle
-
-configure_file(PythonUtil.cpp.in ${PROJ_ROOT}/paddle/utils/PythonUtil.cpp)
-
 file(GLOB UTIL_HEADERS . *.h)
 file(GLOB UTIL_SOURCES . *.cpp)
 create_resources(enable_virtualenv.py enable_virtualenv.c)

--- a/paddle/utils/PythonUtil.cpp.in
+++ b/paddle/utils/PythonUtil.cpp.in
@@ -195,15 +195,6 @@ extern const char enable_virtualenv_py[];
 }
 void initPython(int argc, char** argv) {
 #ifndef PADDLE_NO_PYTHON
-  std::string pyHome;
-#if defined(__APPLE__) || defined(__OSX__)
-  pyHome = "/usr/local/Frameworks/Python.framework/Versions/2.7";
-  Py_SetPythonHome(const_cast<char*>(pyHome.c_str()));
-#endif
-  pyHome = "@PYTHON_INSTALL_DIR@"; // NOLINT
-  if (!pyHome.empty()) {
-    Py_SetPythonHome(const_cast<char*>(pyHome.c_str()));
-  }
  Py_SetProgramName(argv[0]);
  Py_Initialize();
  PySys_SetArgv(argc, argv);

--- a/python/paddle/v2/data_feeder.py
+++ b/python/paddle/v2/data_feeder.py
@@ -67,7 +67,7 @@ class DataFeeder(DataProviderConverter):
        #                       [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ],  # first sample
        #                       [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ]   # second sample
        #                     ]
-        arg = feeder(minibatch_data)
+        arg = feeder.convert(minibatch_data)

    ..  note::


--- a/python/paddle/v2/dataset/cifar.py
+++ b/python/paddle/v2/dataset/cifar.py
@@ -12,9 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html
+CIFAR dataset.
+
+This module will download dataset from
+https://www.cs.toronto.edu/~kriz/cifar.html and parse train/test set into
+paddle reader creators.
+
+The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes,
+with 6000 images per class. There are 50000 training images and 10000 test
+images.
+
+The CIFAR-100 dataset is just like the CIFAR-10, except it has 100 classes
+containing 600 images each. There are 500 training images and 100 testing
+images per class.

-TODO(yuyang18): Complete the comments.
 """

 import cPickle
@@ -54,20 +65,56 @@ def reader_creator(filename, sub_name):


 def train100():
+    """
+    CIFAR-100 training set creator.
+
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 99].
+
+    :return: Training reader creator
+    :rtype: callable
+    """
    return reader_creator(
        download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'train')


 def test100():
+    """
+    CIFAR-100 test set cretor.
+
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+
+    :return: Test reader creator.
+    :rtype: callable
+    """
    return reader_creator(download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'test')


 def train10():
+    """
+    CIFAR-10 training set creator.
+
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+
+    :return: Training reader creator
+    :rtype: callable
+    """
    return reader_creator(
        download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'data_batch')


 def test10():
+    """
+    CIFAR-10 test set cretor.
+
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+
+    :return: Test reader creator.
+    :rtype: callable
+    """
    return reader_creator(
        download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'test_batch')


--- a/python/paddle/v2/dataset/conll05.py
+++ b/python/paddle/v2/dataset/conll05.py
@@ -11,19 +11,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Conll05 dataset.
+Paddle semantic role labeling Book and demo use this dataset as an example.
+Because Conll05 is not free in public, the default downloaded URL is test set
+of Conll05 (which is public). Users can change URL and MD5 to their Conll
+dataset. And a pre-trained word vector model based on Wikipedia corpus is used
+to initialize SRL model.
+"""

 import tarfile
 import gzip
 import itertools
 from common import download
-"""
-Conll 2005 dataset.  Paddle semantic role labeling Book and demo use this
-dataset as an example. Because Conll 2005 is not free in public, the default
-downloaded URL is test set of Conll 2005 (which is public). Users can change
-URL and MD5 to their Conll dataset.
-
-TODO(yuyang18): Complete comments.
-"""

 __all__ = ['test, get_dict', 'get_embedding']

@@ -179,6 +179,9 @@ def reader_creator(corpus_reader,


 def get_dict():
+    """
+    Get the word, verb and label dictionary of Wikipedia corpus.
+    """
    word_dict = load_dict(download(WORDDICT_URL, 'conll05st', WORDDICT_MD5))
    verb_dict = load_dict(download(VERBDICT_URL, 'conll05st', VERBDICT_MD5))
    label_dict = load_dict(download(TRGDICT_URL, 'conll05st', TRGDICT_MD5))
@@ -186,10 +189,24 @@ def get_dict():


 def get_embedding():
+    """
+    Get the trained word vector based on Wikipedia corpus.
+    """
    return download(EMB_URL, 'conll05st', EMB_MD5)


 def test():
+    """
+    Conll05 test set creator.
+
+    Because the training dataset is not free, the test dataset is used for
+    training. It returns a reader creator, each sample in the reader is nine
+    features, including sentence sequence, predicate, predicate context,
+    predicate context flag and tagged sequence.
+
+    :return: Training reader creator
+    :rtype: callable
+    """
    word_dict, verb_dict, label_dict = get_dict()
    reader = corpus_reader(
        download(DATA_URL, 'conll05st', DATA_MD5),

--- a/python/paddle/v2/dataset/imdb.py
+++ b/python/paddle/v2/dataset/imdb.py
@@ -12,9 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz
+IMDB dataset.

-TODO(yuyang18): Complete comments.
+This module downloads IMDB dataset from
+http://ai.stanford.edu/%7Eamaas/data/sentiment/. This dataset contains a set
+of 25,000 highly polar movie reviews for training, and 25,000 for testing.
+Besides, this module also provides API for building dictionary.
 """

 import paddle.v2.dataset.common
@@ -31,8 +34,11 @@ URL = 'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz'
 MD5 = '7c2ac02c03563afcf9b574c7e56c153a'


-# Read files that match pattern.  Tokenize and yield each file.
 def tokenize(pattern):
+    """
+    Read files that match the given pattern.  Tokenize and yield each file.
+    """
+
    with tarfile.open(paddle.v2.dataset.common.download(URL, 'imdb',
                                                        MD5)) as tarf:
        # Note that we should use tarfile.next(), which does
@@ -49,6 +55,10 @@ def tokenize(pattern):


 def build_dict(pattern, cutoff):
+    """
+    Build a word dictionary from the corpus. Keys of the dictionary are words,
+    and values are zero-based IDs of these words.
+    """
    word_freq = collections.defaultdict(int)
    for doc in tokenize(pattern):
        for word in doc:
@@ -110,18 +120,46 @@ def reader_creator(pos_pattern, neg_pattern, word_idx, buffer_size):


 def train(word_idx):
+    """
+    IMDB training set creator.
+
+    It returns a reader creator, each sample in the reader is an zero-based ID
+    sequence and label in [0, 1].
+
+    :param word_idx: word dictionary
+    :type word_idx: dict
+    :return: Training reader creator
+    :rtype: callable
+    """
    return reader_creator(
        re.compile("aclImdb/train/pos/.*\.txt$"),
        re.compile("aclImdb/train/neg/.*\.txt$"), word_idx, 1000)


 def test(word_idx):
+    """
+    IMDB test set creator.
+
+    It returns a reader creator, each sample in the reader is an zero-based ID
+    sequence and label in [0, 1].
+
+    :param word_idx: word dictionary
+    :type word_idx: dict
+    :return: Test reader creator
+    :rtype: callable
+    """
    return reader_creator(
        re.compile("aclImdb/test/pos/.*\.txt$"),
        re.compile("aclImdb/test/neg/.*\.txt$"), word_idx, 1000)


 def word_dict():
+    """
+    Build a word dictionary from the corpus.
+
+    :return: Word dictionary
+    :rtype: dict
+    """
    return build_dict(
        re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150)


--- a/python/paddle/v2/dataset/imikolov.py
+++ b/python/paddle/v2/dataset/imikolov.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/
+imikolov's simple dataset.

-Complete comments.
+This module will download dataset from 
+http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set
+into paddle reader creators.
 """
 import paddle.v2.dataset.common
 import collections
@@ -40,6 +42,10 @@ def word_count(f, word_freq=None):


 def build_dict():
+    """
+    Build a word dictionary from the corpus,  Keys of the dictionary are words,
+    and values are zero-based IDs of these words.
+    """
    train_filename = './simple-examples/data/ptb.train.txt'
    test_filename = './simple-examples/data/ptb.valid.txt'
    with tarfile.open(
@@ -84,10 +90,36 @@ def reader_creator(filename, word_idx, n):


 def train(word_idx, n):
+    """
+    imikolov training set creator.
+
+    It returns a reader creator, each sample in the reader is a word ID
+    tuple.
+
+    :param word_idx: word dictionary
+    :type word_idx: dict
+    :param n: sliding window size
+    :type n: int
+    :return: Training reader creator
+    :rtype: callable
+    """
    return reader_creator('./simple-examples/data/ptb.train.txt', word_idx, n)


 def test(word_idx, n):
+    """
+    imikolov test set creator.
+
+    It returns a reader creator, each sample in the reader is a word ID
+    tuple.
+
+    :param word_idx: word dictionary
+    :type word_idx: dict
+    :param n: sliding window size
+    :type n: int
+    :return: Test reader creator
+    :rtype: callable
+    """
    return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n)



--- a/python/paddle/v2/dataset/mnist.py
+++ b/python/paddle/v2/dataset/mnist.py
@@ -15,7 +15,7 @@
 MNIST dataset.

 This module will download dataset from http://yann.lecun.com/exdb/mnist/ and
-parse train set and test set into paddle reader creators.
+parse training set and test set into paddle reader creators.
 """
 import paddle.v2.dataset.common
 import subprocess
@@ -76,12 +76,12 @@ def reader_creator(image_filename, label_filename, buffer_size):

 def train():
    """
-    MNIST train set creator.
+    MNIST training set creator.

    It returns a reader creator, each sample in the reader is image pixels in
    [0, 1] and label in [0, 9].

-    :return: Train reader creator
+    :return: Training reader creator
    :rtype: callable
    """
    return reader_creator(

--- a/python/paddle/v2/dataset/movielens.py
+++ b/python/paddle/v2/dataset/movielens.py
@@ -14,7 +14,12 @@
 """
 Movielens 1-M dataset.

-TODO(yuyang18): Complete comments.
+Movielens 1-M dataset contains 1 million ratings from 6000 users on 4000
+movies, which was collected by GroupLens Research. This module will download
+Movielens 1-M dataset from 
+http://files.grouplens.org/datasets/movielens/ml-1m.zip and parse training
+set and test set into paddle reader creators.
+
 """

 import zipfile
@@ -35,12 +40,19 @@ MD5 = 'c4d9eecfca2ab87c1945afe126590906'


 class MovieInfo(object):
+    """
+    Movie id, title and categories information are stored in MovieInfo.
+    """
+
    def __init__(self, index, categories, title):
        self.index = int(index)
        self.categories = categories
        self.title = title

    def value(self):
+        """
+        Get information from a movie.
+        """
        return [
            self.index, [CATEGORIES_DICT[c] for c in self.categories],
            [MOVIE_TITLE_DICT[w.lower()] for w in self.title.split()]
@@ -55,6 +67,10 @@ class MovieInfo(object):


 class UserInfo(object):
+    """
+    User id, gender, age, and job information are stored in UserInfo.
+    """
+
    def __init__(self, index, gender, age, job_id):
        self.index = int(index)
        self.is_male = gender == 'M'
@@ -62,6 +78,9 @@ class UserInfo(object):
        self.job_id = int(job_id)

    def value(self):
+        """
+        Get information from a user.
+        """
        return [self.index, 0 if self.is_male else 1, self.age, self.job_id]

    def __str__(self):
@@ -148,6 +167,9 @@ test = functools.partial(__reader_creator__, is_test=True)


 def get_movie_title_dict():
+    """
+    Get movie title dictionary.
+    """
    __initialize_meta_info__()
    return MOVIE_TITLE_DICT

@@ -160,11 +182,17 @@ def __max_index_info__(a, b):


 def max_movie_id():
+    """
+    Get the maximum value of movie id.
+    """
    __initialize_meta_info__()
    return reduce(__max_index_info__, MOVIE_INFO.viewvalues()).index


 def max_user_id():
+    """
+    Get the maximum value of user id.
+    """
    __initialize_meta_info__()
    return reduce(__max_index_info__, USER_INFO.viewvalues()).index

@@ -177,21 +205,33 @@ def __max_job_id_impl__(a, b):


 def max_job_id():
+    """
+    Get the maximum value of job id.
+    """
    __initialize_meta_info__()
    return reduce(__max_job_id_impl__, USER_INFO.viewvalues()).job_id


 def movie_categories():
+    """
+    Get movie categoriges dictionary.
+    """
    __initialize_meta_info__()
    return CATEGORIES_DICT


 def user_info():
+    """
+    Get user info dictionary.
+    """
    __initialize_meta_info__()
    return USER_INFO


 def movie_info():
+    """
+    Get movie info dictionary.
+    """
    __initialize_meta_info__()
    return MOVIE_INFO


--- a/python/paddle/v2/dataset/sentiment.py
+++ b/python/paddle/v2/dataset/sentiment.py
@@ -113,7 +113,7 @@ def reader_creator(data):

 def train():
    """
-    Default train set reader creator
+    Default training set reader creator
    """
    data_set = load_sentiment_data()
    return reader_creator(data_set[0:NUM_TRAINING_INSTANCES])

--- a/python/paddle/v2/dataset/uci_housing.py
+++ b/python/paddle/v2/dataset/uci_housing.py
@@ -14,7 +14,9 @@
 """
 UCI Housing dataset.

-TODO(yuyang18): Complete comments.
+This module will download dataset from
+https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ and
+parse training set and test set into paddle reader creators.
 """

 import numpy as np
@@ -70,6 +72,15 @@ def load_data(filename, feature_num=14, ratio=0.8):


 def train():
+    """
+    UCI_HOUSING training set creator.
+
+    It returns a reader creator, each sample in the reader is features after
+    normalization and price number.
+
+    :return: Training reader creator
+    :rtype: callable
+    """
    global UCI_TRAIN_DATA
    load_data(download(URL, 'uci_housing', MD5))

@@ -81,6 +92,15 @@ def train():


 def test():
+    """
+    UCI_HOUSING test set creator.
+
+    It returns a reader creator, each sample in the reader is features after
+    normalization and price number.
+
+    :return: Test reader creator
+    :rtype: callable
+    """
    global UCI_TEST_DATA
    load_data(download(URL, 'uci_housing', MD5))


--- a/python/paddle/v2/dataset/wmt14.py
+++ b/python/paddle/v2/dataset/wmt14.py
@@ -12,7 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-wmt14 dataset
+WMT14 dataset.
+The original WMT14 dataset is too large and a small set of data for set is
+provided. This module will download dataset from
+http://paddlepaddle.cdn.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz and
+parse training set and test set into paddle reader creators.
+
 """
 import tarfile
 import gzip
@@ -99,11 +104,31 @@ def reader_creator(tar_file, file_name, dict_size):


 def train(dict_size):
+    """
+    WMT14 training set creator.
+
+    It returns a reader creator, each sample in the reader is source language
+    word ID sequence, target language word ID sequence and next word ID
+    sequence.
+
+    :return: Training reader creator
+    :rtype: callable
+    """
    return reader_creator(
        download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'train/train', dict_size)


 def test(dict_size):
+    """
+    WMT14 test set creator.
+
+    It returns a reader creator, each sample in the reader is source language
+    word ID sequence, target language word ID sequence and next word ID
+    sequence.
+
+    :return: Test reader creator
+    :rtype: callable
+    """
    return reader_creator(
        download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'test/test', dict_size)


--- a/python/paddle/v2/event.py
+++ b/python/paddle/v2/event.py
 """
-All training events.
+Testing and training events.

 There are:

+* TestResult
 * BeginIteration
 * EndIteration
 * BeginPass
 * EndPass
-
-TODO(yuyang18): Complete it!
 """
 import py_paddle.swig_paddle as api


--- a/python/paddle/v2/inference.py
+++ b/python/paddle/v2/inference.py
@@ -9,6 +9,17 @@ __all__ = ['infer']


 class Inference(object):
+    """
+    Inference combines neural network output and parameters together
+    to do inference.
+
+    :param outptut_layer: The neural network that should be inferenced.
+    :type output_layer: paddle.v2.config_base.Layer or the sequence
+                        of paddle.v2.config_base.Layer
+    :param parameters: The parameters dictionary.
+    :type parameters: paddle.v2.parameters.Parameters
+    """
+
    def __init__(self, output_layer, parameters):
        topo = topology.Topology(output_layer)
        gm = api.GradientMachine.createFromConfigProto(
@@ -49,7 +60,7 @@ class Inference(object):
        retv = None
        for result in self.iter_infer_field(field=field, **kwargs):
            if retv is None:
-                retv = [[]] * len(result)
+                retv = [[] for i in xrange(len(result))]
            for i, item in enumerate(result):
                retv[i].append(item)
        retv = [numpy.concatenate(out) for out in retv]

--- a/python/paddle/v2/optimizer.py
+++ b/python/paddle/v2/optimizer.py
@@ -47,6 +47,35 @@ class Optimizer(object):


 class Momentum(Optimizer):
+    """
+    SGD Optimizer.
+
+    SGD is an optimization method, trying to find a neural network that
+    minimize the "cost/error" of it by iteration. In paddle's implementation
+    SGD Optimizer is synchronized, which means all gradients will be wait to
+    calculate and reduced into one gradient, then do optimize operation.
+
+    The neural network consider the learning problem of minimizing an objective
+    function, that has the form of a sum
+
+    ..  math::
+
+        Q(w) = \\sum_{i}^{n} Q_i(w)
+
+    The value of function Q sometimes is the cost of neural network (Mean
+    Square Error between prediction and label for example). The function Q is
+    parametrised by w, the weight/bias of neural network. And weights is what to
+    be learned. The i is the i-th observation in (trainning) data.
+
+    So, the SGD method will optimize the weight by
+
+    ..  math::
+
+        w = w - \\eta \\nabla Q(w) = w - \\eta \\sum_{i}^{n} \\nabla Q_i(w)
+
+    where :math:`\\eta` is learning rate. And :math:`n` is batch size.
+    """
+
    def __init__(self, momentum=None, sparse=False, **kwargs):
        learning_method = v1_optimizers.MomentumOptimizer(
            momentum=momentum, sparse=sparse)
@@ -55,6 +84,26 @@ class Momentum(Optimizer):


 class Adam(Optimizer):
+    """
+    Adam optimizer.
+    The details of please refer `Adam: A Method for Stochastic Optimization
+    <https://arxiv.org/abs/1412.6980>`_
+
+    ..  math::
+
+        m(w, t) & = \\beta_1 m(w, t-1) + (1 - \\beta_1) \\nabla Q_i(w) \\\\
+        v(w, t) & = \\beta_2 v(w, t-1) + (1 - \\beta_2)(\\nabla Q_i(w)) ^2 \\\\
+        w & = w - \\frac{\\eta}{\\sqrt{v(w,t) + \\epsilon}}
+
+    :param beta1: the :math:`\\beta_1` in equation.
+    :type beta1: float
+    :param beta2: the :math:`\\beta_2` in equation.
+    :type beta2: float
+    :param epsilon: the :math:`\\epsilon` in equation. It is used to prevent
+                        divided by zero.
+    :type epsilon: float
+    """
+
    def __init__(self, beta1=0.9, beta2=0.999, epsilon=1e-8, **kwargs):
        learning_method = v1_optimizers.AdamOptimizer(
            beta1=beta1, beta2=beta2, epsilon=epsilon)
@@ -62,6 +111,24 @@ class Adam(Optimizer):


 class Adamax(Optimizer):
+    """
+    Adamax optimizer.
+
+    The details of please refer this `Adam: A Method for Stochastic Optimization
+    <https://arxiv.org/abs/1412.6980>`_
+
+    ..  math::
+
+        m_t & = \\beta_1 * m_{t-1} + (1-\\beta_1)* \\nabla Q_i(w) \\\\
+        u_t & = max(\\beta_2*u_{t-1}, abs(\\nabla Q_i(w))) \\\\
+        w_t & = w_{t-1} - (\\eta/(1-\\beta_1^t))*m_t/u_t
+
+    :param beta1: the :math:`\\beta_1` in the equation.
+    :type beta1: float
+    :param beta2: the :math:`\\beta_2` in the equation.
+    :type beta2: float
+    """
+
    def __init__(self, beta1=0.9, beta2=0.999, **kwargs):
        learning_method = v1_optimizers.AdamaxOptimizer(
            beta1=beta1, beta2=beta2)
@@ -69,12 +136,40 @@ class Adamax(Optimizer):


 class AdaGrad(Optimizer):
+    """
+    Adagrad(for ADAptive GRAdient algorithm) optimizer.
+
+    For details please refer this `Adaptive Subgradient Methods for
+    Online Learning and Stochastic Optimization
+    <http://www.magicbroom.info/Papers/DuchiHaSi10.pdf>`_.
+
+    ..  math::
+
+        G &= \\sum_{\\tau=1}^{t} g_{\\tau} g_{\\tau}^T \\\\
+        w & = w - \\eta diag(G)^{-\\frac{1}{2}} \\circ g
+    """
+
    def __init__(self, **kwargs):
        learning_method = v1_optimizers.AdaGradOptimizer()
        super(AdaGrad, self).__init__(learning_method=learning_method, **kwargs)


 class DecayedAdaGrad(Optimizer):
+    """
+    AdaGrad method with decayed sum gradients. The equations of this method
+    show as follow.
+
+    ..  math::
+
+        E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\
+        learning\\_rate &= 1/sqrt( ( E(g_t^2) + \\epsilon )
+
+    :param rho: The :math:`\\rho` parameter in that equation
+    :type rho: float
+    :param epsilon: The :math:`\\epsilon` parameter in that equation.
+    :type epsilon: float
+    """
+
    def __init__(self, rho=0.95, epsilon=1e-06, **kwargs):
        learning_method = v1_optimizers.DecayedAdaGradOptimizer(
            rho=rho, epsilon=epsilon)
@@ -83,6 +178,24 @@ class DecayedAdaGrad(Optimizer):


 class AdaDelta(Optimizer):
+    """
+    AdaDelta method. The details of adadelta please refer to this
+    `ADADELTA: AN ADAPTIVE LEARNING RATE METHOD
+    <http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf>`_.
+
+    ..  math::
+
+        E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\
+        learning\\_rate &= sqrt( ( E(dx_{t-1}^2) + \\epsilon ) / ( \\
+                          E(g_t^2) + \\epsilon ) ) \\\\
+        E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\\_rate)^2
+
+    :param rho: :math:`\\rho` in equation
+    :type rho: float
+    :param epsilon: :math:`\\rho` in equation
+    :type epsilon: float
+    """
+
    def __init__(self, rho=0.95, epsilon=1e-06, **kwargs):
        learning_method = v1_optimizers.AdaDeltaOptimizer(
            rho=rho, epsilon=epsilon)
@@ -91,6 +204,24 @@ class AdaDelta(Optimizer):


 class RMSProp(Optimizer):
+    """
+    RMSProp(for Root Mean Square Propagation) optimizer. For details please
+    refer this `slide <http://www.cs.toronto.edu/~tijmen/csc321/slides/
+    lecture_slides_lec6.pdf>`_.
+
+    The equations of this method as follows:
+
+    ..  math::
+
+        v(w, t) & = \\rho v(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\\\
+        w & = w - \\frac{\\eta} {\\sqrt{v(w,t) + \\epsilon}} \\nabla Q_{i}(w)
+
+    :param rho: the :math:`\\rho` in the equation. The forgetting factor.
+    :type rho: float
+    :param epsilon: the :math:`\\epsilon` in the equation.
+    :type epsilon: float
+    """
+
    def __init__(self, rho=0.95, epsilon=1e-6, **kwargs):
        learning_method = v1_optimizers.RMSPropOptimizer(
            rho=rho, epsilon=epsilon)

--- a/python/paddle/v2/trainer.py
+++ b/python/paddle/v2/trainer.py
+"""
+Module Trainer
+"""
 import collections

 import py_paddle.swig_paddle as api
@@ -9,10 +12,6 @@ from . import optimizer as v2_optimizer
 from . import parameters as v2_parameters

 __all__ = ['SGD']
-"""
-Trainer package
-TODO(yuyang18): Complete comments.
-"""


 def default_event_handler(event):
@@ -29,7 +28,8 @@ def default_event_handler(event):
 class SGD(object):
    """
    Simple SGD Trainer.
-    TODO(yuyang18): Complete comments
+    SGD Trainer combines data reader, network topolopy and update_equation together
+    to train/test a neural network.

    :param update_equation: The optimizer object.
    :type update_equation: paddle.v2.optimizer.Optimizer
@@ -74,7 +74,9 @@ class SGD(object):
        """
        Training method. Will train num_passes of input data.

-        :param reader:
+        :param reader: A reader that reads and yeilds data items. Usually we use a
+                       batched reader to do mini-batch training.
+        :type reader: collections.Iterable
        :param num_passes: The total train passes.
        :param event_handler: Event handler. A method will be invoked when event
                              occurred.
@@ -132,6 +134,16 @@ class SGD(object):
        self.__gradient_machine__.finish()

    def test(self, reader, feeding=None):
+        """
+        Testing method. Will test input data.
+
+        :param reader: A reader that reads and yeilds data items.
+        :type reader: collections.Iterable  
+        :param feeding: Feeding is a map of neural network input name and array
+                        index that reader returns.
+        :type feeding: dict
+        :return:
+        """
        feeder = DataFeeder(self.__data_types__, feeding)
        evaluator = self.__gradient_machine__.makeEvaluator()
        out_args = api.Arguments.createArguments(0)