diff --git a/.travis.yml b/.travis.yml
index 28d1f51be7107b57594eb8aacab2e82d2dec624a..5a7f45a748ac7e81f3f90c245bcf2cd84c4e9027 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,22 +4,14 @@ cache:
     - $HOME/third_party
     - $HOME/.ccache
     - $HOME/.cache/pip
-    - $HOME/Library/Caches/Homebrew
 sudo: required
 dist: trusty
 os:
   - linux
-  - osx
 env:
   - JOB=DOCS
   - JOB=BUILD_AND_TEST
   - JOB=PRE_COMMIT
-matrix:
-  exclude:
-    - os: osx
-      env: JOB=DOCS  # Only generate documentation in linux.
-    - os: osx
-      env: JOB=PRE_COMMIT # Only check pre-commit hook in linux
 
 addons:
   apt:
@@ -53,7 +45,6 @@ before_install:
         fi
       fi
     fi
-  - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
   - if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
   # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python 
   # protobuf version.
diff --git a/cmake/FindSphinx.cmake b/cmake/FindSphinx.cmake
index d319442ef10b38b9edf5844e5540a92c7094c7ce..1c29cb22a31f1e41a6b5575837c6374175cfdea5 100644
--- a/cmake/FindSphinx.cmake
+++ b/cmake/FindSphinx.cmake
@@ -72,7 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination )
     ${source}
     ${destination}
     COMMENT "Generating sphinx documentation: ${builder}"
-    COMMAND ln -sf ${destination}/index_*.html ${destination}/index.html
+    COMMAND cd ${destination} && ln -s ./index_*.html index.html
     )
 
   set_property(
diff --git a/cmake/coverallsGcovJsons.cmake b/cmake/coverallsGcovJsons.cmake
index ae3530c3a0eeb79ddbcbf4f2e99be75aa7968a2f..ad9a10cb8616159b9e3aff445e698cb2edb92820 100644
--- a/cmake/coverallsGcovJsons.cmake
+++ b/cmake/coverallsGcovJsons.cmake
@@ -110,14 +110,13 @@ endmacro()
 
 # Get the coverage data.
 file(GLOB_RECURSE GCDA_FILES "${COV_PATH}" "*.gcda")
-message("GCDA files:")
+message("Process GCDA files:")
+message("===============================")
 
 # Get a list of all the object directories needed by gcov
 # (The directories the .gcda files and .o files are found in)
 # and run gcov on those.
 foreach(GCDA ${GCDA_FILES})
-	message("Process: ${GCDA}")
-	message("------------------------------------------------------------------------------")
 	get_filename_component(GCDA_DIR ${GCDA} PATH)
 
 	#
@@ -135,7 +134,7 @@ foreach(GCDA ${GCDA_FILES})
 	# If -p is not specified then the file is named only "the_file.c.gcov"
 	#
 	execute_process(
-		COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA}
+		COMMAND "${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} >/dev/null"
 		WORKING_DIRECTORY ${GCDA_DIR}
 	)
 endforeach()
@@ -383,7 +382,6 @@ foreach(NOT_COVERED_SRC ${COVERAGE_SRCS_REMAINING})
 	set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]")
 
 	# Generate the final JSON for this file.
-	message("Generate JSON for non-gcov file: ${NOT_COVERED_SRC}...")
 	string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON)
 	set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ")
 endforeach()
diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake
index 84f459033f06f89d3b150317793c7e62274468b2..26da7e8e384bafdcbcd1a358c39cc6eb167b067e 100644
--- a/cmake/external/protobuf.cmake
+++ b/cmake/external/protobuf.cmake
@@ -14,46 +14,50 @@
 
 INCLUDE(ExternalProject)
 
-SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf)
-SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf)
-SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE)
+FIND_PACKAGE(Protobuf)
 
-INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR})
+IF(NOT PROTOBUF_FOUND)
+    SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf)
+    SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf)
+    SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE)
+
+    IF(WIN32)
+        SET(PROTOBUF_LITE_LIBRARY
+            "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.lib" CACHE FILEPATH "protobuf lite library." FORCE)
+        SET(PROTOBUF_LIBRARY
+            "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.lib" CACHE FILEPATH "protobuf library." FORCE)
+        SET(PROTOBUF_PROTOC_LIBRARY
+            "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE)
+        SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE)
+    ELSE(WIN32)
+        SET(PROTOBUF_LITE_LIBRARY
+            "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE)
+        SET(PROTOBUF_LIBRARY
+            "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE)
+        SET(PROTOBUF_PROTOC_LIBRARY
+            "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.a" CACHE FILEPATH "protoc library." FORCE)
+        SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE)
+    ENDIF(WIN32)
 
-IF(WIN32)
-  SET(PROTOBUF_LITE_LIBRARY
-        "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.lib" CACHE FILEPATH "protobuf lite library." FORCE)
-  SET(PROTOBUF_LIBRARY
-        "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.lib" CACHE FILEPATH "protobuf library." FORCE)
-  SET(PROTOBUF_PROTOC_LIBRARY
-        "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE)
-  SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE)
-ELSE(WIN32)
-  SET(PROTOBUF_LITE_LIBRARY
-        "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE)
-  SET(PROTOBUF_LIBRARY
-        "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE)
-  SET(PROTOBUF_PROTOC_LIBRARY
-        "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.a" CACHE FILEPATH "protoc library." FORCE)
-  SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE)
-ENDIF(WIN32)
+    ExternalProject_Add(
+        protobuf
+        ${EXTERNAL_PROJECT_LOG_ARGS}
+        PREFIX          ${PROTOBUF_SOURCES_DIR}
+        UPDATE_COMMAND  ""
+        DEPENDS         zlib
+        GIT_REPOSITORY  "https://github.com/google/protobuf.git"
+        GIT_TAG         "9f75c5aa851cd877fb0d93ccc31b8567a6706546"
+        CONFIGURE_COMMAND
+        ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake
+        -Dprotobuf_BUILD_TESTS=OFF
+        -DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
+        -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+        -DCMAKE_BUILD_TYPE=Release
+        -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
+        -DCMAKE_INSTALL_LIBDIR=lib
+    )
 
-ExternalProject_Add(
-  protobuf
-  ${EXTERNAL_PROJECT_LOG_ARGS}
-  PREFIX          ${PROTOBUF_SOURCES_DIR}
-  UPDATE_COMMAND  ""
-  DEPENDS         zlib
-  GIT_REPOSITORY  "https://github.com/google/protobuf.git"
-  GIT_TAG         "9f75c5aa851cd877fb0d93ccc31b8567a6706546"
-  CONFIGURE_COMMAND
-    ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake
-    -Dprotobuf_BUILD_TESTS=OFF
-    -DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
-    -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-    -DCMAKE_BUILD_TYPE=Release
-    -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
-    -DCMAKE_INSTALL_LIBDIR=lib
-)
+    LIST(APPEND external_project_dependencies protobuf)
+ENDIF(NOT PROTOBUF_FOUND)
 
-LIST(APPEND external_project_dependencies protobuf)
+INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR})
diff --git a/cmake/external/python.cmake b/cmake/external/python.cmake
index 6372a9a768e580f74f837ccb6c57d4f4395eb779..0accf1a8dd83560324716f0f4936be56dd7a9f1b 100644
--- a/cmake/external/python.cmake
+++ b/cmake/external/python.cmake
@@ -221,7 +221,3 @@ ENDIF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
 
 INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR})
 INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR})
-
-MESSAGE("[Paddle] Python Executable: ${PYTHON_EXECUTABLE}")
-MESSAGE("[Paddle] Python Include: ${PYTHON_INCLUDE_DIRS}")
-MESSAGE("[Paddle] Python Libraries: ${PYTHON_LIBRARIES}")
diff --git a/demo/image_classification/api_v2_resnet.py b/demo/image_classification/api_v2_resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..19d20540780becf504973a23b50445d4b65dc2ef
--- /dev/null
+++ b/demo/image_classification/api_v2_resnet.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.v2 as paddle
+
+__all__ = ['resnet_cifar10']
+
+
+def conv_bn_layer(input,
+                  ch_out,
+                  filter_size,
+                  stride,
+                  padding,
+                  active_type=paddle.activation.Relu(),
+                  ch_in=None):
+    tmp = paddle.layer.img_conv(
+        input=input,
+        filter_size=filter_size,
+        num_channels=ch_in,
+        num_filters=ch_out,
+        stride=stride,
+        padding=padding,
+        act=paddle.activation.Linear(),
+        bias_attr=False)
+    return paddle.layer.batch_norm(input=tmp, act=active_type)
+
+
+def shortcut(ipt, n_in, n_out, stride):
+    if n_in != n_out:
+        return conv_bn_layer(ipt, n_out, 1, stride, 0,
+                             paddle.activation.Linear())
+    else:
+        return ipt
+
+
+def basicblock(ipt, ch_out, stride):
+    ch_in = ch_out * 2
+    tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1)
+    tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear())
+    short = shortcut(ipt, ch_in, ch_out, stride)
+    return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu())
+
+
+def layer_warp(block_func, ipt, features, count, stride):
+    tmp = block_func(ipt, features, stride)
+    for i in range(1, count):
+        tmp = block_func(tmp, features, 1)
+    return tmp
+
+
+def resnet_cifar10(ipt, depth=32):
+    # depth should be one of 20, 32, 44, 56, 110, 1202
+    assert (depth - 2) % 6 == 0
+    n = (depth - 2) / 6
+    nStages = {16, 64, 128}
+    conv1 = conv_bn_layer(
+        ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
+    res1 = layer_warp(basicblock, conv1, 16, n, 1)
+    res2 = layer_warp(basicblock, res1, 32, n, 2)
+    res3 = layer_warp(basicblock, res2, 64, n, 2)
+    pool = paddle.layer.img_pool(
+        input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
+    return pool
diff --git a/demo/image_classification/api_v2_train.py b/demo/image_classification/api_v2_train.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0fc0e04bbd21f691caa1ce3fb95c8a7065d1b3f
--- /dev/null
+++ b/demo/image_classification/api_v2_train.py
@@ -0,0 +1,91 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License
+
+import sys
+import paddle.v2 as paddle
+from api_v2_vgg import vgg_bn_drop
+from api_v2_resnet import resnet_cifar10
+
+
+def main():
+    datadim = 3 * 32 * 32
+    classdim = 10
+
+    # PaddlePaddle init
+    paddle.init(use_gpu=True, trainer_count=1)
+
+    image = paddle.layer.data(
+        name="image", type=paddle.data_type.dense_vector(datadim))
+
+    # Add neural network config
+    # option 1. resnet
+    net = resnet_cifar10(image, depth=32)
+    # option 2. vgg
+    # net = vgg_bn_drop(image)
+
+    out = paddle.layer.fc(input=net,
+                          size=classdim,
+                          act=paddle.activation.Softmax())
+
+    lbl = paddle.layer.data(
+        name="label", type=paddle.data_type.integer_value(classdim))
+    cost = paddle.layer.classification_cost(input=out, label=lbl)
+
+    # Create parameters
+    parameters = paddle.parameters.create(cost)
+
+    # Create optimizer
+    momentum_optimizer = paddle.optimizer.Momentum(
+        momentum=0.9,
+        regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
+        learning_rate=0.1 / 128.0,
+        learning_rate_decay_a=0.1,
+        learning_rate_decay_b=50000 * 100,
+        learning_rate_schedule='discexp',
+        batch_size=128)
+
+    # End batch and end pass event handler
+    def event_handler(event):
+        if isinstance(event, paddle.event.EndIteration):
+            if event.batch_id % 100 == 0:
+                print "\nPass %d, Batch %d, Cost %f, %s" % (
+                    event.pass_id, event.batch_id, event.cost, event.metrics)
+            else:
+                sys.stdout.write('.')
+                sys.stdout.flush()
+        if isinstance(event, paddle.event.EndPass):
+            result = trainer.test(
+                reader=paddle.batch(
+                    paddle.dataset.cifar.test10(), batch_size=128),
+                reader_dict={'image': 0,
+                             'label': 1})
+            print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
+
+    # Create trainer
+    trainer = paddle.trainer.SGD(cost=cost,
+                                 parameters=parameters,
+                                 update_equation=momentum_optimizer)
+    trainer.train(
+        reader=paddle.batch(
+            paddle.reader.shuffle(
+                paddle.dataset.cifar.train10(), buf_size=50000),
+            batch_size=128),
+        num_passes=5,
+        event_handler=event_handler,
+        reader_dict={'image': 0,
+                     'label': 1})
+
+
+if __name__ == '__main__':
+    main()
diff --git a/demo/image_classification/api_v2_vgg.py b/demo/image_classification/api_v2_vgg.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e0e6b93adde30425f17aa9cd07542275f4fec37
--- /dev/null
+++ b/demo/image_classification/api_v2_vgg.py
@@ -0,0 +1,47 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.v2 as paddle
+
+__all__ = ['vgg_bn_drop']
+
+
+def vgg_bn_drop(input):
+    def conv_block(ipt, num_filter, groups, dropouts, num_channels=None):
+        return paddle.networks.img_conv_group(
+            input=ipt,
+            num_channels=num_channels,
+            pool_size=2,
+            pool_stride=2,
+            conv_num_filter=[num_filter] * groups,
+            conv_filter_size=3,
+            conv_act=paddle.activation.Relu(),
+            conv_with_batchnorm=True,
+            conv_batchnorm_drop_rate=dropouts,
+            pool_type=paddle.pooling.Max())
+
+    conv1 = conv_block(input, 64, 2, [0.3, 0], 3)
+    conv2 = conv_block(conv1, 128, 2, [0.4, 0])
+    conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
+    conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
+    conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
+
+    drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5)
+    fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear())
+    bn = paddle.layer.batch_norm(
+        input=fc1,
+        act=paddle.activation.Relu(),
+        layer_attr=paddle.attr.Extra(drop_rate=0.5))
+    fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear())
+    return fc2
diff --git a/demo/introduction/api_train_v2.py b/demo/introduction/api_train_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..75dd65f9fc8cd8e7fab5bf30a6337574a645e89f
--- /dev/null
+++ b/demo/introduction/api_train_v2.py
@@ -0,0 +1,58 @@
+import paddle.v2 as paddle
+import paddle.v2.dataset.uci_housing as uci_housing
+
+
+def main():
+    # init
+    paddle.init(use_gpu=False, trainer_count=1)
+
+    # network config
+    x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))
+    y_predict = paddle.layer.fc(input=x,
+                                param_attr=paddle.attr.Param(name='w'),
+                                size=1,
+                                act=paddle.activation.Linear(),
+                                bias_attr=paddle.attr.Param(name='b'))
+    y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
+    cost = paddle.layer.regression_cost(input=y_predict, label=y)
+
+    # create parameters
+    parameters = paddle.parameters.create(cost)
+
+    # create optimizer
+    optimizer = paddle.optimizer.Momentum(momentum=0)
+
+    trainer = paddle.trainer.SGD(cost=cost,
+                                 parameters=parameters,
+                                 update_equation=optimizer)
+
+    # event_handler to print training and testing info
+    def event_handler(event):
+        if isinstance(event, paddle.event.EndIteration):
+            if event.batch_id % 100 == 0:
+                print "Pass %d, Batch %d, Cost %f, %s" % (
+                    event.pass_id, event.batch_id, event.cost, event.metrics)
+
+        if isinstance(event, paddle.event.EndPass):
+            result = trainer.test(
+                reader=paddle.reader.batched(
+                    uci_housing.test(), batch_size=2),
+                reader_dict={'x': 0,
+                             'y': 1})
+            if event.pass_id % 10 == 0:
+                print "Test %d, %s" % (event.pass_id, result.metrics)
+
+    # training
+    trainer.train(
+        reader=paddle.reader.batched(
+            paddle.reader.shuffle(
+                uci_housing.train(), buf_size=500),
+            batch_size=2),
+        reader_dict={'x': 0,
+                     'y': 1},
+        event_handler=event_handler,
+        num_passes=30)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py
index 5b526e8a0f6054ab0ec240c1a5f42f8c6a241a7f..9a49274e9a0d7be79047c479c2437c65193c590b 100644
--- a/demo/mnist/api_train_v2.py
+++ b/demo/mnist/api_train_v2.py
@@ -2,6 +2,59 @@ import paddle.v2 as paddle
 import cPickle
 
 
+def softmax_regression(img):
+    predict = paddle.layer.fc(input=img,
+                              size=10,
+                              act=paddle.activation.Softmax())
+    return predict
+
+
+def multilayer_perceptron(img):
+    # The first fully-connected layer
+    hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu())
+    # The second fully-connected layer and the according activation function
+    hidden2 = paddle.layer.fc(input=hidden1,
+                              size=64,
+                              act=paddle.activation.Relu())
+    # The thrid fully-connected layer, note that the hidden size should be 10,
+    # which is the number of unique digits
+    predict = paddle.layer.fc(input=hidden2,
+                              size=10,
+                              act=paddle.activation.Softmax())
+    return predict
+
+
+def convolutional_neural_network(img):
+    # first conv layer
+    conv_pool_1 = paddle.networks.simple_img_conv_pool(
+        input=img,
+        filter_size=5,
+        num_filters=20,
+        num_channel=1,
+        pool_size=2,
+        pool_stride=2,
+        act=paddle.activation.Tanh())
+    # second conv layer
+    conv_pool_2 = paddle.networks.simple_img_conv_pool(
+        input=conv_pool_1,
+        filter_size=5,
+        num_filters=50,
+        num_channel=20,
+        pool_size=2,
+        pool_stride=2,
+        act=paddle.activation.Tanh())
+    # The first fully-connected layer
+    fc1 = paddle.layer.fc(input=conv_pool_2,
+                          size=128,
+                          act=paddle.activation.Tanh())
+    # The softmax layer, note that the hidden size should be 10,
+    # which is the number of unique digits
+    predict = paddle.layer.fc(input=fc1,
+                              size=10,
+                              act=paddle.activation.Softmax())
+    return predict
+
+
 def main():
     paddle.init(use_gpu=False, trainer_count=1)
 
@@ -10,12 +63,14 @@ def main():
         name='pixel', type=paddle.data_type.dense_vector(784))
     label = paddle.layer.data(
         name='label', type=paddle.data_type.integer_value(10))
-    hidden1 = paddle.layer.fc(input=images, size=200)
-    hidden2 = paddle.layer.fc(input=hidden1, size=200)
-    inference = paddle.layer.fc(input=hidden2,
-                                size=10,
-                                act=paddle.activation.Softmax())
-    cost = paddle.layer.classification_cost(input=inference, label=label)
+
+    # Here we can build the prediction network in different ways. Please
+    # choose one by uncomment corresponding line.
+    predict = softmax_regression(images)
+    #predict = multilayer_perceptron(images)
+    #predict = convolutional_neural_network(images)
+
+    cost = paddle.layer.classification_cost(input=predict, label=label)
 
     try:
         with open('params.pkl', 'r') as f:
@@ -23,43 +78,49 @@ def main():
     except IOError:
         parameters = paddle.parameters.create(cost)
 
-    adam_optimizer = paddle.optimizer.Adam(learning_rate=0.01)
+    optimizer = paddle.optimizer.Momentum(
+        learning_rate=0.1 / 128.0,
+        momentum=0.9,
+        regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128))
 
     trainer = paddle.trainer.SGD(cost=cost,
                                  parameters=parameters,
-                                 update_equation=adam_optimizer)
+                                 update_equation=optimizer)
+
+    lists = []
 
     def event_handler(event):
         if isinstance(event, paddle.event.EndIteration):
-            if event.batch_id % 1000 == 0:
-                result = trainer.test(reader=paddle.reader.batched(
-                    paddle.dataset.mnist.test(), batch_size=256))
-
-                print "Pass %d, Batch %d, Cost %.2f, %s, " \
-                      "Testing cost %.2f metrics %s" % (
-                          event.pass_id, event.batch_id, event.cost,
-                          event.metrics,
-                          result.cost, result.metrics)
-
-                with open('params.pkl', 'w') as f:
-                    cPickle.dump(
-                        parameters, f, protocol=cPickle.HIGHEST_PROTOCOL)
-        else:
-            pass
+            if event.batch_id % 100 == 0:
+                print "Pass %d, Batch %d, Cost %f, %s" % (
+                    event.pass_id, event.batch_id, event.cost, event.metrics)
+        if isinstance(event, paddle.event.EndPass):
+            result = trainer.test(reader=paddle.reader.batched(
+                paddle.dataset.mnist.test(), batch_size=128))
+            print "Test with Pass %d, Cost %f, %s\n" % (
+                event.pass_id, result.cost, result.metrics)
+            lists.append((event.pass_id, result.cost,
+                          result.metrics['classification_error_evaluator']))
 
     trainer.train(
-        reader=paddle.reader.batched(
+        reader=paddle.batch(
             paddle.reader.shuffle(
                 paddle.dataset.mnist.train(), buf_size=8192),
-            batch_size=32),
-        event_handler=event_handler)
+            batch_size=128),
+        event_handler=event_handler,
+        num_passes=100)
+
+    # find the best pass
+    best = sorted(lists, key=lambda list: float(list[1]))[0]
+    print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1])
+    print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100)
 
     # output is a softmax layer. It returns probabilities.
     # Shape should be (100, 10)
     probs = paddle.infer(
-        output=inference,
+        output=predict,
         parameters=parameters,
-        reader=paddle.reader.batched(
+        reader=paddle.batch(
             paddle.reader.firstn(
                 paddle.reader.map_readers(lambda item: (item[0], ),
                                           paddle.dataset.mnist.test()),
diff --git a/demo/semantic_role_labeling/api_train_v2.py b/demo/semantic_role_labeling/api_train_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..15db922b97abc5ae79f095edfd632604eec8ab94
--- /dev/null
+++ b/demo/semantic_role_labeling/api_train_v2.py
@@ -0,0 +1,190 @@
+import sys
+import math
+import numpy as np
+import paddle.v2 as paddle
+import paddle.v2.dataset.conll05 as conll05
+
+
+def db_lstm():
+    word_dict, verb_dict, label_dict = conll05.get_dict()
+    word_dict_len = len(word_dict)
+    label_dict_len = len(label_dict)
+    pred_len = len(verb_dict)
+
+    mark_dict_len = 2
+    word_dim = 32
+    mark_dim = 5
+    hidden_dim = 512
+    depth = 8
+
+    #8 features
+    def d_type(size):
+        return paddle.data_type.integer_value_sequence(size)
+
+    word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
+    predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
+
+    ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
+    ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
+    ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
+    ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
+    ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
+    mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
+
+    target = paddle.layer.data(name='target', type=d_type(label_dict_len))
+
+    default_std = 1 / math.sqrt(hidden_dim) / 3.0
+
+    emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.)
+    std_0 = paddle.attr.Param(initial_std=0.)
+    std_default = paddle.attr.Param(initial_std=default_std)
+
+    predicate_embedding = paddle.layer.embedding(
+        size=word_dim,
+        input=predicate,
+        param_attr=paddle.attr.Param(
+            name='vemb', initial_std=default_std))
+    mark_embedding = paddle.layer.embedding(
+        size=mark_dim, input=mark, param_attr=std_0)
+
+    word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
+    emb_layers = [
+        paddle.layer.embedding(
+            size=word_dim, input=x, param_attr=emb_para) for x in word_input
+    ]
+    emb_layers.append(predicate_embedding)
+    emb_layers.append(mark_embedding)
+
+    hidden_0 = paddle.layer.mixed(
+        size=hidden_dim,
+        bias_attr=std_default,
+        input=[
+            paddle.layer.full_matrix_projection(
+                input=emb, param_attr=std_default) for emb in emb_layers
+        ])
+
+    mix_hidden_lr = 1e-3
+    lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
+    hidden_para_attr = paddle.attr.Param(
+        initial_std=default_std, learning_rate=mix_hidden_lr)
+
+    lstm_0 = paddle.layer.lstmemory(
+        input=hidden_0,
+        act=paddle.activation.Relu(),
+        gate_act=paddle.activation.Sigmoid(),
+        state_act=paddle.activation.Sigmoid(),
+        bias_attr=std_0,
+        param_attr=lstm_para_attr)
+
+    #stack L-LSTM and R-LSTM with direct edges
+    input_tmp = [hidden_0, lstm_0]
+
+    for i in range(1, depth):
+        mix_hidden = paddle.layer.mixed(
+            size=hidden_dim,
+            bias_attr=std_default,
+            input=[
+                paddle.layer.full_matrix_projection(
+                    input=input_tmp[0], param_attr=hidden_para_attr),
+                paddle.layer.full_matrix_projection(
+                    input=input_tmp[1], param_attr=lstm_para_attr)
+            ])
+
+        lstm = paddle.layer.lstmemory(
+            input=mix_hidden,
+            act=paddle.activation.Relu(),
+            gate_act=paddle.activation.Sigmoid(),
+            state_act=paddle.activation.Sigmoid(),
+            reverse=((i % 2) == 1),
+            bias_attr=std_0,
+            param_attr=lstm_para_attr)
+
+        input_tmp = [mix_hidden, lstm]
+
+    feature_out = paddle.layer.mixed(
+        size=label_dict_len,
+        bias_attr=std_default,
+        input=[
+            paddle.layer.full_matrix_projection(
+                input=input_tmp[0], param_attr=hidden_para_attr),
+            paddle.layer.full_matrix_projection(
+                input=input_tmp[1], param_attr=lstm_para_attr)
+        ], )
+
+    crf_cost = paddle.layer.crf(size=label_dict_len,
+                                input=feature_out,
+                                label=target,
+                                param_attr=paddle.attr.Param(
+                                    name='crfw',
+                                    initial_std=default_std,
+                                    learning_rate=mix_hidden_lr))
+
+    crf_dec = paddle.layer.crf_decoding(
+        name='crf_dec_l',
+        size=label_dict_len,
+        input=feature_out,
+        label=target,
+        param_attr=paddle.attr.Param(name='crfw'))
+
+    return crf_cost, crf_dec
+
+
+def load_parameter(file_name, h, w):
+    with open(file_name, 'rb') as f:
+        f.read(16)  # skip header.
+        return np.fromfile(f, dtype=np.float32).reshape(h, w)
+
+
+def main():
+    paddle.init(use_gpu=False, trainer_count=1)
+
+    # define network topology
+    crf_cost, crf_dec = db_lstm()
+
+    # create parameters
+    parameters = paddle.parameters.create([crf_cost, crf_dec])
+
+    # create optimizer
+    optimizer = paddle.optimizer.Momentum(
+        momentum=0,
+        learning_rate=2e-2,
+        regularization=paddle.optimizer.L2Regularization(rate=8e-4),
+        model_average=paddle.optimizer.ModelAverage(
+            average_window=0.5, max_average_window=10000), )
+
+    def event_handler(event):
+        if isinstance(event, paddle.event.EndIteration):
+            if event.batch_id % 100 == 0:
+                print "Pass %d, Batch %d, Cost %f, %s" % (
+                    event.pass_id, event.batch_id, event.cost, event.metrics)
+
+    trainer = paddle.trainer.SGD(cost=crf_cost,
+                                 parameters=parameters,
+                                 update_equation=optimizer)
+    parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
+
+    trn_reader = paddle.reader.batched(
+        paddle.reader.shuffle(
+            conll05.test(), buf_size=8192), batch_size=10)
+
+    reader_dict = {
+        'word_data': 0,
+        'ctx_n2_data': 1,
+        'ctx_n1_data': 2,
+        'ctx_0_data': 3,
+        'ctx_p1_data': 4,
+        'ctx_p2_data': 5,
+        'verb_data': 6,
+        'mark_data': 7,
+        'target': 8
+    }
+
+    trainer.train(
+        reader=trn_reader,
+        event_handler=event_handler,
+        num_passes=10000,
+        reader_dict=reader_dict)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/demo/sentiment/train_v2.py b/demo/sentiment/train_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a266e74ea93068cad2757d0076a4ae664ad4cf8
--- /dev/null
+++ b/demo/sentiment/train_v2.py
@@ -0,0 +1,166 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import paddle.trainer_config_helpers.attrs as attrs
+from paddle.trainer_config_helpers.poolings import MaxPooling
+import paddle.v2 as paddle
+
+
+def convolution_net(input_dim,
+                    class_dim=2,
+                    emb_dim=128,
+                    hid_dim=128,
+                    is_predict=False):
+    data = paddle.layer.data("word",
+                             paddle.data_type.integer_value_sequence(input_dim))
+    emb = paddle.layer.embedding(input=data, size=emb_dim)
+    conv_3 = paddle.networks.sequence_conv_pool(
+        input=emb, context_len=3, hidden_size=hid_dim)
+    conv_4 = paddle.networks.sequence_conv_pool(
+        input=emb, context_len=4, hidden_size=hid_dim)
+    output = paddle.layer.fc(input=[conv_3, conv_4],
+                             size=class_dim,
+                             act=paddle.activation.Softmax())
+    lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
+    cost = paddle.layer.classification_cost(input=output, label=lbl)
+    return cost
+
+
+def stacked_lstm_net(input_dim,
+                     class_dim=2,
+                     emb_dim=128,
+                     hid_dim=512,
+                     stacked_num=3,
+                     is_predict=False):
+    """
+    A Wrapper for sentiment classification task.
+    This network uses bi-directional recurrent network,
+    consisting three LSTM layers. This configure is referred to
+    the paper as following url, but use fewer layrs.
+        http://www.aclweb.org/anthology/P15-1109
+
+    input_dim: here is word dictionary dimension.
+    class_dim: number of categories.
+    emb_dim: dimension of word embedding.
+    hid_dim: dimension of hidden layer.
+    stacked_num: number of stacked lstm-hidden layer.
+    is_predict: is predicting or not.
+                Some layers is not needed in network when predicting.
+    """
+    assert stacked_num % 2 == 1
+
+    layer_attr = attrs.ExtraLayerAttribute(drop_rate=0.5)
+    fc_para_attr = attrs.ParameterAttribute(learning_rate=1e-3)
+    lstm_para_attr = attrs.ParameterAttribute(initial_std=0., learning_rate=1.)
+    para_attr = [fc_para_attr, lstm_para_attr]
+    bias_attr = attrs.ParameterAttribute(initial_std=0., l2_rate=0.)
+    relu = paddle.activation.Relu()
+    linear = paddle.activation.Linear()
+
+    data = paddle.layer.data("word",
+                             paddle.data_type.integer_value_sequence(input_dim))
+    emb = paddle.layer.embedding(input=data, size=emb_dim)
+
+    fc1 = paddle.layer.fc(input=emb,
+                          size=hid_dim,
+                          act=linear,
+                          bias_attr=bias_attr)
+    lstm1 = paddle.layer.lstmemory(
+        input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
+
+    inputs = [fc1, lstm1]
+    for i in range(2, stacked_num + 1):
+        fc = paddle.layer.fc(input=inputs,
+                             size=hid_dim,
+                             act=linear,
+                             param_attr=para_attr,
+                             bias_attr=bias_attr)
+        lstm = paddle.layer.lstmemory(
+            input=fc,
+            reverse=(i % 2) == 0,
+            act=relu,
+            bias_attr=bias_attr,
+            layer_attr=layer_attr)
+        inputs = [fc, lstm]
+
+    fc_last = paddle.layer.pooling(input=inputs[0], pooling_type=MaxPooling())
+    lstm_last = paddle.layer.pooling(input=inputs[1], pooling_type=MaxPooling())
+    output = paddle.layer.fc(input=[fc_last, lstm_last],
+                             size=class_dim,
+                             act=paddle.activation.Softmax(),
+                             bias_attr=bias_attr,
+                             param_attr=para_attr)
+
+    lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
+    cost = paddle.layer.classification_cost(input=output, label=lbl)
+    return cost
+
+
+if __name__ == '__main__':
+    # init
+    paddle.init(use_gpu=True, trainer_count=4)
+
+    # network config
+    print 'load dictionary...'
+    word_dict = paddle.dataset.imdb.word_dict()
+    dict_dim = len(word_dict)
+    class_dim = 2
+
+    # Please choose the way to build the network
+    # by uncommenting the corresponding line.
+    cost = convolution_net(dict_dim, class_dim=class_dim)
+    # cost = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
+
+    # create parameters
+    parameters = paddle.parameters.create(cost)
+
+    # create optimizer
+    adam_optimizer = paddle.optimizer.Adam(
+        learning_rate=2e-3,
+        regularization=paddle.optimizer.L2Regularization(rate=8e-4),
+        model_average=paddle.optimizer.ModelAverage(average_window=0.5))
+
+    # End batch and end pass event handler
+    def event_handler(event):
+        if isinstance(event, paddle.event.EndIteration):
+            if event.batch_id % 100 == 0:
+                print "\nPass %d, Batch %d, Cost %f, %s" % (
+                    event.pass_id, event.batch_id, event.cost, event.metrics)
+            else:
+                sys.stdout.write('.')
+                sys.stdout.flush()
+        if isinstance(event, paddle.event.EndPass):
+            result = trainer.test(
+                reader=paddle.reader.batched(
+                    lambda: paddle.dataset.imdb.test(word_dict),
+                    batch_size=128),
+                reader_dict={'word': 0,
+                             'label': 1})
+            print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
+
+    # create trainer
+    trainer = paddle.trainer.SGD(cost=cost,
+                                 parameters=parameters,
+                                 update_equation=adam_optimizer)
+
+    trainer.train(
+        reader=paddle.reader.batched(
+            paddle.reader.shuffle(
+                lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
+            batch_size=100),
+        event_handler=event_handler,
+        reader_dict={'word': 0,
+                     'label': 1},
+        num_passes=10)
diff --git a/demo/seqToseq/api_train_v2.py b/demo/seqToseq/api_train_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5f59ec379738eb5bed3e7559739cae38582ed06
--- /dev/null
+++ b/demo/seqToseq/api_train_v2.py
@@ -0,0 +1,110 @@
+import os
+
+import paddle.v2 as paddle
+
+from seqToseq_net_v2 import seqToseq_net_v2
+
+# Data Definiation.
+# TODO:This code should be merged to dataset package.
+data_dir = "./data/pre-wmt14"
+src_lang_dict = os.path.join(data_dir, 'src.dict')
+trg_lang_dict = os.path.join(data_dir, 'trg.dict')
+
+source_dict_dim = len(open(src_lang_dict, "r").readlines())
+target_dict_dim = len(open(trg_lang_dict, "r").readlines())
+
+
+def read_to_dict(dict_path):
+    with open(dict_path, "r") as fin:
+        out_dict = {
+            line.strip(): line_count
+            for line_count, line in enumerate(fin)
+        }
+    return out_dict
+
+
+src_dict = read_to_dict(src_lang_dict)
+trg_dict = read_to_dict(trg_lang_dict)
+
+train_list = os.path.join(data_dir, 'train.list')
+test_list = os.path.join(data_dir, 'test.list')
+
+UNK_IDX = 2
+START = "<s>"
+END = "<e>"
+
+
+def _get_ids(s, dictionary):
+    words = s.strip().split()
+    return [dictionary[START]] + \
+           [dictionary.get(w, UNK_IDX) for w in words] + \
+           [dictionary[END]]
+
+
+def train_reader(file_name):
+    def reader():
+        with open(file_name, 'r') as f:
+            for line_count, line in enumerate(f):
+                line_split = line.strip().split('\t')
+                if len(line_split) != 2:
+                    continue
+                src_seq = line_split[0]  # one source sequence
+                src_ids = _get_ids(src_seq, src_dict)
+
+                trg_seq = line_split[1]  # one target sequence
+                trg_words = trg_seq.split()
+                trg_ids = [trg_dict.get(w, UNK_IDX) for w in trg_words]
+
+                # remove sequence whose length > 80 in training mode
+                if len(src_ids) > 80 or len(trg_ids) > 80:
+                    continue
+                trg_ids_next = trg_ids + [trg_dict[END]]
+                trg_ids = [trg_dict[START]] + trg_ids
+
+                yield src_ids, trg_ids, trg_ids_next
+
+    return reader
+
+
+def main():
+    paddle.init(use_gpu=False, trainer_count=1)
+
+    # define network topology
+    cost = seqToseq_net_v2(source_dict_dim, target_dict_dim)
+    parameters = paddle.parameters.create(cost)
+
+    # define optimize method and trainer
+    optimizer = paddle.optimizer.Adam(learning_rate=1e-4)
+    trainer = paddle.trainer.SGD(cost=cost,
+                                 parameters=parameters,
+                                 update_equation=optimizer)
+
+    # define data reader
+    reader_dict = {
+        'source_language_word': 0,
+        'target_language_word': 1,
+        'target_language_next_word': 2
+    }
+
+    wmt14_reader = paddle.reader.batched(
+        paddle.reader.shuffle(
+            train_reader("data/pre-wmt14/train/train"), buf_size=8192),
+        batch_size=5)
+
+    # define event_handler callback
+    def event_handler(event):
+        if isinstance(event, paddle.event.EndIteration):
+            if event.batch_id % 10 == 0:
+                print "Pass %d, Batch %d, Cost %f, %s" % (
+                    event.pass_id, event.batch_id, event.cost, event.metrics)
+
+    # start to train
+    trainer.train(
+        reader=wmt14_reader,
+        event_handler=event_handler,
+        num_passes=10000,
+        reader_dict=reader_dict)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/demo/seqToseq/seqToseq_net_v2.py b/demo/seqToseq/seqToseq_net_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..058a6789d7094c71492ed9772ed5594c4c0c8f84
--- /dev/null
+++ b/demo/seqToseq/seqToseq_net_v2.py
@@ -0,0 +1,92 @@
+import paddle.v2 as paddle
+
+
+def seqToseq_net_v2(source_dict_dim, target_dict_dim):
+    ### Network Architecture
+    word_vector_dim = 512  # dimension of word vector
+    decoder_size = 512  # dimension of hidden unit in GRU Decoder network
+    encoder_size = 512  # dimension of hidden unit in GRU Encoder network
+
+    #### Encoder
+    src_word_id = paddle.layer.data(
+        name='source_language_word',
+        type=paddle.data_type.integer_value_sequence(source_dict_dim))
+    src_embedding = paddle.layer.embedding(
+        input=src_word_id,
+        size=word_vector_dim,
+        param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
+    src_forward = paddle.networks.simple_gru(
+        input=src_embedding, size=encoder_size)
+    src_backward = paddle.networks.simple_gru(
+        input=src_embedding, size=encoder_size, reverse=True)
+    encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
+
+    #### Decoder
+    with paddle.layer.mixed(size=decoder_size) as encoded_proj:
+        encoded_proj += paddle.layer.full_matrix_projection(
+            input=encoded_vector)
+
+    backward_first = paddle.layer.first_seq(input=src_backward)
+
+    with paddle.layer.mixed(
+            size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot:
+        decoder_boot += paddle.layer.full_matrix_projection(
+            input=backward_first)
+
+    def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
+
+        decoder_mem = paddle.layer.memory(
+            name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
+
+        context = paddle.networks.simple_attention(
+            encoded_sequence=enc_vec,
+            encoded_proj=enc_proj,
+            decoder_state=decoder_mem)
+
+        with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
+            decoder_inputs += paddle.layer.full_matrix_projection(input=context)
+            decoder_inputs += paddle.layer.full_matrix_projection(
+                input=current_word)
+
+        gru_step = paddle.layer.gru_step(
+            name='gru_decoder',
+            input=decoder_inputs,
+            output_mem=decoder_mem,
+            size=decoder_size)
+
+        with paddle.layer.mixed(
+                size=target_dict_dim,
+                bias_attr=True,
+                act=paddle.activation.Softmax()) as out:
+            out += paddle.layer.full_matrix_projection(input=gru_step)
+        return out
+
+    decoder_group_name = "decoder_group"
+    group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
+    group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
+    group_inputs = [group_input1, group_input2]
+
+    trg_embedding = paddle.layer.embedding(
+        input=paddle.layer.data(
+            name='target_language_word',
+            type=paddle.data_type.integer_value_sequence(target_dict_dim)),
+        size=word_vector_dim,
+        param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
+    group_inputs.append(trg_embedding)
+
+    # For decoder equipped with attention mechanism, in training,
+    # target embeding (the groudtruth) is the data input,
+    # while encoded source sequence is accessed to as an unbounded memory.
+    # Here, the StaticInput defines a read-only memory
+    # for the recurrent_group.
+    decoder = paddle.layer.recurrent_group(
+        name=decoder_group_name,
+        step=gru_decoder_with_attention,
+        input=group_inputs)
+
+    lbl = paddle.layer.data(
+        name='target_language_next_word',
+        type=paddle.data_type.integer_value_sequence(target_dict_dim))
+    cost = paddle.layer.classification_cost(input=decoder, label=lbl)
+
+    return cost
diff --git a/demo/word2vec/train_v2.py b/demo/word2vec/train_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d952b446f9db432062fc3305a6b65b0ad66dd47
--- /dev/null
+++ b/demo/word2vec/train_v2.py
@@ -0,0 +1,80 @@
+import math
+
+import paddle.v2 as paddle
+
+dictsize = 1953
+embsize = 32
+hiddensize = 256
+N = 5
+
+
+def wordemb(inlayer):
+    wordemb = paddle.layer.table_projection(
+        input=inlayer,
+        size=embsize,
+        param_attr=paddle.attr.Param(
+            name="_proj",
+            initial_std=0.001,
+            learning_rate=1,
+            l2_rate=0, ))
+    return wordemb
+
+
+def main():
+    paddle.init(use_gpu=False, trainer_count=1)
+    word_dict = paddle.dataset.imikolov.build_dict()
+    dict_size = len(word_dict)
+    firstword = paddle.layer.data(
+        name="firstw", type=paddle.data_type.integer_value(dict_size))
+    secondword = paddle.layer.data(
+        name="secondw", type=paddle.data_type.integer_value(dict_size))
+    thirdword = paddle.layer.data(
+        name="thirdw", type=paddle.data_type.integer_value(dict_size))
+    fourthword = paddle.layer.data(
+        name="fourthw", type=paddle.data_type.integer_value(dict_size))
+    nextword = paddle.layer.data(
+        name="fifthw", type=paddle.data_type.integer_value(dict_size))
+
+    Efirst = wordemb(firstword)
+    Esecond = wordemb(secondword)
+    Ethird = wordemb(thirdword)
+    Efourth = wordemb(fourthword)
+
+    contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
+    hidden1 = paddle.layer.fc(input=contextemb,
+                              size=hiddensize,
+                              act=paddle.activation.Sigmoid(),
+                              layer_attr=paddle.attr.Extra(drop_rate=0.5),
+                              bias_attr=paddle.attr.Param(learning_rate=2),
+                              param_attr=paddle.attr.Param(
+                                  initial_std=1. / math.sqrt(embsize * 8),
+                                  learning_rate=1))
+    predictword = paddle.layer.fc(input=hidden1,
+                                  size=dict_size,
+                                  bias_attr=paddle.attr.Param(learning_rate=2),
+                                  act=paddle.activation.Softmax())
+
+    def event_handler(event):
+        if isinstance(event, paddle.event.EndIteration):
+            if event.batch_id % 100 == 0:
+                result = trainer.test(
+                    paddle.batch(
+                        paddle.dataset.imikolov.test(word_dict, N), 32))
+                print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
+                    event.pass_id, event.batch_id, event.cost, event.metrics,
+                    result.metrics)
+
+    cost = paddle.layer.classification_cost(input=predictword, label=nextword)
+    parameters = paddle.parameters.create(cost)
+    adam_optimizer = paddle.optimizer.Adam(
+        learning_rate=3e-3,
+        regularization=paddle.optimizer.L2Regularization(8e-4))
+    trainer = paddle.trainer.SGD(cost, parameters, adam_optimizer)
+    trainer.train(
+        paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
+        num_passes=30,
+        event_handler=event_handler)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/doc/api/index_cn.rst b/doc/api/index_cn.rst
index 3718cd73a2003b8ef6c406a9bd51dc68e76402dc..fca981221e490686e468ae8d385d844d49767883 100644
--- a/doc/api/index_cn.rst
+++ b/doc/api/index_cn.rst
@@ -1,37 +1,26 @@
-API中文手册
-============
+API
+===
 
-DataProvider API
-----------------
+模型配置 API
+------------
 
 ..  toctree::
     :maxdepth: 1
 
-    data_provider/dataprovider_cn.rst
-    data_provider/pydataprovider2_cn.rst
+    v2/model_configs.rst
 
-..  _api_trainer_config:
-
-Model Config API
-----------------
+数据 API
+--------
 
 ..  toctree::
     :maxdepth: 1
 
-    trainer_config_helpers/optimizers.rst
-    trainer_config_helpers/data_sources.rst
-    trainer_config_helpers/layers.rst
-    trainer_config_helpers/activations.rst 
-    trainer_config_helpers/poolings.rst
-    trainer_config_helpers/networks.rst
-    trainer_config_helpers/evaluators.rst
-    trainer_config_helpers/attrs.rst
-
+    v2/data.rst
 
-Applications API
-----------------
+训练 API
+--------
 
-..  toctree::
-    :maxdepth: 1
+..	toctree::
+	:maxdepth: 1
 
-    predict/swig_py_paddle_cn.rst
+	v2/run_logic.rst
\ No newline at end of file
diff --git a/doc/api/index_en.rst b/doc/api/index_en.rst
index 10c297a71d6988c002de868e804ed9ee2345fbd7..f0ad0fb2aee7345db1dd5f175a342598366f5e3c 100644
--- a/doc/api/index_en.rst
+++ b/doc/api/index_en.rst
@@ -1,37 +1,26 @@
 API
 ===
 
-DataProvider API
+Model Config API
 ----------------
 
 ..  toctree::
     :maxdepth: 1
 
-    data_provider/dataprovider_en.rst
-    data_provider/pydataprovider2_en.rst
-
-..  _api_trainer_config:
+    v2/model_configs.rst
 
-Model Config API
-----------------
+Data API
+--------
 
 ..  toctree::
     :maxdepth: 1
 
-    trainer_config_helpers/optimizers.rst
-    trainer_config_helpers/data_sources.rst
-    trainer_config_helpers/layers.rst
-    trainer_config_helpers/activations.rst 
-    trainer_config_helpers/poolings.rst
-    trainer_config_helpers/networks.rst
-    trainer_config_helpers/evaluators.rst
-    trainer_config_helpers/attrs.rst
+    v2/data.rst
 
+Train API
+---------
 
-Applications API
-----------------
-
-..  toctree::
-    :maxdepth: 1
+..	toctree::
+	:maxdepth: 1
 
-    predict/swig_py_paddle_en.rst
+	v2/run_logic.rst
\ No newline at end of file
diff --git a/doc/api/data_provider/dataprovider_cn.rst b/doc/api/v1/data_provider/dataprovider_cn.rst
similarity index 100%
rename from doc/api/data_provider/dataprovider_cn.rst
rename to doc/api/v1/data_provider/dataprovider_cn.rst
diff --git a/doc/api/data_provider/dataprovider_en.rst b/doc/api/v1/data_provider/dataprovider_en.rst
similarity index 100%
rename from doc/api/data_provider/dataprovider_en.rst
rename to doc/api/v1/data_provider/dataprovider_en.rst
diff --git a/doc/api/data_provider/pydataprovider2_cn.rst b/doc/api/v1/data_provider/pydataprovider2_cn.rst
similarity index 100%
rename from doc/api/data_provider/pydataprovider2_cn.rst
rename to doc/api/v1/data_provider/pydataprovider2_cn.rst
diff --git a/doc/api/data_provider/pydataprovider2_en.rst b/doc/api/v1/data_provider/pydataprovider2_en.rst
similarity index 100%
rename from doc/api/data_provider/pydataprovider2_en.rst
rename to doc/api/v1/data_provider/pydataprovider2_en.rst
diff --git a/doc/api/data_provider/src/mnist_config.py b/doc/api/v1/data_provider/src/mnist_config.py
similarity index 100%
rename from doc/api/data_provider/src/mnist_config.py
rename to doc/api/v1/data_provider/src/mnist_config.py
diff --git a/doc/api/data_provider/src/mnist_provider.dict.py b/doc/api/v1/data_provider/src/mnist_provider.dict.py
similarity index 100%
rename from doc/api/data_provider/src/mnist_provider.dict.py
rename to doc/api/v1/data_provider/src/mnist_provider.dict.py
diff --git a/doc/api/data_provider/src/mnist_train.txt b/doc/api/v1/data_provider/src/mnist_train.txt
similarity index 100%
rename from doc/api/data_provider/src/mnist_train.txt
rename to doc/api/v1/data_provider/src/mnist_train.txt
diff --git a/doc/api/data_provider/src/sentimental_config.py b/doc/api/v1/data_provider/src/sentimental_config.py
similarity index 100%
rename from doc/api/data_provider/src/sentimental_config.py
rename to doc/api/v1/data_provider/src/sentimental_config.py
diff --git a/doc/api/data_provider/src/sentimental_provider.py b/doc/api/v1/data_provider/src/sentimental_provider.py
similarity index 100%
rename from doc/api/data_provider/src/sentimental_provider.py
rename to doc/api/v1/data_provider/src/sentimental_provider.py
diff --git a/doc/api/data_provider/src/sentimental_train.txt b/doc/api/v1/data_provider/src/sentimental_train.txt
similarity index 100%
rename from doc/api/data_provider/src/sentimental_train.txt
rename to doc/api/v1/data_provider/src/sentimental_train.txt
diff --git a/doc/api/data_provider/src/train.list b/doc/api/v1/data_provider/src/train.list
similarity index 100%
rename from doc/api/data_provider/src/train.list
rename to doc/api/v1/data_provider/src/train.list
diff --git a/doc/api/v1/index_cn.rst b/doc/api/v1/index_cn.rst
new file mode 100644
index 0000000000000000000000000000000000000000..3718cd73a2003b8ef6c406a9bd51dc68e76402dc
--- /dev/null
+++ b/doc/api/v1/index_cn.rst
@@ -0,0 +1,37 @@
+API中文手册
+============
+
+DataProvider API
+----------------
+
+..  toctree::
+    :maxdepth: 1
+
+    data_provider/dataprovider_cn.rst
+    data_provider/pydataprovider2_cn.rst
+
+..  _api_trainer_config:
+
+Model Config API
+----------------
+
+..  toctree::
+    :maxdepth: 1
+
+    trainer_config_helpers/optimizers.rst
+    trainer_config_helpers/data_sources.rst
+    trainer_config_helpers/layers.rst
+    trainer_config_helpers/activations.rst 
+    trainer_config_helpers/poolings.rst
+    trainer_config_helpers/networks.rst
+    trainer_config_helpers/evaluators.rst
+    trainer_config_helpers/attrs.rst
+
+
+Applications API
+----------------
+
+..  toctree::
+    :maxdepth: 1
+
+    predict/swig_py_paddle_cn.rst
diff --git a/doc/api/v1/index_en.rst b/doc/api/v1/index_en.rst
new file mode 100644
index 0000000000000000000000000000000000000000..10c297a71d6988c002de868e804ed9ee2345fbd7
--- /dev/null
+++ b/doc/api/v1/index_en.rst
@@ -0,0 +1,37 @@
+API
+===
+
+DataProvider API
+----------------
+
+..  toctree::
+    :maxdepth: 1
+
+    data_provider/dataprovider_en.rst
+    data_provider/pydataprovider2_en.rst
+
+..  _api_trainer_config:
+
+Model Config API
+----------------
+
+..  toctree::
+    :maxdepth: 1
+
+    trainer_config_helpers/optimizers.rst
+    trainer_config_helpers/data_sources.rst
+    trainer_config_helpers/layers.rst
+    trainer_config_helpers/activations.rst 
+    trainer_config_helpers/poolings.rst
+    trainer_config_helpers/networks.rst
+    trainer_config_helpers/evaluators.rst
+    trainer_config_helpers/attrs.rst
+
+
+Applications API
+----------------
+
+..  toctree::
+    :maxdepth: 1
+
+    predict/swig_py_paddle_en.rst
diff --git a/doc/api/predict/src/predict_sample.py b/doc/api/v1/predict/src/predict_sample.py
similarity index 100%
rename from doc/api/predict/src/predict_sample.py
rename to doc/api/v1/predict/src/predict_sample.py
diff --git a/doc/api/predict/swig_py_paddle_cn.rst b/doc/api/v1/predict/swig_py_paddle_cn.rst
similarity index 100%
rename from doc/api/predict/swig_py_paddle_cn.rst
rename to doc/api/v1/predict/swig_py_paddle_cn.rst
diff --git a/doc/api/predict/swig_py_paddle_en.rst b/doc/api/v1/predict/swig_py_paddle_en.rst
similarity index 100%
rename from doc/api/predict/swig_py_paddle_en.rst
rename to doc/api/v1/predict/swig_py_paddle_en.rst
diff --git a/doc/api/trainer_config_helpers/activations.rst b/doc/api/v1/trainer_config_helpers/activations.rst
similarity index 100%
rename from doc/api/trainer_config_helpers/activations.rst
rename to doc/api/v1/trainer_config_helpers/activations.rst
diff --git a/doc/api/trainer_config_helpers/attrs.rst b/doc/api/v1/trainer_config_helpers/attrs.rst
similarity index 100%
rename from doc/api/trainer_config_helpers/attrs.rst
rename to doc/api/v1/trainer_config_helpers/attrs.rst
diff --git a/doc/api/trainer_config_helpers/data_sources.rst b/doc/api/v1/trainer_config_helpers/data_sources.rst
similarity index 100%
rename from doc/api/trainer_config_helpers/data_sources.rst
rename to doc/api/v1/trainer_config_helpers/data_sources.rst
diff --git a/doc/api/trainer_config_helpers/evaluators.rst b/doc/api/v1/trainer_config_helpers/evaluators.rst
similarity index 100%
rename from doc/api/trainer_config_helpers/evaluators.rst
rename to doc/api/v1/trainer_config_helpers/evaluators.rst
diff --git a/doc/api/trainer_config_helpers/layers.rst b/doc/api/v1/trainer_config_helpers/layers.rst
similarity index 100%
rename from doc/api/trainer_config_helpers/layers.rst
rename to doc/api/v1/trainer_config_helpers/layers.rst
diff --git a/doc/api/trainer_config_helpers/networks.rst b/doc/api/v1/trainer_config_helpers/networks.rst
similarity index 100%
rename from doc/api/trainer_config_helpers/networks.rst
rename to doc/api/v1/trainer_config_helpers/networks.rst
diff --git a/doc/api/trainer_config_helpers/optimizers.rst b/doc/api/v1/trainer_config_helpers/optimizers.rst
similarity index 100%
rename from doc/api/trainer_config_helpers/optimizers.rst
rename to doc/api/v1/trainer_config_helpers/optimizers.rst
diff --git a/doc/api/trainer_config_helpers/poolings.rst b/doc/api/v1/trainer_config_helpers/poolings.rst
similarity index 100%
rename from doc/api/trainer_config_helpers/poolings.rst
rename to doc/api/v1/trainer_config_helpers/poolings.rst
diff --git a/doc/api/v2/data.rst b/doc/api/v2/data.rst
new file mode 100644
index 0000000000000000000000000000000000000000..1c0a202a8c04322de3e9533c11fb5c74abac6c62
--- /dev/null
+++ b/doc/api/v2/data.rst
@@ -0,0 +1,93 @@
+================
+Data Related API
+================
+
+
+#########
+DataTypes
+#########
+
+..  automodule:: paddle.v2.data_type
+    :members:
+
+##########
+DataFeeder
+##########
+
+..  automodule:: paddle.v2.data_feeder
+    :members:
+
+######
+Reader
+######
+
+..  automodule:: paddle.v2.reader
+    :members:
+
+..  automodule:: paddle.v2.reader.creator
+    :members:
+
+#########
+minibatch
+#########
+
+..  automodule:: paddle.v2.minibatch
+    :members:
+
+#######
+Dataset
+#######
+
+..  automodule:: paddle.v2.dataset
+    :members:
+
+
+mnist
++++++
+
+..  automodule:: paddle.v2.dataset.mnist
+    :members:
+
+
+cifar
++++++
+
+..  automodule:: paddle.v2.dataset.cifar
+    :members:
+
+conll05
++++++++
+
+..  automodule:: paddle.v2.dataset.conll05
+    :members:
+
+imdb
+++++
+
+..  automodule:: paddle.v2.dataset.imdb
+    :members:
+
+imikolov
+++++++++
+
+..  automodule:: paddle.v2.dataset.imikolov
+    :members:
+
+movielens
++++++++++
+
+..  automodule:: paddle.v2.dataset.movielens
+    :members:
+
+sentiment
++++++++++
+
+..  automodule:: paddle.v2.dataset.sentiment
+    :members:
+
+uci_housing
++++++++++++
+
+..  automodule:: paddle.v2.dataset.uci_housing
+    :members:
+
diff --git a/doc/api/v2/model_configs.rst b/doc/api/v2/model_configs.rst
new file mode 100644
index 0000000000000000000000000000000000000000..e9cd3d5bf7b0e9e59c231bcabdb163a740909de1
--- /dev/null
+++ b/doc/api/v2/model_configs.rst
@@ -0,0 +1,46 @@
+#########################
+Configuration Related API
+#########################
+
+======
+Layers
+======
+
+..  automodule:: paddle.v2.layer
+    :members:
+
+
+==========
+Attributes
+==========
+
+..	automodule:: paddle.v2.attr
+	:members:
+
+===========
+Activations
+===========
+
+..	automodule:: paddle.v2.activation
+	:members:
+
+========
+Poolings
+========
+
+..	automodule:: paddle.v2.pooling
+	:members:
+
+========
+Networks
+========
+
+..	automodule:: paddle.v2.networks
+	:members:
+
+==========
+Optimizers
+==========
+
+..	automodule:: paddle.v2.optimizer
+	:members:
diff --git a/doc/api/v2/run_logic.rst b/doc/api/v2/run_logic.rst
new file mode 100644
index 0000000000000000000000000000000000000000..904d45966dfc16a474016ff48fd5a951988b0ab0
--- /dev/null
+++ b/doc/api/v2/run_logic.rst
@@ -0,0 +1,26 @@
+###########
+Trainer API
+###########
+
+==========
+Parameters
+==========
+
+..  automodule:: paddle.v2.parameters
+    :members:
+
+
+=======
+Trainer
+=======
+
+..	automodule:: paddle.v2.trainer
+	:members:
+
+
+=====
+Event
+=====
+
+..	automodule:: paddle.v2.event
+	:members:
diff --git a/doc/design/reader/README.md b/doc/design/reader/README.md
index 17d52b9e20b8130688028092421f4b33f44763ac..f21f7af520df5171798326818ecb97c3bcd14a12 100644
--- a/doc/design/reader/README.md
+++ b/doc/design/reader/README.md
@@ -4,9 +4,10 @@ At training and testing time, PaddlePaddle programs need to read data. To ease t
 
 - A *reader* is a function that reads data (from file, network, random number generator, etc) and yields data items.
 - A *reader creator* is a function that returns a reader function.
-- A *reader* decorator is a function, which accepts one or more readers, and returns a reader.
+- A *reader decorator* is a function, which accepts one or more readers, and returns a reader.
+- A *batch reader* is a function that reads data (from *reader*, file, network, random number generator, etc) and yields a batch of data items.
 
-and provide frequently used reader creators and reader decorators.
+and provide function which converts reader to batch reader, frequently used reader creators and reader decorators.
 
 ## Data Reader Interface
 
@@ -22,24 +23,69 @@ An example implementation for single item data reader creator:
 
 ```python
 def reader_creator_random_image(width, height):
-	def reader():
-		while True:
-			yield numpy.random.uniform(-1, 1, size=width*height)
-	return reader
+    def reader():
+        while True:
+            yield numpy.random.uniform(-1, 1, size=width*height)
+    return reader
 ```
 
 An example implementation for multiple item data reader creator:
 ```python
-def reader_creator_random_imageand_label(widht, height, label):
-	def reader():
-		while True:
-			yield numpy.random.uniform(-1, 1, size=width*height), label
-	return reader
+def reader_creator_random_image_and_label(width, height, label):
+    def reader():
+        while True:
+            yield numpy.random.uniform(-1, 1, size=width*height), label
+    return reader
+```
+
+## Batch Reader Interface
+
+*batch reader* can be any function with no parameter that creates a iterable (anything can be used in `for x in iterable`). The output of the iterable should be a batch (list) of data items. Each item inside the list must be a tuple.
+
+Here are valid outputs:
+```python
+# a mini batch of three data items. Each data item consist three columns of data, each of which is 1.
+[(1, 1, 1),
+(2, 2, 2),
+(3, 3, 3)]
+
+# a mini batch of three data items, each data item is a list (single column).
+[([1,1,1],),
+([2,2,2],),
+([3,3,3],),
+```
+
+Please note that each item inside the list must be a tuple, below is an invalid output:
+```python
+ # wrong, [1,1,1] needs to be inside a tuple: ([1,1,1],).
+ # Otherwise it's ambiguous whether [1,1,1] means a single column of data [1, 1, 1],
+ # or three column of datas, each of which is 1.
+[[1,1,1],
+[2,2,2],
+[3,3,3]]
+```
+
+It's easy to convert from reader to batch reader:
+```python
+mnist_train = paddle.dataset.mnist.train()
+mnist_train_batch_reader = paddle.batch(mnist_train, 128)
+```
+
+Also easy to create custom batch reader:
+```python
+def custom_batch_reader():
+    while True:
+        batch = []
+        for i in xrange(128):
+            batch.append((numpy.random.uniform(-1, 1, 28*28),)) # note that it's a tuple being appended.
+        yield batch
+
+mnist_random_image_batch_reader = custom_batch_reader
 ```
 
 ## Usage
 
-data reader, mapping from item(s) read to data layer, batch size and number of total pass will be passed into `paddle.train`:
+batch reader, mapping from item(s) read to data layer, batch size and number of total pass will be passed into `paddle.train`:
 
 ```python
 # two data layer is created:
@@ -47,8 +93,8 @@ image_layer = paddle.layer.data("image", ...)
 label_layer = paddle.layer.data("label", ...)
 
 # ...
-
-paddle.train(paddle.dataset.mnist, {"image":0, "label":1}, 128, 10, ...)
+batch_reader = paddle.batch(paddle.dataset.mnist.train(), 128)
+paddle.train(batch_reader, {"image":0, "label":1}, 128, 10, ...)
 ```
 
 ## Data Reader Decorator
@@ -64,7 +110,7 @@ Since reading data may take time and training can not proceed without data. It i
 Use `paddle.reader.buffered` to prefetch data:
 
 ```python
-buffered_reader = paddle.reader.buffered(paddle.dataset.mnist, 100)
+buffered_reader = paddle.reader.buffered(paddle.dataset.mnist.train(), 100)
 ```
 
 `buffered_reader` will try to buffer (prefetch) `100` data entries.
@@ -77,24 +123,24 @@ We can do:
 
 ```python
 def reader_creator_random_image(width, height):
-	def reader():
-		while True:
-			yield numpy.random.uniform(-1, 1, size=width*height)
-	return reader
+    def reader():
+        while True:
+            yield numpy.random.uniform(-1, 1, size=width*height)
+    return reader
 
 def reader_creator_bool(t):
-	def reader:
-		while True:
-			yield t
-	return reader
+    def reader:
+        while True:
+            yield t
+    return reader
 
 true_reader = reader_creator_bool(True)
 false_reader = reader_creator_bool(False)
 
-reader = paddle.reader.compose(paddle.dataset.mnist, data_reader_creator_random_image(20, 20), true_reader, false_reader)
-# Skipped 1 because paddle.dataset.mnist produces two items per data entry.
+reader = paddle.reader.compose(paddle.dataset.mnist.train(), data_reader_creator_random_image(20, 20), true_reader, false_reader)
+# Skipped 1 because paddle.dataset.mnist.train() produces two items per data entry.
 # And we don't care second item at this time.
-paddle.train(reader, {"true_image":0, "fake_image": 2, "true_label": 3, "false_label": 4}, ...)
+paddle.train(paddle.batch(reader, 128), {"true_image":0, "fake_image": 2, "true_label": 3, "false_label": 4}, ...)
 ```
 
 ### Shuffle
@@ -103,16 +149,20 @@ Given shuffle buffer size `n`, `paddle.reader.shuffle` will return a data reader
 
 Example:
 ```python
-reader = paddle.reader.shuffle(paddle.dataset.mnist, 512)
+reader = paddle.reader.shuffle(paddle.dataset.mnist.train(), 512)
 ```
 
 ## Q & A
 
-### Why return only a single entry, but not a mini batch?
+### Why reader return only a single entry, but not a mini batch?
+
+Always returning a single entry make reusing existing data readers much easier (e.g., if existing reader return not a single entry but 3 entries, training code will be more complex because it need to handle cases like batch size 2).
+
+We provide function `paddle.batch` to turn (single entry) reader into batch reader.
 
-If a mini batch is returned, data reader need to take care of batch size. But batch size is a concept for training, it makes more sense for user to specify batch size as a parameter for `train`.
+### Why do we need batch reader, isn't train take reader and batch_size as arguments sufficient?
 
-Practically, always return a single entry make reusing existing data readers much easier (e.g., if existing reader return not a single entry but 3 entries, training code will be more complex because it need to handle cases like batch size 2).
+In most of the case, train taking reader and batch_size as arguments would be sufficent. However sometimes user want to customize order of data entries inside a mini batch. Or even change batch size dynamically.
 
 ### Why use a dictionary but not a list to provide mapping?
 
@@ -122,22 +172,22 @@ We decided to use dictionary (`{"image":0, "label":1}`) instead of list (`["imag
 
 ```python
 def image_reader_creator(image_path, label_path, n):
-	def reader():
-		f = open(image_path)
-		l = open(label_path)
-		images = numpy.fromfile(
-			f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
-		images = images / 255.0 * 2.0 - 1.0
-		labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")
-		for i in xrange(n):
-			yield images[i, :], labels[i] # a single entry of data is created each time
-		f.close()
-		l.close()
-	return reader
+    def reader():
+        f = open(image_path)
+        l = open(label_path)
+        images = numpy.fromfile(
+            f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
+        images = images / 255.0 * 2.0 - 1.0
+        labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")
+        for i in xrange(n):
+            yield images[i, :], labels[i] # a single entry of data is created each time
+        f.close()
+        l.close()
+    return reader
 
 # images_reader_creator creates a reader
 reader = image_reader_creator("/path/to/image_file", "/path/to/label_file", 1024)
-paddle.train(reader, {"image":0, "label":1}, ...)
+paddle.train(paddle.batch(reader, 128), {"image":0, "label":1}, ...)
 ```
 
 ### How is `paddle.train` implemented
@@ -145,17 +195,8 @@ paddle.train(reader, {"image":0, "label":1}, ...)
 An example implementation of paddle.train could be:
 
 ```python
-def make_minibatch(reader, minibatch_size):
-	def ret():
-		r = reader()
-		buf = [r.next() for x in xrange(minibatch_size)]
-		while len(buf) > 0:
-			yield buf
-			buf = [r.next() for x in xrange(minibatch_size)]
-	return ret
-
-def train(reader, mapping, batch_size, total_pass):
-	for pass_idx in range(total_pass):
-		for mini_batch in make_minibatch(reader): # this loop will never end in online learning.
-			do_forward_backward(mini_batch, mapping)
+def train(batch_reader, mapping, batch_size, total_pass):
+    for pass_idx in range(total_pass):
+        for mini_batch in batch_reader(): # this loop will never end in online learning.
+            do_forward_backward(mini_batch, mapping)
 ```
diff --git a/doc/howto/usage/k8s/k8s_distributed_cn.md b/doc/howto/usage/k8s/k8s_distributed_cn.md
index 7213a977b8a2a4241f4eae22b5bdd65f03c574ac..2a7a6c8c17882a6f2c95e933e051c4b8f1a8eeee 100644
--- a/doc/howto/usage/k8s/k8s_distributed_cn.md
+++ b/doc/howto/usage/k8s/k8s_distributed_cn.md
@@ -43,22 +43,55 @@ docker push  [YOUR_REPO]/paddle:mypaddle
 
 注意上述命令中`[YOUR_REPO]`表示读者所使用的Docker镜像仓库地址，读者需要替换成自己使用的仓库地址。下文使用`[YOUR_REPO]/paddle:mypaddle`这个地址来表示此步骤所构建出的镜像。
 
-### 上传训练文件
+### 准备训练数据
 
-本文使用PaddlePaddle官方的[recommendation demo](http://www.paddlepaddle.org/doc/demo/index.html#recommendation)作为这次训练的内容，我们将训练文件与数据放在一个job name命名的目录中，上传到volume所在的共享存储（使用不同分布式存储会有不同的挂载方式，需要要先挂载这个目录，然后拷贝数据）。完成后volume中的文件内容大致如下：
+这里我们通过在Kubernetes集群上启动一个Job来下载并切割数据，也可以通过修改[k8s_train](./src/k8s_train/README.md)的内容来定制image.
 
-```bash
-[root@paddle-kubernetes-node0 mfs]# tree -d
+在启动Job之前，需要根据不同的分布式存储来绑定一个[persistentVolumeClaim](https://kubernetes.io/docs/user-guide/persistent-volumes/),生成的数据将会存储在这个volume下.
+
+```yaml
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: paddle-data
+spec:
+  template:
+    metadata:
+      name: pi
+    spec:
+      hostNetwork: true
+      containers:
+      - name: paddle-data
+        image: paddledev/paddle-tutorial:k8s_data
+        imagePullPolicy: Always
+        volumeMounts:
+        - mountPath: "/mnt"
+          name: nfs
+        env:
+        - name: OUT_DIR
+          value: /home/work/mfs/paddle-cluster-job
+        - name: SPLIT_COUNT
+          value: "3"
+      volumes:
+        - name: nfs
+          persistentVolumeClaim:
+            claimName: mfs
+      restartPolicy: Never
+```
+
+完成后volume中的文件内容大致如下：
+```base
+[root@paddle-kubernetes-node0 nfsdir]$ tree -d
 .
-└── paddle-cluster-job
-    ├── data
-    │   ├── 0
-    │   │
-    │   ├── 1
-    │   │
-    │   └── 2
-    ├── output
-    └── recommendation
+`-- paddle-cluster-job
+    |-- 0
+    |   `-- data
+    |-- 1
+    |   `-- data
+    |-- 2
+    |   `-- data
+    |-- output
+    |-- quick_start
 ```
 
 目录中paddle-cluster-job是本次训练对应的job name，本次训练要求有3个PaddlePaddle节点，在paddle-cluster-job/data目录中存放切分好的数据，文件夹0，1，2分别代表3个节点的trainer_id。recommendation文件夹内存放训练文件，output文件夹存放训练结果与日志。
@@ -118,15 +151,16 @@ spec:
 
 `env`字段表示容器的环境变量，我们将`paddle`运行的一些参数通过这种方式传递到容器内。
 
-`JOB_PATH`表示共享存储挂载的路径，`JOB_NAME`表示job名字，`TRAIN_CONFIG_DIR`表示本次训练文件所在目录，这三个变量组合就可以找到本次训练需要的文件路径。
-
-`CONF_PADDLE_NIC`表示`paddle pserver`进程需要的`--nics`参数，即网卡名
-
-`CONF_PADDLE_PORT`表示`paddle pserver`的`--port`参数，`CONF_PADDLE_PORTS_NUM`则表示稠密更新的端口数量，也就是`--ports_num`参数。
-
-`CONF_PADDLE_PORTS_NUM_SPARSE`表示稀疏更新的端口数量，也就是`--ports_num_for_sparse`参数。
-
-`CONF_PADDLE_GRADIENT_NUM`表示训练节点数量，即`--num_gradient_servers`参数
+环境变量 | 说明
+--- | ---
+JOB_PATH | 共享存储挂在的路径
+JOB_NAME | Job的名字
+TRAIN_CONFIG_DIR | 本次训练文件所在目录，与JOB_PATH,JOB_NAME组合可以找到本次训练需要的文件路径
+CONF_PADDLE_NIC | `paddle pserver`进程需要的`--nics`参数，即网卡名
+CONF_PADDLE_PORT | `paddle paserver`的`--port`参数
+CONF_PADDLE_PORTS_NUM | 稠密更新的端口数量，即`--ports_num`参数
+CONF_PADDLE_PORTS_NUM_SPARSE | 稀疏更新的端口数量，即`--ports_num_for_sparse`参数
+CONF_PADDLE_GRADIENT_NUM | 训练节点数量，即`--num_gradient_servers参数`
 
 这些参数的具体描述，读者可以查看[这里](http://www.paddlepaddle.org/doc/ui/cmd_argument/detail_introduction.html#parameter-server-and-distributed-communication)。
 
diff --git a/doc/howto/usage/k8s/src/k8s_train/start_paddle.py b/doc/howto/usage/k8s/src/k8s_train/start_paddle.py
index f1a770ccb54fbd7d4c3cf6bf134d00d7bf5961ca..935c12bb67e1fe08bc135a7a2220fcd43c548482 100755
--- a/doc/howto/usage/k8s/src/k8s_train/start_paddle.py
+++ b/doc/howto/usage/k8s/src/k8s_train/start_paddle.py
@@ -132,7 +132,8 @@ def startPaddle(idMap={}, train_args_dict=None):
     logDir = JOB_PATH_OUTPUT + "/node_" + str(trainerId)
     if not os.path.exists(JOB_PATH_OUTPUT):
         os.makedirs(JOB_PATH_OUTPUT)
-    os.mkdir(logDir)
+    if not os.path.exists(logDir):
+        os.mkdir(logDir)
     copyCommand = 'cp -rf ' + JOB_PATH + \
         "/" + str(trainerId) + "/data/*" + " ./data/"
     os.system(copyCommand)
diff --git a/doc/templates/conf.py.cn.in b/doc/templates/conf.py.cn.in
index 418d718fbd9c61bff3acb9c2dab0638c0b650bab..6dc48704bc230bd1a573c4b4b2e7c07791e48ced 100644
--- a/doc/templates/conf.py.cn.in
+++ b/doc/templates/conf.py.cn.in
@@ -15,13 +15,19 @@ import sys
 import os, subprocess
 import shlex
 from recommonmark import parser, transform
+try:
+   import py_paddle
+   import paddle
+   import paddle.v2
+except ImportError:
+   print("Must install paddle python package before generating documentation")
+   sys.exit(1)
 
 MarkdownParser = parser.CommonMarkParser
 AutoStructify = transform.AutoStructify
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-sys.path.insert(0, '@PROJ_ROOT@/python')
 templates_path = ["@PROJ_ROOT@/doc_theme/templates"]
 
 # -- General configuration ------------------------------------------------
diff --git a/doc/templates/conf.py.en.in b/doc/templates/conf.py.en.in
index e96c25cb75bee20d2e2949423d80ddab1d3450a1..b477f0120c4fa0544012080b7cfb8572d3c44b04 100644
--- a/doc/templates/conf.py.en.in
+++ b/doc/templates/conf.py.en.in
@@ -15,14 +15,20 @@ import sys
 import os, subprocess
 import shlex
 from recommonmark import parser, transform
+try:
+   import py_paddle
+   import paddle
+   import paddle.v2
+except ImportError:
+   print("Must install paddle python package before generating documentation")
+   sys.exit(1)
+
 
 MarkdownParser = parser.CommonMarkParser
 AutoStructify = transform.AutoStructify
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-sys.path.insert(0, '@PROJ_ROOT@/python')
-
 templates_path = ["@PROJ_ROOT@/doc_theme/templates"]
 
 # -- General configuration ------------------------------------------------
diff --git a/doc/tutorials/quick_start/index_en.md b/doc/tutorials/quick_start/index_en.md
index 70dec2eb2a8c397bc56b1e6f52a624a3a6877905..ca110431cf921ae0480d3fb2b17c58f90a84cc0e 100644
--- a/doc/tutorials/quick_start/index_en.md
+++ b/doc/tutorials/quick_start/index_en.md
@@ -156,14 +156,14 @@ define_py_data_sources2(train_list='data/train.list',
                         obj="process",
                         args={"dictionary": word_dict})
 ```
-You can refer to the following link for more detailed examples and data formats: <a href = "../../api/data_provider/pydataprovider2_en.html">PyDataProvider2</a>.
+You can refer to the following link for more detailed examples and data formats: <a href = "../../api/v1/data_provider/pydataprovider2_en.html">PyDataProvider2</a>.
 
 ## Network Architecture
 We will describe four kinds of network architectures in this section.
 <center> ![](./src/PipelineNetwork_en.jpg) </center>
 
 First, you will build a logistic regression model. Later, you will also get chance to build other more powerful network architectures.
-For more detailed documentation, you could refer to: <a href = "../../api/trainer_config_helpers/layers.html">layer documentation</a>. All configuration files are in `demo/quick_start` directory.
+For more detailed documentation, you could refer to: <a href = "../../api/v1/trainer_config_helpers/layers.html">layer documentation</a>. All configuration files are in `demo/quick_start` directory.
 
 ### Logistic Regression
 The architecture is illustrated in the following picture:
@@ -366,7 +366,7 @@ You can use single layer LSTM model with Dropout for our text classification pro
 <br>
 
 ## Optimization Algorithm
-<a href = "../../api/trainer_config_helpers/optimizers.html">Optimization algorithms</a> include Momentum, RMSProp, AdaDelta, AdaGrad, Adam, and Adamax. You can use Adam optimization method here, with L2 regularization and gradient clipping, because Adam has been proved to work very well for training recurrent neural network.
+<a href = "../../api/v1/trainer_config_helpers/optimizers.html">Optimization algorithms</a> include Momentum, RMSProp, AdaDelta, AdaGrad, Adam, and Adamax. You can use Adam optimization method here, with L2 regularization and gradient clipping, because Adam has been proved to work very well for training recurrent neural network.
 
 ```python
 settings(batch_size=128,
@@ -407,7 +407,7 @@ paddle train \
 --init_model_path=./output/pass-0000x
 ```
 
-We will give an example of performing prediction using Recurrent model on a dataset with no labels. You can refer to <a href = "../../api/predict/swig_py_paddle_en.html">Python Prediction API</a> tutorial，or other <a href = "../../tutorials/index_en.html">demo</a> for the prediction process using Python. You can also use the following script for inference or evaluation.
+We will give an example of performing prediction using Recurrent model on a dataset with no labels. You can refer to <a href = "../../api/v1/predict/swig_py_paddle_en.html">Python Prediction API</a> tutorial，or other <a href = "../../tutorials/index_en.html">demo</a> for the prediction process using Python. You can also use the following script for inference or evaluation.
 
 inference script (predict.sh)：
 
diff --git a/paddle/api/Arguments.cpp b/paddle/api/Arguments.cpp
index a3f4bfffc9f074900ebcc52876c04bbfc0e570b2..d49b189e253f7a0792fe3f1fe7c8fdbb7071acd4 100644
--- a/paddle/api/Arguments.cpp
+++ b/paddle/api/Arguments.cpp
@@ -144,9 +144,7 @@ void Arguments::setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError) {
   a.cpuSequenceDims = m->cast<paddle::IVector>(vec->getSharedPtr());
 }
 
-float Arguments::sumCosts() const {
-  return paddle::Argument::sumCosts(m->outputs);
-}
+float Arguments::sum() const { return paddle::Argument::sum(m->outputs); }
 
 int64_t Arguments::getBatchSize(size_t idx) const throw(RangeError) {
   auto& a = m->getArg(idx);
diff --git a/paddle/api/GradientMachine.cpp b/paddle/api/GradientMachine.cpp
index 538ca2999f8f05afc45ac2d2f526133c8024f066..dcb5fe086fdccf8ec62ee52cbaaac4b7dbbe2f9d 100644
--- a/paddle/api/GradientMachine.cpp
+++ b/paddle/api/GradientMachine.cpp
@@ -142,6 +142,20 @@ Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) {
   }
 }
 
+size_t GradientMachine::getNonStaticParameterSize() const {
+  return m->machine->getNonStaticParameters().size();
+}
+
+Parameter* GradientMachine::getNonStaticParameter(size_t i) throw(RangeError) {
+  auto params = m->machine->getNonStaticParameters();
+  if (i < params.size()) {
+    return Parameter::createFromSharedPtr(
+        &m->machine->getNonStaticParameters()[i]);
+  } else {
+    throw RangeError();
+  }
+}
+
 void GradientMachine::randParameters() { m->machine->randParameters(); }
 
 Arguments* GradientMachine::getLayerOutput(const std::string& layerName) const
diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h
index d99e9a4ad48ea4764c7a1ea56c507d754d56853b..c4f5dca26cc6a5e9fdd23ee27b594ced29a25c7a 100644
--- a/paddle/api/PaddleAPI.h
+++ b/paddle/api/PaddleAPI.h
@@ -453,7 +453,7 @@ public:
                                         IVector* vec) throw(RangeError);
   void setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError);
 
-  float sumCosts() const;
+  float sum() const;
 
 private:
   static Arguments* createByPaddleArgumentVector(void* ptr);
@@ -771,6 +771,9 @@ public:
   size_t getParameterSize() const;
   Parameter* getParameter(size_t i) throw(RangeError);
 
+  size_t getNonStaticParameterSize() const;
+  Parameter* getNonStaticParameter(size_t i) throw(RangeError);
+
   void randParameters();
 
   Arguments* getLayerOutput(const std::string& layerName) const
diff --git a/paddle/api/test/testArguments.py b/paddle/api/test/testArguments.py
index a04a805d7a64ef906c8388f1241b9ef823e4d9e0..9fe44de94ea6ddb71d2dfbb2243fc86ede0d0531 100644
--- a/paddle/api/test/testArguments.py
+++ b/paddle/api/test/testArguments.py
@@ -22,7 +22,7 @@ class TestArguments(unittest.TestCase):
         args = swig_paddle.Arguments.createArguments(1)
         args.setSlotValue(0, m)
 
-        self.assertAlmostEqual(27.0, args.sumCosts())
+        self.assertAlmostEqual(27.0, args.sum())
 
         mat = args.getSlotValue(0)
         assert isinstance(mat, swig_paddle.Matrix)
diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp
index ae016e74eaa84f7c43a30c09c8c4577e25360c4e..7617af10ba719490d1b33dd297b070cd8c7c292c 100644
--- a/paddle/gserver/tests/LayerGradUtil.cpp
+++ b/paddle/gserver/tests/LayerGradUtil.cpp
@@ -24,7 +24,7 @@ real getCostSum(LayerPtr& testLayer, MatrixPtr weights) {
   if (weights) {
     outArgs[0].value->dotMul(*outArgs[0].value, *weights);
   }
-  return Argument::sumCosts(outArgs);
+  return Argument::sum(outArgs);
 }
 
 real getDiffAndPrint(real newCost1,
@@ -241,7 +241,7 @@ void testBatchState(LayerPtr testLayer,
 
     std::vector<Argument> args;
     args.push_back(out);
-    EXPECT_EQ(0, Argument::sumCosts(args)) << "testBatchState failed";
+    EXPECT_EQ(0, Argument::sum(args)) << "testBatchState failed";
     for (size_t seqId = 0; seqId < numSequences; ++seqId) {
       start[seqId] += seqLens[seqId];
     }
@@ -672,7 +672,7 @@ void testLayerGradKernel(TestConfig testConf,
     outArgs[0].value->dotMul(*testLayer->getOutput().value, *weights);
   }
 
-  real cost = Argument::sumCosts(outArgs);
+  real cost = Argument::sum(outArgs);
   LOG(INFO) << " cost " << cost;
   EXPECT_FALSE(std::isnan(cost));
 
diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h
index 178c068b93ac5fc1e06200984f14da86069cf7e4..9ef44be0cb3b960db1e789f3f26bb66d1fe63c81 100644
--- a/paddle/parameter/Argument.h
+++ b/paddle/parameter/Argument.h
@@ -163,7 +163,7 @@ struct Argument {
                        : sequenceStartPositions->getData(false);
   }
 
-  static inline real sumCosts(const std::vector<Argument>& arguments) {
+  static inline real sum(const std::vector<Argument>& arguments) {
     real cost = 0;
     for (auto& arg : arguments) {
       if (arg.value) {
diff --git a/paddle/pserver/test/CMakeLists.txt b/paddle/pserver/test/CMakeLists.txt
index 64654f67d0c2c82f05a5038fb33b220f3cff0f39..6e8f9c37f64b70921e09241089a5a480fd8ca47f 100644
--- a/paddle/pserver/test/CMakeLists.txt
+++ b/paddle/pserver/test/CMakeLists.txt
@@ -10,9 +10,11 @@ add_test(NAME socket_test
 add_unittest_without_exec(test_ProtoServer
     test_ProtoServer.cpp)
 
-add_test(NAME test_ProtoServer
-    COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port
-        ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoServer)
+IF(NOT ON_TRAVIS)
+    add_test(NAME test_ProtoServer
+        COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port
+            ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoServer)
+ENDIF(NOT ON_TRAVIS)
 
 # TODO(yuyang18): Run test_ProtoServer when with rdma
 # add_test(NAME test_ProtoServerRDMA
diff --git a/paddle/py_paddle/util.py b/paddle/py_paddle/util.py
index a708def1d2d7f6da2998a5905f9473accc6db969..1c9455fab5f9c1179bddffb100cd53fe8adfb6b1 100644
--- a/paddle/py_paddle/util.py
+++ b/paddle/py_paddle/util.py
@@ -195,6 +195,12 @@ def __monkeypatch_gradient_machine__():
 
     swig_paddle.GradientMachine.getParameters = getParameters
 
+    def getNonStaticParameters(self):
+        return (self.getNonStaticParameter(i)
+                for i in xrange(self.getNonStaticParameterSize()))
+
+    swig_paddle.GradientMachine.getNonStaticParameters = getNonStaticParameters
+
     def getLayerOutputs(self, layerNames):
         """
         getLayerOutputs. get outputs of layers and return a numpy matrix dict.
diff --git a/paddle/scripts/travis/before_install.osx.sh b/paddle/scripts/travis/before_install.osx.sh
deleted file mode 100755
index 80f031a74e7052d183b5ef21d432476ff1cce722..0000000000000000000000000000000000000000
--- a/paddle/scripts/travis/before_install.osx.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-brew update
-brew tap homebrew/science
-brew install openblas swig md5sha1sum
diff --git a/paddle/scripts/travis/build_and_test.sh b/paddle/scripts/travis/build_and_test.sh
index 5e6350b57458594163f23cca41a546d7bd9b1eda..7deb3e62e88de7e1306fcbfc5a28aa4372d678e6 100755
--- a/paddle/scripts/travis/build_and_test.sh
+++ b/paddle/scripts/travis/build_and_test.sh
@@ -2,18 +2,11 @@
 source ./common.sh
 
 NPROC=1
-if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
-  export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages
-  export PYTHONHOME=/opt/python/2.7.12
-  export PATH=/opt/python/2.7.12/bin:${PATH}
-  cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS}
-  NRPOC=`nproc`
-  make -j $NPROC
-  make coveralls
-  sudo make install
-elif [[ "$TRAVIS_OS_NAME" == "osx" ]]; then
-  export PYTHONPATH=/usr/local/lib/python2.7/site-packages
-  cmake .. -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS}
-  NPROC=`sysctl -n hw.ncpu`
-  make -j $NPROC
-fi
+export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages
+export PYTHONHOME=/opt/python/2.7.12
+export PATH=/opt/python/2.7.12/bin:${PATH}
+cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS}
+NRPOC=`nproc`
+make -j $NPROC
+make coveralls
+sudo make install
diff --git a/paddle/scripts/travis/docs.sh b/paddle/scripts/travis/docs.sh
index 6b43cad20b76e9abeb3cb10a726d3d8e3da5f8e2..53e998ef6c1b96d9e7d82b7effd12a27e6dc69f2 100755
--- a/paddle/scripts/travis/docs.sh
+++ b/paddle/scripts/travis/docs.sh
@@ -2,8 +2,12 @@
 
 # Add set -e, cd to directory.
 source ./common.sh
-
 # Compile Documentation only.
+cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_STYLE_CHECK=OFF ${EXTRA_CMAKE_OPTS}
+mkdir output
+make DESTDIR=./output install -j `nproc`
+pip install ./output/usr/local/opt/paddle/share/wheels/*
+rm -rf *
 cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=ON ${EXTRA_CMAKE_OPTS}
 make paddle_docs paddle_docs_cn
 
@@ -25,26 +29,41 @@ TARGET_BRANCH="gh-pages"
 # Only deploy master branch to build latest documentation.
 SOURCE_BRANCH="master"
 
-# If is not a Github pull request, and in master branch.
-if [ "$TRAVIS_PULL_REQUEST" != "false" -o "$TRAVIS_BRANCH" != "$SOURCE_BRANCH"  ]; then
-  exit 0
-fi
-
 # Clone the repo to output directory
 git clone $REPO output
 cd output
 
-# checkout github page branch
-git checkout $TARGET_BRANCH || git checkout --orphan $TARGET_BRANCH
+function deploy_docs() {
+  SOURCE_BRANCH=$1
+  DIR=$2
+  # If is not a Github pull request
+  if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then
+    exit 0
+  fi
+  # If it is not watched branch.
+  if [ "$TRAVIS_BRANCH" != "$SOURCE_BRANCH" ]; then
+    return
+  fi
 
-# remove old docs. mv new docs.
-rm -rf doc doc_cn
-mv ../doc/cn/html doc_cn
-mv ../doc/en/html doc
+  # checkout github page branch
+  git checkout $TARGET_BRANCH || git checkout --orphan $TARGET_BRANCH
+  
+  mkdir -p ${DIR}
+  # remove old docs. mv new docs.
+  set +e
+  rm -rf ${DIR}/doc ${DIR}/doc_cn
+  set -e
+  mv ../doc/cn/html ${DIR}/doc_cn
+  mv ../doc/en/html ${DIR}/doc
+  git add .
+}
+
+deploy_docs "master" "." 
+deploy_docs "develop" "./develop/"
 
 # Check is there anything changed.
 set +e
-git diff --exit-code >/dev/null
+git diff --cached --exit-code >/dev/null
 if [ $? -eq 0 ]; then
   echo "No changes to the output on this push; exiting."
   exit 0
@@ -57,7 +76,6 @@ if [ -n $SSL_KEY ]; then  # Only push updated docs for github.com/PaddlePaddle/P
   git config user.name "Travis CI"
   git config user.email "paddle-dev@baidu.com"
   git commit -m "Deploy to GitHub Pages: ${SHA}"
-
   # Set ssh private key
   openssl aes-256-cbc -K $SSL_KEY -iv $SSL_IV -in ../../paddle/scripts/travis/deploy_key.enc -out deploy_key -d
   chmod 600 deploy_key
diff --git a/paddle/setup.py.in b/paddle/setup.py.in
index 38621af065913c9edd44958e9fb767c983c00dbb..382d5be6ecfc26b4a524bb6a775bd1a805a34d96 100644
--- a/paddle/setup.py.in
+++ b/paddle/setup.py.in
@@ -72,6 +72,7 @@ setup(name="py_paddle",
   packages=['py_paddle'],
   include_dirs = include_dirs,
   install_requires = [
+    'nltk>=3.2.2',
     'numpy>=1.8.0',      # The numpy is required.
     'protobuf>=3.0.0'    # The paddle protobuf version
   ],
diff --git a/paddle/trainer/Tester.cpp b/paddle/trainer/Tester.cpp
index 13aa28ae5d9699d267858d48e46797c756487ddd..80664fa877b324af73e3e3effa11e46eac6294e2 100644
--- a/paddle/trainer/Tester.cpp
+++ b/paddle/trainer/Tester.cpp
@@ -208,7 +208,7 @@ real Tester::forwardOneBatch(const DataBatch& dataBatch,
     return 0.0;  // In this case, there is no meaning to calculate cost
   }
 
-  return Argument::sumCosts(outArgs);
+  return Argument::sum(outArgs);
 }
 
 void Tester::testOnePassBatch(int passId) {
diff --git a/paddle/trainer/Trainer.cpp b/paddle/trainer/Trainer.cpp
index bd84545375117b178d4324f0ad03f5bc35ae925d..b68e29cd5ea223272151e7a8b52d998832f47103 100644
--- a/paddle/trainer/Trainer.cpp
+++ b/paddle/trainer/Trainer.cpp
@@ -310,7 +310,7 @@ real Trainer::checkGradient() {
   std::vector<Argument> outArgs;
 
   trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC);
-  real cost = Argument::sumCosts(outArgs);
+  real cost = Argument::sum(outArgs);
   LOG(INFO) << "original cost=" << cost;
   trainerInternal_.getGradientMachine()->backward();
 
@@ -340,7 +340,7 @@ real Trainer::checkGradient() {
     parameter->getBuf(PARAMETER_VALUE)->copyFrom(newPara);
     parameter->setValueUpdated();
     trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC);
-    real newCost1 = Argument::sumCosts(outArgs);
+    real newCost1 = Argument::sum(outArgs);
 
     for (size_t i = 0; i < dim; ++i) {
       newp[i] = oldp[i] - step * d[i];
@@ -349,7 +349,7 @@ real Trainer::checkGradient() {
     parameter->getBuf(PARAMETER_VALUE)->copyFrom(newPara);
     parameter->setValueUpdated();
     trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC);
-    real newCost2 = Argument::sumCosts(outArgs);
+    real newCost2 = Argument::sum(outArgs);
 
     real trueDelta = 0.5 * (newCost1 - newCost2);
     real diff = (1e-20 + trueDelta) / (1e-20 + delta) - 1;
@@ -575,7 +575,7 @@ real Trainer::calcGradient(const DataBatch& dataBatch,
 
   trainerInternal_.getGradientMachine()->forwardBackward(
       inArgs, &outArgs, PASS_TRAIN);
-  real cost = Argument::sumCosts(outArgs);
+  real cost = Argument::sum(outArgs);
 
   offset = 0;
   for (auto& para : parameters) {
diff --git a/paddle/trainer/TrainerInternal.cpp b/paddle/trainer/TrainerInternal.cpp
index f3b465b444167d4624a5e99c30e1257eda53ca2c..4c5d4a0913aaf3a9932b3d67806378ece4245304 100644
--- a/paddle/trainer/TrainerInternal.cpp
+++ b/paddle/trainer/TrainerInternal.cpp
@@ -134,7 +134,7 @@ void TrainerInternal::trainOneBatch(int64_t batchId,
   real cost = 0;
   {
     REGISTER_TIMER("sumCost");
-    cost = Argument::sumCosts(*outArgs);
+    cost = Argument::sum(*outArgs);
   }
 
   if (batchId % intconfig_->log_period == 0) {
diff --git a/python/paddle/trainer/PyDataProvider2.py b/python/paddle/trainer/PyDataProvider2.py
index bd24c68b6fe88eab03c814f8cac70db3880316f4..0e752c117c1ecfab72e2da2f830380e9524236e7 100644
--- a/python/paddle/trainer/PyDataProvider2.py
+++ b/python/paddle/trainer/PyDataProvider2.py
@@ -45,6 +45,23 @@ class CacheType(object):
 
 
 class InputType(object):
+    """
+    InputType is the base class for paddle input types.
+
+    ..  note::
+
+        this is a base class, and should never be used by user.
+
+    :param dim: dimension of input. If the input is an integer, it means the
+                value range. Otherwise, it means the size of layer.
+    :type dim: int
+    :param seq_type: sequence type of input. 0 means it is not a sequence. 1
+                     means it is a variable length sequence. 2 means it is a
+                     nested sequence.
+    :type seq_type: int
+    :param type: data type of input.
+    :type type: int
+    """
     __slots__ = ['dim', 'seq_type', 'type']
 
     def __init__(self, dim, seq_type, tp):
@@ -54,19 +71,63 @@ class InputType(object):
 
 
 def dense_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
+    """
+    Dense Vector. It means the input feature is dense float vector. For example,
+    if the input is an image with 28*28 pixels, the input of Paddle neural
+    network should be a dense vector with dimension 784.
+
+    :param dim: dimension of this vector.
+    :type dim: int
+    :param seq_type: sequence type of input.
+    :type seq_type: int
+    :return: An input type object.
+    :rtype: InputType
+    """
     return InputType(dim, seq_type, DataType.Dense)
 
 
 def sparse_non_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
+    """
+    Sparse binary vector. It means the input feature is a sparse vector and the
+    every element in this vector is either zero or one.
+
+    :param dim: dimension of this vector.
+    :type dim: int
+    :param seq_type: sequence type of this input.
+    :type seq_type: int
+    :return: An input type object.
+    :rtype: InputType
+    """
     return InputType(dim, seq_type, DataType.SparseNonValue)
 
 
 def sparse_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
+    """
+    Sparse vector. It means the input feature is a sparse vector. Most of the
+    elements in this vector are zero, others could be any float value.
+
+    :param dim: dimension of this vector.
+    :type dim: int
+    :param seq_type: sequence type of this input.
+    :type seq_type: int
+    :return: An input type object.
+    :rtype: InputType
+    """
     return InputType(dim, seq_type, DataType.SparseValue)
 
 
-def index_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
-    return InputType(dim, seq_type, DataType.Index)
+def index_slot(value_range, seq_type=SequenceType.NO_SEQUENCE):
+    """
+    Data type of integer.
+
+    :param seq_type: sequence type of this input.
+    :type seq_type: int
+    :param value_range: range of this integer.
+    :type value_range: int
+    :return: An input type object
+    :rtype: InputType
+    """
+    return InputType(value_range, seq_type, DataType.Index)
 
 
 dense_vector = dense_slot
@@ -76,6 +137,14 @@ integer_value = index_slot
 
 
 def dense_vector_sequence(dim):
+    """
+    Data type of a sequence of dense vector.
+
+    :param dim: dimension of dense vector.
+    :type dim: int
+    :return: An input type object
+    :rtype: InputType
+    """
     return dense_vector(dim, seq_type=SequenceType.SEQUENCE)
 
 
@@ -84,6 +153,15 @@ def dense_vector_sub_sequence(dim):
 
 
 def sparse_binary_vector_sequence(dim):
+    """
+    Data type of a sequence of sparse vector, which every element is either zero
+     or one.
+
+    :param dim: dimension of sparse vector.
+    :type dim: int
+    :return: An input type object
+    :rtype: InputType
+    """
     return sparse_binary_vector(dim, seq_type=SequenceType.SEQUENCE)
 
 
@@ -92,6 +170,15 @@ def sparse_binary_vector_sub_sequence(dim):
 
 
 def sparse_vector_sequence(dim):
+    """
+    Data type of a sequence of sparse vector, which most elements are zero,
+    others could be any float value.
+
+    :param dim: dimension of sparse vector.
+    :type dim: int
+    :return: An input type object
+    :rtype: InputType
+    """
     return sparse_vector(dim, seq_type=SequenceType.SEQUENCE)
 
 
@@ -99,8 +186,14 @@ def sparse_vector_sub_sequence(dim):
     return sparse_vector(dim, seq_type=SequenceType.SUB_SEQUENCE)
 
 
-def integer_value_sequence(dim):
-    return integer_value(dim, seq_type=SequenceType.SEQUENCE)
+def integer_value_sequence(value_range):
+    """
+    Data type of a sequence of integer.
+
+    :param value_range: range of each element.
+    :type value_range: int
+    """
+    return integer_value(value_range, seq_type=SequenceType.SEQUENCE)
 
 
 def integer_value_sub_sequence(dim):
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index b68460b6a3ab621904f4dc4e48352044ab265a38..b94f8f9a783552519ca73e7cfc0937b302d3445b 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -795,17 +795,16 @@ def data_layer(name, size, height=None, width=None, layer_attr=None):
 
     ..  code-block:: python
 
-        data = data_layer(name="input",
-                          size=1000)
+        data = data_layer(name="input", size=1000)
 
     :param name: Name of this data layer.
     :type name: basestring
     :param size: Size of this data layer.
     :type size: int
     :param height: Height of this data layer, used for image
-    :type size: int|None
+    :type height: int|None
     :param width: Width of this data layer, used for image
-    :type size: int|None
+    :type width: int|None
     :param layer_attr: Extra Layer Attribute.
     :type layer_attr: ExtraLayerAttribute.
     :return: LayerOutput object.
diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py
index d548d1adaafacdb097dbe476fdc76651c9f46b6b..25526bf409cf82f26979a84700ce948ac969df0c 100644
--- a/python/paddle/v2/__init__.py
+++ b/python/paddle/v2/__init__.py
@@ -20,18 +20,20 @@ import event
 import data_type
 import topology
 import data_feeder
+import networks
 from . import dataset
 from . import reader
 import attr
 import pooling
-import inferencer
+import inference
 import networks
 import py_paddle.swig_paddle as api
+import minibatch
 
 __all__ = [
     'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer',
     'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'dataset', 'reader',
-    'topology', 'networks', 'inferencer', 'infer'
+    'topology', 'networks', 'infer'
 ]
 
 
@@ -43,4 +45,5 @@ def init(**kwargs):
     api.initPaddle(*args)
 
 
-infer = inferencer.infer
+infer = inference.infer
+batch = minibatch.batch
diff --git a/python/paddle/v2/activation.py b/python/paddle/v2/activation.py
index 1f3aab9ef3c5f69e22d7e83250d0ff46c1ff718a..21261a178203b633ca6cf59a5fc89edc24a868b9 100644
--- a/python/paddle/v2/activation.py
+++ b/python/paddle/v2/activation.py
@@ -12,26 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.trainer_config_helpers.activations import *
+import paddle.trainer_config_helpers.activations
+import copy
 
-__all__ = [
-    "Base", "Tanh", "Sigmoid", "Softmax", "Identity", "Linear",
-    'SequenceSoftmax', "Exp", "Relu", "BRelu", "SoftRelu", "STanh", "Abs",
-    "Square", "Log"
-]
+__all__ = []
 
-Base = BaseActivation
-Tanh = TanhActivation
-Sigmoid = SigmoidActivation
-Softmax = SoftmaxActivation
-SequenceSoftmax = SequenceSoftmaxActivation
-Identity = IdentityActivation
-Linear = Identity
-Relu = ReluActivation
-BRelu = BReluActivation
-SoftRelu = SoftReluActivation
-STanh = STanhActivation
-Abs = AbsActivation
-Square = SquareActivation
-Exp = ExpActivation
-Log = LogActivation
+suffix = 'Activation'
+for act in paddle.trainer_config_helpers.activations.__all__:
+    new_name = act[:-len(suffix)]
+    globals()[new_name] = copy.copy(
+        getattr(paddle.trainer_config_helpers.activations, act))
+    globals()[new_name].__name__ = new_name
+    __all__.append(new_name)
diff --git a/python/paddle/v2/attr.py b/python/paddle/v2/attr.py
index 40c64f621b443d5613468b27d030ab08776641b2..32f78614e7f8abe7cffdc7a50a9fa77f1fc1a780 100644
--- a/python/paddle/v2/attr.py
+++ b/python/paddle/v2/attr.py
@@ -12,12 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.trainer_config_helpers.attrs import *
+import paddle.trainer_config_helpers.attrs
 
 __all__ = [
     "Param",
     "Extra",
 ]
 
-Param = ParameterAttribute
-Extra = ExtraLayerAttribute
+Param = paddle.trainer_config_helpers.attrs.ParameterAttribute
+Extra = paddle.trainer_config_helpers.attrs.ExtraLayerAttribute
+
+for each in paddle.trainer_config_helpers.attrs.__all__:
+    globals()[each] = getattr(paddle.trainer_config_helpers.attrs, each)
+    __all__.append(each)
diff --git a/python/paddle/v2/config_base.py b/python/paddle/v2/config_base.py
index 035f96b0f2e978a413a1ebe0ec115f75ff07befc..1ec1d7bbdf912b940ca4b8e7b20eb11310f0e74f 100644
--- a/python/paddle/v2/config_base.py
+++ b/python/paddle/v2/config_base.py
@@ -13,15 +13,59 @@
 # limitations under the License.
 
 import collections
-
+import re
 from paddle.trainer_config_helpers.default_decorators import wrap_name_default
 import paddle.trainer_config_helpers as conf_helps
 
 
+class LayerType(type):
+    def __new__(cls, name, bases, attrs):
+        method_name = attrs.get('METHOD_NAME', None)
+        if method_name is not None:
+            method = getattr(conf_helps, method_name)
+            if method.__doc__ is not None:
+                mapper = attrs.get("__map_docstr__", None)
+                if mapper is not None:
+                    attrs['__doc__'] = LayerType.__map_docstr__(
+                        mapper(method.__doc__),
+                        method_name=method_name,
+                        name=name)
+                else:
+                    attrs['__doc__'] = LayerType.__map_docstr__(
+                        method.__doc__, method_name=method_name, name=name)
+        return super(LayerType, cls).__new__(cls, name, bases, attrs)
+
+    @staticmethod
+    def __map_docstr__(doc, name, method_name):
+        assert isinstance(doc, basestring)
+
+        # replace LayerOutput to paddle.v2.config_base.Layer
+        doc = doc.replace("LayerOutput", "paddle.v2.config_base.Layer")
+
+        doc = doc.replace('ParameterAttribute',
+                          'paddle.v2.attr.ParameterAttribute')
+
+        doc = re.sub(r'ExtraLayerAttribute[^\s]?',
+                     'paddle.v2.attr.ExtraAttribute', doc)
+
+        # xxx_layer to xxx
+        doc = re.sub(r"(?P<name>[a-z]+)_layer", r"\g<name>", doc)
+
+        # XxxxActivation to paddle.v2.Activation.Xxxx
+        doc = re.sub(r"(?P<name>[A-Z][a-zA-Z]+)Activation",
+                     r"paddle.v2.Activation.\g<name>", doc)
+
+        # TODO(yuyang18): Add more rules if needed.
+        return doc
+
+
 class Layer(object):
+    __metaclass__ = LayerType
+
     def __init__(self, name=None, parent_layers=None):
         assert isinstance(parent_layers, dict)
         self.name = name
+        self.__contex__ = {}
         self.__parent_layers__ = parent_layers
 
     def to_proto(self, context):
@@ -39,16 +83,38 @@ class Layer(object):
                                self.__parent_layers__[layer_name])
             kwargs[layer_name] = v1_layer
 
-        if self.name is None:
+        if self.context_name() is None:
             return self.to_proto_impl(**kwargs)
-        elif self.name not in context:
-            context[self.name] = self.to_proto_impl(**kwargs)
-
-        return context[self.name]
+        elif self.context_name() not in context:
+            context[self.context_name()] = self.to_proto_impl(**kwargs)
+        self.__contex__ = context
+        if self.use_context_name():
+            return context[self.context_name()]
+        else:
+            return context[self.name]
 
     def to_proto_impl(self, **kwargs):
         raise NotImplementedError()
 
+    def context_name(self):
+        """
+        Context name means the context which stores `to_proto_impl` result.
+        If multiple layer share same context_name, the `to_proto_impl` of them
+        will be invoked only once.
+        """
+        return self.name
+
+    def use_context_name(self):
+        return False
+
+    def calculate_size(self):
+        """
+        lazy calculate size of the layer, should be called when to_proto_impl of
+        this layer is called.
+        :return:
+        """
+        return self.__contex__[self.context_name()].size
+
 
 def __convert_to_v2__(method_name, parent_names, is_default_name=True):
     if is_default_name:
@@ -57,6 +123,8 @@ def __convert_to_v2__(method_name, parent_names, is_default_name=True):
         wrapper = None
 
     class V2LayerImpl(Layer):
+        METHOD_NAME = method_name
+
         def __init__(self, **kwargs):
             parent_layers = dict()
             other_kwargs = dict()
diff --git a/python/paddle/v2/data_feeder.py b/python/paddle/v2/data_feeder.py
index 3b106e100cff7539611d95bb4123b4e0dfbfa6cb..b7465238be8138e47913b7bd3f1c669ed9653958 100644
--- a/python/paddle/v2/data_feeder.py
+++ b/python/paddle/v2/data_feeder.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from py_paddle import swig_paddle
 from py_paddle import DataProviderConverter
+
 import data_type
 
 __all__ = ['DataFeeder']
@@ -29,7 +29,10 @@ class DataFeeder(DataProviderConverter):
     to feed it to C++ interface.
     
     The example usage:
-    
+
+
+    ..  code-block:: python
+
         data_types = [('image', paddle.data_type.dense_vector(784)),
                       ('label', paddle.data_type.integer_value(10))]
         reader_dict = {'image':0, 'label':1}
@@ -43,20 +46,24 @@ class DataFeeder(DataProviderConverter):
         #                       [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ]   # second sample
         #                     ]
         arg = feeder(minibatch_data)
+
+    ..  note::
+
+        This module is for internal use only. Users should use the `reader`
+        interface.
+
+
+
+    :param data_types: A list to specify data name and type. Each item is
+                       a tuple of (data_name, data_type).
+
+    :type data_types: list
+    :param reader_dict: A dictionary to specify the position of each data
+                        in the input data.
+    :type reader_dict: dict
     """
 
     def __init__(self, data_types, reader_dict):
-        """
-        :param data_types: A list to specify data name and type. Each item is
-                           a tuple of (data_name, data_type). For example:
-                           [('image', paddle.data_type.dense_vector(784)),
-                            ('label', paddle.data_type.integer_value(10))]
-
-        :type data_types: A list of tuple
-        :param reader_dict: A dictionary to specify the position of each data
-                            in the input data.
-        :type reader_dict: dict()
-        """
         self.input_names = []
         input_types = []
         self.reader_dict = reader_dict
@@ -70,22 +77,12 @@ class DataFeeder(DataProviderConverter):
         """
         :param dat: A list of mini-batch data. Each sample is a list or tuple
                     one feature or multiple features.
-                    for example:
-                    [ 
-                      ([0.2, 0.2], ), # first sample
-                      ([0.8, 0.3], ), # second sample
-                    ]
-                    or,
-                    [ 
-                      [[0.2, 0.2], ], # first sample
-                      [[0.8, 0.3], ], # second sample
-                    ]
-
-        :type dat: List
+
+        :type dat: list
         :param argument: An Arguments object contains this mini-batch data with
                          one or multiple features. The Arguments definition is
                          in the API.
-        :type argument: swig_paddle.Arguments
+        :type argument: py_paddle.swig_paddle.Arguments
         """
 
         def reorder_data(data):
diff --git a/python/paddle/v2/data_type.py b/python/paddle/v2/data_type.py
index 522ddfdaacce44be7cf27bdbfc1009d4a0c0bbe6..d582f76ddf01ed3430a1d075624bbb8e0bf3f2a9 100644
--- a/python/paddle/v2/data_type.py
+++ b/python/paddle/v2/data_type.py
@@ -12,11 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.trainer.PyDataProvider2 import \
-    InputType, DataType, dense_vector, sparse_binary_vector,\
-    sparse_vector, integer_value, integer_value_sequence
+import paddle.trainer.PyDataProvider2 as pydp2
 
-__all__ = [
-    'InputType', 'DataType', 'dense_vector', 'sparse_binary_vector',
-    'sparse_vector', 'integer_value', 'integer_value_sequence'
+import_list = [
+    nm for nm in dir(pydp2)
+    if '_' in nm and nm[0] != '_' and ('value' in nm or 'vector' in nm)
 ]
+import_list.extend(['InputType'])
+
+for nm in import_list:
+    globals()[nm] = getattr(pydp2, nm)
+
+__all__ = import_list
diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py
index a947edd2cce649b74649cabc32321c62f5a1bd64..80ff6295c34e853d8f69b9e78719af23a56d1fbb 100644
--- a/python/paddle/v2/dataset/__init__.py
+++ b/python/paddle/v2/dataset/__init__.py
@@ -1,5 +1,31 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Dataset package.
+"""
+
 import mnist
+import imikolov
+import imdb
 import cifar
 import movielens
+import conll05
+import uci_housing
+import sentiment
+import wmt14
 
-__all__ = ['mnist', 'cifar', 'movielens']
+__all__ = [
+    'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'sentiment'
+    'uci_housing', 'wmt14'
+]
diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py
index 77c54bd268b5d988b0802a3edca91605e56f730e..d9f7a830ee60a331b55a1e218923e690103e1c5b 100644
--- a/python/paddle/v2/dataset/cifar.py
+++ b/python/paddle/v2/dataset/cifar.py
@@ -1,6 +1,22 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """
 CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html
+
+TODO(yuyang18): Complete the comments.
 """
+
 import cPickle
 import itertools
 import numpy
diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py
index fcf4437ffaf329f52cc5bc997eff45dee200873c..3021b68ddb02ecaa874e21681796c0912ad4cc06 100644
--- a/python/paddle/v2/dataset/common.py
+++ b/python/paddle/v2/dataset/common.py
@@ -1,7 +1,22 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import requests
 import hashlib
 import os
 import shutil
+import sys
 
 __all__ = ['DATA_HOME', 'download', 'md5file']
 
@@ -27,9 +42,24 @@ def download(url, module_name, md5sum):
 
     filename = os.path.join(dirname, url.split('/')[-1])
     if not (os.path.exists(filename) and md5file(filename) == md5sum):
+        print "Cache file %s not found, downloading %s" % (filename, url)
         r = requests.get(url, stream=True)
-        with open(filename, 'w') as f:
-            shutil.copyfileobj(r.raw, f)
+        total_length = r.headers.get('content-length')
+
+        if total_length is None:
+            with open(filename, 'w') as f:
+                shutil.copyfileobj(r.raw, f)
+        else:
+            with open(filename, 'w') as f:
+                dl = 0
+                total_length = int(total_length)
+                for data in r.iter_content(chunk_size=4096):
+                    dl += len(data)
+                    f.write(data)
+                    done = int(50 * dl / total_length)
+                    sys.stdout.write("\r[%s%s]" % ('=' * done,
+                                                   ' ' * (50 - done)))
+                    sys.stdout.flush()
 
     return filename
 
diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py
new file mode 100644
index 0000000000000000000000000000000000000000..9eab49ee39325c1c60fc511e0bd834e83aa987f0
--- /dev/null
+++ b/python/paddle/v2/dataset/conll05.py
@@ -0,0 +1,198 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tarfile
+import gzip
+import itertools
+from common import download
+"""
+Conll 2005 dataset.  Paddle semantic role labeling Book and demo use this
+dataset as an example. Because Conll 2005 is not free in public, the default
+downloaded URL is test set of Conll 2005 (which is public). Users can change
+URL and MD5 to their Conll dataset.
+
+TODO(yuyang18): Complete comments.
+"""
+
+__all__ = ['test, get_dict', 'get_embedding']
+
+DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'
+DATA_MD5 = '387719152ae52d60422c016e92a742fc'
+WORDDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt'
+WORDDICT_MD5 = 'ea7fb7d4c75cc6254716f0177a506baa'
+VERBDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/verbDict.txt'
+VERBDICT_MD5 = '0d2977293bbb6cbefab5b0f97db1e77c'
+TRGDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/targetDict.txt'
+TRGDICT_MD5 = 'd8c7f03ceb5fc2e5a0fa7503a4353751'
+EMB_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/emb'
+EMB_MD5 = 'bf436eb0faa1f6f9103017f8be57cdb7'
+
+UNK_IDX = 0
+
+
+def load_dict(filename):
+    d = dict()
+    with open(filename, 'r') as f:
+        for i, line in enumerate(f):
+            d[line.strip()] = i
+    return d
+
+
+def corpus_reader(data_path, words_name, props_name):
+    """
+    Read one corpus. It returns an iterator. Each element of
+    this iterator is a tuple including sentence and labels. The sentence is
+    consist of a list of word IDs. The labels include a list of label IDs.
+    :return: a iterator of data.
+    :rtype: iterator
+    """
+
+    def reader():
+        tf = tarfile.open(data_path)
+        wf = tf.extractfile(words_name)
+        pf = tf.extractfile(props_name)
+        with gzip.GzipFile(fileobj=wf) as words_file, gzip.GzipFile(
+                fileobj=pf) as props_file:
+            sentences = []
+            labels = []
+            one_seg = []
+            for word, label in itertools.izip(words_file, props_file):
+                word = word.strip()
+                label = label.strip().split()
+
+                if len(label) == 0:  # end of sentence
+                    for i in xrange(len(one_seg[0])):
+                        a_kind_lable = [x[i] for x in one_seg]
+                        labels.append(a_kind_lable)
+
+                    if len(labels) >= 1:
+                        verb_list = []
+                        for x in labels[0]:
+                            if x != '-':
+                                verb_list.append(x)
+
+                        for i, lbl in enumerate(labels[1:]):
+                            cur_tag = 'O'
+                            is_in_bracket = False
+                            lbl_seq = []
+                            verb_word = ''
+                            for l in lbl:
+                                if l == '*' and is_in_bracket == False:
+                                    lbl_seq.append('O')
+                                elif l == '*' and is_in_bracket == True:
+                                    lbl_seq.append('I-' + cur_tag)
+                                elif l == '*)':
+                                    lbl_seq.append('I-' + cur_tag)
+                                    is_in_bracket = False
+                                elif l.find('(') != -1 and l.find(')') != -1:
+                                    cur_tag = l[1:l.find('*')]
+                                    lbl_seq.append('B-' + cur_tag)
+                                    is_in_bracket = False
+                                elif l.find('(') != -1 and l.find(')') == -1:
+                                    cur_tag = l[1:l.find('*')]
+                                    lbl_seq.append('B-' + cur_tag)
+                                    is_in_bracket = True
+                                else:
+                                    raise RuntimeError('Unexpected label: %s' %
+                                                       l)
+
+                            yield sentences, verb_list[i], lbl_seq
+
+                    sentences = []
+                    labels = []
+                    one_seg = []
+                else:
+                    sentences.append(word)
+                    one_seg.append(label)
+
+        pf.close()
+        wf.close()
+        tf.close()
+
+    return reader
+
+
+def reader_creator(corpus_reader,
+                   word_dict=None,
+                   predicate_dict=None,
+                   label_dict=None):
+    def reader():
+        for sentence, predicate, labels in corpus_reader():
+
+            sen_len = len(sentence)
+
+            verb_index = labels.index('B-V')
+            mark = [0] * len(labels)
+            if verb_index > 0:
+                mark[verb_index - 1] = 1
+                ctx_n1 = sentence[verb_index - 1]
+            else:
+                ctx_n1 = 'bos'
+
+            if verb_index > 1:
+                mark[verb_index - 2] = 1
+                ctx_n2 = sentence[verb_index - 2]
+            else:
+                ctx_n2 = 'bos'
+
+            mark[verb_index] = 1
+            ctx_0 = sentence[verb_index]
+
+            if verb_index < len(labels) - 1:
+                mark[verb_index + 1] = 1
+                ctx_p1 = sentence[verb_index + 1]
+            else:
+                ctx_p1 = 'eos'
+
+            if verb_index < len(labels) - 2:
+                mark[verb_index + 2] = 1
+                ctx_p2 = sentence[verb_index + 2]
+            else:
+                ctx_p2 = 'eos'
+
+            word_idx = [word_dict.get(w, UNK_IDX) for w in sentence]
+
+            ctx_n2_idx = [word_dict.get(ctx_n2, UNK_IDX)] * sen_len
+            ctx_n1_idx = [word_dict.get(ctx_n1, UNK_IDX)] * sen_len
+            ctx_0_idx = [word_dict.get(ctx_0, UNK_IDX)] * sen_len
+            ctx_p1_idx = [word_dict.get(ctx_p1, UNK_IDX)] * sen_len
+            ctx_p2_idx = [word_dict.get(ctx_p2, UNK_IDX)] * sen_len
+
+            pred_idx = [predicate_dict.get(predicate)] * sen_len
+            label_idx = [label_dict.get(w) for w in labels]
+
+            yield word_idx, ctx_n2_idx, ctx_n1_idx, \
+              ctx_0_idx, ctx_p1_idx, ctx_p2_idx, pred_idx, mark, label_idx
+
+    return reader
+
+
+def get_dict():
+    word_dict = load_dict(download(WORDDICT_URL, 'conll05st', WORDDICT_MD5))
+    verb_dict = load_dict(download(VERBDICT_URL, 'conll05st', VERBDICT_MD5))
+    label_dict = load_dict(download(TRGDICT_URL, 'conll05st', TRGDICT_MD5))
+    return word_dict, verb_dict, label_dict
+
+
+def get_embedding():
+    return download(EMB_URL, 'conll05st', EMB_MD5)
+
+
+def test():
+    word_dict, verb_dict, label_dict = get_dict()
+    reader = corpus_reader(
+        download(DATA_URL, 'conll05st', DATA_MD5),
+        words_name='conll05st-release/test.wsj/words/test.wsj.words.gz',
+        props_name='conll05st-release/test.wsj/props/test.wsj.props.gz')
+    return reader_creator(reader, word_dict, verb_dict, label_dict)
diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py
index 433e37380f840f5b7ff619a5f64b99d2ad724b17..76019d9f54020ff6f02c17eb6047cbd014a8ccf2 100644
--- a/python/paddle/v2/dataset/imdb.py
+++ b/python/paddle/v2/dataset/imdb.py
@@ -1,6 +1,3 @@
-# /usr/bin/env python
-# -*- coding:utf-8 -*-
-
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,7 +13,10 @@
 # limitations under the License.
 """
 IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz
+
+TODO(yuyang18): Complete comments.
 """
+
 import paddle.v2.dataset.common
 import tarfile
 import Queue
@@ -118,3 +118,8 @@ def test(word_idx):
     return reader_creator(
         re.compile("aclImdb/test/pos/.*\.txt$"),
         re.compile("aclImdb/test/neg/.*\.txt$"), word_idx, 1000)
+
+
+def word_dict():
+    return build_dict(
+        re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150)
diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/v2/dataset/imikolov.py
index b3791ddad66e588356338150fccadbcc8fa113ca..97c160f111d09d61eb860c7f02552e635f2400a7 100644
--- a/python/paddle/v2/dataset/imikolov.py
+++ b/python/paddle/v2/dataset/imikolov.py
@@ -1,10 +1,25 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """
 imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/
+
+Complete comments.
 """
 import paddle.v2.dataset.common
 import tarfile
 
-__all__ = ['train', 'test']
+__all__ = ['train', 'test', 'build_dict']
 
 URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz'
 MD5 = '30177ea32e27c525793142b6bf2c8e2d'
@@ -24,7 +39,9 @@ def word_count(f, word_freq=None):
     return word_freq
 
 
-def build_dict(train_filename, test_filename):
+def build_dict():
+    train_filename = './simple-examples/data/ptb.train.txt'
+    test_filename = './simple-examples/data/ptb.valid.txt'
     with tarfile.open(
             paddle.v2.dataset.common.download(
                 paddle.v2.dataset.imikolov.URL, 'imikolov',
@@ -32,27 +49,22 @@ def build_dict(train_filename, test_filename):
         trainf = tf.extractfile(train_filename)
         testf = tf.extractfile(test_filename)
         word_freq = word_count(testf, word_count(trainf))
+        if '<unk>' in word_freq:
+            # remove <unk> for now, since we will set it as last index
+            del word_freq['<unk>']
 
         TYPO_FREQ = 50
         word_freq = filter(lambda x: x[1] > TYPO_FREQ, word_freq.items())
 
-        dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0]))
-        words, _ = list(zip(*dictionary))
+        word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0]))
+        words, _ = list(zip(*word_freq_sorted))
         word_idx = dict(zip(words, xrange(len(words))))
         word_idx['<unk>'] = len(words)
 
     return word_idx
 
 
-word_idx = {}
-
-
-def reader_creator(filename, n):
-    global word_idx
-    if len(word_idx) == 0:
-        word_idx = build_dict('./simple-examples/data/ptb.train.txt',
-                              './simple-examples/data/ptb.valid.txt')
-
+def reader_creator(filename, word_idx, n):
     def reader():
         with tarfile.open(
                 paddle.v2.dataset.common.download(
@@ -71,9 +83,9 @@ def reader_creator(filename, n):
     return reader
 
 
-def train(n):
-    return reader_creator('./simple-examples/data/ptb.train.txt', n)
+def train(word_idx, n):
+    return reader_creator('./simple-examples/data/ptb.train.txt', word_idx, n)
 
 
-def test(n):
-    return reader_creator('./simple-examples/data/ptb.valid.txt', n)
+def test(word_idx, n):
+    return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n)
diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py
index ebcdff78b317ceb4811048ac78982e072962fa9c..16f2fcb99de4cb1971a7375a97b5daa209ee95ef 100644
--- a/python/paddle/v2/dataset/mnist.py
+++ b/python/paddle/v2/dataset/mnist.py
@@ -1,5 +1,21 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """
 MNIST dataset.
+
+This module will download dataset from http://yann.lecun.com/exdb/mnist/ and
+parse train set and test set into paddle reader creators.
 """
 import paddle.v2.dataset.common
 import subprocess
@@ -59,6 +75,15 @@ def reader_creator(image_filename, label_filename, buffer_size):
 
 
 def train():
+    """
+    MNIST train set creator.
+
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+
+    :return: Train reader creator
+    :rtype: callable
+    """
     return reader_creator(
         paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist',
                                           TRAIN_IMAGE_MD5),
@@ -67,6 +92,15 @@ def train():
 
 
 def test():
+    """
+    MNIST test set cretor.
+
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+
+    :return: Test reader creator.
+    :rtype: callable
+    """
     return reader_creator(
         paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist',
                                           TEST_IMAGE_MD5),
diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py
index 5534af1145563365fe5ede7d21395e5b3ab88c36..25fd8227da2f219d75c6b830e65627ecf35be453 100644
--- a/python/paddle/v2/dataset/movielens.py
+++ b/python/paddle/v2/dataset/movielens.py
@@ -1,3 +1,22 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Movielens 1-M dataset.
+
+TODO(yuyang18): Complete comments.
+"""
+
 import zipfile
 from common import download
 import re
diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py
new file mode 100644
index 0000000000000000000000000000000000000000..71689fd61b6b14a7b5072caff4e2fd48a7f74072
--- /dev/null
+++ b/python/paddle/v2/dataset/sentiment.py
@@ -0,0 +1,127 @@
+# /usr/bin/env python
+# -*- coding:utf-8 -*-
+
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+The script fetch and preprocess movie_reviews data set that provided by NLTK
+
+TODO(yuyang18): Complete dataset.
+"""
+
+import collections
+from itertools import chain
+
+import nltk
+from nltk.corpus import movie_reviews
+
+import common
+
+__all__ = ['train', 'test', 'get_word_dict']
+NUM_TRAINING_INSTANCES = 1600
+NUM_TOTAL_INSTANCES = 2000
+
+
+def download_data_if_not_yet():
+    """
+    Download the data set, if the data set is not download.
+    """
+    try:
+        # make sure that nltk can find the data
+        if common.DATA_HOME not in nltk.data.path:
+            nltk.data.path.append(common.DATA_HOME)
+        movie_reviews.categories()
+    except LookupError:
+        print "Downloading movie_reviews data set, please wait....."
+        nltk.download('movie_reviews', download_dir=common.DATA_HOME)
+        print "Download data set success....."
+        print "Path is " + nltk.data.find('corpora/movie_reviews').path
+
+
+def get_word_dict():
+    """
+    Sorted the words by the frequency of words which occur in sample
+    :return:
+        words_freq_sorted
+    """
+    words_freq_sorted = list()
+    word_freq_dict = collections.defaultdict(int)
+    download_data_if_not_yet()
+
+    for category in movie_reviews.categories():
+        for field in movie_reviews.fileids(category):
+            for words in movie_reviews.words(field):
+                word_freq_dict[words] += 1
+    words_sort_list = word_freq_dict.items()
+    words_sort_list.sort(cmp=lambda a, b: b[1] - a[1])
+    for index, word in enumerate(words_sort_list):
+        words_freq_sorted.append((word[0], index))
+    return words_freq_sorted
+
+
+def sort_files():
+    """
+    Sorted the sample for cross reading the sample
+    :return:
+        files_list
+    """
+    files_list = list()
+    neg_file_list = movie_reviews.fileids('neg')
+    pos_file_list = movie_reviews.fileids('pos')
+    files_list = list(chain.from_iterable(zip(neg_file_list, pos_file_list)))
+    return files_list
+
+
+def load_sentiment_data():
+    """
+    Load the data set
+    :return:
+        data_set
+    """
+    data_set = list()
+    download_data_if_not_yet()
+    words_ids = dict(get_word_dict())
+    for sample_file in sort_files():
+        words_list = list()
+        category = 0 if 'neg' in sample_file else 1
+        for word in movie_reviews.words(sample_file):
+            words_list.append(words_ids[word.lower()])
+        data_set.append((words_list, category))
+    return data_set
+
+
+def reader_creator(data):
+    """
+    Reader creator, generate an iterator for data set
+    :param data:
+        train data set or test data set
+    """
+    for each in data:
+        yield each[0], each[1]
+
+
+def train():
+    """
+    Default train set reader creator
+    """
+    data_set = load_sentiment_data()
+    return reader_creator(data_set[0:NUM_TRAINING_INSTANCES])
+
+
+def test():
+    """
+    Default test set reader creator
+    """
+    data_set = load_sentiment_data()
+    return reader_creator(data_set[NUM_TRAINING_INSTANCES:])
diff --git a/python/paddle/v2/dataset/tests/cifar_test.py b/python/paddle/v2/dataset/tests/cifar_test.py
index a2af45ecf508462fe4b596b5d8d6401c5b974eff..e0e18229da7818be5752ee592e094a00da286ad9 100644
--- a/python/paddle/v2/dataset/tests/cifar_test.py
+++ b/python/paddle/v2/dataset/tests/cifar_test.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2.dataset.cifar
 import unittest
 
diff --git a/python/paddle/v2/dataset/tests/common_test.py b/python/paddle/v2/dataset/tests/common_test.py
index 7d8406171b8478e4a8331637c5e867c18d5eb3d8..5babcef0eb4345d243904877d323c37d4889a643 100644
--- a/python/paddle/v2/dataset/tests/common_test.py
+++ b/python/paddle/v2/dataset/tests/common_test.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2.dataset.common
 import unittest
 import tempfile
diff --git a/python/paddle/v2/dataset/tests/imdb_test.py b/python/paddle/v2/dataset/tests/imdb_test.py
index e887af16634d2db04b8cf5fa0269a69991d8baac..c4d82f26895d77d05c6e936bd636b1239e1a0cd8 100644
--- a/python/paddle/v2/dataset/tests/imdb_test.py
+++ b/python/paddle/v2/dataset/tests/imdb_test.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2.dataset.imdb
 import unittest
 import re
diff --git a/python/paddle/v2/dataset/tests/imikolov_test.py b/python/paddle/v2/dataset/tests/imikolov_test.py
index 9b1748eaaa7f913a6b94f2087a8089fb998570aa..009e55243a594e5e235c36fb0223ec70754d17f3 100644
--- a/python/paddle/v2/dataset/tests/imikolov_test.py
+++ b/python/paddle/v2/dataset/tests/imikolov_test.py
@@ -1,6 +1,8 @@
 import paddle.v2.dataset.imikolov
 import unittest
 
+WORD_DICT = paddle.v2.dataset.imikolov.build_dict()
+
 
 class TestMikolov(unittest.TestCase):
     def check_reader(self, reader, n):
@@ -9,11 +11,15 @@ class TestMikolov(unittest.TestCase):
 
     def test_train(self):
         n = 5
-        self.check_reader(paddle.v2.dataset.imikolov.train(n), n)
+        self.check_reader(paddle.v2.dataset.imikolov.train(WORD_DICT, n), n)
 
     def test_test(self):
         n = 5
-        self.check_reader(paddle.v2.dataset.imikolov.test(n), n)
+        self.check_reader(paddle.v2.dataset.imikolov.test(WORD_DICT, n), n)
+
+    def test_total(self):
+        _, idx = zip(*WORD_DICT.items())
+        self.assertEqual(sorted(idx)[-1], len(WORD_DICT) - 1)
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/v2/dataset/tests/mnist_test.py b/python/paddle/v2/dataset/tests/mnist_test.py
index b4408cc2f590d4d8da4ce5e98213cf7b208cfc15..1d344cac3e7483a351033570fbec75a4d19f4a55 100644
--- a/python/paddle/v2/dataset/tests/mnist_test.py
+++ b/python/paddle/v2/dataset/tests/mnist_test.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2.dataset.mnist
 import unittest
 
diff --git a/python/paddle/v2/dataset/tests/test_sentiment.py b/python/paddle/v2/dataset/tests/test_sentiment.py
new file mode 100644
index 0000000000000000000000000000000000000000..407405290734609059c1767600748d530e8a13a6
--- /dev/null
+++ b/python/paddle/v2/dataset/tests/test_sentiment.py
@@ -0,0 +1,55 @@
+# /usr/bin/env python
+# -*- coding:utf-8 -*-
+
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import nltk
+import paddle.v2.dataset.sentiment as st
+from nltk.corpus import movie_reviews
+
+
+class TestSentimentMethods(unittest.TestCase):
+    def test_get_word_dict(self):
+        word_dict = st.get_word_dict()[0:10]
+        test_word_list = [(u',', 0), (u'the', 1), (u'.', 2), (u'a', 3),
+                          (u'and', 4), (u'of', 5), (u'to', 6), (u"'", 7),
+                          (u'is', 8), (u'in', 9)]
+        for idx, each in enumerate(word_dict):
+            self.assertEqual(each, test_word_list[idx])
+        self.assertTrue("/root/.cache/paddle/dataset" in nltk.data.path)
+
+    def test_sort_files(self):
+        last_label = ''
+        for sample_file in st.sort_files():
+            current_label = sample_file.split("/")[0]
+            self.assertNotEqual(current_label, last_label)
+            last_label = current_label
+
+    def test_data_set(self):
+        data_set = st.load_sentiment_data()
+        last_label = -1
+        for each in st.test():
+            self.assertNotEqual(each[1], last_label)
+            last_label = each[1]
+        self.assertEqual(len(data_set), st.NUM_TOTAL_INSTANCES)
+        self.assertEqual(len(list(st.train())), st.NUM_TRAINING_INSTANCES)
+        self.assertEqual(
+            len(list(st.test())),
+            (st.NUM_TOTAL_INSTANCES - st.NUM_TRAINING_INSTANCES))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/dataset/uci_housing.py b/python/paddle/v2/dataset/uci_housing.py
new file mode 100644
index 0000000000000000000000000000000000000000..27f454b137e3a40febd19cf085e2f4034cc16b24
--- /dev/null
+++ b/python/paddle/v2/dataset/uci_housing.py
@@ -0,0 +1,91 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+UCI Housing dataset.
+
+TODO(yuyang18): Complete comments.
+"""
+
+import numpy as np
+import os
+from common import download
+
+__all__ = ['train', 'test']
+
+URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data'
+MD5 = 'd4accdce7a25600298819f8e28e8d593'
+feature_names = [
+    'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
+    'PTRATIO', 'B', 'LSTAT'
+]
+
+UCI_TRAIN_DATA = None
+UCI_TEST_DATA = None
+
+
+def feature_range(maximums, minimums):
+    import matplotlib
+    matplotlib.use('Agg')
+    import matplotlib.pyplot as plt
+    fig, ax = plt.subplots()
+    feature_num = len(maximums)
+    ax.bar(range(feature_num), maximums - minimums, color='r', align='center')
+    ax.set_title('feature scale')
+    plt.xticks(range(feature_num), feature_names)
+    plt.xlim([-1, feature_num])
+    fig.set_figheight(6)
+    fig.set_figwidth(10)
+    if not os.path.exists('./image'):
+        os.makedirs('./image')
+    fig.savefig('image/ranges.png', dpi=48)
+    plt.close(fig)
+
+
+def load_data(filename, feature_num=14, ratio=0.8):
+    global UCI_TRAIN_DATA, UCI_TEST_DATA
+    if UCI_TRAIN_DATA is not None and UCI_TEST_DATA is not None:
+        return
+
+    data = np.fromfile(filename, sep=' ')
+    data = data.reshape(data.shape[0] / feature_num, feature_num)
+    maximums, minimums, avgs = data.max(axis=0), data.min(axis=0), data.sum(
+        axis=0) / data.shape[0]
+    feature_range(maximums[:-1], minimums[:-1])
+    for i in xrange(feature_num - 1):
+        data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i])
+    offset = int(data.shape[0] * ratio)
+    UCI_TRAIN_DATA = data[:offset]
+    UCI_TEST_DATA = data[offset:]
+
+
+def train():
+    global UCI_TRAIN_DATA
+    load_data(download(URL, 'uci_housing', MD5))
+
+    def reader():
+        for d in UCI_TRAIN_DATA:
+            yield d[:-1], d[-1:]
+
+    return reader
+
+
+def test():
+    global UCI_TEST_DATA
+    load_data(download(URL, 'uci_housing', MD5))
+
+    def reader():
+        for d in UCI_TEST_DATA:
+            yield d[:-1], d[-1:]
+
+    return reader
diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py
new file mode 100644
index 0000000000000000000000000000000000000000..9904848b5d3ef95dc331fc0ba1a98f29f8b1dfeb
--- /dev/null
+++ b/python/paddle/v2/dataset/wmt14.py
@@ -0,0 +1,142 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+wmt14 dataset
+"""
+import paddle.v2.dataset.common
+import tarfile
+import os.path
+import itertools
+
+__all__ = ['train', 'test', 'build_dict']
+
+URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz'
+MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5'
+URL_TRAIN = 'http://localhost:8000/train.tgz'
+MD5_TRAIN = '72de99da2830ea5a3a2c4eb36092bbc7'
+
+
+def word_count(f, word_freq=None):
+    add = paddle.v2.dataset.common.dict_add
+    if word_freq == None:
+        word_freq = {}
+
+    for l in f:
+        for w in l.strip().split():
+            add(word_freq, w)
+        add(word_freq, '<s>')
+        add(word_freq, '<e>')
+
+    return word_freq
+
+
+def get_word_dix(word_freq):
+    TYPO_FREQ = 50
+    word_freq = filter(lambda x: x[1] > TYPO_FREQ, word_freq.items())
+    word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0]))
+    words, _ = list(zip(*word_freq_sorted))
+    word_idx = dict(zip(words, xrange(len(words))))
+    word_idx['<unk>'] = len(words)
+    return word_idx
+
+
+def get_word_freq(train, dev):
+    word_freq = word_count(train, word_count(dev))
+    if '<unk>' in word_freq:
+        # remove <unk> for now, since we will set it as last index
+        del word_freq['<unk>']
+    return word_freq
+
+
+def build_dict():
+    base_dir = './wmt14-data'
+    train_en_filename = base_dir + '/train/train.en'
+    train_fr_filename = base_dir + '/train/train.fr'
+    dev_en_filename = base_dir + '/dev/ntst1213.en'
+    dev_fr_filename = base_dir + '/dev/ntst1213.fr'
+
+    if not os.path.exists(train_en_filename) or not os.path.exists(
+            train_fr_filename):
+        with tarfile.open(
+                paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14',
+                                                  MD5_TRAIN)) as tf:
+            tf.extractall(base_dir)
+
+    if not os.path.exists(dev_en_filename) or not os.path.exists(
+            dev_fr_filename):
+        with tarfile.open(
+                paddle.v2.dataset.common.download(URL_DEV_TEST, 'wmt14',
+                                                  MD5_DEV_TEST)) as tf:
+            tf.extractall(base_dir)
+
+    f_en = open(train_en_filename)
+    f_fr = open(train_fr_filename)
+    f_en_dev = open(dev_en_filename)
+    f_fr_dev = open(dev_fr_filename)
+
+    word_freq_en = get_word_freq(f_en, f_en_dev)
+    word_freq_fr = get_word_freq(f_fr, f_fr_dev)
+
+    f_en.close()
+    f_fr.close()
+    f_en_dev.close()
+    f_fr_dev.close()
+
+    return get_word_dix(word_freq_en), get_word_dix(word_freq_fr)
+
+
+def reader_creator(directory, path_en, path_fr, URL, MD5, dict_en, dict_fr):
+    def reader():
+        if not os.path.exists(path_en) or not os.path.exists(path_fr):
+            with tarfile.open(
+                    paddle.v2.dataset.common.download(URL, 'wmt14', MD5)) as tf:
+                tf.extractall(directory)
+
+        f_en = open(path_en)
+        f_fr = open(path_fr)
+        UNK_en = dict_en['<unk>']
+        UNK_fr = dict_fr['<unk>']
+
+        for en, fr in itertools.izip(f_en, f_fr):
+            src_ids = [dict_en.get(w, UNK_en) for w in en.strip().split()]
+            tar_ids = [
+                dict_fr.get(w, UNK_fr)
+                for w in ['<s>'] + fr.strip().split() + ['<e>']
+            ]
+
+            # remove sequence whose length > 80 in training mode
+            if len(src_ids) == 0 or len(tar_ids) <= 1 or len(
+                    src_ids) > 80 or len(tar_ids) > 80:
+                continue
+
+            yield src_ids, tar_ids[:-1], tar_ids[1:]
+
+        f_en.close()
+        f_fr.close()
+
+    return reader
+
+
+def train(dict_en, dict_fr):
+    directory = './wmt14-data'
+    return reader_creator(directory, directory + '/train/train.en',
+                          directory + '/train/train.fr', URL_TRAIN, MD5_TRAIN,
+                          dict_en, dict_fr)
+
+
+def test(dict_en, dict_fr):
+    directory = './wmt14-data'
+    return reader_creator(directory, directory + '/dev/ntst1213.en',
+                          directory + '/dev/ntst1213.fr', URL_DEV_TEST,
+                          MD5_DEV_TEST, dict_en, dict_fr)
diff --git a/python/paddle/v2/event.py b/python/paddle/v2/event.py
index a429e36b63c9e812332673b66f4d8b99f3303cf8..1ad52b8baa411269d29732685871a875df5185cc 100644
--- a/python/paddle/v2/event.py
+++ b/python/paddle/v2/event.py
@@ -34,6 +34,10 @@ class WithMetric(object):
 
 
 class TestResult(WithMetric):
+    """
+    Result that trainer.test return.
+    """
+
     def __init__(self, evaluator, cost):
         super(TestResult, self).__init__(evaluator)
         self.cost = cost
diff --git a/python/paddle/v2/inferencer.py b/python/paddle/v2/inference.py
similarity index 98%
rename from python/paddle/v2/inferencer.py
rename to python/paddle/v2/inference.py
index ac03b016c9b8bfbc586072855402ed3a373e9b54..476fd3fa4523a77709f68c73c73e6851e04064aa 100644
--- a/python/paddle/v2/inferencer.py
+++ b/python/paddle/v2/inference.py
@@ -5,7 +5,7 @@ from data_feeder import DataFeeder
 import itertools
 import numpy
 
-__all__ = ['Inference', 'infer']
+__all__ = ['infer']
 
 
 class Inference(object):
diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py
index 67111f1315fbb0f55c1db0f6fe89fc988c8d83f6..1e4efedde363f20fde168941adcb6e8a594b533a 100644
--- a/python/paddle/v2/layer.py
+++ b/python/paddle/v2/layer.py
@@ -12,91 +12,67 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-Before this new package paddle.v2.layer, users would need to use functions
-in paddle.trainer_config_helpers.layers to configure networks.
-
-The Old Way:
-=========
-This old way requires that the creation of a network be defined in a Python
-function, say network_config, and that this Python function being passed to
-paddle.trainer_config_helpers.parse_network_config for the creation of
-protobuf message description of this network.
-
-```python
-def network_config():
-  img = paddle.trainer_config_helpers.data_layer(name="pixel", size=784)
-  inference = paddle.trainer_config_helpers.fc_layer(
-    input=img,
-    size=10,
-    act=paddle.trainer_config_helpers.SoftmaxActivation())
-  cost = paddle.trainer_config_helpers.classification_cost(
-    input=inference,
-    label=paddle.trainer_config_helpers.data_layer(name="label", size=10))
-
-proto_desc = parse_network_config(network_config)
-```
-
-When parse_network_config executes network_config, those layer definition
-functions like data_layer and fc_layer would change some Python global variables,
-so that after the execution, parse_network_config could collect information from
-these global variables and generates the protobuf message.
-
-
-
-The New Way:
-=========
-In this PR, we define a function in paddle.v2.layer which creates a Python
-class for each layer creation function in paddle.trainer_config_helpers.layers.
-Users can use create a network as follows:
-
-```python
-img = paddle.v2.layer.data(name="pixel", size=784)
-inference = paddle.v2.layer.fc(input=img, size=10, act=paddle.v2.layer.Softmax())
-cost = paddle.v2.layer.classification(
-  input=inference,
-  label=paddle.v2.layer.data(name="label", size=10))
-
-parameters = paddle.v2.parameters.create(cost)
-```
-
-This new way doesn't require those invocations to layer definition functions
-to be in a Python function but could be anywhere.
-
-Also, the creation of a protobuf message is hidden in the invocation of
-paddle.v2.parameters.create, no longer exposed to users.
+`paddle.v2.layer` is a part of model config packages in paddle.v2. In API v2,
+we want to make Paddle a plain Python package. The model config package defined
+the way how to configure a neural network topology in Paddle Python code.
+
+The primary usage shows below.
+
+..  code-block:: python
+
+    import paddle.v2 as paddle
+
+    img = paddle.layer.data(name='img', type=paddle.data_type.dense_vector(784))
+    hidden = paddle.layer.fc(input=img, size=200)
+    prediction = paddle.layer.fc(input=hidden, size=10,
+                                 act=paddle.activation.Softmax())
+
+    # use prediction instance where needed.
+    parameters = paddle.parameters.create(cost)
 """
+
+import collections
+import inspect
 from config_base import Layer, __convert_to_v2__
 import paddle.trainer_config_helpers as conf_helps
 from paddle.trainer_config_helpers.config_parser_utils import \
     parse_network_config as __parse__
-
-from paddle.trainer_config_helpers.default_decorators import wrap_name_default
 from paddle.trainer_config_helpers.default_decorators import wrap_act_default
 from paddle.trainer_config_helpers.default_decorators import \
     wrap_bias_attr_default
+from paddle.trainer_config_helpers.default_decorators import wrap_name_default
 from paddle.trainer_config_helpers.layers import layer_support
+from paddle.trainer.config_parser import \
+    RecurrentLayerGroupWithoutOutLinksBegin, RecurrentLayerGroupSetOutLink, \
+    RecurrentLayerGroupEnd, model_type
 
-import data_type
 import activation
+import re
+import data_type
 
 __all__ = ['parse_network', 'data']
 
-__projection_names__ = filter(lambda x: x.endswith('_projection'),
-                              dir(conf_helps))
-__all__ += __projection_names__
-
-__operator_names__ = filter(lambda x: x.endswith('_operator'), dir(conf_helps))
-__all__ += __operator_names__
-
 
 def parse_network(*outputs):
     """
-    parse all output layers and then generate a model config proto.
-    :param outputs:
-    :return:
+    Parse all output layers and then generate a ModelConfig object.
+
+    ..  note::
+
+        This function is used internally in paddle.v2 module. User should never
+        invoke this method.
+
+    :param outputs: Output layers.
+    :type outputs: Layer
+    :return: A ModelConfig object instance.
+    :rtype: ModelConfig
     """
 
     def __real_func__():
+        """
+        __real_func__ is the function that config_parser.parse invoked. It is
+        the plain old paddle configuration function.
+        """
         context = dict()
         real_output = [each.to_proto(context=context) for each in outputs]
         conf_helps.outputs(real_output)
@@ -111,6 +87,8 @@ So we also need to implement some special LayerV2.
 
 
 class DataLayerV2(Layer):
+    METHOD_NAME = 'data_layer'
+
     def __init__(self, name, type, **kwargs):
         assert isinstance(type, data_type.InputType)
 
@@ -129,6 +107,148 @@ class DataLayerV2(Layer):
             args[each] = self.__kwargs__[each]
         return getattr(conf_helps, self.__method_name__)(name=self.name, **args)
 
+    def __map_docstr__(doc):
+        doc = re.sub(r'(data = [^\)]+)\).*',
+                     "data = paddle.layer.data(name=\"input\", "
+                     "type=paddle.data_type.dense_vector(1000))", doc)
+
+        doc = re.sub(r':param size:.*',
+                     ':param type: Data type of this data layer', doc)
+        doc = re.sub(r':type size:.*',
+                     ":type size: paddle.v2.data_type.InputType", doc)
+        return doc
+
+
+class WithExtraParent(Layer):
+    def extra_parent(self):
+        return self.__extra_parent__
+
+    def __init__(self, name=None, parent_layers=None):
+        self.__extra_parent__ = []
+        super(WithExtraParent, self).__init__(
+            name=name, parent_layers=parent_layers)
+
+    def append_extra_parent(self, parent):
+        self.__extra_parent__.append(parent)
+
+    def to_proto(self, context):
+        """
+        function to set proto attribute
+        """
+        kwargs = dict()
+        for p in self.__extra_parent__:
+            p.to_proto(context=context)
+
+        for layer_name in self.__parent_layers__:
+            if not isinstance(self.__parent_layers__[layer_name],
+                              collections.Sequence):
+                v1_layer = self.__parent_layers__[layer_name].to_proto(
+                    context=context)
+            else:
+                v1_layer = map(lambda x: x.to_proto(context=context),
+                               self.__parent_layers__[layer_name])
+            kwargs[layer_name] = v1_layer
+
+        if self.context_name() is None:
+            return self.to_proto_impl(context=context, **kwargs)
+        elif self.context_name() not in context:
+            context[self.context_name()] = self.to_proto_impl(
+                context=context, **kwargs)
+
+        if self.use_context_name():
+            return context[self.context_name()]
+        else:
+            return context[self.name]
+
+
+class MemoryV2(WithExtraParent):
+    def __init__(self, name, **kwargs):
+        self.name = name
+        super(MemoryV2, self).__init__(name=name, parent_layers=dict())
+        self.__kwargs__ = kwargs
+        self.__boot_layer_name__ = None
+        if 'boot_layer' in kwargs:
+            begin_of_current_rnn = []
+            # TODO(yuyang18): Fix inspect, it could be wrong when user invoke a
+            # function inside step.
+            st = inspect.stack()
+            for i in xrange(len(st)):
+                locs = inspect.stack()[i][0].f_locals
+                keys = locs.keys()
+                for key in keys:
+                    val = locs[key]
+                    if isinstance(val, RecurrentLayerInput):
+                        begin_of_current_rnn.append(val)
+                    elif isinstance(val, collections.Sequence):
+                        for v in val:
+                            if isinstance(v, RecurrentLayerInput):
+                                begin_of_current_rnn.append(v)
+
+                if begin_of_current_rnn:
+                    break
+            assert begin_of_current_rnn is not None
+            for extra in begin_of_current_rnn:
+                self.append_extra_parent(extra)
+                assert isinstance(extra, WithExtraParent)
+                extra.append_extra_parent(kwargs['boot_layer'])
+                self.__boot_layer_name__ = kwargs['boot_layer'].name
+
+    def to_proto_impl(self, context, **kwargs):
+        args = dict()
+        for each in kwargs:
+            args[each] = kwargs[each]
+        for each in self.__kwargs__:
+            args[each] = self.__kwargs__[each]
+
+        if self.__boot_layer_name__ is not None:
+            args['boot_layer'] = context[self.__boot_layer_name__]
+
+        size = args.get('size', None)
+        if size is not None:
+            if callable(size):
+                real_size = size()
+            else:
+                real_size = size
+            args['size'] = real_size
+        return conf_helps.memory(name=self.name, **args)
+
+    def context_name(self):
+        return self.name + "#memory"
+
+    def use_context_name(self):
+        """
+        memory layer will have the same name with some layer
+        :return:
+        """
+        return True
+
+
+class LayerOutputV2(Layer):
+    """
+    LayerOutputV2 is used to store the result of LayerOutput in v1 api.
+    It will not store it's parents because layer_output has been parsed already.
+    """
+
+    def __init__(self, layer_output):
+        assert isinstance(layer_output, conf_helps.LayerOutput)
+        self.layer_output = layer_output
+        super(LayerOutputV2, self).__init__(
+            name=layer_output.name, parent_layers=dict())
+
+    def to_proto_impl(self):
+        return self.layer_output
+
+
+class StaticInputV2(object):
+    def __init__(self, input, is_seq=False, size=None):
+        assert isinstance(input, LayerV2)
+        self.name = input.name
+        self.input = input
+        self.is_seq = is_seq
+        self.size = size
+        # TODO(add size check)
+        # assert input.size is not None or size is not None
+
 
 class MixedLayerV2(Layer):
     """
@@ -161,7 +281,6 @@ class MixedLayerV2(Layer):
         other_kwargs['act'] = act
         other_kwargs['bias_attr'] = bias_attr
         other_kwargs['layer_attr'] = layer_attr
-
         parent_layers = {"input": self.__inputs__}
         super(MixedLayerV2, self).__init__(name, parent_layers)
         self.__other_kwargs__ = other_kwargs
@@ -171,7 +290,7 @@ class MixedLayerV2(Layer):
             self.__inputs__.append(other)
             return self
         else:
-            raise MixedLayerTypeV2.AddToSealedMixedLayerExceptionV2()
+            raise MixedLayerV2.AddToSealedMixedLayerExceptionV2()
 
     def __enter__(self):
         assert len(self.__inputs__) == 0
@@ -186,6 +305,13 @@ class MixedLayerV2(Layer):
             args[each] = kwargs[each]
         for each in self.__other_kwargs__:
             args[each] = self.__other_kwargs__[each]
+        size = args.get('size', None)
+        if size is not None:
+            if callable(size):
+                real_size = size()
+            else:
+                real_size = size
+            args['size'] = real_size
         return getattr(conf_helps, self.__method_name__)(**args)
 
 
@@ -202,14 +328,52 @@ def mixed(size=0,
     return MixedLayerV2(size, input, name, act, bias_attr, layer_attr)
 
 
+class RecurrentLayerInput(WithExtraParent):
+    def __init__(self, recurrent_name, index, parent_layers):
+        assert len(parent_layers) == 1
+        self.__parents__ = parent_layers.values()[0]
+        super(RecurrentLayerInput, self).__init__(
+            name=self.__parents__[index].name, parent_layers=parent_layers)
+        self.__recurrent_name__ = recurrent_name
+
+    def context_name(self):
+        return self.__recurrent_name__ + ".begin"
+
+    def to_proto_impl(self, context, **kwargs):
+        model_type('recurrent_nn')
+        RecurrentLayerGroupWithoutOutLinksBegin(
+            name=self.__recurrent_name__,
+            in_links=map(lambda x: x.name, self.__parents__))
+        return self
+
+
+class RecurrentLayerOutput(Layer):
+    def __init__(self, recurrent_name, index, parent_layers):
+        assert len(parent_layers) == 1
+        self.__parents__ = parent_layers.values()[0]
+        super(RecurrentLayerOutput, self).__init__(
+            name=self.__parents__[index].name, parent_layers=parent_layers)
+        self.__recurrent_name__ = recurrent_name
+
+    def context_name(self):
+        return self.__recurrent_name__ + ".end"
+
+    def to_proto_impl(self, **kwargs):
+        for l in self.__parents__:
+            RecurrentLayerGroupSetOutLink(l.name)
+        RecurrentLayerGroupEnd(name=self.__recurrent_name__)
+
+
 LayerV2 = Layer
 data = DataLayerV2
+data.__name__ = 'data'
 AggregateLevel = conf_helps.layers.AggregateLevel
 ExpandLevel = conf_helps.layers.ExpandLevel
+memory = MemoryV2
 
 
 def __layer_name_mapping__(inname):
-    if inname in ['data_layer', 'memory', 'mixed_layer']:
+    if inname in ['data_layer', 'memory', 'mixed_layer', 'recurrent_group']:
         # Do Not handle these layers
         return
     elif inname == 'maxid_layer':
@@ -231,8 +395,10 @@ def __layer_name_mapping__(inname):
 def __layer_name_mapping_parent_names__(inname):
     all_args = getattr(conf_helps, inname).argspec.args
     return filter(
-        lambda x: x in ['input1', 'input2','label', 'input', 'a', 'b', 'expand_as',
-                        'weights', 'vectors', 'weight', 'score', 'left', 'right'],
+        lambda x: x in ['input1', 'input2', 'label', 'input', 'a', 'b',
+                        'expand_as',
+                        'weights', 'vectors', 'weight', 'score', 'left',
+                        'right', 'output_mem'],
         all_args)
 
 
@@ -240,6 +406,7 @@ def __convert_layer__(_new_name_, _old_name_, _parent_names_):
     global __all__
     __all__.append(_new_name_)
     globals()[new_name] = __convert_to_v2__(_old_name_, _parent_names_)
+    globals()[new_name].__name__ = new_name
 
 
 for each_layer_name in dir(conf_helps):
@@ -253,10 +420,71 @@ del parent_names
 del new_name
 del each_layer_name
 
+
+@wrap_name_default()
+def recurrent_group(step, input, name=None):
+    if not isinstance(input, collections.Sequence):
+        input = [input]
+
+    non_static_inputs = filter(lambda x: not isinstance(x, StaticInputV2),
+                               input)
+    actual_input = [
+        RecurrentLayerInput(
+            recurrent_name=name,
+            index=i,
+            parent_layers={'recurrent_inputs': non_static_inputs})
+        for i in xrange(len(non_static_inputs))
+    ]
+
+    def __real_step__(*args):
+        rnn_input = list(args)
+        static_inputs = filter(lambda x: isinstance(x, StaticInputV2), input)
+        for static_input in static_inputs:
+            mem_name = "__%s_memory__" % static_input.input.name
+            mem = memory(
+                name=mem_name,
+                is_seq=static_input.is_seq,
+                size=static_input.input.calculate_size,
+                boot_layer=static_input.input)
+            with mixed(
+                    name=mem_name,
+                    size=static_input.input.calculate_size,
+                    act=activation.Identity()) as mix:
+                mix += identity_projection(input=mem)
+            rnn_input.insert(input.index(static_input), mix)
+        return step(*rnn_input)
+
+    actual_output = __real_step__(*actual_input)
+
+    if not isinstance(actual_output, collections.Sequence):
+        actual_output = [actual_output]
+
+    retv = [
+        RecurrentLayerOutput(
+            recurrent_name=name,
+            index=i,
+            parent_layers={'recurrent_outputs': actual_output})
+        for i in xrange(len(actual_output))
+    ]
+    if len(retv) == 1:
+        return retv[0]
+    else:
+        return retv
+
+
+__projection_names__ = filter(lambda x: x.endswith('_projection'),
+                              dir(conf_helps))
+
+__all__ += __projection_names__
+
+__operator_names__ = filter(lambda x: x.endswith('_operator'), dir(conf_helps))
+__all__ += __operator_names__
+
 # convert projection
 for prj in __projection_names__:
     globals()[prj] = __convert_to_v2__(
         prj, parent_names=['input'], is_default_name=False)
+    globals()[prj].__name__ = prj
 
 # convert operator
 operator_list = [
@@ -267,3 +495,4 @@ operator_list = [
 for op in operator_list:
     globals()[op[0]] = __convert_to_v2__(
         op[0], parent_names=op[1], is_default_name=False)
+    globals()[op[0]].__name__ = op[0]
diff --git a/python/paddle/v2/minibatch.py b/python/paddle/v2/minibatch.py
new file mode 100644
index 0000000000000000000000000000000000000000..317cf037c69f8639e3760fbfce20565127794fcb
--- /dev/null
+++ b/python/paddle/v2/minibatch.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__all__ = ['batch']
+
+
+def batch(reader, batch_size):
+    """
+    Create a batched reader.
+
+    :param reader: the data reader to read from.
+    :type reader: callable
+    :param batch_size: size of each mini-batch
+    :type batch_size: int
+    :return: the batched reader.
+    :rtype: callable
+    """
+
+    def batch_reader():
+        r = reader()
+        b = []
+        for instance in r:
+            b.append(instance)
+            if len(b) == batch_size:
+                yield b
+                b = []
+        if b:
+            yield b
+
+    return batch_reader
diff --git a/python/paddle/v2/networks.py b/python/paddle/v2/networks.py
index 74d91593d8551bc1592a78efb704ab3b89f0e0d9..9e6644196c8242cc3fed7a4fb1503697e5b59ffb 100644
--- a/python/paddle/v2/networks.py
+++ b/python/paddle/v2/networks.py
@@ -38,6 +38,7 @@ def __initialize__():
             parent_names=parents,
             is_default_name='name' in argspec.args)
         globals()[each_subnetwork] = v2_subnet
+        globals()[each_subnetwork].__name__ = each_subnetwork
         global __all__
         __all__.append(each_subnetwork)
 
diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py
index 10e255dc945efb8b20f09dc1806d2ba7ef856c55..1a01d95c205c0626374e1814a170ce2d58f23a60 100644
--- a/python/paddle/v2/optimizer.py
+++ b/python/paddle/v2/optimizer.py
@@ -1,7 +1,12 @@
 import py_paddle.swig_paddle as swig_api
-import paddle.trainer_config_helpers.optimizers as v1_optimizers
+
 import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils
-import paddle.v2
+import paddle.trainer_config_helpers.optimizers as v1_optimizers
+"""
+Optimizers(update equation) for SGD method.
+
+TODO(yuyang18): Complete comments.
+"""
 
 __all__ = [
     'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta',
@@ -44,7 +49,7 @@ class Optimizer(object):
 class Momentum(Optimizer):
     def __init__(self, momentum=None, sparse=False, **kwargs):
         learning_method = v1_optimizers.MomentumOptimizer(
-            momentum=None, sparse=False)
+            momentum=momentum, sparse=sparse)
         super(Momentum, self).__init__(
             learning_method=learning_method, **kwargs)
 
diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py
index d8c3a73b0ea9e5df11dd0933a1c87cf13e0b5ded..f01185c14e2c47876ab8d16365372ceb2f44d69a 100644
--- a/python/paddle/v2/parameters.py
+++ b/python/paddle/v2/parameters.py
@@ -10,6 +10,7 @@ __all__ = ['Parameters', 'create']
 def create(layers):
     """
     Create parameter pool by topology.
+
     :param layers:
     :return:
     """
@@ -67,6 +68,7 @@ class Parameters(object):
     def keys(self):
         """
         keys are the names of each parameter.
+
         :return: list of parameter name
         :rtype: list
         """
@@ -75,6 +77,7 @@ class Parameters(object):
     def names(self):
         """
         names of each parameter.
+
         :return: list of parameter name
         :rtype: list
         """
@@ -83,6 +86,7 @@ class Parameters(object):
     def has_key(self, key):
         """
         has_key return true if there are such parameter name == key
+
         :param key: Parameter name
         :type key: basestring
         :return: True if contains such key
@@ -136,6 +140,7 @@ class Parameters(object):
     def get_shape(self, key):
         """
         get shape of the parameter.
+
         :param key: parameter name
         :type key: basestring
         :return: parameter's shape
@@ -190,6 +195,7 @@ class Parameters(object):
     def set(self, parameter_name, value):
         """
         Set parameter by parameter name & matrix.
+
         :param parameter_name: parameter name
         :type parameter_name: basestring
         :param value: parameter matrix
diff --git a/python/paddle/v2/pooling.py b/python/paddle/v2/pooling.py
index 9076a159bb4f2c58bf52cba1657c58510637f4f8..4881c27d1d6d3d926f12aab096f377164debf1ef 100644
--- a/python/paddle/v2/pooling.py
+++ b/python/paddle/v2/pooling.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.trainer_config_helpers.poolings import *
+import paddle.trainer_config_helpers.poolings
+import copy
 
-__all__ = ["Max", "CudnnMax", "Avg", "CudnnAvg", "Sum", "SquareRootN"]
+__all__ = []
+suffix = 'Pooling'
 
-Max = MaxPooling
-CudnnMax = CudnnMaxPooling
-Avg = AvgPooling
-CudnnAvg = CudnnAvgPooling
-Sum = SumPooling
-SquareRootN = SquareRootNPooling
+for name in paddle.trainer_config_helpers.poolings.__all__:
+    new_name = name[:-len(suffix)]
+    globals()[new_name] = copy.copy(
+        getattr(paddle.trainer_config_helpers.poolings, name))
+    globals()[new_name].__name__ = new_name
+    __all__.append(new_name)
diff --git a/python/paddle/v2/reader/__init__.py b/python/paddle/v2/reader/__init__.py
index 7373dc461b1d3115c03b37c5102a469a52aa7441..3b059735a924d58714cd88a761eb83143f1192d6 100644
--- a/python/paddle/v2/reader/__init__.py
+++ b/python/paddle/v2/reader/__init__.py
@@ -11,15 +11,64 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+At training and testing time, PaddlePaddle programs need to read data. To ease
+the users' work to write data reading code, we define that
 
-# It would be too lengthy to require our users to prefix decorators with `decorator`.
-# For example, we want the following line
-#
-#     r = paddle.reader.decorator.bufferd(paddle.reader.creator.text("hello.txt"))
-#
-# to be a shorter version:
-#
-#     r = paddle.reader.buffered(paddle.reader.creator.text("hello.txt"))
+- A *reader* is a function that reads data (from file, network, random number
+  generator, etc) and yields data items.
+- A *reader creator* is a function that returns a reader function.
+- A *reader decorator* is a function, which accepts one or more readers, and
+  returns a reader.
+- A *batch reader* is a function that reads data (from *reader*, file, network,
+  random number generator, etc) and yields a batch of data items.
+
+#####################
+Data Reader Interface
+#####################
+
+Indeed, *data reader* doesn't have to be a function that reads and yields data
+items. It can be any function with no parameter that creates a iterable
+(anything can be used in :code:`for x in iterable`)\:
+
+..  code-block:: python
+
+    iterable = data_reader()
+
+Element produced from the iterable should be a **single** entry of data,
+**not** a mini batch. That entry of data could be a single item, or a tuple of
+items.
+Item should be of `supported type <http://www.paddlepaddle.org/doc/ui/data_provider
+/pydataprovider2.html?highlight=dense_vector#input-types>`_ (e.g., numpy 1d
+array of float32, int, list of int)
+
+An example implementation for single item data reader creator:
+
+..  code-block:: python
+
+    def reader_creator_random_image(width, height):
+        def reader():
+            while True:
+                yield numpy.random.uniform(-1, 1, size=width*height)
+    return reader
+
+An example implementation for multiple item data reader creator:
+
+..  code-block:: python
+
+    def reader_creator_random_image_and_label(width, height, label):
+        def reader():
+            while True:
+                yield numpy.random.uniform(-1, 1, size=width*height), label
+    return reader
+
+
+TODO(yuyang18): Should we add whole design doc here?
+"""
+
+import decorator
 from decorator import *
 
 import creator
+
+__all__ = decorator.__all__ + ['creator']
diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/v2/reader/creator.py
index 5a91bb0b8ef6d1874737386897f6c555eaec18d4..07142056f872db5113acdd296b17c52b343c1be6 100644
--- a/python/paddle/v2/reader/creator.py
+++ b/python/paddle/v2/reader/creator.py
@@ -11,6 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Creator package contains some simple reader creator, which could be used in user
+program.
+"""
 
 __all__ = ['np_array', 'text_file']
 
@@ -38,7 +42,7 @@ def np_array(x):
 def text_file(path):
     """
     Creates a data reader that outputs text line by line from given text file.
-    Trailing new line ('\n') of each line will be removed.
+    Trailing new line ('\\\\n') of each line will be removed.
 
     :path: path of the text file.
     :returns: data reader of text file
diff --git a/python/paddle/v2/reader/decorator.py b/python/paddle/v2/reader/decorator.py
index b7657e27764f099334ba3030c493a7607f323abe..104ce9a0411413bb8fc65eedf5821f98d6acdba3 100644
--- a/python/paddle/v2/reader/decorator.py
+++ b/python/paddle/v2/reader/decorator.py
@@ -14,7 +14,7 @@
 
 __all__ = [
     'map_readers', 'buffered', 'compose', 'chain', 'shuffle',
-    'ComposeNotAligned', 'batched', 'firstn'
+    'ComposeNotAligned', 'firstn'
 ]
 
 import itertools
@@ -28,9 +28,11 @@ def map_readers(func, *readers):
     Creates a data reader that outputs return value of function using
     output of each data readers as arguments.
 
-    :param func: function to use.
-    :param *readers: readers whose outputs will be used as arguments of func.
-    :returns: the created data reader.
+    :param func: function to use. The type of func should be (Sample) => Sample
+    :type: callable
+    :param readers: readers whose outputs will be used as arguments of func.
+    :return: the created data reader.
+    :rtype: callable
     """
 
     def reader():
@@ -45,16 +47,19 @@ def map_readers(func, *readers):
 
 def shuffle(reader, buf_size):
     """
-    Creates a data reader whose data output is suffled.
+    Creates a data reader whose data output is shuffled.
 
     Output from the iterator that created by original reader will be
     buffered into shuffle buffer, and then shuffled. The size of shuffle buffer
     is determined by argument buf_size.
 
     :param reader: the original reader whose output will be shuffled.
+    :type reader: callable
     :param buf_size: shuffle buffer size.
+    :type buf_size: int
 
-    :returns:the new reader whose output is shuffled.
+    :return: the new reader whose output is shuffled.
+    :rtype: callable
     """
 
     def data_reader():
@@ -88,7 +93,8 @@ def chain(*readers):
     [0, 0, 0, 1, 1, 1, 2, 2, 2]
 
     :param readers: input readers.
-    :returns: the new data reader.
+    :return: the new data reader.
+    :rtype: callable
     """
 
     def reader():
@@ -115,12 +121,13 @@ def compose(*readers, **kwargs):
     The composed reader will output:
     (1, 2, 3, 4, 5)
 
-    :*readers: readers that will be composed together.
-    :check_alignment: if True, will check if input readers are aligned
+    :param readers: readers that will be composed together.
+    :param check_alignment: if True, will check if input readers are aligned
         correctly. If False, will not check alignment and trailing outputs
         will be discarded. Defaults to True.
+    :type check_alignment: bool
 
-    :returns: the new data reader.
+    :return: the new data reader.
 
     :raises ComposeNotAligned: outputs of readers are not aligned.
         Will not raise when check_alignment is set to False.
@@ -161,7 +168,9 @@ def buffered(reader, size):
     as the buffer is not empty.
     
     :param reader: the data reader to read from.
+    :type reader: callable
     :param size: max buffer size.
+    :type size: int
     
     :returns: the buffered data reader.
     """
@@ -193,31 +202,16 @@ def buffered(reader, size):
     return data_reader
 
 
-def batched(reader, batch_size):
-    """
-    Create a batched reader.
-    :param reader: the data reader to read from.
-    :param batch_size: batch_size
-    :return: the batched reader.
-    """
-
-    def batched_reader():
-        r = reader()
-        batch = []
-        for instance in r:
-            batch.append(instance)
-            if len(batch) == batch_size:
-                yield batch
-                batch = []
-        if batch:
-            yield batch
-
-    return batched_reader
-
-
 def firstn(reader, n):
     """
     Limit the max number of samples that reader could return.
+
+    :param reader: the data reader to read from.
+    :type reader: callable
+    :param n: the max number of samples that return.
+    :type n: int
+    :return: the decorated reader.
+    :rtype: callable
     """
 
     # TODO(yuyang18): Check if just drop the reader, could clean the opened
diff --git a/python/paddle/v2/tests/CMakeLists.txt b/python/paddle/v2/tests/CMakeLists.txt
index 46b5d08b8761ea58530f5aefe5d1947408727f85..572deaff356712cac23cd7911cdf289db100564c 100644
--- a/python/paddle/v2/tests/CMakeLists.txt
+++ b/python/paddle/v2/tests/CMakeLists.txt
@@ -1,12 +1,16 @@
+add_test(NAME test_v2_api
+        COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE})
+
 add_test(NAME test_v2_layer
         COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
         ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py
         WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle)
 
-add_test(NAME test_v2_api
-        COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE})
+add_test(NAME test_v2_rnn_layer
+        COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
+        ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_rnn_layer.py)
 
-add_test(NAME topology_test
+add_test(NAME test_topology
         COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
         ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_topology.py
         WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle)
diff --git a/python/paddle/v2/tests/test_data_feeder.py b/python/paddle/v2/tests/test_data_feeder.py
index ab2bc5df76cd839b5b0184e9559f0c2e03baf38b..71eb3bf31425c22b47accc11c9550042e077ef12 100644
--- a/python/paddle/v2/tests/test_data_feeder.py
+++ b/python/paddle/v2/tests/test_data_feeder.py
@@ -110,14 +110,14 @@ class DataFeederTest(unittest.TestCase):
             self.assertAlmostEqual(value.all(), w[i].all())
 
     def test_integer(self):
-        dim = 100
+        value_range = 100
         batch_size = 32
         index = []
         for i in xrange(batch_size):
             each_sample = []
-            each_sample.append(np.random.randint(dim))
+            each_sample.append(np.random.randint(value_range))
             index.append(each_sample)
-        feeder = DataFeeder([('input', data_type.integer_value(dim))],
+        feeder = DataFeeder([('input', data_type.integer_value(value_range))],
                             {'input': 0})
         arg = feeder(index)
         output = arg.getSlotIds(0).copyToNumpyArray()
@@ -125,7 +125,7 @@ class DataFeederTest(unittest.TestCase):
         self.assertEqual(output.all(), index.flatten().all())
 
     def test_integer_sequence(self):
-        dim = 10000
+        value_range = 10000
         batch_size = 32
         start = [0]
         data = []
@@ -133,11 +133,12 @@ class DataFeederTest(unittest.TestCase):
             each_sample = []
             each_sample.append(
                 self.sparse_binary_reader(
-                    dim, 30, non_empty=True))
+                    value_range, 30, non_empty=True))
             data.append(each_sample)
             start.append(len(each_sample[0]) + start[-1])
-        feeder = DataFeeder([('input', data_type.integer_value_sequence(dim))],
-                            {'input': 0})
+        feeder = DataFeeder(
+            [('input', data_type.integer_value_sequence(value_range))],
+            {'input': 0})
         arg = feeder(data)
         output_data = arg.getSlotIds(0).copyToNumpyArray()
         output_start = arg.getSlotSequenceStartPositions(0).copyToNumpyArray()
diff --git a/python/paddle/v2/tests/test_rnn_layer.py b/python/paddle/v2/tests/test_rnn_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..5fbbd20eb76bb9daab2bcf98c4adad989106a377
--- /dev/null
+++ b/python/paddle/v2/tests/test_rnn_layer.py
@@ -0,0 +1,155 @@
+# Copyright PaddlePaddle contributors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import difflib
+import unittest
+
+import paddle.trainer_config_helpers as conf_helps
+import paddle.v2.activation as activation
+import paddle.v2.data_type as data_type
+import paddle.v2.layer as layer
+from paddle.trainer_config_helpers.config_parser_utils import \
+    parse_network_config as parse_network
+
+
+class RNNTest(unittest.TestCase):
+    def test_simple_rnn(self):
+        dict_dim = 10
+        word_dim = 8
+        hidden_dim = 8
+
+        def parse_old_rnn():
+            def step(y):
+                mem = conf_helps.memory(name="rnn_state", size=hidden_dim)
+                out = conf_helps.fc_layer(
+                    input=[y, mem],
+                    size=hidden_dim,
+                    act=activation.Tanh(),
+                    bias_attr=True,
+                    name="rnn_state")
+                return out
+
+            def test():
+                data = conf_helps.data_layer(name="word", size=dict_dim)
+                embd = conf_helps.embedding_layer(input=data, size=word_dim)
+                conf_helps.recurrent_group(name="rnn", step=step, input=embd)
+
+            return str(parse_network(test))
+
+        def parse_new_rnn():
+            def new_step(y):
+                mem = layer.memory(name="rnn_state", size=hidden_dim)
+                out = layer.fc(input=[y, mem],
+                               size=hidden_dim,
+                               act=activation.Tanh(),
+                               bias_attr=True,
+                               name="rnn_state")
+                return out
+
+            data = layer.data(
+                name="word", type=data_type.integer_value(dict_dim))
+            embd = layer.embedding(input=data, size=word_dim)
+            rnn_layer = layer.recurrent_group(
+                name="rnn", step=new_step, input=embd)
+            return str(layer.parse_network(rnn_layer))
+
+        diff = difflib.unified_diff(parse_old_rnn().splitlines(1),
+                                    parse_new_rnn().splitlines(1))
+        print ''.join(diff)
+
+    def test_sequence_rnn_multi_input(self):
+        dict_dim = 10
+        word_dim = 8
+        hidden_dim = 8
+        label_dim = 3
+
+        def parse_old_rnn():
+            def test():
+                data = conf_helps.data_layer(name="word", size=dict_dim)
+                label = conf_helps.data_layer(name="label", size=label_dim)
+                emb = conf_helps.embedding_layer(input=data, size=word_dim)
+                boot_layer = conf_helps.data_layer(name="boot", size=10)
+                boot_layer = conf_helps.fc_layer(
+                    name='boot_fc', input=boot_layer, size=10)
+
+                def step(y, wid):
+                    z = conf_helps.embedding_layer(input=wid, size=word_dim)
+                    mem = conf_helps.memory(
+                        name="rnn_state",
+                        size=hidden_dim,
+                        boot_layer=boot_layer)
+                    out = conf_helps.fc_layer(
+                        input=[y, z, mem],
+                        size=hidden_dim,
+                        act=conf_helps.TanhActivation(),
+                        bias_attr=True,
+                        name="rnn_state")
+                    return out
+
+                out = conf_helps.recurrent_group(
+                    name="rnn", step=step, input=[emb, data])
+
+                rep = conf_helps.last_seq(input=out)
+                prob = conf_helps.fc_layer(
+                    size=label_dim,
+                    input=rep,
+                    act=conf_helps.SoftmaxActivation(),
+                    bias_attr=True)
+
+                conf_helps.outputs(
+                    conf_helps.classification_cost(
+                        input=prob, label=label))
+
+            return str(parse_network(test))
+
+        def parse_new_rnn():
+            data = layer.data(
+                name="word", type=data_type.dense_vector(dict_dim))
+            label = layer.data(
+                name="label", type=data_type.dense_vector(label_dim))
+            emb = layer.embedding(input=data, size=word_dim)
+            boot_layer = layer.data(
+                name="boot", type=data_type.dense_vector(10))
+            boot_layer = layer.fc(name='boot_fc', input=boot_layer, size=10)
+
+            def step(y, wid):
+                z = layer.embedding(input=wid, size=word_dim)
+                mem = layer.memory(
+                    name="rnn_state", size=hidden_dim, boot_layer=boot_layer)
+                out = layer.fc(input=[y, z, mem],
+                               size=hidden_dim,
+                               act=activation.Tanh(),
+                               bias_attr=True,
+                               name="rnn_state")
+                return out
+
+            out = layer.recurrent_group(
+                name="rnn", step=step, input=[emb, data])
+
+            rep = layer.last_seq(input=out)
+            prob = layer.fc(size=label_dim,
+                            input=rep,
+                            act=activation.Softmax(),
+                            bias_attr=True)
+
+            cost = layer.classification_cost(input=prob, label=label)
+
+            return str(layer.parse_network(cost))
+
+        diff = difflib.unified_diff(parse_old_rnn().splitlines(1),
+                                    parse_new_rnn().splitlines(1))
+        print ''.join(diff)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/tests/test_topology.py b/python/paddle/v2/tests/test_topology.py
index 1bf55a5bc68dfdb837773b3120e5b55d304f644d..5c6dbcdb4f49b960fb8b71aecbad4f013d2cd283 100644
--- a/python/paddle/v2/tests/test_topology.py
+++ b/python/paddle/v2/tests/test_topology.py
@@ -16,6 +16,7 @@ import paddle.v2.layer as layer
 import paddle.v2.topology as topology
 import paddle.v2.data_type as data_type
 import paddle.trainer_config_helpers as conf_helps
+import paddle.trainer.PyDataProvider2 as pydp2
 
 
 class TestTopology(unittest.TestCase):
@@ -35,13 +36,13 @@ class TestTopology(unittest.TestCase):
         pixel_data_type = filter(lambda type: type[0] == "pixel", data_types)
         self.assertEqual(len(pixel_data_type), 1)
         pixel_data_type = pixel_data_type[0]
-        self.assertEqual(pixel_data_type[1].type, data_type.DataType.Dense)
+        self.assertEqual(pixel_data_type[1].type, pydp2.DataType.Dense)
         self.assertEqual(pixel_data_type[1].dim, 784)
 
         label_data_type = filter(lambda type: type[0] == "label", data_types)
         self.assertEqual(len(label_data_type), 1)
         label_data_type = label_data_type[0]
-        self.assertEqual(label_data_type[1].type, data_type.DataType.Index)
+        self.assertEqual(label_data_type[1].type, pydp2.DataType.Index)
         self.assertEqual(label_data_type[1].dim, 10)
 
     def test_get_layer(self):
diff --git a/python/paddle/v2/topology.py b/python/paddle/v2/topology.py
index 4c211254319bbdf46b02a2cee56b6a98b01819a2..f0679c5675b0c0f24f28f3df22efd4eb51ccbb3a 100644
--- a/python/paddle/v2/topology.py
+++ b/python/paddle/v2/topology.py
@@ -17,6 +17,7 @@ import collections
 from paddle.proto.ModelConfig_pb2 import ModelConfig
 
 import layer as v2_layer
+from layer import WithExtraParent
 
 __all__ = ['Topology']
 
@@ -40,7 +41,10 @@ def __bfs_travel__(callback, *layers):
         __break__ = callback(each_layer)
         if __break__:
             return
-        __bfs_travel__(callback, *each_layer.__parent_layers__.values())
+        __layers__ = each_layer.__parent_layers__.values()
+        if isinstance(each_layer, WithExtraParent):
+            __layers__ = __layers__ + each_layer.extra_parent()
+        __bfs_travel__(callback, *__layers__)
 
 
 class Topology(object):
diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py
index 6719a268270da7bacd1b77cf8848e4a1591a7b79..187abaf9a34f7b7901c5ff71b15a6db05756c7c4 100644
--- a/python/paddle/v2/trainer.py
+++ b/python/paddle/v2/trainer.py
@@ -8,7 +8,11 @@ from . import event as v2_event
 from . import optimizer as v2_optimizer
 from . import parameters as v2_parameters
 
-__all__ = ['ITrainer', 'SGD']
+__all__ = ['SGD']
+"""
+Trainer package
+TODO(yuyang18): Complete comments.
+"""
 
 
 def default_event_handler(event):
@@ -22,33 +26,20 @@ def default_event_handler(event):
     pass
 
 
-class ITrainer(object):
+class SGD(object):
     """
-    The interface of Trainer. The only exposed method is `train`.
+    Simple SGD Trainer.
+    TODO(yuyang18): Complete comments
+
+    :param update_equation: The optimizer object.
+    :type update_equation: paddle.v2.optimizer.Optimizer
+    :param cost: Target cost that neural network should be optimized.
+    :type cost: paddle.v2.config_base.Layer
+    :param parameters: The parameters dictionary.
+    :type parameters: paddle.v2.parameters.Parameters
     """
 
-    def train(self, reader, topology, parameters, event_handler=None):
-        """
-        train method.
-
-        :param reader:
-        :param topology:
-        :param parameters:
-        :param event_handler:
-        :return:
-        """
-
-        raise NotImplementedError()
-
-
-class SGD(ITrainer):
     def __init__(self, cost, parameters, update_equation):
-        """
-        Simple SGD Trainer.
-
-        :param update_equation: The optimizer object.
-        :type update_equation: v2_optimizer.Optimizer
-        """
 
         if not isinstance(parameters, v2_parameters.Parameters):
             raise TypeError('parameters should be parameters')
@@ -75,8 +66,6 @@ class SGD(ITrainer):
         Training method. Will train num_passes of input data.
 
         :param reader:
-        :param topology: Network Topology, use one or more Layers to represent it.
-        :param parameters: The parameter pools.
         :param num_passes: The total train passes.
         :param event_handler: Event handler. A method will be invoked when event
                               occurred.
@@ -117,10 +106,10 @@ class SGD(ITrainer):
                     feeder(data_batch), out_args, pass_type)
                 self.__gradient_machine__.eval(pass_evaluator)
                 self.__gradient_machine__.eval(batch_evaluator)
-                for each_param in self.__gradient_machine__.getParameters():
+                for each_param in self.__gradient_machine__.getNonStaticParameters(
+                ):
                     updater.update(each_param)
-                # Get cost. We use numpy to calculate total cost for this batch.
-                cost_sum = out_args.sumCosts()
+                cost_sum = out_args.sum()
                 cost = cost_sum / len(data_batch)
                 updater.finishBatch(cost)
                 batch_evaluator.finish()
@@ -156,7 +145,7 @@ class SGD(ITrainer):
             num_samples += len(data_batch)
             self.__gradient_machine__.forward(
                 feeder(data_batch), out_args, api.PASS_TEST)
-            total_cost += out_args.sumCosts()
+            total_cost += out_args.sum()
             self.__gradient_machine__.eval(evaluator)
 
         evaluator.finish()