diff --git a/.travis.yml b/.travis.yml
index bf0e0b7bbddd4c1f69e287e0f5ad471a54a75600..7812ac02837895a32fcad36158814268e93a4da8 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,6 +35,8 @@ addons:
       - libgoogle-glog-dev
       - libgflags-dev
       - libgtest-dev
+      - curl
+      - lcov
       - graphviz
 before_install:
   - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 527064e31000aa7d8c99212561d06493ca135841..282e3e199ef440092550deec906019bc44bc73bd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -9,7 +9,7 @@ set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATC
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
 set(PROJ_ROOT ${CMAKE_SOURCE_DIR})
 include(package)
-include(swig)
+find_package(SWIG 2.0)
 find_package(CUDA QUIET)
 find_package(Protobuf REQUIRED)
 find_package(PythonLibs 2.7 REQUIRED)
@@ -40,6 +40,9 @@ option(WITH_TESTING "Compile and run unittest for PaddlePaddle" ${GTEST_FOUND})
 option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
 option(WITH_SWIG_PY "Compile PaddlePaddle with py PaddlePaddle prediction api" ${SWIG_FOUND})
 option(ON_TRAVIS "Running test on travis-ci or not." OFF)
+option(ON_COVERALLS "Generating code coverage data on coveralls or not." OFF)
+option(COVERALLS_UPLOAD "Uploading the generated coveralls json." ON)
+
 if(NOT CMAKE_BUILD_TYPE)
     set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING 
         "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
@@ -49,11 +52,16 @@ endif()
 include(enableCXX11)
 include(cpplint)
 include(ccache)
+if(WITH_RDMA)
+  include(rdma)
+endif()
 include(util)
 include(flags)
 include(cudnn)
 include(FindPythonModule)
 include(check_packages)
+include(swig)
+include(coveralls)
 
 # add PaddlePaddle version
 if(DEFINED ENV{PADDLE_VERSION})
@@ -129,9 +137,11 @@ else(WITH_PYTHON)
     add_definitions(-DPADDLE_NO_PYTHON)
 endif(WITH_PYTHON)
 
-if(NOT WITH_RDMA)
-    add_definitions(-DPADDLE_DISABLE_RDMA)
-endif()
+if(WITH_RDMA)
+  include_directories("${RDMA_INC_DIR}")
+else(WITH_RDMA)
+  add_definitions(-DPADDLE_DISABLE_RDMA)
+endif(WITH_RDMA)
 
 if(WITH_GLOG)
     add_definitions(-DPADDLE_USE_GLOG)
diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md
new file mode 100644
index 0000000000000000000000000000000000000000..6b2614b1011081a5e0e03a53fec2012bc7b81333
--- /dev/null
+++ b/ISSUE_TEMPLATE.md
@@ -0,0 +1,14 @@
+Thank you for contributing to PaddlePaddle. Submitting an issue is a great help for us.
+Both Chinese and English issues are welcome.
+
+It's hard to solve a problem when important details are missing.
+Before submitting the issue, look over the following criteria before handing your request in.
+
+- [ ] Was there a similar issue submitted or resolved before ? You could search issue in the github.
+- [ ] Did you retrieve your issue from widespread search engines ?
+- [ ] Is my description of the issue clear enough to reproduce this problem?
+   * If some errors occurred, we need details about `how do you run your code?`, `what system do you use?`, `Are you using GPU or not?`, etc.
+   * If you use an recording [asciinema](https://asciinema.org/) to show what you are doing to make it happen, that's awesome! We could help you solve the problem more quickly.
+- [ ] Is my description of the issue use the github markdown correctly?
+   * Please use the proper markdown syntaxes for styling all forms of writing, e.g, source code, error information, etc.
+   * Check out [this page](https://guides.github.com/features/mastering-markdown/) to find out much more about markdown.
diff --git a/README.md b/README.md
index 1cc0444c0617af3da0ec1d9beaf2ae73e31bd7b2..66767d7ff8e4acf8ef246f7e0129a66e64486727 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,10 @@
 # PaddlePaddle
 
-|  **`Linux`**   | **`License`** | **`Chat Room`** |
-|----------------|---------------|-----------------|
-|[![Build Status](https://travis-ci.org/baidu/Paddle.svg?branch=master)](https://travis-ci.org/baidu/Paddle)|[![License](https://img.shields.io/badge/license-Apache%202.0-green.svg)](LICENSE)|[![Join the chat at https://gitter.im/PaddlePaddle/Deep_Learning](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/PaddlePaddle/Deep_Learning?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)|
+
+[![Build Status](https://travis-ci.org/baidu/Paddle.svg?branch=master)](https://travis-ci.org/baidu/Paddle)
+[![Coverage Status](https://coveralls.io/repos/github/baidu/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/baidu/Paddle?branch=develop)
+[![Join the chat at https://gitter.im/PaddlePaddle/Deep_Learning](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/PaddlePaddle/Deep_Learning?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+[![License](https://img.shields.io/badge/license-Apache%202.0-green.svg)](LICENSE)
 
 Welcome to the PaddlePaddle GitHub.
 
diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake
index 57c32a54cd727e3acb181eeb19f811fab4dc82fd..685334c6585060c0344e552c6f3fda2c7324de03 100644
--- a/cmake/cblas.cmake
+++ b/cmake/cblas.cmake
@@ -1,4 +1,4 @@
-# Find the CBlas libraries
+# Find the CBlas and lapack libraries
 #
 # It will search MKL, atlas, OpenBlas, reference-cblas in order.
 #
@@ -19,6 +19,8 @@ set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains MKL")
 
 find_path(MKL_INCLUDE_DIR mkl.h PATHS
   ${MKL_ROOT}/include)
+find_path(MKL_INCLUDE_DIR mkl_lapacke.h PATHS
+  ${MKL_ROOT}/include)
 find_library(MKL_CORE_LIB NAMES mkl_core PATHS
   ${MKL_ROOT}/lib
   ${MKL_ROOT}/lib/intel64)
@@ -37,6 +39,7 @@ if(MKL_INCLUDE_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
           ${MKL_SEQUENTIAL_LIB}
           ${MKL_CORE_LIB})
   add_definitions(-DPADDLE_USE_MKL)
+  message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
   return() # return file.
 endif()
 
@@ -55,15 +58,19 @@ set(ATLAS_LIB_SEARCH_PATHS
     )
 find_path(ATLAS_INC_DIR NAMES cblas.h 
   PATHS ${ATLAS_INCLUDE_SEARCH_PATHS})
+find_path(ATLAS_CLAPACK_INC_DIR NAMES clapack.h
+  PATHS ${ATLAS_INCLUDE_SEARCH_PATHS})
 find_library(ATLAS_CBLAS_LIB NAMES cblas libcblas.so.3 
   PATHS ${ATLAS_LIB_SEARCH_PATHS})
-find_library(ATLAS_LIB NAMES atlas libatlas.so.3
+find_library(ATLAS_LIB NAMES lapack_atlas liblapack_atlas.so.3
   PATHS ${ATLAS_LIB_SEARCH_PATHS})
 
 if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB)
   set(CBLAS_PROVIDER ATLAS)
-  set(CBLAS_INC_DIR ${ATLAS_INC_DIR})
+  set(CBLAS_INC_DIR ${ATLAS_INC_DIR} ${ATLAS_CLAPACK_INC_DIR})
   set(CBLAS_LIBS ${ATLAS_LIB} ${ATLAS_CBLAS_LIB})
+  add_definitions(-DPADDLE_USE_ATLAS)  
+  message(STATUS "Found Atlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
   return()
 endif()
 
@@ -83,6 +90,8 @@ set(OPENBLAS_LIB_SEARCH_PATHS
 
 find_path(OPENBLAS_INC_DIR NAMES cblas.h
   PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS})
+find_path(OPENBLAS_LAPACKE_INC_DIR NAMES lapacke.h
+  PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS})
 find_library(OPENBLAS_LIB NAMES openblas
   PATHS ${OPENBLAS_LIB_SEARCH_PATHS})
 
@@ -90,6 +99,7 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB)
   set(CBLAS_PROVIDER OPENBLAS)
   set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR})
   set(CBLAS_LIBS ${OPENBLAS_LIB})
+  message(STATUS "Found OpenBlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
   return()
 endif()
 
diff --git a/cmake/coveralls.cmake b/cmake/coveralls.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..9be7643819efdde3f42e4d39b2849ecc17e0d9fb
--- /dev/null
+++ b/cmake/coveralls.cmake
@@ -0,0 +1,103 @@
+# CMake script for code coverage.
+# If _COVERALLS_UPLOAD is ON, it will upload json files to overalls.io automatically.
+
+# Param _COVERAGE_SRCS          A list of coverage source files.
+# Param _COVERALLS_UPLOAD       Upload the result to coveralls.
+# Param _CMAKE_SCRIPT_PATH      CMake script path.
+function(code_coverage _COVERAGE_SRCS _COVERALLS_UPLOAD _CMAKE_SCRIPT_PATH)
+    # clean previous gcov data.
+    file(REMOVE_RECURSE ${PROJECT_BINARY_DIR}/*.gcda)
+
+    # find curl for upload JSON soon.
+    if (_COVERALLS_UPLOAD)
+        find_program(CURL_EXECUTABLE curl)
+        if (NOT CURL_EXECUTABLE)
+            message(FATAL_ERROR "Coveralls: curl not found!")
+        endif()
+    endif()
+
+    # When passing a CMake list to an external process, the list
+    # will be converted from the format "1;2;3" to "1 2 3".
+    set(COVERAGE_SRCS "")
+    foreach (SINGLE_SRC ${_COVERAGE_SRCS})
+        set(COVERAGE_SRCS "${COVERAGE_SRCS}*${SINGLE_SRC}")
+    endforeach()
+
+    # query number of logical cores
+    cmake_host_system_information(RESULT core_size QUERY NUMBER_OF_LOGICAL_CORES)
+    # coveralls json file.
+    set(COVERALLS_FILE ${PROJECT_BINARY_DIR}/coveralls.json)
+    add_custom_target(coveralls_generate
+        # Run regress tests.
+        COMMAND ${CMAKE_CTEST_COMMAND}
+                -j ${core_size}
+                --output-on-failure
+        # Generate Gcov and translate it into coveralls JSON.
+        COMMAND ${CMAKE_COMMAND}
+                -DCOVERAGE_SRCS="${COVERAGE_SRCS}"
+                -DCOVERALLS_OUTPUT_FILE="${COVERALLS_FILE}"
+                -DCOV_PATH="${PROJECT_BINARY_DIR}"
+                -DPROJECT_ROOT="${PROJECT_SOURCE_DIR}"
+                -P "${_CMAKE_SCRIPT_PATH}/coverallsGcovJsons.cmake"
+        WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
+        COMMENT "Coveralls: generating coveralls output..."
+    )
+
+    if (_COVERALLS_UPLOAD)
+        message("COVERALLS UPLOAD: ON")
+        # Upload the JSON to coveralls.
+        add_custom_target(coveralls_upload
+            COMMAND ${CURL_EXECUTABLE}
+                    -S -F json_file=@${COVERALLS_FILE}
+                    https://coveralls.io/api/v1/jobs
+            DEPENDS coveralls_generate
+            WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
+            COMMENT "Coveralls: uploading coveralls output...")
+
+        add_custom_target(coveralls DEPENDS coveralls_upload)
+    else()
+        message("COVERALLS UPLOAD: OFF")
+        add_custom_target(coveralls DEPENDS coveralls_generate)
+    endif()
+endfunction()
+
+if(ON_COVERALLS)
+    set(CMAKE_BUILD_TYPE "Debug")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
+
+    set(EXCLUDE_DIRS
+        "demo/"
+        "build/"
+        "tests/"
+        ".test_env/"
+    )
+
+    if(WITH_GPU)
+        file(GLOB_RECURSE PADDLE_SOURCES RELATIVE "${PROJECT_SOURCE_DIR}" "*.cpp" "*.cc" ".c" "*.cu")
+    else()
+        file(GLOB_RECURSE PADDLE_SOURCES RELATIVE "${PROJECT_SOURCE_DIR}" "*.cpp" "*.cc" "*.c")
+    endif()
+
+    # exclude trivial files in PADDLE_SOURCES
+    foreach(EXCLUDE_DIR ${EXCLUDE_DIRS})
+        foreach(TMP_PATH ${PADDLE_SOURCES})
+            string(FIND ${TMP_PATH} ${EXCLUDE_DIR} EXCLUDE_DIR_FOUND)
+            if(NOT ${EXCLUDE_DIR_FOUND} EQUAL -1)
+                list(REMOVE_ITEM PADDLE_SOURCES ${TMP_PATH})
+            endif()
+        endforeach(TMP_PATH)
+    endforeach()
+
+    # convert to absolute path
+    set(PADDLE_SRCS "")
+    foreach(PADDLE_SRC ${PADDLE_SOURCES})
+        set(PADDLE_SRCS "${PADDLE_SRCS};${PROJECT_SOURCE_DIR}/${PADDLE_SRC}")
+    endforeach()
+
+    code_coverage(
+        "${PADDLE_SRCS}"
+        ${COVERALLS_UPLOAD}
+        "${PROJECT_SOURCE_DIR}/cmake"
+    )
+endif()
diff --git a/cmake/coverallsGcovJsons.cmake b/cmake/coverallsGcovJsons.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..ae3530c3a0eeb79ddbcbf4f2e99be75aa7968a2f
--- /dev/null
+++ b/cmake/coverallsGcovJsons.cmake
@@ -0,0 +1,403 @@
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Copyright (C) 2014 Joakim Söderberg <joakim.soderberg@gmail.com>
+#
+# This is intended to be run by a custom target in a CMake project like this.
+# 0. Compile program with coverage support.
+# 1. Clear coverage data. (Recursively delete *.gcda in build dir)
+# 2. Run the unit tests.
+# 3. Run this script specifying which source files the coverage should be performed on.
+#
+# This script will then use gcov to generate .gcov files in the directory specified
+# via the COV_PATH var. This should probably be the same as your cmake build dir.
+#
+# It then parses the .gcov files to convert them into the Coveralls JSON format:
+# https://coveralls.io/docs/api
+#
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
+
+# Since it's not possible to pass a CMake list properly in the
+# "1;2;3" format to an external process, we have replaced the
+# ";" with "*", so reverse that here so we get it back into the
+# CMake list format.
+string(REGEX REPLACE "\\*" ";" COVERAGE_SRCS ${COVERAGE_SRCS})
+
+find_program(GCOV_EXECUTABLE gcov)
+if (NOT GCOV_EXECUTABLE)
+	message(FATAL_ERROR "gcov not found! Aborting...")
+endif()
+
+find_package(Git)
+
+# TODO: Add these git things to the coveralls json.
+if (GIT_FOUND)
+	# Branch.
+	execute_process(
+		COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref HEAD
+		WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+		OUTPUT_VARIABLE GIT_BRANCH
+		OUTPUT_STRIP_TRAILING_WHITESPACE
+	)
+
+	macro (git_log_format FORMAT_CHARS VAR_NAME)
+		execute_process(
+			COMMAND ${GIT_EXECUTABLE} log -1 --pretty=format:%${FORMAT_CHARS}
+			WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+			OUTPUT_VARIABLE ${VAR_NAME}
+			OUTPUT_STRIP_TRAILING_WHITESPACE
+		)
+	endmacro()
+
+	git_log_format(an GIT_AUTHOR_EMAIL)
+	git_log_format(ae GIT_AUTHOR_EMAIL)
+	git_log_format(cn GIT_COMMITTER_NAME)
+	git_log_format(ce GIT_COMMITTER_EMAIL)
+	git_log_format(B GIT_COMMIT_MESSAGE)
+
+	message("Git exe: ${GIT_EXECUTABLE}")
+	message("Git branch: ${GIT_BRANCH}")
+	message("Git author: ${GIT_AUTHOR_NAME}")
+	message("Git e-mail: ${GIT_AUTHOR_EMAIL}")
+	message("Git commiter name: ${GIT_COMMITTER_NAME}")
+	message("Git commiter e-mail: ${GIT_COMMITTER_EMAIL}")
+	message("Git commit message: ${GIT_COMMIT_MESSAGE}")
+
+endif()
+
+############################# Macros #########################################
+
+#
+# This macro converts from the full path format gcov outputs:
+#
+#    /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov
+#
+# to the original source file path the .gcov is for:
+#
+#   /path/to/project/root/subdir/the_file.c
+#
+macro(get_source_path_from_gcov_filename _SRC_FILENAME _GCOV_FILENAME)
+
+	# /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov 
+	# -> 
+	# #path#to#project#root#subdir#the_file.c.gcov   
+	get_filename_component(_GCOV_FILENAME_WEXT ${_GCOV_FILENAME} NAME)
+
+	# #path#to#project#root#subdir#the_file.c.gcov -> /path/to/project/root/subdir/the_file.c
+	string(REGEX REPLACE "\\.gcov$" "" SRC_FILENAME_TMP ${_GCOV_FILENAME_WEXT})
+	string(REGEX REPLACE "\#" "/" SRC_FILENAME_TMP ${SRC_FILENAME_TMP})
+	set(${_SRC_FILENAME} "${SRC_FILENAME_TMP}")
+endmacro()
+
+##############################################################################
+
+# Get the coverage data.
+file(GLOB_RECURSE GCDA_FILES "${COV_PATH}" "*.gcda")
+message("GCDA files:")
+
+# Get a list of all the object directories needed by gcov
+# (The directories the .gcda files and .o files are found in)
+# and run gcov on those.
+foreach(GCDA ${GCDA_FILES})
+	message("Process: ${GCDA}")
+	message("------------------------------------------------------------------------------")
+	get_filename_component(GCDA_DIR ${GCDA} PATH)
+
+	#
+	# The -p below refers to "Preserve path components",
+	# This means that the generated gcov filename of a source file will
+	# keep the original files entire filepath, but / is replaced with #.
+	# Example:
+	#
+	# /path/to/project/root/build/CMakeFiles/the_file.dir/subdir/the_file.c.gcda
+	# ------------------------------------------------------------------------------
+	# File '/path/to/project/root/subdir/the_file.c'
+	# Lines executed:68.34% of 199
+	# /path/to/project/root/subdir/the_file.c:creating '#path#to#project#root#subdir#the_file.c.gcov'
+	#
+	# If -p is not specified then the file is named only "the_file.c.gcov"
+	#
+	execute_process(
+		COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA}
+		WORKING_DIRECTORY ${GCDA_DIR}
+	)
+endforeach()
+
+# TODO: Make these be absolute path
+file(GLOB_RECURSE ALL_GCOV_FILES "${COV_PATH}" "*.gcov")
+
+# Get only the filenames to use for filtering.
+#set(COVERAGE_SRCS_NAMES "")
+#foreach (COVSRC ${COVERAGE_SRCS})
+#	get_filename_component(COVSRC_NAME ${COVSRC} NAME)
+#	message("${COVSRC} -> ${COVSRC_NAME}")
+#	list(APPEND COVERAGE_SRCS_NAMES "${COVSRC_NAME}")
+#endforeach()
+
+#
+# Filter out all but the gcov files we want.
+#
+# We do this by comparing the list of COVERAGE_SRCS filepaths that the
+# user wants the coverage data for with the paths of the generated .gcov files,
+# so that we only keep the relevant gcov files.
+#
+# Example:
+# COVERAGE_SRCS =
+#				/path/to/project/root/subdir/the_file.c
+#
+# ALL_GCOV_FILES =
+#				/path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov
+#				/path/to/project/root/build/#path#to#project#root#subdir#other_file.c.gcov
+# 
+# Result should be:
+# GCOV_FILES = 
+#				/path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov
+#
+set(GCOV_FILES "")
+#message("Look in coverage sources: ${COVERAGE_SRCS}")
+message("\nFilter out unwanted GCOV files:")
+message("===============================")
+
+set(COVERAGE_SRCS_REMAINING ${COVERAGE_SRCS})
+
+foreach (GCOV_FILE ${ALL_GCOV_FILES})
+
+	#
+	# /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov 
+	# -> 
+	# /path/to/project/root/subdir/the_file.c 
+	get_source_path_from_gcov_filename(GCOV_SRC_PATH ${GCOV_FILE})
+
+	# Is this in the list of source files?
+	# TODO: We want to match against relative path filenames from the source file root...
+	list(FIND COVERAGE_SRCS ${GCOV_SRC_PATH} WAS_FOUND)
+
+	if (NOT WAS_FOUND EQUAL -1)
+		message("YES: ${GCOV_FILE}")
+		list(APPEND GCOV_FILES ${GCOV_FILE})
+
+		# We remove it from the list, so we don't bother searching for it again.
+		# Also files left in COVERAGE_SRCS_REMAINING after this loop ends should
+		# have coverage data generated from them (no lines are covered).
+		list(REMOVE_ITEM COVERAGE_SRCS_REMAINING ${GCOV_SRC_PATH})
+	else()
+		message("NO:  ${GCOV_FILE}")
+	endif()
+endforeach()
+
+# TODO: Enable setting these
+set(JSON_SERVICE_NAME "travis-ci")
+set(JSON_SERVICE_JOB_ID $ENV{TRAVIS_JOB_ID})
+
+set(JSON_TEMPLATE
+"{
+  \"service_name\": \"\@JSON_SERVICE_NAME\@\",
+  \"service_job_id\": \"\@JSON_SERVICE_JOB_ID\@\",
+  \"source_files\": \@JSON_GCOV_FILES\@
+}"
+)
+
+set(SRC_FILE_TEMPLATE
+"{
+      \"name\": \"\@GCOV_SRC_REL_PATH\@\",
+      \"source_digest\": \"\@GCOV_CONTENTS_MD5\@\",
+      \"coverage\": \@GCOV_FILE_COVERAGE\@
+  }"
+)
+
+message("\nGenerate JSON for files:")
+message("=========================")
+
+set(JSON_GCOV_FILES "[")
+
+# Read the GCOV files line by line and get the coverage data.
+foreach (GCOV_FILE ${GCOV_FILES})
+
+	get_source_path_from_gcov_filename(GCOV_SRC_PATH ${GCOV_FILE})
+	file(RELATIVE_PATH GCOV_SRC_REL_PATH "${PROJECT_ROOT}" "${GCOV_SRC_PATH}")
+
+	# The new coveralls API doesn't need the entire source (Yay!)
+	# However, still keeping that part for now. Will cleanup in the future.
+	file(MD5 "${GCOV_SRC_PATH}" GCOV_CONTENTS_MD5)
+	message("MD5: ${GCOV_SRC_PATH} = ${GCOV_CONTENTS_MD5}")
+
+	# Loads the gcov file as a list of lines.
+	# (We first open the file and replace all occurences of [] with _
+	#  because CMake will fail to parse a line containing unmatched brackets...
+	#  also the \ to escaped \n in macros screws up things.)
+	# https://public.kitware.com/Bug/view.php?id=15369
+	file(READ ${GCOV_FILE} GCOV_CONTENTS)
+	string(REPLACE "[" "_" GCOV_CONTENTS "${GCOV_CONTENTS}")
+	string(REPLACE "]" "_" GCOV_CONTENTS "${GCOV_CONTENTS}")
+	string(REPLACE "\\" "_" GCOV_CONTENTS "${GCOV_CONTENTS}")
+	file(WRITE ${GCOV_FILE}_tmp "${GCOV_CONTENTS}")
+
+	file(STRINGS ${GCOV_FILE}_tmp GCOV_LINES)
+	list(LENGTH GCOV_LINES LINE_COUNT)
+
+	# Instead of trying to parse the source from the
+	# gcov file, simply read the file contents from the source file.
+	# (Parsing it from the gcov is hard because C-code uses ; in many places
+	#  which also happens to be the same as the CMake list delimeter).
+	file(READ ${GCOV_SRC_PATH} GCOV_FILE_SOURCE)
+
+	string(REPLACE "\\" "\\\\" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+	string(REGEX REPLACE "\"" "\\\\\"" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+	string(REPLACE "\t" "\\\\t" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+	string(REPLACE "\r" "\\\\r" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+	string(REPLACE "\n" "\\\\n" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+	# According to http://json.org/ these should be escaped as well.
+	# Don't know how to do that in CMake however...
+	#string(REPLACE "\b" "\\\\b" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+	#string(REPLACE "\f" "\\\\f" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+	#string(REGEX REPLACE "\u([a-fA-F0-9]{4})" "\\\\u\\1" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+
+	# We want a json array of coverage data as a single string
+	# start building them from the contents of the .gcov
+	set(GCOV_FILE_COVERAGE "[")
+
+	set(GCOV_LINE_COUNT 1) # Line number for the .gcov.
+	set(DO_SKIP 0)
+	foreach (GCOV_LINE ${GCOV_LINES})
+		#message("${GCOV_LINE}")
+		# Example of what we're parsing:
+		# Hitcount  |Line | Source
+		# "        8:   26:        if (!allowed || (strlen(allowed) == 0))"
+		string(REGEX REPLACE 
+			"^([^:]*):([^:]*):(.*)$" 
+			"\\1;\\2;\\3"
+			RES
+			"${GCOV_LINE}")
+
+		# Check if we should exclude lines using the Lcov syntax.
+		string(REGEX MATCH "LCOV_EXCL_START" START_SKIP "${GCOV_LINE}")
+		string(REGEX MATCH "LCOV_EXCL_END" END_SKIP "${GCOV_LINE}")
+		string(REGEX MATCH "LCOV_EXCL_LINE" LINE_SKIP "${GCOV_LINE}")
+
+		set(RESET_SKIP 0)
+		if (LINE_SKIP AND NOT DO_SKIP)
+			set(DO_SKIP 1)
+			set(RESET_SKIP 1)
+		endif()
+
+		if (START_SKIP)
+			set(DO_SKIP 1)
+			message("${GCOV_LINE_COUNT}: Start skip")
+		endif()
+
+		if (END_SKIP)
+			set(DO_SKIP 0)
+		endif()
+
+		list(LENGTH RES RES_COUNT)
+
+		if (RES_COUNT GREATER 2)
+			list(GET RES 0 HITCOUNT)
+			list(GET RES 1 LINE)
+			list(GET RES 2 SOURCE)
+
+			string(STRIP ${HITCOUNT} HITCOUNT)
+			string(STRIP ${LINE} LINE)
+
+			# Lines with 0 line numbers are metadata and can be ignored.
+			if (NOT ${LINE} EQUAL 0)
+				
+				if (DO_SKIP)
+					set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}null, ")
+				else()
+					# Translate the hitcount into valid JSON values.
+					if (${HITCOUNT} STREQUAL "#####")
+						set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}0, ")
+					elseif (${HITCOUNT} STREQUAL "-")
+						set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}null, ")
+					else()
+						set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}${HITCOUNT}, ")
+					endif()
+				endif()
+			endif()
+		else()
+			message(WARNING "Failed to properly parse line (RES_COUNT = ${RES_COUNT}) ${GCOV_FILE}:${GCOV_LINE_COUNT}\n-->${GCOV_LINE}")
+		endif()
+
+		if (RESET_SKIP)
+			set(DO_SKIP 0)
+		endif()
+		math(EXPR GCOV_LINE_COUNT "${GCOV_LINE_COUNT}+1")
+	endforeach()
+
+	message("${GCOV_LINE_COUNT} of ${LINE_COUNT} lines read!")
+
+	# Advanced way of removing the trailing comma in the JSON array.
+	# "[1, 2, 3, " -> "[1, 2, 3"
+	string(REGEX REPLACE ",[ ]*$" "" GCOV_FILE_COVERAGE ${GCOV_FILE_COVERAGE})
+
+	# Append the trailing ] to complete the JSON array.
+	set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]")
+
+	# Generate the final JSON for this file.
+	message("Generate JSON for file: ${GCOV_SRC_REL_PATH}...")
+	string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON)
+
+	set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ")
+endforeach()
+
+# Loop through all files we couldn't find any coverage for
+# as well, and generate JSON for those as well with 0% coverage.
+foreach(NOT_COVERED_SRC ${COVERAGE_SRCS_REMAINING})
+
+	# Loads the source file as a list of lines.
+	file(STRINGS ${NOT_COVERED_SRC} SRC_LINES)
+
+	set(GCOV_FILE_COVERAGE "[")
+	set(GCOV_FILE_SOURCE "")
+
+	foreach (SOURCE ${SRC_LINES})
+		set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}0, ")
+
+		string(REPLACE "\\" "\\\\" SOURCE "${SOURCE}")
+		string(REGEX REPLACE "\"" "\\\\\"" SOURCE "${SOURCE}")
+		string(REPLACE "\t" "\\\\t" SOURCE "${SOURCE}")
+		string(REPLACE "\r" "\\\\r" SOURCE "${SOURCE}")
+		set(GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}${SOURCE}\\n")
+	endforeach()
+
+	# Remove trailing comma, and complete JSON array with ]
+	string(REGEX REPLACE ",[ ]*$" "" GCOV_FILE_COVERAGE ${GCOV_FILE_COVERAGE})
+	set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]")
+
+	# Generate the final JSON for this file.
+	message("Generate JSON for non-gcov file: ${NOT_COVERED_SRC}...")
+	string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON)
+	set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ")
+endforeach()
+
+# Get rid of trailing comma.
+string(REGEX REPLACE ",[ ]*$" "" JSON_GCOV_FILES ${JSON_GCOV_FILES})
+set(JSON_GCOV_FILES "${JSON_GCOV_FILES}]")
+
+# Generate the final complete JSON!
+message("Generate final JSON...")
+string(CONFIGURE ${JSON_TEMPLATE} JSON)
+
+file(WRITE "${COVERALLS_OUTPUT_FILE}" "${JSON}")
+message("###########################################################################")
+message("Generated coveralls JSON containing coverage data:") 
+message("${COVERALLS_OUTPUT_FILE}")
+message("###########################################################################")
diff --git a/cmake/rdma.cmake b/cmake/rdma.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..e9a4da79aa92a92aa7e5d21bb795ab9aaf60ab8b
--- /dev/null
+++ b/cmake/rdma.cmake
@@ -0,0 +1,76 @@
+# user should download rdma first from subversion repository
+
+# execute following instruction to download svn mannally
+# svn co https://svn.baidu.com/sys/ip/trunk/rdma/sockrdmav1 rdma/
+# svn co https://svn.baidu.com/sys/ip/trunk/rdma/thirdparty rdma/
+# we use static output in svn repositories to avoid implict bugs from not standard runtime env.
+
+set(RDMA_ROOT $ENV{RDMA_ROOT} CACHE PATH "Folder contains RDMA sock library and thirdparty library")
+
+function(generate_rdma_links)
+  #redirect to current DIR to isolate the pollution from system runtime environment
+  #it can benifits unified control for different gcc environment. 
+  #e.g, by default gcc48 did not refer /usr/lib64 which could contain low version
+  #runtime libraries that will crash process while loading it. That redirect trick
+  #can fix it.
+  execute_process(
+    COMMAND mkdir -p librdma
+    COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so.1
+    COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so
+    COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so.1
+    COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so 
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  )
+endfunction(generate_rdma_links)
+
+
+#check and set headers
+find_path(RDMA_INC_SXISOCK sxi_sock.h PATHS ${RDMA_ROOT}/sockrdmav1/output/include)
+find_path(RDMA_INC_XIO libxio.h PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
+find_path(RDMA_INC_EVENT event2 PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
+find_path(RDMA_INC_NUMA numa.h PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
+
+#check and set libs
+find_library(RDMA_LIB_SXISOCK NAMES sxisock PATHS ${RDMA_ROOT}/sockrdmav1/output)
+find_library(RDMA_LIB_XIO NAMES xio PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
+find_library(RDMA_LIB_EVENT NAMES event PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
+find_library(RDMA_LIB_EVENT_CORE NAMES event_core PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
+find_library(RDMA_LIB_EVENT_EXTRA NAMES event_extra PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
+find_library(RDMA_LIB_EVENT_PTHREADS NAMES event_pthreads PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
+find_library(RDMA_LIB_NUMA NAMES numa PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
+
+if(
+    RDMA_INC_SXISOCK AND
+    RDMA_INC_XIO AND
+    RDMA_INC_EVENT AND
+    RDMA_INC_NUMA AND
+    RDMA_LIB_SXISOCK AND 
+    RDMA_LIB_XIO AND
+    RDMA_LIB_EVENT AND
+    RDMA_LIB_EVENT_CORE AND
+    RDMA_LIB_EVENT_EXTRA AND
+    RDMA_LIB_EVENT_PTHREADS AND
+    RDMA_LIB_NUMA
+    )
+
+  set(RDMA_INC_DIR 
+    ${RDMA_INC_SXISOCK} 
+    ${RDMA_INC_XIO}
+    ${RDMA_INC_EVENT}
+    ${RDMA_INC_NUMA})
+  set(RDMA_LIBS  
+    ${RDMA_LIB_SXISOCK} 
+    ${RDMA_LIB_XIO} 
+    ${RDMA_LIB_EVENT} 
+    ${RDMA_LIB_EVENT_CORE} 
+    ${RDMA_LIB_EVENT_EXTRA} 
+    ${RDMA_LIB_EVENT_PTHREADS} 
+    ${RDMA_LIB_NUMA} 
+    )
+  set(RDMA_LD_FLAGS "-L./librdma -libverbs -lrdmacm -Xlinker -rpath ./librdma")
+  return()
+endif()
+
+#if this module is not called, RDMA_INC_DIR RDMA_LIBS will be null, so top module always refer this variable
+
+message(FATAL_ERROR, "RDMA libraries are not found, try to set RDMA_ROOT or check all related libraries.")
diff --git a/cmake/swig.cmake b/cmake/swig.cmake
index 160d7ee56a9c6f2aa2afedf7883540aab684a025..97e87aa947791e2c5a88e7e554dec43bcd661664 100644
--- a/cmake/swig.cmake
+++ b/cmake/swig.cmake
@@ -1,25 +1,3 @@
-find_program(
-    SWIG_BINARY_PATH
-    swig)
-
-if(${SWIG_BINARY_PATH} STREQUAL "SWIG_BINARY_PATH-NOTFOUND")
-    set(SWIG_FOUND OFF)
-else()
-    set(SWIG_FOUND ON)
-endif()
-
-set(MIN_SWIG_VERSION 2)
-if(SWIG_FOUND)
-    execute_process(COMMAND sh -c "${SWIG_BINARY_PATH} -version | grep Version | cut -f3 -d' '"
-        OUTPUT_VARIABLE _SWIG_VERSION
-        OUTPUT_STRIP_TRAILING_WHITESPACE)
-    if(${_SWIG_VERSION} VERSION_LESS ${MIN_SWIG_VERSION})
-        message("swig version ${MIN_SWIG_VERSION} or greater is needed for generating python api. "
-                 "Only version ${_SWIG_VERSION} is found. Set SWIG_FOUND to FALSE")
-        set(SWIG_FOUND FALSE)
-    endif(${_SWIG_VERSION} VERSION_LESS ${MIN_SWIG_VERSION})
-endif(SWIG_FOUND)
-
 function(generate_python_api target_name)
     add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
                               ${PROJ_ROOT}/paddle/Paddle_wrap.cxx
diff --git a/cmake/util.cmake b/cmake/util.cmake
index 0fa36f070cc11be543efe9573b93173ec771b9be..3f78cd08c390550790b7145c412de32351873e4e 100644
--- a/cmake/util.cmake
+++ b/cmake/util.cmake
@@ -67,6 +67,10 @@ endmacro()
 #
 # It will handle WITH_PYTHON/WITH_GLOG etc.
 function(link_paddle_exe TARGET_NAME)
+    if(WITH_RDMA)
+        generate_rdma_links()
+    endif()
+
     if(WITH_METRIC)
         if(WITH_GPU)
             set(METRIC_LIBS paddle_metric_learning paddle_dserver_lib metric metric_cpu)
@@ -109,6 +113,12 @@ function(link_paddle_exe TARGET_NAME)
         ${ZLIB_LIBRARIES}
         ${INTERAL_LIBS}
         ${CMAKE_DL_LIBS})
+
+    if(WITH_RDMA)
+        target_link_libraries(${TARGET_NAME}
+            ${RDMA_LD_FLAGS}
+            ${RDMA_LIBS})
+    endif()
     
     if(WITH_PYTHON)
         target_link_libraries(${TARGET_NAME}
diff --git a/demo/semantic_role_labeling/.gitignore b/demo/semantic_role_labeling/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..cd90ca7bbe9be46f54cb656a8067c794a55d8cfc
--- /dev/null
+++ b/demo/semantic_role_labeling/.gitignore
@@ -0,0 +1,10 @@
+*.pyc
+train.log
+data/feature
+data/conll05st-release/
+data/src.dict
+data/test.wsj.props
+data/test.wsj.seq_pair
+data/test.wsj.words
+data/tgt.dict
+output
diff --git a/doc/build/build_from_source.md b/doc/build/build_from_source.md
index 7727c8c3788b9defb34430bda9aab9cc858920ba..b8f26f431eb7a04147fe791a8c805427c827fe09 100644
--- a/doc/build/build_from_source.md
+++ b/doc/build/build_from_source.md
@@ -4,7 +4,6 @@ Installing from Sources
 * [1. Download and Setup](#download)
 * [2. Requirements](#requirements)
 * [3. Build on Ubuntu](#ubuntu)
-* [4. Build on Mac OS X](#mac)
 
 ## <span id="download">Download and Setup</span> 
 You can download PaddlePaddle from the [github source](https://github.com/gangliao/Paddle).
@@ -191,122 +190,3 @@ sudo pip install <path to install>/opt/paddle/share/wheels/*.whl
 # or just run 
 sudo paddle version
 ```
-
-## <span id="mac">Building on Mac OS X</span>
-
-### Prerequisites
-This guide is based on Mac OS X 10.11 (El Capitan). Note that if you are running an up to date version of OS X, 
-you will already have Python 2.7.10 and Numpy 1.8 installed.
-
-The best option is to use the package manager homebrew to handle installations and upgrades for you.
-To install [homebrew](http://brew.sh/), first open a terminal window (you can find Terminal in the Utilities folder in Applications), and issue the command:
-
-```bash
-# install brew
-/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
-# install pip
-easy_install pip
-```
-
-### Install Dependencies
-
-- **CPU Dependencies**
-
-  ```bash
-  # Install fundamental dependents 
-  brew install glog gflags cmake protobuf openblas
-
-  # Install google test on Mac OS X
-  # Download gtest 1.7.0
-  wget https://github.com/google/googletest/archive/release-1.7.0.tar.gz
-  tar -xvf googletest-release-1.7.0.tar.gz && cd googletest-release-1.7.0
-  # Build gtest
-  mkdir build && cmake ..
-  make
-  # Install gtest library
-  sudo cp -r ../include/gtest /usr/local/include/
-  sudo cp lib*.a /usr/local/lib
-  ```
-
-- **GPU Dependencies(optional)**
-
-    To build GPU version, you will need the following installed:
-
-        1. a CUDA-capable GPU
-        2. Mac OS X 10.11 or later
-        2. the Clang compiler and toolchain installed using Xcode
-        3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads)
-        4. NVIDIA cuDNN Library (availabel at https://developer.nvidia.com/cudnn)
-
-    The CUDA development environment relies on tight integration with the host development environment,
-    including the host compiler and C runtime libraries, and is therefore only supported on
-    distribution versions that have been qualified for this CUDA Toolkit release.
-        
-    1. After downloading cuDNN library, issue the following commands:
-
-        ```bash
-        sudo tar -xzf cudnn-7.5-osx-x64-v5.0-ga.tgz -C /usr/local
-        sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib/libcudnn*
-        ```
-    2. Then you need to set DYLD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc.
-
-        ```bash
-        export DYLD_LIBRARY_PATH=/usr/local/cuda/lib:$DYLD_LIBRARY_PATH
-        export PATH=/usr/local/cuda/bin:$PATH
-        ```
-
-### Build and Install
-
-As usual, the best option is to create build folder under paddle project directory.
-
-```bash
-mkdir build && cd build
-cmake ..
-```
-
-CMake first check PaddlePaddle's dependencies in system default path. After installing some optional
-libraries, corresponding build option will be set automatically (for instance, glog, gtest and gflags).
-If still not found, you can manually set it based on CMake error information from your screen.
-
-As a simple example, consider the following:
-
-- **Only CPU**
-
-  ```bash
-  cmake  .. -DWITH_GPU=OFF
-  ```
-- **GPU**
-
-  ```bash
-  cmake .. -DWITH_GPU=ON
-  ```
-
-- **GPU with doc and swig**
-
-  ```bash
-  cmake .. -DWITH_GPU=ON -DWITH_DOC=ON -DWITH_SWIG_PY=ON
-  ``` 
-
-Finally, you can build PaddlePaddle:
-
-```bash
-# you can add build option here, such as:    
-cmake .. -DWITH_GPU=ON -DCMAKE_INSTALL_PREFIX=<installation path>
-# please use sudo make install, if you want to install PaddlePaddle into the system
-make -j `sysctl -n hw.ncpu` && make install
-# set PaddlePaddle installation path in ~/.bashrc
-export PATH=<installation path>/bin:$PATH
-```
-**Note:**
-
-If you set `WITH_SWIG_PY=ON`, related python dependencies also need to be installed.
-Otherwise, PaddlePaddle will automatically install python dependencies
-at first time when user run paddle commands, such as `paddle version`, `paddle train`.
-It may require sudo privileges:
-
-```bash
-# you can run
-sudo pip install <path to install>/opt/paddle/share/wheels/*.whl
-# or just run 
-sudo paddle version
-```
diff --git a/doc/ui/api/trainer_config_helpers/activations.rst b/doc/ui/api/trainer_config_helpers/activations.rst
index c4e14ed779efb6f6601d2c5fa41764f318c82848..070ed03ab6cc938f735667701bd46eec33ea77b4 100644
--- a/doc/ui/api/trainer_config_helpers/activations.rst
+++ b/doc/ui/api/trainer_config_helpers/activations.rst
@@ -32,6 +32,13 @@ LinearActivation
 ..  automodule:: paddle.trainer_config_helpers.activations
     :members: LinearActivation
     :noindex:
+
+LogActivation
+==================
+
+..  automodule:: paddle.trainer_config_helpers.activations
+    :members: LogActivation
+    :noindex:
     
 SquareActivation
 ================
diff --git a/paddle/cuda/include/hl_cuda_cublas.h b/paddle/cuda/include/hl_cuda_cublas.h
index 0ffbed18b5f9e57f22d1bbe1a98a0d899f2fa88d..d757317eb4a97559feef22d4fd8edf7c10ca6745 100644
--- a/paddle/cuda/include/hl_cuda_cublas.h
+++ b/paddle/cuda/include/hl_cuda_cublas.h
@@ -21,8 +21,8 @@ limitations under the License. */
 /**
  * @brief   Matrix transpose: C_d = T(A_d)
  *
- * @param[in]   A_d     input matrix (M x N).
- * @param[out]  C_d     output matrix (N x M).
+ * @param[in]   A_d     input matrix (dimM x dimN).
+ * @param[out]  C_d     output matrix (dimN x dimM).
  * @param[in]   dimM    matrix height.
  * @param[in]   dimN    matrix width.
  * @param[in]   lda     the first dimension of A_d.
@@ -39,8 +39,8 @@ extern void hl_matrix_transpose(real *A_d,
 /*
  * @brief Matrix transpose, while lda = dimN, ldc = dimM.
  *
- * @param[in]   A_d     input matrix (M x N).
- * @param[out]  C_d     output matrix (N x M).
+ * @param[in]   A_d     input matrix (dimM x dimN).
+ * @param[out]  C_d     output matrix (dimN x dimM).
  * @param[in]   dimM    matrix height.
  * @param[in]   dimN    matrix width.
  *
@@ -50,6 +50,22 @@ extern void hl_matrix_transpose(real *A_d,
                                 int dimM,
                                 int dimN);
 
+/*
+ * @brief Matrix inverse
+ *
+ * @param[in]   A_d    input matrix (dimN x dimN).
+ * @param[out]  C_d    output matrix (dimN x dimN).
+ * @param[in]   dimN   matrix height = matrix width
+ * @param[in]   lda    the first dimension of A_d
+ * @param[in]   ldc    the first dimension of C_d
+ *
+ */
+extern void hl_matrix_inverse(real *A_d,
+                              real *C_d,
+                              int dimN,
+                              int lda,
+                              int ldc);
+
 /**
  * @brief   C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d
  *
diff --git a/paddle/cuda/include/stub/hl_cuda_cublas_stub.h b/paddle/cuda/include/stub/hl_cuda_cublas_stub.h
index 4a5e2a25a71b38b2c38688820cbffdb10251bcac..903dcbe8355d6f593d96bc1f9e686d54035a9366 100644
--- a/paddle/cuda/include/stub/hl_cuda_cublas_stub.h
+++ b/paddle/cuda/include/stub/hl_cuda_cublas_stub.h
@@ -30,6 +30,12 @@ inline void hl_matrix_transpose(real *A_d,
                                 int dimM,
                                 int dimN) {}
 
+inline void hl_matrix_inverse(real *A_d,
+                              real *C_d,
+                              int dimN,
+                              int lda,
+                              int ldc) {}
+
 inline void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
                           real *B_d, hl_trans_op_t transb,
                           real *C_d,
diff --git a/paddle/cuda/src/hl_cuda_cublas.cc b/paddle/cuda/src/hl_cuda_cublas.cc
index b3c9001ba397361376ee191081a71863b2e5a578..f16376ec937d3a397d9e7117de528c304f8403ee 100644
--- a/paddle/cuda/src/hl_cuda_cublas.cc
+++ b/paddle/cuda/src/hl_cuda_cublas.cc
@@ -15,6 +15,7 @@ limitations under the License. */
 
 #include <sys/time.h>
 #include <mutex>
+#include "hl_cuda.h"
 #include "hl_cuda_cublas.h"
 #include "hl_thread.ph"
 #include "hl_dso_loader.h"
@@ -75,6 +76,10 @@ DYNAMIC_LOAD_CUBLAS_WRAP(cublasSgemmBatched)
 DYNAMIC_LOAD_CUBLAS_WRAP(cublasDgemmBatched)
 DYNAMIC_LOAD_CUBLAS_WRAP(cublasCgemmBatched)
 DYNAMIC_LOAD_CUBLAS_WRAP(cublasZgemmBatched)
+DYNAMIC_LOAD_CUBLAS_WRAP(cublasSgetrfBatched)
+DYNAMIC_LOAD_CUBLAS_WRAP(cublasSgetriBatched)
+DYNAMIC_LOAD_CUBLAS_WRAP(cublasDgetrfBatched)
+DYNAMIC_LOAD_CUBLAS_WRAP(cublasDgetriBatched)
 CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP)
 
 #undef DYNAMIC_LOAD_CUBLAS_WRAP
@@ -88,10 +93,14 @@ CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP)
 #define     CUBLAS_GEAM     dynload::cublasSgeam
 #define     CUBLAS_GEMV     dynload::cublasSgemv
 #define     CUBLAS_GEMM     dynload::cublasSgemm
+#define     CUBLAS_GETRF    dynload::cublasSgetrfBatched
+#define     CUBLAS_GETRI    dynload::cublasSgetriBatched
 #else
 #define     CUBLAS_GEAM     dynload::cublasDgeam
 #define     CUBLAS_GEMV     dynload::cublasDgemv
 #define     CUBLAS_GEMM     dynload::cublasDgemm
+#define     CUBLAS_GETRF    dynload::cublasDgetrfBatched
+#define     CUBLAS_GETRI    dynload::cublasDgetriBatched
 #endif
 
 const char* hl_cublas_get_error_string(cublasStatus_t status) {
@@ -162,6 +171,54 @@ void hl_matrix_transpose(real *A_d, real *C_d, int dimM, int dimN) {
   hl_matrix_transpose(A_d, C_d, dimM, dimN, dimN, dimM);
 }
 
+void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
+  /* Solve Ax = I */
+  CHECK_NOTNULL(A_d);
+  CHECK_NOTNULL(C_d);
+
+  /* Step 1: Compute the LU decomposition of matrix A */
+  real **inout_h = &A_d;
+  real **inout_d = (real **)hl_malloc_device(sizeof(real *));
+  hl_memcpy(inout_d, inout_h, sizeof(real *));
+
+  int *pivot_d = (int *)hl_malloc_device(dimN*sizeof(int));  
+  int *info_d = (int *)t_resource.gpu_mem;
+
+  /* Note: cublasSgetrfBatched is used to calculate a number of
+     small-sized matrices. There may be a better way to reconstruct
+     the API for better performance.
+   */
+  CHECK_CUBLAS(CUBLAS_GETRF(t_resource.handle,
+	       dimN, inout_d, lda, pivot_d,
+               info_d, 1));
+
+  int info_h; 
+  hl_memcpy(&info_h, info_d, sizeof(int));
+  if (info_h != 0) {
+      LOG(FATAL) << "Factorization of matrix failed: matrix may be singular.\n";
+  }
+
+  /* Step 2: Compute the inverse of the matrix given its LU decomposition */
+  real **out_h = &C_d;
+  real **out_d = (real **)hl_malloc_device(sizeof(real *));
+  hl_memcpy(out_d, out_h, sizeof(real *));
+
+  CHECK_CUBLAS(CUBLAS_GETRI(t_resource.handle,
+	       dimN, (const real **)inout_d, lda, pivot_d,
+	       out_d, ldc, info_d, 1));
+
+  hl_memcpy(&info_h, info_d, sizeof(int));
+  if (info_h != 0) {
+      LOG(FATAL) << "Inversion of matrix failed: matrix may be singular.\n";
+  }
+
+  hl_free_mem_device(inout_d);
+  hl_free_mem_device(pivot_d);
+  hl_free_mem_device(out_d);
+  
+  CHECK_SYNC("hl_matrix_inverse failed");
+}
+
 void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
                    real *B_d, hl_trans_op_t transb,
                    real *C_d,
diff --git a/paddle/cuda/src/hl_cuda_cudnn.cc b/paddle/cuda/src/hl_cuda_cudnn.cc
index 7810d0d10053d3a838788d029a2dfa39248f8933..92b28e4345c3d4d306e6ee2a7f9f50189454f951 100644
--- a/paddle/cuda/src/hl_cuda_cudnn.cc
+++ b/paddle/cuda/src/hl_cuda_cudnn.cc
@@ -41,65 +41,28 @@ void* cudnn_dso_handle = nullptr;
 
 #ifdef PADDLE_USE_DSO
 
-#define DYNAMIC_LOAD_CUDNN_WRAP(__name)                         \
-  struct DynLoad__##__name {                                    \
-    template <typename... Args>                                 \
-    cudnnStatus_t operator()(Args... args) {                    \
-      typedef cudnnStatus_t (*cudnnFunc)(Args...);              \
-      std::call_once(cudnn_dso_flag, GetCudnnDsoHandle,         \
-                     &cudnn_dso_handle);                        \
-      void* p_##__name = dlsym(cudnn_dso_handle, #__name);      \
-      return reinterpret_cast<cudnnFunc>(p_##__name)(args...);  \
-    }                                                           \
+#define DYNAMIC_LOAD_CUDNN_WRAP(__name)                          \
+  struct DynLoad__##__name {                                     \
+    template <typename... Args>                                  \
+    auto operator()(Args... args) -> decltype(__name(args...)) { \
+      using cudnn_func = decltype(__name(args...))(*)(Args...);  \
+      std::call_once(cudnn_dso_flag, GetCudnnDsoHandle,          \
+                     &cudnn_dso_handle);                         \
+      void* p_##__name = dlsym(cudnn_dso_handle, #__name);       \
+      return reinterpret_cast<cudnn_func>(p_##__name)(args...);  \
+    }                                                            \
   } __name; /* struct DynLoad__##__name */
 
-struct DynLoad__cudnnGetVersion {
-  template <typename... Args>
-  size_t operator()(Args... args) {
-    typedef size_t (*cudnnFunc)(Args...);
-    std::call_once(cudnn_dso_flag, GetCudnnDsoHandle,
-                   &cudnn_dso_handle);
-    void* p_name = dlsym(cudnn_dso_handle, "cudnnGetVersion");
-    return reinterpret_cast<cudnnFunc>(p_name)(args...);
-  }
-} cudnnGetVersion; /* struct DynLoad__##__name */
-
-struct DynLoad__cudnnGetErrorString {
-  template <typename... Args>
-  const char* operator()(Args... args) {
-    typedef const char* (*cudnnFunc)(Args...);
-    std::call_once(cudnn_dso_flag, GetCudnnDsoHandle,
-                   &cudnn_dso_handle);
-    void* p_name = dlsym(cudnn_dso_handle, "cudnnGetErrorString");
-    return reinterpret_cast<cudnnFunc>(p_name)(args...);
-  }
-} cudnnGetErrorString; /* struct DynLoad__##__name */
-
-
 #else
 
-#define DYNAMIC_LOAD_CUDNN_WRAP(__name)                         \
-  struct DynLoad__##__name {                                    \
-    template <typename... Args>                                 \
-    cudnnStatus_t operator()(Args... args) {                    \
-      return __name(args...);                                   \
-    }                                                           \
+#define DYNAMIC_LOAD_CUDNN_WRAP(__name)                          \
+  struct DynLoad__##__name {                                     \
+    template <typename... Args>                                  \
+    auto operator()(Args... args) -> decltype(__name(args...)) { \
+      return __name(args...);                                    \
+    }                                                            \
   } __name; /* struct DynLoad__##__name */
 
-struct DynLoad__cudnnGetVersion {
-  template <typename... Args>
-  size_t operator()(Args... args) {
-    return cudnnGetVersion(args...);
-  }
-} cudnnGetVersion; /* struct DynLoad__##__name */
-
-struct DynLoad__cudnnGetErrorString {
-  template <typename... Args>
-  const char* operator()(Args... args) {
-    return cudnnGetErrorString(args...);
-  }
-} cudnnGetErrorString; /* struct DynLoad__##__name */
-
 #endif
 
 /**
@@ -133,7 +96,9 @@ struct DynLoad__cudnnGetErrorString {
   __macro(cudnnPoolingForward)                            \
   __macro(cudnnPoolingBackward)                           \
   __macro(cudnnSoftmaxBackward)                           \
-  __macro(cudnnSoftmaxForward)
+  __macro(cudnnSoftmaxForward)                            \
+  __macro(cudnnGetVersion)                                \
+  __macro(cudnnGetErrorString)
 CUDNN_DNN_ROUTINE_EACH(DYNAMIC_LOAD_CUDNN_WRAP)
 
 #define CUDNN_DNN_ROUTINE_EACH_R2(__macro)                \
diff --git a/paddle/cuda/src/hl_cuda_device.cc b/paddle/cuda/src/hl_cuda_device.cc
index e9fe9f1c117a0573643c81f061bb36399523b38d..3ea2c91bd5a41e0cd6ece0605a25e645676faa40 100644
--- a/paddle/cuda/src/hl_cuda_device.cc
+++ b/paddle/cuda/src/hl_cuda_device.cc
@@ -85,44 +85,24 @@ void* cudart_dso_handle = nullptr;
 #define DYNAMIC_LOAD_CUDART_WRAP(__name)                            \
   struct DynLoad__##__name {                                        \
     template <typename... Args>                                     \
-    cudaError_t operator()(Args... args) {                          \
-      typedef cudaError_t (*cudartFunc)(Args...);                   \
+    auto operator()(Args... args) -> decltype(__name(args...)) {    \
+      using cudart_func = decltype(__name(args...))(*)(Args...);    \
       std::call_once(cudart_dso_flag, GetCudartDsoHandle,           \
                      &cudart_dso_handle);                           \
       void* p_##__name = dlsym(cudart_dso_handle, #__name);         \
-      return reinterpret_cast<cudartFunc>(p_##__name)(args...);     \
+      return reinterpret_cast<cudart_func>(p_##__name)(args...);    \
     }                                                               \
   } __name;  /* struct DynLoad__##__name */
 #else
 #define DYNAMIC_LOAD_CUDART_WRAP(__name)                            \
   struct DynLoad__##__name {                                        \
     template <typename... Args>                                     \
-    cudaError_t operator()(Args... args) {                          \
+    auto operator()(Args... args) -> decltype(__name(args...)) {    \
       return __name(args...);                                       \
     }                                                               \
   } __name;  /* struct DynLoad__##__name */
 #endif
 
-#ifdef PADDLE_USE_DSO
-  struct DynLoad__cudaGetErrorString {
-    template <typename... Args>
-    const char* operator()(Args... args) {
-      typedef const char* (*cudaFunc)(Args...);
-      std::call_once(cudart_dso_flag, GetCudartDsoHandle,
-                     &cudart_dso_handle);
-      void* p_func = dlsym(cudart_dso_handle, "cudaGetErrorString");
-      return reinterpret_cast<cudaFunc>(p_func)(args...);
-    }
-  } cudaGetErrorString;  /* struct DynLoad__cudaGetErrorString */
-#else
-struct DynLoad__cudaGetErrorString {
-  template <typename... Args>
-  const char* operator()(Args... args) {
-    return cudaGetErrorString(args...);
-  }
-} cudaGetErrorString;  /* struct DynLoad__cudaGetErrorString */
-#endif
-
 /* include all needed cuda functions in HPPL */
 #define CUDA_ROUTINE_EACH(__macro)        \
   __macro(cudaMalloc)                     \
@@ -152,7 +132,8 @@ struct DynLoad__cudaGetErrorString {
   __macro(cudaSetDeviceFlags)             \
   __macro(cudaGetLastError)               \
   __macro(cudaFuncSetCacheConfig)         \
-  __macro(cudaRuntimeGetVersion)
+  __macro(cudaRuntimeGetVersion)          \
+  __macro(cudaGetErrorString)
 
 CUDA_ROUTINE_EACH(DYNAMIC_LOAD_CUDART_WRAP)
 
diff --git a/paddle/cuda/src/hl_dso_loader.cc b/paddle/cuda/src/hl_dso_loader.cc
index 91c60d85a1e416b06e30fd04763540d41f21c82a..c0b5d6e357fc70ed17180ab38458164918b13878 100644
--- a/paddle/cuda/src/hl_dso_loader.cc
+++ b/paddle/cuda/src/hl_dso_loader.cc
@@ -49,14 +49,14 @@ static inline std::string join(const std::string& part1, const std::string& part
 static inline void GetDsoHandleFromDefaultPath(
         std::string& dso_path, void** dso_handle, int dynload_flags) {
     LOG(INFO) << "Try to find cuda library: " << dso_path
-              << "from default system path.";
+              << " from default system path.";
     // default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH 
     *dso_handle = dlopen(dso_path.c_str(), dynload_flags);
     
     // DYLD_LIBRARY_PATH is disabled after Mac OS 10.11 to
     // bring System Integrity Projection (SIP), if dso_handle
     // is null, search from default package path in Mac OS.
-    #if defined(__APPLE__) or defined(__OSX__)
+    #if defined(__APPLE__) || defined(__OSX__)
     if (nullptr == *dso_handle) {
         dso_path = join("/usr/local/cuda/lib/", dso_path);
         *dso_handle = dlopen(dso_path.c_str(), dynload_flags);
diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp
index 9918d20d9082ae6c07684ce05eba68c4989dd5d5..27eed75d4d76c351e381a3b71dc44a3254fb1a4d 100644
--- a/paddle/gserver/activations/ActivationFunction.cpp
+++ b/paddle/gserver/activations/ActivationFunction.cpp
@@ -295,6 +295,7 @@ void forward(Argument& act) {
 
 void backward(Argument& act) { act.grad->squareDerivative(*act.in); }
 END_DEFINE_ACTIVATION(square)
+
 /**
  * @brief Exponential Activation.
  * \f[
@@ -307,8 +308,36 @@ void forward(Argument& act) { act.value->exp(*act.value); }
 void backward(Argument& act) { act.grad->expDerivative(*act.value); }
 END_DEFINE_ACTIVATION(exponential)
 
+/**
+ * @brief Logarithm Activation.
+ * \f[
+ * f(z) = log(z)
+ * \f]
+ */
+BEGIN_DEFINE_ACTIVATION(log)
+void forward(Argument& act) {
+  SetDevice device(act.deviceId);
+  Matrix::resizeOrCreate(act.in, act.value->getHeight(), act.value->getWidth(),
+                         /* trans */ false, useGpu(act.deviceId));
+
+  act.in->copyFrom(*act.value);
+  act.value->log(*act.value);
+}
+
+void backward(Argument& act) { act.grad->dotDiv(*act.grad, *act.in); }
+END_DEFINE_ACTIVATION(log)
+
 ActivationFunction* ActivationFunction::create(const std::string& type) {
   return gActivationRegistrar.createByType(type);
 }
 
+std::vector<std::string> ActivationFunction::getAllRegisteredTypes() {
+  std::vector<std::string> types;
+  gActivationRegistrar.forEachType([&](const std::string& type) {
+      types.push_back(type);
+    });
+  return types;
+}
+
+
 }  // namespace paddle
diff --git a/paddle/gserver/activations/ActivationFunction.h b/paddle/gserver/activations/ActivationFunction.h
index 29860b4a736c37dee70c56731820a4197ea4cdbe..c483372256c035e39bfdbcaa4193a1a2e7fd80b8 100644
--- a/paddle/gserver/activations/ActivationFunction.h
+++ b/paddle/gserver/activations/ActivationFunction.h
@@ -15,6 +15,7 @@ limitations under the License. */
 
 #pragma once
 #include <string>
+#include <vector>
 
 namespace paddle {
 
@@ -32,6 +33,7 @@ struct Argument;
 class ActivationFunction {
 public:
   static ActivationFunction* create(const std::string& type);
+  static std::vector<std::string> getAllRegisteredTypes();
 
   ActivationFunction() {}
 
diff --git a/paddle/gserver/dataproviders/DataProvider.cpp b/paddle/gserver/dataproviders/DataProvider.cpp
index 8cefbb30ada46d1ff1b0a4952dde0aeafb5419b1..2cfb5a3a18c8a63d69bf0598eeee2807376340bc 100644
--- a/paddle/gserver/dataproviders/DataProvider.cpp
+++ b/paddle/gserver/dataproviders/DataProvider.cpp
@@ -131,9 +131,10 @@ void DoubleBuffer::asyncLoadBatch() {
     taskReadySem_.wait();
     if (stopping_) break;
 
-    while (batchSize_ == 0) {
+    while (batchSize_ == 0 && !stopping_) {
       usleep(5);
     }
+    if (stopping_) break;
 
     do {
       DataBatch newBatch;
diff --git a/paddle/gserver/dataproviders/PyDataProvider2.cpp b/paddle/gserver/dataproviders/PyDataProvider2.cpp
index ca8b07af49ca071940960336be6cc652fcd62a44..90391a7c307d8dff7e289d445cafd27dc5008547 100644
--- a/paddle/gserver/dataproviders/PyDataProvider2.cpp
+++ b/paddle/gserver/dataproviders/PyDataProvider2.cpp
@@ -433,26 +433,34 @@ private:
 
   inline void resetImpl(bool startNewThread) {
     DBG << "Reseting " << startNewThread;
+    exit_.store(true);
     if (loadThread_) {  // is loading.
-      exit_.store(true);
       loadThread_->join();
       loadThread_.reset();
     }
     {
       PyGuard g;
       callingContexts_.clear();
+      this->pullCV_.notify_one();
+    }
+
+    std::lock_guard<std::mutex> guard(mutexForReset_);
+    {
+      PyGuard g;
       dataPool_.clear();
     }
     poolActualSize_ = 0;
-    exit_ = false;
+
     if (startNewThread && cache_->reset()) {
       DBG << "Start new thread.";
       loadThread_.reset(new std::thread([this] {
+        exit_ = false;
         loadThread();
       }));
       callingContextCreated_.wait();
     }
     DBG << "Reset done";
+    exit_ = false;
   }
 
 private:
@@ -465,6 +473,8 @@ private:
   std::condition_variable pullCV_;
   std::mutex mtx_;
 
+  std::mutex mutexForReset_;
+
   ThreadBarrier callingContextCreated_;
   std::unique_ptr<IPyDataProviderCache> cache_;
 
@@ -529,6 +539,7 @@ public:
    * Loading a batch of data.
    */
   int64_t getNextBatchInternal(int64_t size_, DataBatch *batch) {
+    std::lock_guard<std::mutex> guard(mutexForReset_);
     REGISTER_TIMER("PyDP2.getNextBatchInternal")
     CHECK_GE(size_, 0);
     size_t size = (size_t) size_;
@@ -554,6 +565,10 @@ public:
     } else {  // loading from cache.
       poolPtr = this->cache_->load();
     }
+    if (exit_) {
+      // PyDataProvider is destructing.
+      return 0;
+    }
     CHECK(poolPtr != nullptr);
 
     std::deque<PyObjectPtr>& pool = *poolPtr;
diff --git a/paddle/gserver/gradientmachines/ParallelNeuralNetwork.cpp b/paddle/gserver/gradientmachines/ParallelNeuralNetwork.cpp
index 952df60a7d78666c84d5fd9176c3113fdbdacdc9..22698f586701774d884e6eeca943f6bf75fe7a96 100644
--- a/paddle/gserver/gradientmachines/ParallelNeuralNetwork.cpp
+++ b/paddle/gserver/gradientmachines/ParallelNeuralNetwork.cpp
@@ -28,6 +28,12 @@ void ParallelNeuralNetwork::init(
     const std::vector<ParameterType>& parameterTypes, bool useGpu) {
   NeuralNetwork::init(config, callback, parameterTypes, useGpu);
 
+  if (config.type() == "recurrent_nn") {
+    LOG(FATAL)
+      << "You can not add `--parallel_nn=true` on the command line, "
+      << "parallel_nn training mode does not support the recurrent_nn model.";
+  }
+
   useGpu_ = useGpu;
   numDevices_ = 0;
   if (useGpu_) {
diff --git a/paddle/gserver/layers/ConvBaseLayer.cpp b/paddle/gserver/layers/ConvBaseLayer.cpp
index 040510b7ad2116c1c624141185124556fc8fd7de..42ff0b70d86f788d58d56854a778d61e2af53e06 100644
--- a/paddle/gserver/layers/ConvBaseLayer.cpp
+++ b/paddle/gserver/layers/ConvBaseLayer.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-
 #include "paddle/utils/Logging.h"
 #include "ConvBaseLayer.h"
 namespace paddle {
@@ -78,10 +77,10 @@ size_t ConvBaseLayer::calOutputSize() {
       imgSizeH_[i] = config_.inputs(i).conv_conf().img_size();
     if (imgSizeW_[i] == 0)
       imgSizeW_[i] = config_.inputs(i).conv_conf().img_size();
-    outputH_.push_back(
-        outputSize(imgSizeH_[i], filterSizeY_[i], paddingY_[i], strideY_[i]));
-    outputW_.push_back(
-        outputSize(imgSizeW_[i], filterSize_[i], padding_[i], stride_[i]));
+    outputH_.push_back(outputSize(imgSizeH_[i], filterSizeY_[i], paddingY_[i],
+                                  strideY_[i], caffeMode_));
+    outputW_.push_back(outputSize(imgSizeW_[i], filterSize_[i], padding_[i],
+                                  stride_[i], caffeMode_));
     CHECK_EQ(outputH_[i], outputH_[0]);
     CHECK_EQ(outputW_[i], outputW_[0]);
   }
diff --git a/paddle/gserver/layers/ConvBaseLayer.h b/paddle/gserver/layers/ConvBaseLayer.h
index 316514acf1a0d15e60f918220241271db2b11133..e660a6d6f50acf8286dfd6fc795e8a03ce3ba604 100644
--- a/paddle/gserver/layers/ConvBaseLayer.h
+++ b/paddle/gserver/layers/ConvBaseLayer.h
@@ -16,6 +16,7 @@ limitations under the License. */
 #pragma once
 
 #include "Layer.h"
+#include "paddle/math/MathUtils.h"
 namespace paddle {
 
 /**
@@ -87,31 +88,6 @@ public:
   virtual size_t calOutputSize();
 
   Weight& getWeight(int idx) { return *weights_[idx]; }
-
-  /**
-   * Calculate output size based on caffeMode_.
-   * - input(+padding): 0123456789
-   * - imageSize(+padding) = 10;
-   * - filterSize = 3;
-   * - stride = 2;
-   * - caffeMode_ is true:
-       - output: (012), (234), (456), (678)
-       - outputSize = 4;
-   * - caffeMode_ is false:
-   *   - output: (012), (234), (456), (678), (9)
-   *   - outputSize = 5;
-   */
-  int outputSize(int imageSize, int filterSize, int padding, int stride) {
-    int outputSize;
-    if (!caffeMode_) {
-     outputSize =
-          (imageSize - filterSize + 2 * padding + stride - 1) / stride + 1;
-    } else {
-      outputSize = (imageSize - filterSize + 2 * padding) / stride + 1;
-    }
-    CHECK_GE(outputSize, 1);
-    return outputSize;
-  }
 };
 
 }  // namespace paddle
diff --git a/paddle/gserver/layers/ConvOperator.cpp b/paddle/gserver/layers/ConvOperator.cpp
index 8c72c1778451dfddbaa740921cd08cf73fe56785..2d9c892fe595f2f4dcdc9dcc3cd392a6c29fac01 100644
--- a/paddle/gserver/layers/ConvOperator.cpp
+++ b/paddle/gserver/layers/ConvOperator.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-
 #include "paddle/math/Matrix.h"
+#include "paddle/math/MathUtils.h"
 #include "Operator.h"
 
 namespace paddle {
@@ -35,8 +35,8 @@ public:
    */
   virtual ~ConvOperator() {
     if (workSpaceInBytes_ != 0) {
-        hl_free_mem_device(workSpace_);
-        workSpaceInBytes_ = 0;
+      hl_free_mem_device(workSpace_);
+      workSpaceInBytes_ = 0;
     }
 
     hl_destroy_tensor_descriptor(inputDesc_);
@@ -83,33 +83,6 @@ private:
              filterSize_ * filterSizeY_ * channels_ * numFilters_);
   }
 
-  /**
-   * Calculate output size.
-   */
-  int outputSize(int imageSize, int filterSize, int padding, int stride) {
-    int outputSize;
-    if (!caffeMode_) {
-      /* input(+padding): 0123456789
-       * imageSize(+padding) = 10;
-       * filterSize = 3;
-       * stride = 2;
-       * output: (012), (234), (456), (678), (9)
-       * outputSize = 5;
-       */
-      outputSize =
-          (imageSize - filterSize + 2 * padding + stride - 1) / stride + 1;
-    } else {
-      /* input(+padding): 0123456789
-       * imageSize(+padding) = 10;
-       * filterSize = 3;
-       * stride = 2;
-       * output: (012), (234), (456), (678)
-       * outputSize = 4;
-       */
-      outputSize = (imageSize - filterSize + 2 * padding) / stride + 1;
-    }
-    return outputSize;
-  }
   /// Most of member variables are same with CudnnConvLayer.
   /// There is no explanation here.
   int imageH_, imageW_, outputH_, outputW_;
@@ -129,7 +102,7 @@ private:
   int fwdAlgo_, bwdFilterAlgo_, bwdDataAlgo_;
   size_t fwdLimitBytes_, bwdDataLimitBytes_, bwdFilterLimitBytes_;
   size_t workSpaceInBytes_;
-  void* workSpace_;
+  void *workSpace_;
   bool isSelectAlgo_;
 };
 
@@ -160,7 +133,7 @@ ConvOperator::ConvOperator(const OperatorConfig &config, bool useGpu)
 void ConvOperator::allocConvWorkSpace(size_t maxWorkSpace) {
   if (maxWorkSpace > workSpaceInBytes_) {
     if (workSpaceInBytes_ != 0) {
-        hl_free_mem_device(workSpace_);
+      hl_free_mem_device(workSpace_);
     }
     // total amount of storage needed
     workSpace_ = hl_malloc_device(maxWorkSpace);
@@ -168,14 +141,13 @@ void ConvOperator::allocConvWorkSpace(size_t maxWorkSpace) {
   }
 }
 
-
 void ConvOperator::reshape(int batchSize) {
   imageH_ = ins_[0]->getFrameHeight();
   imageW_ = ins_[0]->getFrameWidth();
   if (imageH_ == 0) imageH_ = imgSize_;
   if (imageW_ == 0) imageW_ = imgSize_;
-  outputH_ = outputSize(imageH_, filterSizeY_, paddingY_, strideY_);
-  outputW_ = outputSize(imageW_, filterSize_, padding_, stride_);
+  outputH_ = outputSize(imageH_, filterSizeY_, paddingY_, strideY_, caffeMode_);
+  outputW_ = outputSize(imageW_, filterSize_, padding_, stride_, caffeMode_);
 
   out_->setFrameHeight(outputH_);
   out_->setFrameWidth(outputW_);
@@ -183,10 +155,10 @@ void ConvOperator::reshape(int batchSize) {
   reshapeImageDescriptors();
 
   if (!isSelectAlgo_) {
-    hl_conv_workspace(inputDesc_, outputDesc_, filterDesc_,
-               convDesc_, &fwdAlgo_, &fwdLimitBytes_,
-               &bwdDataAlgo_, &bwdDataLimitBytes_,
-               &bwdFilterAlgo_, &bwdFilterLimitBytes_);
+    hl_conv_workspace(inputDesc_, outputDesc_, filterDesc_, convDesc_,
+                      &fwdAlgo_, &fwdLimitBytes_, &bwdDataAlgo_,
+                      &bwdDataLimitBytes_, &bwdFilterAlgo_,
+                      &bwdFilterLimitBytes_);
 
     size_t maxWorkSpace = 0;
     maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
@@ -202,7 +174,8 @@ void ConvOperator::computeConvSizes() {
   hl_create_filter_descriptor(&filterDesc_, channels_, numFilters_,
                               filterSizeY_, filterSize_);
   hl_create_tensor_descriptor(&inputDesc_);
-  int outputX = outputSize(imgSize_, filterSize_, padding_, stride_);
+  int outputX =
+      outputSize(imgSize_, filterSize_, padding_, stride_, caffeMode_);
   CHECK_EQ(outputX, outputX_);
   hl_create_tensor_descriptor(&outputDesc_);
   hl_create_convolution_descriptor(&convDesc_, inputDesc_, filterDesc_,
@@ -211,13 +184,13 @@ void ConvOperator::computeConvSizes() {
 
 void ConvOperator::reshapeImageDescriptors() {
   hl_tensor_reshape(inputDesc_, 1, channels_, imageH_, imageW_,
-                    channels_ * imageH_ * imageW_, imageH_ * imageW_,
-                    imageW_, 1);
+                    channels_ * imageH_ * imageW_, imageH_ * imageW_, imageW_,
+                    1);
   hl_tensor_reshape(outputDesc_, 1, numFilters_, outputH_, outputW_,
                     numFilters_ * outputH_ * outputW_, outputH_ * outputW_,
                     outputW_, 1);
-  hl_reset_convolution_descriptor(convDesc_, inputDesc_, filterDesc_,
-                                  paddingY_, padding_, strideY_, stride_);
+  hl_reset_convolution_descriptor(convDesc_, inputDesc_, filterDesc_, paddingY_,
+                                  padding_, strideY_, stride_);
   inputOffset_ = channels_ * imageH_ * imageW_;
   outputOffset_ = numFilters_ * outputH_ * outputW_;
   weightOffset_ = numFilters_ * channels_ * filterSize_ * filterSize_;
@@ -273,18 +246,17 @@ void ConvOperator::backward() {
         real *weightGrad = ins_[1]->grad->getData() + weightOffset_ * batchId;
         hl_convolution_backward_filter(inputDesc_, inputData, outputDesc_,
                                        outGrad, filterDesc_, weightGrad,
-                                       convDesc_, workSpace_,
-                                       workSpaceInBytes_, bwdFilterAlgo_);
+                                       convDesc_, workSpace_, workSpaceInBytes_,
+                                       bwdFilterAlgo_);
       }
 
       MatrixPtr preGrad = ins_[0]->grad;
       if (NULL != preGrad) {
         real *inputGrad = preGrad->getData() + inputOffset_ * batchId;
         real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId;
-        hl_convolution_backward_data(inputDesc_, inputGrad, outputDesc_,
-                                     outGrad, filterDesc_, wgtData,
-                                     convDesc_, workSpace_,
-                                     workSpaceInBytes_, bwdDataAlgo_);
+        hl_convolution_backward_data(
+            inputDesc_, inputGrad, outputDesc_, outGrad, filterDesc_, wgtData,
+            convDesc_, workSpace_, workSpaceInBytes_, bwdDataAlgo_);
       }
     }
   }
diff --git a/paddle/gserver/layers/ConvProjection.h b/paddle/gserver/layers/ConvProjection.h
index 41a100ac3c50fe0180440b20a0b8dfa359e2848a..d0bfe9a6edba05230202da065ca42741439ce190 100644
--- a/paddle/gserver/layers/ConvProjection.h
+++ b/paddle/gserver/layers/ConvProjection.h
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-
 #pragma once
 
 #include "Projection.h"
+#include "paddle/math/MathUtils.h"
 
 namespace paddle {
 
@@ -42,17 +42,15 @@ protected:
   void reshapeTensorDesc(int batchSize);
   void reshape(int batchSize);
 
-  int outputSize(int imageSize, int filterSize, int padding, int stride) {
-    return (imageSize - filterSize + 2 * padding) / stride + 1;
-  }
-
   size_t calOutputSize() {
     imageH_ = in_->getFrameHeight();
     imageW_ = in_->getFrameWidth();
     if (imageH_ == 0) imageH_ = configImgH_;
     if (imageW_ == 0) imageW_ = configImgW_;
-    outputH_ = outputSize(imageH_, filterH_, paddingH_, strideH_);
-    outputW_ = outputSize(imageW_, filterW_, paddingW_, strideW_);
+    outputH_ = outputSize(imageH_, filterH_, paddingH_, strideH_,
+                          /* caffeMode */ true);
+    outputW_ = outputSize(imageW_, filterW_, paddingW_, strideW_,
+                          /* caffeMode */ true);
 
     const_cast<Argument*>(out_)->setFrameHeight(outputH_);
     const_cast<Argument*>(out_)->setFrameWidth(outputW_);
diff --git a/paddle/gserver/layers/CudnnPoolLayer.cpp b/paddle/gserver/layers/CudnnPoolLayer.cpp
index 4c733591b3779f2502c308a965cb731466b464f0..24adb50a985ff4020f1716a053aba325fcf076f3 100644
--- a/paddle/gserver/layers/CudnnPoolLayer.cpp
+++ b/paddle/gserver/layers/CudnnPoolLayer.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 #include "paddle/math/Matrix.h"
@@ -62,9 +61,9 @@ bool CudnnPoolLayer::init(const LayerMap &layerMap,
   strideHeight = strideY_;
   strideWidth = stride_;
 
-  hl_create_pooling_descriptor(&poolingDesc_, mode_, windowHeight,
-                               windowWidth, heightPadding, widthPadding,
-                               strideHeight, strideWidth);
+  hl_create_pooling_descriptor(&poolingDesc_, mode_, windowHeight, windowWidth,
+                               heightPadding, widthPadding, strideHeight,
+                               strideWidth);
 
   return true;
 }
@@ -80,8 +79,10 @@ void CudnnPoolLayer::reshape(int batchSize) {
   }
   CHECK_EQ(inputLayers_[0]->getOutput().value->getWidth(),
            channels_ * imageH_ * imageW_);
-  outputH_ = outputSize(imageH_, sizeY_, confPaddingY_, strideY_);
-  outputW_ = outputSize(imageW_, sizeX_, confPadding_, stride_);
+  outputH_ = outputSize(imageH_, sizeY_, confPaddingY_, strideY_,
+                        /* caffeMode */ false);
+  outputW_ =
+      outputSize(imageW_, sizeX_, confPadding_, stride_, /* caffeMode */ false);
   getOutput().setFrameHeight(outputH_);
   getOutput().setFrameWidth(outputW_);
 
@@ -99,8 +100,7 @@ void CudnnPoolLayer::forward(PassType passType) {
 
   real *inputData = getInputValue(0)->getData();
   real *outData = getOutputValue()->getData();
-  hl_pooling_forward(inputDesc_, inputData, outputDesc_, outData,
-                     poolingDesc_);
+  hl_pooling_forward(inputDesc_, inputData, outputDesc_, outData, poolingDesc_);
 }
 
 void CudnnPoolLayer::backward(const UpdateCallback &callback) {
@@ -113,8 +113,8 @@ void CudnnPoolLayer::backward(const UpdateCallback &callback) {
   real *inputGrad = getInputGrad(0)->getData();
   real *outData = getOutputValue()->getData();
   real *outGrad = getOutputGrad()->getData();
-  hl_pooling_backward(inputDesc_, inputData, inputGrad, outputDesc_,
-                      outData, outGrad, poolingDesc_);
+  hl_pooling_backward(inputDesc_, inputData, inputGrad, outputDesc_, outData,
+                      outGrad, poolingDesc_);
 }
 
 CudnnPoolLayer::~CudnnPoolLayer() {
diff --git a/paddle/gserver/layers/PoolLayer.h b/paddle/gserver/layers/PoolLayer.h
index bde1f5b8dcbfdc4301266fa758278486fe930daf..e87ad08251dd40214f61857251a03e56867a675e 100644
--- a/paddle/gserver/layers/PoolLayer.h
+++ b/paddle/gserver/layers/PoolLayer.h
@@ -17,6 +17,7 @@ limitations under the License. */
 
 #include "Layer.h"
 #include "paddle/math/Matrix.h"
+#include "paddle/math/MathUtils.h"
 #include <vector>
 
 namespace paddle {
@@ -47,16 +48,6 @@ public:
   static Layer* create(const LayerConfig& config);
 
   virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
-
-  /**
-   * Calculate output size according window size and padding size.
-   */
-  int outputSize(int imageSize, int windowSize, int padding, int stride) {
-    int outputSize;
-    outputSize =
-        (imageSize - windowSize + 2 * padding + stride - 1) / stride + 1;
-    return outputSize;
-  }
 };
 
 }  // namespace paddle
diff --git a/paddle/gserver/layers/PoolProjectionLayer.cpp b/paddle/gserver/layers/PoolProjectionLayer.cpp
index 3a54c51cfc1ac5cbb40e37cb3aa9d56ac6f538cf..fbef55e4d49a08092cec32b7b47f40695171b5b9 100644
--- a/paddle/gserver/layers/PoolProjectionLayer.cpp
+++ b/paddle/gserver/layers/PoolProjectionLayer.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 #include "PoolProjectionLayer.h"
@@ -32,8 +31,10 @@ size_t PoolProjectionLayer::getSize() {
     imgSizeW_ = imgSize_;
   }
 
-  outputH_ = outputSize(imgSizeH_, sizeY_, confPaddingY_, strideY_);
-  outputW_ = outputSize(imgSizeW_, sizeX_, confPadding_, stride_);
+  outputH_ = outputSize(imgSizeH_, sizeY_, confPaddingY_, strideY_,
+                        /* caffeMode */ false);
+  outputW_ = outputSize(imgSizeW_, sizeX_, confPadding_, stride_,
+                        /* caffeMode */ false);
 
   layerSize = outputH_ * outputW_ * channels_;
 
diff --git a/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp b/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp
index 83334a59882ac0ffb4d449cd54477de8617f7275..7ec761364043b6218d9342dd83f63b60d4b97ec5 100644
--- a/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp
+++ b/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp
@@ -34,14 +34,12 @@ ProjectionConfig SpatialPyramidPoolLayer::getConfig(size_t imgSizeW,
   int numBins = std::pow(2, pyramidLevel);
 
   int sizeH = std::ceil(imgSizeH / static_cast<double>(numBins));
-  int remainderH = sizeH * numBins - imgSizeH;
-  int paddingH = (remainderH + 1) / 2;
-  int outSizeH = outputSize(imgSizeH, sizeH, paddingH, sizeH);
+  int paddingH = (sizeH * numBins - imgSizeH + 1) / 2;
+  int outSizeH = outputSize(imgSizeH, sizeH, paddingH, sizeH, true);
 
   int sizeW = std::ceil(imgSizeW / static_cast<double>(numBins));
-  int remainderW = sizeW * numBins - imgSizeW;
-  int paddingW = (remainderW + 1) / 2;
-  int outSizeW = outputSize(imgSizeW, sizeW, paddingW, sizeW);
+  int paddingW = (sizeW * numBins - imgSizeW + 1) / 2;
+  int outSizeW = outputSize(imgSizeW, sizeW, paddingW, sizeW, true);
 
   conf->set_stride(sizeW);
   conf->set_stride_y(sizeH);
@@ -55,16 +53,23 @@ ProjectionConfig SpatialPyramidPoolLayer::getConfig(size_t imgSizeW,
   return config;
 }
 
-void SpatialPyramidPoolLayer::splitInput(Argument& input, size_t height,
-                                         size_t width, bool useGpu) {
-  input.value = getInput(0).value;
-  if (passType_ != PASS_TEST && needGradient()) {
-    Matrix::resizeOrCreate(input.grad, height, width, /* trans */ false,
-                           useGpu);
-    input.grad->zeroMem();
-  }
+size_t SpatialPyramidPoolLayer::getSize() {
+  CHECK_EQ(inputLayers_.size(), 1UL);
+  size_t layerSize = 0;
+  imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
+  imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
+
+  size_t outputH = 1;
+  size_t outputW = (std::pow(4, pyramidHeight_) - 1) / (4 - 1);
+
+  layerSize = outputH * outputW * channels_;
+
+  getOutput().setFrameHeight(outputH);
+  getOutput().setFrameWidth(outputW);
+  return layerSize;
 }
 
+
 bool SpatialPyramidPoolLayer::init(const LayerMap& layerMap,
                                    const ParameterMap& parameterMap) {
   Layer::init(layerMap, parameterMap);
@@ -79,7 +84,6 @@ bool SpatialPyramidPoolLayer::init(const LayerMap& layerMap,
   imgSizeH_ = sppConf.has_img_size_y() ? sppConf.img_size_y() : imgSizeW_;
   poolProjections_.reserve(pyramidHeight_);
   projCol_.reserve(pyramidHeight_);
-  projInput_.reserve(pyramidHeight_);
   projOutput_.resize(pyramidHeight_);
 
   size_t startCol = 0;
@@ -91,7 +95,6 @@ bool SpatialPyramidPoolLayer::init(const LayerMap& layerMap,
     endCol += poolProjections_[i]->getOutputSize();
     projCol_.push_back(std::make_pair(startCol, endCol));
     startCol = endCol;
-    projInput_.emplace_back(Argument());
   }
   CHECK_EQ(endCol, getSize());
   return true;
@@ -107,11 +110,9 @@ void SpatialPyramidPoolLayer::forward(PassType passType) {
     size_t endCol = projCol_[i].second;
     projOutput_[i].value = output_.value->subColMatrix(startCol, endCol);
     projOutput_[i].grad = output_.grad->subColMatrix(startCol, endCol);
-    splitInput(projInput_[i], getInput(0).value->getHeight(),
-               getInput(0).value->getWidth(), useGpu_);
   }
   for (size_t i = 0; i < pyramidHeight_; i++) {
-    poolProjections_[i]->forward(&projInput_[i], &projOutput_[i], passType);
+    poolProjections_[i]->forward(&getInput(0), &projOutput_[i], passType);
   }
 }
 
@@ -119,7 +120,6 @@ void SpatialPyramidPoolLayer::backward(const UpdateCallback& callback) {
   for (size_t i = 0; i < pyramidHeight_; i++) {
     if (poolProjections_[i]) {
       poolProjections_[i]->backward(callback);
-      getInput(0).grad->add(*projInput_[i].grad);
     }
   }
 }
diff --git a/paddle/gserver/layers/SpatialPyramidPoolLayer.h b/paddle/gserver/layers/SpatialPyramidPoolLayer.h
index 156581530a1bc738e360f8690ffa7cf41151b489..64f3fda8a0adffecba82b5be54666a25d89a86a3 100644
--- a/paddle/gserver/layers/SpatialPyramidPoolLayer.h
+++ b/paddle/gserver/layers/SpatialPyramidPoolLayer.h
@@ -18,6 +18,7 @@ limitations under the License. */
 #include "Layer.h"
 #include "PoolProjection.h"
 #include "paddle/utils/Logging.h"
+#include "paddle/math/MathUtils.h"
 
 namespace paddle {
 
@@ -30,7 +31,6 @@ protected:
   std::string poolType_;
 
   std::vector<std::unique_ptr<PoolProjection>> poolProjections_;
-  std::vector<Argument> projInput_;
   std::vector<Argument> projOutput_;
   std::vector<std::pair<size_t, size_t>> projCol_;
 
@@ -41,13 +41,8 @@ public:
   virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
   ProjectionConfig getConfig(size_t sizeX_, size_t sizeY_, size_t channels,
                              size_t pyamidLevel_, std::string& poolType_);
-
-  int outputSize(int imageSize, int windowSize, int padding, int stride) {
-    return (imageSize - windowSize + 2 * padding) / stride + 1;
-  }
-
+  size_t getSize();
   virtual void forward(PassType passType);
   virtual void backward(const UpdateCallback& callback = nullptr);
-  void splitInput(Argument& input, size_t height, size_t width, bool useGpu);
 };
 }  // namespace paddle
diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt
index ff2abf76973174ac2a437830b234f4c9937c08ed..26ee2b3aae64abfce69b543f13ab0f4254757fd8 100644
--- a/paddle/gserver/tests/CMakeLists.txt
+++ b/paddle/gserver/tests/CMakeLists.txt
@@ -20,6 +20,13 @@ add_unittest_without_exec(test_LayerGrad
 add_test(NAME test_LayerGrad
     COMMAND test_LayerGrad)
 
+add_unittest_without_exec(test_ActivationGrad
+    test_ActivationGrad.cpp
+    LayerGradUtil.cpp
+    TestUtil.cpp)
+add_test(NAME test_ActivationGrad
+    COMMAND test_ActivationGrad)
+
 ################## test_Evaluator #######################
 add_unittest(test_Evaluator
     test_Evaluator.cpp
diff --git a/paddle/gserver/tests/test_ActivationGrad.cpp b/paddle/gserver/tests/test_ActivationGrad.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2c5d17090dfc7772c84477cb721b084b7a03c835
--- /dev/null
+++ b/paddle/gserver/tests/test_ActivationGrad.cpp
@@ -0,0 +1,66 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include <vector>
+#include <string>
+#include "paddle/gserver/layers/DataLayer.h"
+#include "ModelConfig.pb.h"
+#include "paddle/trainer/Trainer.h"
+
+#include "TestUtil.h"
+#include "LayerGradUtil.h"
+
+using namespace paddle;  // NOLINT
+using namespace std;     // NOLINT
+
+P_DECLARE_bool(use_gpu);
+P_DECLARE_bool(thread_local_rand_use_global_seed);
+
+void testActivation(const string& act) {
+  LOG(INFO) << "test activation: " << act;
+  size_t size = 10;
+  TestConfig config;
+  config.biasSize = 0;
+  config.layerConfig.set_type("addto");
+  config.layerConfig.set_size(size);
+  config.layerConfig.set_active_type(act);
+  config.inputDefs.push_back({INPUT_DATA, "layer_0", size, 0});
+  config.layerConfig.add_inputs();
+  for (auto useGpu : {false, true}) {
+    testLayerGrad(config,
+                  act + "_activation",
+                  100,
+                  /* trans= */false,
+                  useGpu,
+                  /* useWeight */true);
+  }
+}
+
+TEST(Activation, activation) {
+  auto types = ActivationFunction::getAllRegisteredTypes();
+  std::set<string> excluded{"sequence_softmax"};
+  for (auto type : types) {
+    if (excluded.count(type)) continue;
+    testActivation(type);
+  }
+}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  initMain(argc, argv);
+  FLAGS_thread_local_rand_use_global_seed = true;
+  srand(1);
+  return RUN_ALL_TESTS();
+}
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index 595e20354ad5bca6cd9dfd60bc4a1915a17b29c2..35701c69c423d8b88aefd9182a6417a1e889a54d 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -18,6 +18,7 @@ limitations under the License. */
 #include "ModelConfig.pb.h"
 #include "paddle/gserver/layers/DataLayer.h"
 #include "paddle/trainer/Trainer.h"
+#include "paddle/math/MathUtils.h"
 
 #include "LayerGradUtil.h"
 #include "TestUtil.h"
@@ -134,7 +135,6 @@ TEST(Projection, identity) {
   }
 }
 
-
 #ifndef PADDLE_ONLY_CPU
 TEST(Projection, conv) {
   const int NUM_FILTERS = 16;
@@ -158,21 +158,23 @@ TEST(Projection, conv) {
   conv->set_groups(1);
   conv->set_filter_channels(conv->channels() / conv->groups());
   conv->set_img_size(IMAGE_SIZE);
-  int outputSize = (2 * conv->padding() + conv->img_size() -
-      conv->filter_size()) / conv->stride() + 1;
-  int outputSizeY = (2 * conv->padding_y() + conv->img_size() -
-      conv->filter_size_y()) / conv->stride_y() + 1;
-  conv->set_output_x(outputSize);
+  int output_x =
+      outputSize(conv->img_size(), conv->filter_size(), conv->padding(),
+                 conv->stride(), /* caffeMode */ true);
+  int output_y =
+      outputSize(conv->img_size(), conv->filter_size_y(), conv->padding_y(),
+                 conv->stride_y(), /* caffeMode */ true);
+  conv->set_output_x(output_x);
   conf.set_input_size(IMAGE_SIZE * IMAGE_SIZE * CHANNELS);
-  conf.set_output_size(outputSize * outputSizeY * NUM_FILTERS);
+  conf.set_output_size(output_x * output_y * NUM_FILTERS);
 
-  testProjectionGrad(conf, INPUT_DATA,
+  testProjectionGrad(
+      conf, INPUT_DATA,
       /* parameterSize */ NUM_FILTERS * CHANNELS * FILTER_SIZE * FILTER_SIZE_Y,
       /* batchSize */ 100, true, false, NUM_FILTERS, true);
 }
 #endif
 
-
 TEST(Layer, concat) {
   TestConfig config;
   config.biasSize = 0;
@@ -293,10 +295,9 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
   conv->set_groups(1);
   conv->set_filter_channels(conv->channels() / conv->groups());
   conv->set_img_size(16);
-  conv->set_output_x(
-      (2 * conv->padding() + conv->img_size() - conv->filter_size()) /
-          ((float)conv->stride()) +
-      1.5);
+  conv->set_output_x(outputSize(conv->img_size(), conv->filter_size(),
+                                conv->padding(), conv->stride(),
+                                /* caffeMode */ true));
   config.layerConfig.set_size(conv->output_x() * conv->output_x() *
                               config.layerConfig.num_filters());
 
@@ -329,15 +330,13 @@ TEST(Layer, blockExpandLayer) {
   blockExpand->set_stride_x(2);
   blockExpand->set_stride_y(2);
   blockExpand->set_output_x(
-      1 +
-      (2 * blockExpand->padding_x() + blockExpand->img_size_x() -
-       blockExpand->block_x() + blockExpand->stride_x() - 1) /
-          blockExpand->stride_x());
+      outputSize(blockExpand->img_size_x(), blockExpand->block_x(),
+                 blockExpand->padding_x(), blockExpand->stride_x(),
+                 /* caffeMode */ false));
   blockExpand->set_output_y(
-      1 +
-      (2 * blockExpand->padding_y() + blockExpand->img_size_y() -
-       blockExpand->block_y() + blockExpand->stride_y() - 1) /
-          blockExpand->stride_y());
+      outputSize(blockExpand->img_size_y(), blockExpand->block_y(),
+                 blockExpand->padding_y(), blockExpand->stride_y(),
+                 /* caffeMode */ false));
   config.layerConfig.set_size(blockExpand->block_x() * blockExpand->block_y() *
                               blockExpand->channels());
 
@@ -862,8 +861,8 @@ void setPoolConfig(TestConfig* config, PoolConfig* pool,
   pool->set_stride(sw);
   pool->set_stride_y(sh);
 
-  int ow = (pool->img_size() - kw + 2 * pw + sw - 1) / sw + 1;
-  int oh = (pool->img_size_y() - kh + 2 * ph + sh - 1) / sh + 1;
+  int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false);
+  int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false);
   pool->set_output_x(ow);
   pool->set_output_y(oh);
 }
@@ -1281,12 +1280,11 @@ TEST(Operator, conv) {
   conv->set_groups(1);
   conv->set_filter_channels(conv->channels() / conv->groups());
   conv->set_img_size(IMAGE_SIZE);
-  int outputSize =
-      int(1.0 * (2 * conv->padding() + conv->img_size() - conv->filter_size()) /
-          conv->stride()) +
-      1;
-  conv->set_output_x(outputSize);
-  config.layerConfig.set_size(outputSize * outputSize *
+  int output_x =
+      outputSize(conv->img_size(), conv->filter_size(), conv->padding(),
+                 conv->stride(), /* caffeMode */ true);
+  conv->set_output_x(output_x);
+  config.layerConfig.set_size(output_x * output_x *
                               config.layerConfig.num_filters());
   config.layerConfig.set_size(conv->output_x() * conv->output_x() *
                               NUM_FILTERS);
diff --git a/paddle/gserver/tests/test_PyDataProvider2.cpp b/paddle/gserver/tests/test_PyDataProvider2.cpp
index 6bf1e329251219fcbf68b95f2d80a3235cb7037f..b9867a728d9b4cc8d318578ab3e45021f87daa4c 100644
--- a/paddle/gserver/tests/test_PyDataProvider2.cpp
+++ b/paddle/gserver/tests/test_PyDataProvider2.cpp
@@ -353,6 +353,23 @@ TEST(PyDataProvider2, test_check) {
   }
 }
 
+TEST(PyDataProvider2, multiThread) {
+  paddle::DataConfig config;
+  config.set_type("py2");
+  config.set_files(FLAGS_train_list.c_str());
+  config.set_load_data_module("test_PyDataProvider2");
+  config.set_load_data_object("test_dense_no_seq");
+  config.set_async_load_data(true);
+
+  std::unique_ptr<paddle::DataProvider> provider(
+      paddle::DataProvider::create(config, false));
+  provider->reset();
+  paddle::DataBatch batch;
+  provider->getNextBatch(100, &batch);
+  provider->reset();
+  provider.reset();
+}
+
 int main(int argc, char** argv) {
   testing::InitGoogleTest(&argc, argv);
   paddle::initMain(argc, argv);
diff --git a/paddle/math/MathFunctions.cpp b/paddle/math/MathFunctions.cpp
index da493379e3a37ecb8f4d8f9f333629b3e71d90a5..f8132066477db3b9762348e9baf7a5112d302fd6 100644
--- a/paddle/math/MathFunctions.cpp
+++ b/paddle/math/MathFunctions.cpp
@@ -39,6 +39,46 @@ void gemm<double>(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB,
               beta, C, ldc);
 }
 
+template<>
+int getrf<float>(const CBLAS_ORDER order, const int M, const int N,
+                  float *A, const int lda, int *ipiv) {
+#ifdef PADDLE_USE_ATLAS
+  return clapack_sgetrf(order, M, N, A, lda, ipiv);
+#else
+  return LAPACKE_sgetrf(order, M, N, A, lda, ipiv);
+#endif
+}
+
+template<>
+int getrf<double>(const CBLAS_ORDER order, const int M, const int N,
+                   double *A, const int lda, int *ipiv) {
+#ifdef PADDLE_USE_ATLAS
+  return clapack_dgetrf(order, M, N, A, lda, ipiv);
+#else
+  return LAPACKE_dgetrf(order, M, N, A, lda, ipiv);
+#endif
+}
+
+template<>
+int getri<float>(const CBLAS_ORDER order, const int N, float *A,
+                  const int lda, const int *ipiv) {
+#ifdef PADDLE_USE_ATLAS
+  return clapack_sgetri(order, N, A, lda, ipiv);
+#else
+  return LAPACKE_sgetri(order, N, A, lda, ipiv);
+#endif
+}
+
+template<>
+int getri<double>(const CBLAS_ORDER order, const int N, double *A,
+                  const int lda, const int *ipiv) {
+#ifdef PADDLE_USE_ATLAS
+  return clapack_dgetri(order, N, A, lda, ipiv);
+#else
+  return LAPACKE_dgetri(order, N, A, lda, ipiv);
+#endif
+}
+
 template<>
 void axpy<float>(const int n, const float alpha, const float* x, float* y) {
   cblas_saxpy(n, alpha, x, 1, y, 1);
diff --git a/paddle/math/MathFunctions.h b/paddle/math/MathFunctions.h
index 43075977dc9cef1573cf6dd75d9ef577b07d337e..b322bd2bd719484b86b62bca5783d78bd8ca9a4c 100644
--- a/paddle/math/MathFunctions.h
+++ b/paddle/math/MathFunctions.h
@@ -17,10 +17,18 @@ limitations under the License. */
 
 #ifdef PADDLE_USE_MKL
 #include <mkl.h>
+#include <mkl_lapacke.h>
 #else
 extern "C" {
 #include <cblas.h>
 }
+#ifdef PADDLE_USE_ATLAS
+extern "C" {
+#include <clapack.h>
+}
+#else
+#include <lapacke.h>
+#endif
 #endif
 
 #include <cmath>
@@ -34,6 +42,14 @@ void gemm(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB,
           const T* B, const int ldb,
           const T beta, T* C, const int ldc);
 
+template<class T>
+int getrf(const CBLAS_ORDER Order, const int M, const int N,
+          T *A, const int lda, int *ipiv);
+
+template<class T>
+int getri(const CBLAS_ORDER Order, const int N, T *A,
+          const int lda, const int *ipiv);
+
 template<class T>
 void axpy(const int n, const T alpha, const T* x, T* y);
 
diff --git a/paddle/math/MathUtils.cpp b/paddle/math/MathUtils.cpp
index 5b78ab1b07bda0b28dd1688b6364ecf1882f4073..c1af8628d03c50185089b45f3a0502726da9137e 100644
--- a/paddle/math/MathUtils.cpp
+++ b/paddle/math/MathUtils.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-
 #include "MathUtils.h"
 #include <algorithm>
 #include "paddle/utils/Logging.h"
@@ -24,11 +23,7 @@ namespace paddle {
  * major is rows and minor is cols, according to
  * major value to initialize minor value"
  */
-void sparseRand(int* major,
-                int* minor,
-                int nnz,
-                int majorLen,
-                int minorMax,
+void sparseRand(int* major, int* minor, int nnz, int majorLen, int minorMax,
                 bool useGpu) {
   CHECK(size_t(nnz) > size_t(1));
   int* cpuMajor;
@@ -72,5 +67,17 @@ void sparseRand(int* major,
   }
 }
 
+int outputSize(int imageSize, int filterSize, int padding, int stride,
+               bool caffeMode) {
+  int outputSize;
+  if (!caffeMode) {
+    outputSize =
+        (imageSize - filterSize + 2 * padding + stride - 1) / stride + 1;
+  } else {
+    outputSize = (imageSize - filterSize + 2 * padding) / stride + 1;
+  }
+  CHECK_GE(outputSize, 1);
+  return outputSize;
+}
 
 }  // namespace paddle
diff --git a/paddle/math/MathUtils.h b/paddle/math/MathUtils.h
index 83375022abbe268e22bbeb46e8e4b96a7198cb5f..49d0c10a8f5e4dcdaf22dca77a3f113400b16646 100644
--- a/paddle/math/MathUtils.h
+++ b/paddle/math/MathUtils.h
@@ -44,4 +44,20 @@ namespace paddle {
 void sparseRand(int* major, int* minor, int nnz, int majorLen, int minorMax,
                 bool useGpu);
 
+/**
+ * Calculate output size based on caffeMode_.
+ * - input(+padding): 0123456789
+ * - imageSize(+padding) = 10;
+ * - filterSize = 3;
+ * - stride = 2;
+ * - caffeMode is true:
+     - output: (012), (234), (456), (678)
+     - outputSize = 4;
+ * - caffeMode is false:
+ *   - output: (012), (234), (456), (678), (9)
+ *   - outputSize = 5;
+ */
+int outputSize(int imageSize, int filterSize, int padding, int stride,
+               bool caffeMode);
+
 }  // namespace paddle
diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp
index 607334aaa934bcd1fd33878d9a936e5e5a7e6f8e..281c35f5d14ab6df78e6e64debb7d1356ad485f2 100644
--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
@@ -335,6 +335,30 @@ void GpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) {
   hl_matrix_transpose(data, dataTrans, height_, width_, lda, ldc);
 }
 
+
+MatrixPtr GpuMatrix::getInverse() {
+  MatrixPtr matInv;
+  inverse(matInv, true);
+  return matInv;
+}
+
+void GpuMatrix::inverse(MatrixPtr matInv, bool memAlloc) {
+  CHECK_EQ(height_, width_);
+
+  if (memAlloc) {
+    matInv = std::make_shared<GpuMatrix>(height_, width_);
+  } else {
+    CHECK(matInv != NULL);
+  }
+
+  real* data = getData();
+  real* dataInv = matInv->getData();
+  int lda = getStride();
+  int ldc = matInv->getStride();
+
+  hl_matrix_inverse(data, dataInv, height_, lda, ldc);
+}
+
 void GpuMatrix::addBias(Matrix& b, real scale) {
   CHECK(b.getHeight() == 1) << "the Bias should be a vector";
   BaseMatrix::addBias(b, scale);
@@ -1417,6 +1441,47 @@ void CpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) {
   }
 }
 
+
+MatrixPtr CpuMatrix::getInverse() {
+  MatrixPtr matInv;
+  inverse(matInv, true);
+  return matInv;
+}
+
+void CpuMatrix::inverse(MatrixPtr matInv, bool memAlloc) {
+  CHECK_EQ(height_, width_);
+
+  if (memAlloc) {
+    matInv = std::make_shared<CpuMatrix>(height_, width_);
+  } else {
+    CHECK(matInv != NULL);
+  }
+
+  CHECK_EQ(height_, matInv->getHeight());
+  CHECK_EQ(width_, matInv->getWidth());
+  matInv->copyFrom(*this);
+
+  real* data = getData();
+  real* dataInv = matInv->getData();
+  int ldc = matInv->getStride();
+
+  if (height_ == 1) {
+    CHECK_NE(*data, 0);
+    *dataInv = 1.0 / (*data);
+    return;
+  }
+
+  /* Compute the LU decomposition of the matrix */
+  std::vector<int> ipiv(height_);
+  CBLAS_ORDER order = (matInv->isTransposed() ? CblasColMajor : CblasRowMajor);
+  int info = getrf<real>(order, height_, height_, dataInv, ldc, ipiv.data());
+  CHECK_EQ(info, 0);
+
+  /* Compute the inverse of the matrix given its LU decompsotion */
+  info = getri<real>(order, height_, dataInv, ldc, ipiv.data());
+  CHECK_EQ(info, 0);
+}
+
 void CpuMatrix::convExpand(Matrix& feature, int feaImgHeight, int feaImgWidth,
                            int channels, int blockH, int blockW, int strideH,
                            int strideW, int paddingH, int paddingW, int outputH,
diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h
index 52cbed528ca8bd9bea6ef9a86095a305061ebbe9..293d13f4d6d5af0883ea76fb64ca5d9173efd4e0 100644
--- a/paddle/math/Matrix.h
+++ b/paddle/math/Matrix.h
@@ -328,6 +328,20 @@ public:
     LOG(FATAL) << "Not implemented";
   }
 
+  virtual MatrixPtr getInverse() {
+    LOG(FATAL) << "Not implemented";
+  }
+
+  /**
+   * @brief  inverse.
+   *
+   * if allocate matInv's memory outside, then set memAlloc as false;
+   * else set as true.
+   */
+  virtual void inverse(MatrixPtr matInv, bool memAlloc) {
+    LOG(FATAL) << "Not implemented";
+  }
+
 public:
   /// Only set all variables to 0 or NULL but not free them.
   virtual void clear() {
@@ -1043,6 +1057,9 @@ public:
   MatrixPtr getTranspose();
   void transpose(MatrixPtr matTrans, bool memAlloc);
 
+  MatrixPtr getInverse();
+  void inverse(MatrixPtr matInv, bool memAlloc);
+
   /// add b to each sample of this.
   void addBias(Matrix& b, real scale);
   void addSharedBias(Matrix& b, real scale);
@@ -1282,6 +1299,9 @@ public:
   MatrixPtr getTranspose();
   void transpose(MatrixPtr matTrans, bool memAlloc);
 
+  MatrixPtr getInverse();
+  void inverse(MatrixPtr matInv, bool memAlloc);
+
   void copyFrom(const Matrix& src);
 
   void copyFrom(const Matrix& src, hl_stream_t stream);
diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp
index 0ddf7e0dfc38624d9900d3419b808af7de2f3141..b887cccaaa14e6c3761d151f31a859de66cf8fac 100644
--- a/paddle/math/tests/test_matrixCompare.cpp
+++ b/paddle/math/tests/test_matrixCompare.cpp
@@ -641,9 +641,32 @@ void testMatrixTranspose(int height, int width) {
   MatrixCheckEqual(*cpuT, *outputCheck);
 }
 
+void testMatrixInverse(int height) {
+  MatrixPtr cpu = std::make_shared<CpuMatrix>(height, height);
+  MatrixPtr gpu = std::make_shared<GpuMatrix>(height, height);
+  MatrixPtr cpuI = std::make_shared<CpuMatrix>(height, height);
+  MatrixPtr gpuI = std::make_shared<GpuMatrix>(height, height);
+
+  cpu->randomizeUniform();
+  gpu->copyFrom(*cpu);
+  cpu->inverse(cpuI, false);
+  gpu->inverse(gpuI, false);
+
+  MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, height);
+  outputCheck->copyFrom(*gpuI);
+  MatrixCheckErr(*cpuI, *outputCheck);
+
+  outputCheck->mul(cpu, cpuI);
+  cpu->zeroMem();
+  for (int i = 0; i < height; i++) {
+    cpu->getRowBuf(i)[i] = 1.0;
+  }
+  MatrixCheckErr(*cpu, *outputCheck);
+}
+
 TEST(Matrix, unary) {
-  for (auto height : {1, 11, 73, 128, 200, 330}) {
-    for (auto width : {1, 32, 100, 512, 1000, 3210}) {
+  for (auto height : {1, 3, 11, 73, 128, 200, 330}) {
+    for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) {
       VLOG(3) << " height=" << height << " width=" << width;
 
       // applyUnary
@@ -675,6 +698,8 @@ TEST(Matrix, unary) {
       // transpose
       testMatrixTranspose(height, width);
     }
+    // inverse
+    testMatrixInverse(height);
   }
 }
 
diff --git a/paddle/scripts/submit_local.sh.in b/paddle/scripts/submit_local.sh.in
index 4cf5f41f195df7655c9e77eba23baf90e21cee13..213cf2f1cc7e491dc6455f1af434446806aa4ccc 100644
--- a/paddle/scripts/submit_local.sh.in
+++ b/paddle/scripts/submit_local.sh.in
@@ -68,7 +68,7 @@ EOF
 if [ $? -eq 1 ]; then  # Older version installed, or not installed at all
    echo "First time run paddle, need to install some python dependencies."
    BASEDIR=$(dirname "$0")
-   pip install ${BASEDIR}/../opt/paddle/share/wheels/*.whl
+   pip install ${BASEDIR}/../opt/paddle/share/wheels/*-@PADDLE_VERSION@-*.whl
    if [ $? -ne 0 ]; then
       echo "pip install wheels failed. "
       echo "Please use 'sudo paddle' at the first time you use PaddlePaddle"
diff --git a/paddle/scripts/travis/build_and_test.sh b/paddle/scripts/travis/build_and_test.sh
index a73c32344c8abe4d314fbac2c2ec02aafeeac9d1..54e3320c8c1584d0f41e8507c846b17f7c85d09c 100755
--- a/paddle/scripts/travis/build_and_test.sh
+++ b/paddle/scripts/travis/build_and_test.sh
@@ -6,17 +6,19 @@ if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then
 fi
 
 
-cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_TESTING=ON -DON_TRAVIS=ON ${CMAKE_EXTRA}
+cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_TESTING=ON -DON_TRAVIS=ON -DON_COVERALLS=ON ${CMAKE_EXTRA}
 
 NPROC=1
 if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
   NRPOC=`nproc`
+  make -j $NPROC
+  make coveralls
 elif [[ "$TRAVIS_OS_NAME" == "osx" ]]; then
   NPROC=`sysctl -n hw.ncpu`
+  make -j $NPROC
+  env CTEST_OUTPUT_ON_FAILURE=1 make test ARGS="-j $NPROC"
 fi
 
 
-make -j $NPROC
-env CTEST_OUTPUT_ON_FAILURE=1 make test ARGS="-j $NPROC"
 sudo make install
 sudo paddle version
diff --git a/paddle/utils/Util.cpp b/paddle/utils/Util.cpp
index 45251213d2d7930947f39d4730245ca8f7dfddc8..2cdff9d1aca927122fcdb0c2a7ab22a0e38b41c1 100644
--- a/paddle/utils/Util.cpp
+++ b/paddle/utils/Util.cpp
@@ -106,7 +106,7 @@ pid_t getTID() {
       #endif
       pid_t tid = syscall(__NR_gettid);
   #endif
-  CHECK_NE(tid, -1);
+  CHECK_NE((int)tid, -1);
   return tid;
 }
 
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 7ad2b7fd5ce8343580a574ffcfdd84d363caa97d..f45fcd808708e0ca977e50b88b479faab340ec99 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -782,7 +782,9 @@ class Pool(Cfg):
             padding = None,
             padding_y = None):
         self.add_keys(locals())
-
+        
+# please refer to the comments in proto/ModelConfig.proto
+@config_class
 class SpatialPyramidPool(Cfg):
     def __init__(
             self,
@@ -1015,6 +1017,17 @@ def TestData(data_config, async_load_data=None):
                        " Data definition")
         g_config.test_data_config.async_load_data = async_load_data
 
+'''
+caffe_mode: compute the output size using floor instead of ceil,
+            which is consistent of caffe and CuDNN's convention.
+'''
+def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode):
+    output = (2 * padding + img_size - filter_size) / float(stride)
+    if caffe_mode:
+        return 1 + int(math.floor(output))
+    else:
+        return 1 + int(math.ceil(output))
+
 def parse_pool(pool, input_layer_name, pool_conf):
     pool_conf.pool_type = pool.pool_type
     config_assert(pool.pool_type in ['max-projection', 'avg-projection',
@@ -1045,12 +1058,10 @@ def parse_pool(pool, input_layer_name, pool_conf):
     if pool.padding is not None:
         pool_conf.padding = pool.padding
         pool_conf.padding_y = default(pool.padding_y, pool_conf.padding)
-        pool_conf.output_x = int(math.ceil((pool_conf.img_size + \
-            2*pool_conf.padding - pool_conf.size_x) / \
-            float(pool_conf.stride))) + 1
-        pool_conf.output_y = int(math.ceil((pool_conf.img_size_y + \
-            2*pool_conf.padding_y - pool_conf.size_y) / \
-            float(pool_conf.stride_y))) + 1
+        pool_conf.output_x = cnn_output_size(pool_conf.img_size, pool_conf.size_x,
+                                             pool_conf.padding, pool_conf.stride, False)
+        pool_conf.output_y = cnn_output_size(pool_conf.img_size_y, pool_conf.size_y,
+                                             pool_conf.padding_y, pool_conf.stride_y, False)
 
 def parse_spp(spp, input_layer_name, spp_conf):
     spp_conf.pool_type = spp.pool_type
@@ -1097,10 +1108,7 @@ def parse_norm(norm, input_layer_name, norm_conf):
         norm_conf.scale /= norm.size
     else:
         norm_conf.scale /= norm.size ** 2
-'''
-caffe_mode: compute the output size using floor instead of ceil,
-            which is consistent of caffe and CuDNN's convention.
-'''
+
 def parse_conv(conv, input_layer_name, conv_conf):
     conv_conf.filter_size = conv.filter_size
     conv_conf.filter_size_y = conv.filter_size_y
@@ -1121,14 +1129,9 @@ def parse_conv(conv, input_layer_name, conv_conf):
                   ("Input layer %s: Incorrect input image size %d for input "
                    + "image pixels %d")
                   % (input_layer_name, conv_conf.img_size, img_pixels))
-    if conv.caffe_mode:
-        conv_conf.output_x = \
-            1 + int(math.floor((2 * conv.padding + conv_conf.img_size \
-            - conv.filter_size) / float(conv.stride)))
-    else:
-        conv_conf.output_x = \
-            1 + int(math.ceil((2 * conv.padding + conv_conf.img_size \
-            - conv.filter_size) / float(conv.stride)))
+    conv_conf.output_x = cnn_output_size(conv_conf.img_size, conv_conf.filter_size,
+                                         conv_conf.padding, conv_conf.stride,
+                                         conv_conf.caffe_mode)
 
 def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
     block_expand_conf.channels = block_expand.channels
@@ -1143,18 +1146,16 @@ def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
     if block_expand_conf.img_size_x == 0:
         block_expand_conf.output_x = 0
     else:
-        block_expand_conf.output_x = \
-            1 + \
-            int(math.ceil((2 * block_expand.padding_x + block_expand.img_size_x \
-            - block_expand.block_x) / float(block_expand.stride_x)))
+        block_expand_conf.output_x = cnn_output_size(
+            block_expand.img_size_x, block_expand.block_x, 
+            block_expand.padding_x, block_expand.stride_x, False)
 
     if block_expand_conf.img_size_y == 0:
-      block_expand_conf.output_y = 0
+        block_expand_conf.output_y = 0
     else:
-        block_expand_conf.output_y = \
-            1 + \
-            int(math.ceil((2 * block_expand.padding_y + block_expand.img_size_y \
-            - block_expand.block_y) / float(block_expand.stride_y)))
+        block_expand_conf.output_y = cnn_output_size(
+            block_expand.img_size_y, block_expand.block_y, 
+            block_expand.padding_y, block_expand.stride_y, False)
 
 def parse_maxout(maxout, input_layer_name, maxout_conf):
     maxout_conf.channels = maxout.channels
@@ -2617,8 +2618,9 @@ class MixedLayer(LayerBase):
             for input in self.inputs:
                 psize += input.calc_bias_size()
 
-        self.config.bias_size = psize
-        self.create_bias_parameter(bias, psize)
+        if bias:
+            self.config.bias_size = psize
+            self.create_bias_parameter(bias, psize)
 
         if error_clipping_threshold is not None:
             self.config.error_clipping_threshold = error_clipping_threshold
@@ -2703,8 +2705,9 @@ class ConcatenateLayer2(LayerBase):
             for input in self.inputs:
                 psize += input.calc_bias_size()
 
-        self.config.bias_size = psize
-        self.create_bias_parameter(bias, psize)
+        if bias:
+            self.config.bias_size = psize
+            self.create_bias_parameter(bias, psize)
 
 @config_layer('recurrent')
 class RecurrentLayer(LayerBase):
diff --git a/python/paddle/trainer_config_helpers/activations.py b/python/paddle/trainer_config_helpers/activations.py
index 292014519374eabbe55c61daa73692814a52aac2..ad5cdc0a0eb13f7a58e7d89ebfb79d33a63b75d5 100644
--- a/python/paddle/trainer_config_helpers/activations.py
+++ b/python/paddle/trainer_config_helpers/activations.py
@@ -199,3 +199,12 @@ class ExpActivation(BaseActivation):
        f(z) = e^z.
     """
     def __init__(self): BaseActivation.__init__(self, 'exponential', False)
+
+class LogActivation(BaseActivation):
+    """
+    Logarithm Activation.
+
+    .. math::
+       f(z) = log(z)
+    """
+    def __init__(self): BaseActivation.__init__(self, 'log', False)
diff --git a/python/paddle/trainer_config_helpers/default_decorators.py b/python/paddle/trainer_config_helpers/default_decorators.py
index b20aebc685fe5a36b69c4e9f09b610631b233ecf..be00f48b457c137e3b0913da84ad2e6215f9e9ca 100644
--- a/python/paddle/trainer_config_helpers/default_decorators.py
+++ b/python/paddle/trainer_config_helpers/default_decorators.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import functools
+import inspect
 from .attrs import ParamAttr
 from .activations import TanhActivation
 from paddle.trainer.config_parser import *
@@ -37,8 +38,12 @@ def wrap_param_default(param_names=None, default_factory=None,
         @functools.wraps(func)
         def __wrapper__(*args, **kwargs):
             if len(args) != 0:
-                logger.warning("please use keyword arguments in paddle config.")
-
+                argspec = inspect.getargspec(func)
+                num_positional = len(argspec.args)
+                if argspec.defaults:
+                    num_positional -= len(argspec.defaults)
+                if not argspec.varargs and len(args) > num_positional:
+                    logger.fatal("Must use keyword arguments for non-positional args")
             for name in param_names:
                 if not_set_callback(kwargs, name):  # Not set
                     kwargs[name] = default_factory(func)
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 03243c03b021b7bfc7ea8847ae9a5bab79344b3c..e6338e804536ae6d42467ac6cd253c71d49a305a 100644
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -56,7 +56,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
            'rank_cost', 'lambda_cost', 'huber_cost',
            'block_expand_layer',
            'maxout_layer', 'out_prod_layer', 'print_layer', 
-           'spp_layer', 
+           # 'spp_layer', 
            ]
 
 
@@ -112,7 +112,7 @@ class LayerType(object):
     LINEAR_COMBINATION_LAYER = "convex_comb"
     BLOCK_EXPAND = "blockexpand"
     MAXOUT = "maxout"
-    SPP_LAYER = "spp"
+    # SPP_LAYER = "spp"
 
     PRINT_LAYER = "print"
 
@@ -566,7 +566,7 @@ class MixedLayerType(LayerOutput):
         self.inputs = []
         self.finalized = False
 
-    def __add__(self, other):
+    def __iadd__(self, other):
         """
         + += operator
         :param other: Other projection.
@@ -1711,60 +1711,60 @@ def img_pool_layer(input, pool_size, name=None,
                        num_filters=num_channels)
 
 
-@wrap_name_default("spp")
-@layer_support()
-def spp_layer(input, name=None, num_channels=None, pool_type=None,
-              pyramid_height=None, img_width=None, layer_attr=None):
-    pass
-    """
-    Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition.
-    The details please refer to
-    `Kaiming He's paper <https://arxiv.org/abs/1406.4729>`_.
-
-    :param name: layer name.
-    :type name: basestring
-    :param input: layer's input.
-    :type input: LayerOutput
-    :param num_channels: number of input channel.
-    :type num_channels: int
-    :param pool_type: Pooling type. MaxPooling or AveragePooling. Default is MaxPooling.
-    :type scale: BasePoolingType
-    :param pyramid_height: pyramid height.
-    :type pyramid_height: int
-    :param img_width: the width of input feature map. If it is None, the input feature
-                      map should be square.
-    :type img_width: int|None
-    :param layer_attr: Extra Layer Attribute.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    if num_channels is None:
-        assert input.num_filters is not None
-        num_channels = input.num_filters
-
-    if pool_type is None:
-        pool_type = MaxPooling()
-    elif isinstance(pool_type, AvgPooling):
-        pool_type.name = 'avg'
-
-    type_name = pool_type.name
-    if (isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)):
-        type_name += '-projection'
-
-    Layer(
-        name=name,
-        type=LayerType.SPP_LAYER,
-        inputs=Input(input.name,
-                     spp=SpatialPyramidPool(pool_type=type_name,
-                                            channels=num_channels,
-                                            pyramid_height=pyramid_height,
-                                            img_width=img_width)
-        ),
-        **ExtraLayerAttribute.to_kwargs(layer_attr)
-    )
-    return LayerOutput(name, LayerType.SPP_LAYER, parents=[input], 
-                       num_filters=num_channels)
+# @wrap_name_default("spp")
+# @layer_support()
+# def spp_layer(input, name=None, num_channels=None, pool_type=None,
+#               pyramid_height=None, img_width=None, layer_attr=None):
+#     pass
+#     """
+#     Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition.
+#     The details please refer to
+#     `Kaiming He's paper <https://arxiv.org/abs/1406.4729>`_.
+
+#     :param name: layer name.
+#     :type name: basestring
+#     :param input: layer's input.
+#     :type input: LayerOutput
+#     :param num_channels: number of input channel.
+#     :type num_channels: int
+#     :param pool_type: Pooling type. MaxPooling or AveragePooling. Default is MaxPooling.
+#     :type scale: BasePoolingType
+#     :param pyramid_height: pyramid height.
+#     :type pyramid_height: int
+#     :param img_width: the width of input feature map. If it is None, the input feature
+#                       map should be square.
+#     :type img_width: int|None
+#     :param layer_attr: Extra Layer Attribute.
+#     :type layer_attr: ExtraLayerAttribute
+#     :return: LayerOutput object.
+#     :rtype: LayerOutput
+#     """
+#     if num_channels is None:
+#         assert input.num_filters is not None
+#         num_channels = input.num_filters
+
+#     if pool_type is None:
+#         pool_type = MaxPooling()
+#     elif isinstance(pool_type, AvgPooling):
+#         pool_type.name = 'avg'
+
+#     type_name = pool_type.name
+#     if (isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)):
+#         type_name += '-projection'
+
+#     Layer(
+#         name=name,
+#         type=LayerType.SPP_LAYER,
+#         inputs=Input(input.name,
+#                      spp=SpatialPyramidPool(pool_type=type_name,
+#                                             channels=num_channels,
+#                                             pyramid_height=pyramid_height,
+#                                             img_width=img_width)
+#         ),
+#         **ExtraLayerAttribute.to_kwargs(layer_attr)
+#     )
+#     return LayerOutput(name, LayerType.SPP_LAYER, parents=[input], 
+#                        num_filters=num_channels)
 
 
 def __img_norm_layer__(name, input, size, norm_type, scale, power,
diff --git a/python/paddle/trainer_config_helpers/math.py b/python/paddle/trainer_config_helpers/math.py
new file mode 100644
index 0000000000000000000000000000000000000000..e35849b77ac531b4a4676019e01285af67925bd9
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/math.py
@@ -0,0 +1,64 @@
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .layers import LayerOutput, mixed_layer, identity_projection, \
+    slope_intercept_layer
+from .attrs import is_compatible_with
+from .default_decorators import *
+import activations as act
+
+__all__ = []
+
+def register_unary_math_op(op_name, act):
+    def op(input, name=None):
+        return mixed_layer(input=[identity_projection(input=input)],
+                           name=name,
+                           act=act)
+    op = wrap_name_default(op_name)(op)
+    op.__doc__ = type(act).__doc__
+    globals()[op_name] = op
+    __all__.append(op_name)
+
+register_unary_math_op('exp', act.ExpActivation())
+register_unary_math_op('log', act.LogActivation())
+register_unary_math_op('abs', act.AbsActivation())
+register_unary_math_op('sigmoid', act.SigmoidActivation())
+register_unary_math_op('tanh', act.TanhActivation())
+register_unary_math_op('square', act.SquareActivation())
+
+def add(layeroutput, other):
+    if is_compatible_with(other, float):
+        return slope_intercept_layer(input=layeroutput, intercept=other)
+    assert isinstance(other, LayerOutput)
+    return mixed_layer(input=[identity_projection(input=layeroutput),
+                              identity_projection(input=other)])
+
+LayerOutput.__radd__ = add
+LayerOutput.__add__ = add
+
+def sub(layeroutput, other):
+    if is_compatible_with(other, float):
+        return slope_intercept_layer(input=layeroutput, intercept=other)
+    assert isinstance(other, LayerOutput)
+    neg = slope_intercept_layer(input=other, slope=-1.0)
+    return mixed_layer(input=[identity_projection(input=layeroutput),
+                              identity_projection(input=neg)])
+
+LayerOutput.__sub__ = sub
+
+def rsub(layeroutput, other):
+    neg = slope_intercept_layer(input=layeroutput, slope=-1.0)
+    return add(neg, other)
+
+LayerOutput.__rsub__ = rsub
diff --git a/python/paddle/trainer_config_helpers/tests/configs/check.md5 b/python/paddle/trainer_config_helpers/tests/configs/check.md5
deleted file mode 100644
index bf0512420ed3c4414d24846d0d50bfa5b711936d..0000000000000000000000000000000000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/check.md5
+++ /dev/null
@@ -1,23 +0,0 @@
-86c0815275a9d5eb902e23c6a592f58a  img_layers.protostr
-a5d9259ff1fd7ca23d0ef090052cb1f2  last_first_seq.protostr
-9c038249ec8ff719753a746cdb04c026  layer_activations.protostr
-34e04043cbb12931c47fa44ec50eeffc  projections.protostr
-7334ba0a4544f0623231330fc51d390d  shared_fc.protostr
-bb8e233b05b8e07f9ed386b7aee4f2c6  shared_lstm.protostr
-6b39e34beea8dfb782bee9bd3dea9eb5  simple_rnn_layers.protostr
-f98e79e1630d5eb827c300e64836d269  test_bi_grumemory.protostr
-0fc1409600f1a3301da994ab9d28b0bf  test_cost_layers.protostr
-6cd5f28a3416344f20120698470e0a4c  test_cost_layers_with_weight.protostr
-144bc6d3a509de74115fa623741797ed  test_expand_layer.protostr
-2378518bdb71e8c6e888b1842923df58  test_fc.protostr
-8bb44e1e5072d0c261572307e7672bda  test_grumemory_layer.protostr
-1f3510672dce7a9ed25317fc58579ac7  test_hsigmoid.protostr
-d350bd91a0dc13e854b1364c3d9339c6  test_lstmemory_layer.protostr
-5433ed33d4e7414eaf658f2a55946186  test_maxout.protostr
-251a948ba41c1071afcd3d9cf9c233f7  test_ntm_layers.protostr
-e6ff04e70aea27c7b06d808cc49c9497  test_print_layer.protostr
-fded24727338fb8ce44d9951ed8aea08  test_rnn_group.protostr
-67d6fde3afb54f389d0ce4ff14726fe1  test_sequence_pooling.protostr
-f586a548ef4350ba1ed47a81859a64cb  unused_layers.protostr
-f937a5a6e7e8864b4d8cf56b0f7c7f44  util_layers.protostr
-60c9a71e19bd4b2a1253712799d0ae70  test_spp_layer.protostr
diff --git a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
index 6102c614de3b34ff56a9d9e8cbfae4c22b0eaea8..7440181970af1cd4470822c700d695ac7124af22 100755
--- a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
@@ -4,16 +4,20 @@ set -e
 cd `dirname $0`
 export PYTHONPATH=$PWD/../../../../
 
+protostr=$PWD/protostr
+
 configs=(test_fc layer_activations projections test_print_layer
 test_sequence_pooling test_lstmemory_layer test_grumemory_layer
 last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
 img_layers util_layers simple_rnn_layers unused_layers test_cost_layers
 test_rnn_group shared_fc shared_lstm test_cost_layers_with_weight
-test_maxout test_bi_grumemory test_spp_layer)
+# test_maxout test_bi_grumemory math_ops test_spp_layer)
+test_maxout test_bi_grumemory math_ops test_spp_layer)
+
 
 
 for conf in ${configs[*]}
 do
     echo "Generating " $conf
-    python -m paddle.utils.dump_config $conf.py > $conf.protostr
+    python -m paddle.utils.dump_config $conf.py > $protostr/$conf.protostr.unitest
 done
diff --git a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe515b7029336d093df5428ab8ac1c65a2d4e98a
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
@@ -0,0 +1,24 @@
+from paddle.trainer_config_helpers import *
+from paddle.trainer_config_helpers import math
+
+settings(
+    batch_size=1000,
+    learning_rate=1e-5
+)
+
+x = data_layer(name='data', size=100)
+x = math.exp(x)
+x = math.log(x)
+x = math.abs(x)
+x = math.sigmoid(x)
+x = math.square(x)
+x = math.square(x)
+y = 1 + x
+y = y + 1
+y = x + y
+y = y - x
+y = y - 2
+y = 2 - y
+
+outputs(y)
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..1f262af21126c17eb133b92c84a1ae3bb280a1d6
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
@@ -0,0 +1,176 @@
+type: "nn"
+layers {
+  name: "image"
+  type: "data"
+  size: 65536
+  active_type: ""
+}
+layers {
+  name: "__conv_0__"
+  type: "exconv"
+  size: 3297856
+  active_type: ""
+  inputs {
+    input_layer_name: "image"
+    input_parameter_name: "___conv_0__.w0"
+    conv_conf {
+      filter_size: 32
+      channels: 1
+      stride: 1
+      padding: 1
+      groups: 1
+      filter_channels: 1
+      output_x: 227
+      img_size: 256
+      caffe_mode: true
+      filter_size_y: 32
+      padding_y: 1
+      stride_y: 1
+    }
+  }
+  bias_parameter_name: "___conv_0__.wbias"
+  num_filters: 64
+  shared_biases: true
+}
+layers {
+  name: "__batch_norm_0__"
+  type: "batch_norm"
+  size: 3297856
+  active_type: "relu"
+  inputs {
+    input_layer_name: "__conv_0__"
+    input_parameter_name: "___batch_norm_0__.w0"
+    image_conf {
+      channels: 64
+      img_size: 227
+    }
+  }
+  inputs {
+    input_layer_name: "__conv_0__"
+    input_parameter_name: "___batch_norm_0__.w1"
+  }
+  inputs {
+    input_layer_name: "__conv_0__"
+    input_parameter_name: "___batch_norm_0__.w2"
+  }
+  bias_parameter_name: "___batch_norm_0__.wbias"
+  moving_average_fraction: 0.9
+}
+layers {
+  name: "__crmnorm_0__"
+  type: "norm"
+  size: 3297856
+  active_type: ""
+  inputs {
+    input_layer_name: "__batch_norm_0__"
+    norm_conf {
+      norm_type: "cmrnorm-projection"
+      channels: 64
+      size: 32
+      scale: 0.0004
+      pow: 0.75
+      output_x: 227
+      img_size: 227
+      blocked: false
+    }
+  }
+}
+layers {
+  name: "__pool_0__"
+  type: "pool"
+  size: 2458624
+  active_type: ""
+  inputs {
+    input_layer_name: "__conv_0__"
+    pool_conf {
+      pool_type: "max-projection"
+      channels: 64
+      size_x: 32
+      stride: 1
+      output_x: 196
+      img_size: 227
+      padding: 0
+      size_y: 32
+      stride_y: 1
+      output_y: 196
+      img_size_y: 227
+      padding_y: 0
+    }
+  }
+}
+parameters {
+  name: "___conv_0__.w0"
+  size: 65536
+  initial_mean: 0.0
+  initial_std: 0.0441941738242
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___conv_0__.wbias"
+  size: 64
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 64
+  dims: 1
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___batch_norm_0__.w0"
+  size: 64
+  initial_mean: 1.0
+  initial_std: 0.0
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___batch_norm_0__.w1"
+  size: 64
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 64
+  initial_strategy: 0
+  initial_smart: false
+  is_static: true
+  is_shared: true
+}
+parameters {
+  name: "___batch_norm_0__.w2"
+  size: 64
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 64
+  initial_strategy: 0
+  initial_smart: false
+  is_static: true
+  is_shared: true
+}
+parameters {
+  name: "___batch_norm_0__.wbias"
+  size: 64
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 64
+  initial_strategy: 0
+  initial_smart: false
+}
+input_layer_names: "image"
+output_layer_names: "__pool_0__"
+output_layer_names: "__crmnorm_0__"
+sub_models {
+  name: "root"
+  layer_names: "image"
+  layer_names: "__conv_0__"
+  layer_names: "__batch_norm_0__"
+  layer_names: "__crmnorm_0__"
+  layer_names: "__pool_0__"
+  input_layer_names: "image"
+  output_layer_names: "__pool_0__"
+  output_layer_names: "__crmnorm_0__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..7b2911f8e367ebf9d0797e815a7532c714ef698e
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr
@@ -0,0 +1,69 @@
+type: "nn"
+layers {
+  name: "data"
+  type: "data"
+  size: 30
+  active_type: ""
+}
+layers {
+  name: "__first_seq_0__"
+  type: "seqlastins"
+  size: 30
+  active_type: "linear"
+  inputs {
+    input_layer_name: "data"
+  }
+  select_first: true
+  trans_type: "seq"
+}
+layers {
+  name: "__first_seq_1__"
+  type: "seqlastins"
+  size: 30
+  active_type: "linear"
+  inputs {
+    input_layer_name: "data"
+  }
+  select_first: true
+  trans_type: "non-seq"
+}
+layers {
+  name: "__last_seq_0__"
+  type: "seqlastins"
+  size: 30
+  active_type: "linear"
+  inputs {
+    input_layer_name: "data"
+  }
+  trans_type: "seq"
+}
+layers {
+  name: "__last_seq_1__"
+  type: "seqlastins"
+  size: 30
+  active_type: "linear"
+  inputs {
+    input_layer_name: "data"
+  }
+  trans_type: "non-seq"
+}
+input_layer_names: "data"
+output_layer_names: "__first_seq_0__"
+output_layer_names: "__first_seq_1__"
+output_layer_names: "__last_seq_0__"
+output_layer_names: "__last_seq_1__"
+sub_models {
+  name: "root"
+  layer_names: "data"
+  layer_names: "__first_seq_0__"
+  layer_names: "__first_seq_1__"
+  layer_names: "__last_seq_0__"
+  layer_names: "__last_seq_1__"
+  input_layer_names: "data"
+  output_layer_names: "__first_seq_0__"
+  output_layer_names: "__first_seq_1__"
+  output_layer_names: "__last_seq_0__"
+  output_layer_names: "__last_seq_1__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/layer_activations.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/layer_activations.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..ecf39e4d32167d4e838c43929cc4e7a87ff421a8
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/layer_activations.protostr
@@ -0,0 +1,423 @@
+type: "nn"
+layers {
+  name: "input"
+  type: "data"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "layer_0"
+  type: "fc"
+  size: 100
+  active_type: "tanh"
+  inputs {
+    input_layer_name: "input"
+    input_parameter_name: "_layer_0.w0"
+  }
+  bias_parameter_name: "_layer_0.wbias"
+}
+layers {
+  name: "layer_1"
+  type: "fc"
+  size: 100
+  active_type: "sigmoid"
+  inputs {
+    input_layer_name: "input"
+    input_parameter_name: "_layer_1.w0"
+  }
+  bias_parameter_name: "_layer_1.wbias"
+}
+layers {
+  name: "layer_2"
+  type: "fc"
+  size: 100
+  active_type: "softmax"
+  inputs {
+    input_layer_name: "input"
+    input_parameter_name: "_layer_2.w0"
+  }
+  bias_parameter_name: "_layer_2.wbias"
+}
+layers {
+  name: "layer_3"
+  type: "fc"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "input"
+    input_parameter_name: "_layer_3.w0"
+  }
+  bias_parameter_name: "_layer_3.wbias"
+}
+layers {
+  name: "layer_4"
+  type: "fc"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "input"
+    input_parameter_name: "_layer_4.w0"
+  }
+  bias_parameter_name: "_layer_4.wbias"
+}
+layers {
+  name: "layer_5"
+  type: "fc"
+  size: 100
+  active_type: "exponential"
+  inputs {
+    input_layer_name: "input"
+    input_parameter_name: "_layer_5.w0"
+  }
+  bias_parameter_name: "_layer_5.wbias"
+}
+layers {
+  name: "layer_6"
+  type: "fc"
+  size: 100
+  active_type: "relu"
+  inputs {
+    input_layer_name: "input"
+    input_parameter_name: "_layer_6.w0"
+  }
+  bias_parameter_name: "_layer_6.wbias"
+}
+layers {
+  name: "layer_7"
+  type: "fc"
+  size: 100
+  active_type: "brelu"
+  inputs {
+    input_layer_name: "input"
+    input_parameter_name: "_layer_7.w0"
+  }
+  bias_parameter_name: "_layer_7.wbias"
+}
+layers {
+  name: "layer_8"
+  type: "fc"
+  size: 100
+  active_type: "softrelu"
+  inputs {
+    input_layer_name: "input"
+    input_parameter_name: "_layer_8.w0"
+  }
+  bias_parameter_name: "_layer_8.wbias"
+}
+layers {
+  name: "layer_9"
+  type: "fc"
+  size: 100
+  active_type: "stanh"
+  inputs {
+    input_layer_name: "input"
+    input_parameter_name: "_layer_9.w0"
+  }
+  bias_parameter_name: "_layer_9.wbias"
+}
+layers {
+  name: "layer_10"
+  type: "fc"
+  size: 100
+  active_type: "abs"
+  inputs {
+    input_layer_name: "input"
+    input_parameter_name: "_layer_10.w0"
+  }
+  bias_parameter_name: "_layer_10.wbias"
+}
+layers {
+  name: "layer_11"
+  type: "fc"
+  size: 100
+  active_type: "square"
+  inputs {
+    input_layer_name: "input"
+    input_parameter_name: "_layer_11.w0"
+  }
+  bias_parameter_name: "_layer_11.wbias"
+}
+parameters {
+  name: "_layer_0.w0"
+  size: 10000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "_layer_0.wbias"
+  size: 100
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 100
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "_layer_1.w0"
+  size: 10000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "_layer_1.wbias"
+  size: 100
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 100
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "_layer_2.w0"
+  size: 10000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "_layer_2.wbias"
+  size: 100
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 100
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "_layer_3.w0"
+  size: 10000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "_layer_3.wbias"
+  size: 100
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 100
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "_layer_4.w0"
+  size: 10000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "_layer_4.wbias"
+  size: 100
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 100
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "_layer_5.w0"
+  size: 10000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "_layer_5.wbias"
+  size: 100
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 100
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "_layer_6.w0"
+  size: 10000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "_layer_6.wbias"
+  size: 100
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 100
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "_layer_7.w0"
+  size: 10000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "_layer_7.wbias"
+  size: 100
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 100
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "_layer_8.w0"
+  size: 10000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "_layer_8.wbias"
+  size: 100
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 100
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "_layer_9.w0"
+  size: 10000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "_layer_9.wbias"
+  size: 100
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 100
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "_layer_10.w0"
+  size: 10000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "_layer_10.wbias"
+  size: 100
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 100
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "_layer_11.w0"
+  size: 10000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "_layer_11.wbias"
+  size: 100
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 100
+  initial_strategy: 0
+  initial_smart: false
+}
+input_layer_names: "input"
+output_layer_names: "layer_0"
+output_layer_names: "layer_1"
+output_layer_names: "layer_2"
+output_layer_names: "layer_3"
+output_layer_names: "layer_4"
+output_layer_names: "layer_5"
+output_layer_names: "layer_6"
+output_layer_names: "layer_7"
+output_layer_names: "layer_8"
+output_layer_names: "layer_9"
+output_layer_names: "layer_10"
+output_layer_names: "layer_11"
+sub_models {
+  name: "root"
+  layer_names: "input"
+  layer_names: "layer_0"
+  layer_names: "layer_1"
+  layer_names: "layer_2"
+  layer_names: "layer_3"
+  layer_names: "layer_4"
+  layer_names: "layer_5"
+  layer_names: "layer_6"
+  layer_names: "layer_7"
+  layer_names: "layer_8"
+  layer_names: "layer_9"
+  layer_names: "layer_10"
+  layer_names: "layer_11"
+  input_layer_names: "input"
+  output_layer_names: "layer_0"
+  output_layer_names: "layer_1"
+  output_layer_names: "layer_2"
+  output_layer_names: "layer_3"
+  output_layer_names: "layer_4"
+  output_layer_names: "layer_5"
+  output_layer_names: "layer_6"
+  output_layer_names: "layer_7"
+  output_layer_names: "layer_8"
+  output_layer_names: "layer_9"
+  output_layer_names: "layer_10"
+  output_layer_names: "layer_11"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..1767445c44bf5c0ea7c1149ad9fef2dd92508c54
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr
@@ -0,0 +1,235 @@
+type: "nn"
+layers {
+  name: "data"
+  type: "data"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__exp_0__"
+  type: "mixed"
+  size: 100
+  active_type: "exponential"
+  inputs {
+    input_layer_name: "data"
+    proj_conf {
+      type: "identity"
+      name: "___exp_0__.w0"
+      input_size: 100
+      output_size: 100
+    }
+  }
+}
+layers {
+  name: "__log_0__"
+  type: "mixed"
+  size: 100
+  active_type: "log"
+  inputs {
+    input_layer_name: "__exp_0__"
+    proj_conf {
+      type: "identity"
+      name: "___log_0__.w0"
+      input_size: 100
+      output_size: 100
+    }
+  }
+}
+layers {
+  name: "__abs_0__"
+  type: "mixed"
+  size: 100
+  active_type: "abs"
+  inputs {
+    input_layer_name: "__log_0__"
+    proj_conf {
+      type: "identity"
+      name: "___abs_0__.w0"
+      input_size: 100
+      output_size: 100
+    }
+  }
+}
+layers {
+  name: "__sigmoid_0__"
+  type: "mixed"
+  size: 100
+  active_type: "sigmoid"
+  inputs {
+    input_layer_name: "__abs_0__"
+    proj_conf {
+      type: "identity"
+      name: "___sigmoid_0__.w0"
+      input_size: 100
+      output_size: 100
+    }
+  }
+}
+layers {
+  name: "__square_0__"
+  type: "mixed"
+  size: 100
+  active_type: "square"
+  inputs {
+    input_layer_name: "__sigmoid_0__"
+    proj_conf {
+      type: "identity"
+      name: "___square_0__.w0"
+      input_size: 100
+      output_size: 100
+    }
+  }
+}
+layers {
+  name: "__square_1__"
+  type: "mixed"
+  size: 100
+  active_type: "square"
+  inputs {
+    input_layer_name: "__square_0__"
+    proj_conf {
+      type: "identity"
+      name: "___square_1__.w0"
+      input_size: 100
+      output_size: 100
+    }
+  }
+}
+layers {
+  name: "__slope_intercept_layer_0__"
+  type: "slope_intercept"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__square_1__"
+  }
+  slope: 1.0
+  intercept: 1
+}
+layers {
+  name: "__slope_intercept_layer_1__"
+  type: "slope_intercept"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__slope_intercept_layer_0__"
+  }
+  slope: 1.0
+  intercept: 1
+}
+layers {
+  name: "__mixed_0__"
+  type: "mixed"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__square_1__"
+    proj_conf {
+      type: "identity"
+      name: "___mixed_0__.w0"
+      input_size: 100
+      output_size: 100
+    }
+  }
+  inputs {
+    input_layer_name: "__slope_intercept_layer_1__"
+    proj_conf {
+      type: "identity"
+      name: "___mixed_0__.w1"
+      input_size: 100
+      output_size: 100
+    }
+  }
+}
+layers {
+  name: "__slope_intercept_layer_2__"
+  type: "slope_intercept"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__square_1__"
+  }
+  slope: -1.0
+  intercept: 0.0
+}
+layers {
+  name: "__mixed_1__"
+  type: "mixed"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__mixed_0__"
+    proj_conf {
+      type: "identity"
+      name: "___mixed_1__.w0"
+      input_size: 100
+      output_size: 100
+    }
+  }
+  inputs {
+    input_layer_name: "__slope_intercept_layer_2__"
+    proj_conf {
+      type: "identity"
+      name: "___mixed_1__.w1"
+      input_size: 100
+      output_size: 100
+    }
+  }
+}
+layers {
+  name: "__slope_intercept_layer_3__"
+  type: "slope_intercept"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__mixed_1__"
+  }
+  slope: 1.0
+  intercept: 2
+}
+layers {
+  name: "__slope_intercept_layer_4__"
+  type: "slope_intercept"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__slope_intercept_layer_3__"
+  }
+  slope: -1.0
+  intercept: 0.0
+}
+layers {
+  name: "__slope_intercept_layer_5__"
+  type: "slope_intercept"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__slope_intercept_layer_4__"
+  }
+  slope: 1.0
+  intercept: 2
+}
+input_layer_names: "data"
+output_layer_names: "__slope_intercept_layer_5__"
+sub_models {
+  name: "root"
+  layer_names: "data"
+  layer_names: "__exp_0__"
+  layer_names: "__log_0__"
+  layer_names: "__abs_0__"
+  layer_names: "__sigmoid_0__"
+  layer_names: "__square_0__"
+  layer_names: "__square_1__"
+  layer_names: "__slope_intercept_layer_0__"
+  layer_names: "__slope_intercept_layer_1__"
+  layer_names: "__mixed_0__"
+  layer_names: "__slope_intercept_layer_2__"
+  layer_names: "__mixed_1__"
+  layer_names: "__slope_intercept_layer_3__"
+  layer_names: "__slope_intercept_layer_4__"
+  layer_names: "__slope_intercept_layer_5__"
+  input_layer_names: "data"
+  output_layer_names: "__slope_intercept_layer_5__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..e47e531a2223ddaa9dd1dfaf1fcee8a11008cbbd
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
@@ -0,0 +1,315 @@
+type: "nn"
+layers {
+  name: "test"
+  type: "data"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__embedding_0__"
+  type: "mixed"
+  size: 256
+  active_type: ""
+  inputs {
+    input_layer_name: "test"
+    input_parameter_name: "___embedding_0__.w0"
+    proj_conf {
+      type: "table"
+      name: "___embedding_0__.w0"
+      input_size: 100
+      output_size: 256
+    }
+  }
+}
+layers {
+  name: "__mixed_0__"
+  type: "mixed"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__embedding_0__"
+    input_parameter_name: "___mixed_0__.w0"
+    proj_conf {
+      type: "fc"
+      name: "___mixed_0__.w0"
+      input_size: 256
+      output_size: 100
+    }
+  }
+}
+layers {
+  name: "__mixed_1__"
+  type: "mixed"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__mixed_0__"
+    input_parameter_name: "___mixed_1__.w0"
+    proj_conf {
+      type: "table"
+      name: "___mixed_1__.w0"
+      input_size: 100
+      output_size: 100
+    }
+  }
+}
+layers {
+  name: "__mixed_2__"
+  type: "mixed"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__mixed_1__"
+    proj_conf {
+      type: "identity"
+      name: "___mixed_2__.w0"
+      input_size: 100
+      output_size: 100
+    }
+  }
+}
+layers {
+  name: "__mixed_3__"
+  type: "mixed"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__mixed_2__"
+    input_parameter_name: "___mixed_3__.w0"
+    proj_conf {
+      type: "dot_mul"
+      name: "___mixed_3__.w0"
+      input_size: 100
+      output_size: 100
+    }
+  }
+}
+layers {
+  name: "__mixed_4__"
+  type: "mixed"
+  size: 300
+  active_type: ""
+  inputs {
+    input_layer_name: "__mixed_3__"
+    input_parameter_name: "___mixed_4__.w0"
+    proj_conf {
+      type: "context"
+      name: "___mixed_4__.w0"
+      input_size: 100
+      output_size: 300
+      context_start: -1
+      context_length: 3
+      trainable_padding: true
+    }
+  }
+}
+layers {
+  name: "__mixed_5__"
+  type: "mixed"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__mixed_2__"
+  }
+  inputs {
+    input_layer_name: "__mixed_3__"
+  }
+  operator_confs {
+    type: "dot_mul"
+    input_indices: 0
+    input_indices: 1
+    input_sizes: 100
+    input_sizes: 100
+    output_size: 100
+    dotmul_scale: 1
+  }
+}
+layers {
+  name: "img"
+  type: "data"
+  size: 1024
+  active_type: ""
+}
+layers {
+  name: "filter"
+  type: "data"
+  size: 576
+  active_type: ""
+}
+layers {
+  name: "__mixed_6__"
+  type: "mixed"
+  size: 57600
+  active_type: ""
+  inputs {
+    input_layer_name: "img"
+  }
+  inputs {
+    input_layer_name: "filter"
+  }
+  operator_confs {
+    type: "conv"
+    input_indices: 0
+    input_indices: 1
+    input_sizes: 1024
+    input_sizes: 576
+    output_size: 57600
+    conv_conf {
+      filter_size: 3
+      channels: 1
+      stride: 1
+      padding: 0
+      groups: 1
+      filter_channels: 1
+      output_x: 30
+      img_size: 32
+      caffe_mode: true
+      filter_size_y: 3
+      padding_y: 0
+      stride_y: 1
+    }
+    num_filters: 64
+  }
+}
+layers {
+  name: "__mixed_7__"
+  type: "mixed"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__mixed_4__"
+    input_parameter_name: "___mixed_7__.w0"
+    proj_conf {
+      type: "fc"
+      name: "___mixed_7__.w0"
+      input_size: 300
+      output_size: 100
+    }
+  }
+  inputs {
+    input_layer_name: "__mixed_5__"
+    input_parameter_name: "___mixed_7__.w1"
+    proj_conf {
+      type: "trans_fc"
+      name: "___mixed_7__.w1"
+      input_size: 100
+      output_size: 100
+    }
+  }
+  inputs {
+    input_layer_name: "__mixed_6__"
+    input_parameter_name: "___mixed_7__.w2"
+    proj_conf {
+      type: "fc"
+      name: "___mixed_7__.w2"
+      input_size: 57600
+      output_size: 100
+    }
+  }
+  drop_rate: 0.5
+}
+parameters {
+  name: "___embedding_0__.w0"
+  size: 25600
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 256
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___mixed_0__.w0"
+  size: 25600
+  initial_mean: 0.0
+  initial_std: 0.0625
+  dims: 256
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___mixed_1__.w0"
+  size: 10000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___mixed_3__.w0"
+  size: 100
+  initial_mean: 0.0
+  initial_std: 1.0
+  dims: 1
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___mixed_4__.w0"
+  size: 200
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 2
+  dims: 100
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___mixed_7__.w0"
+  size: 30000
+  initial_mean: 0.0
+  initial_std: 0.057735026919
+  dims: 300
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___mixed_7__.w1"
+  size: 10000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___mixed_7__.w2"
+  size: 5760000
+  initial_mean: 0.0
+  initial_std: 0.00416666666667
+  dims: 57600
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+}
+input_layer_names: "test"
+input_layer_names: "img"
+input_layer_names: "filter"
+output_layer_names: "__mixed_7__"
+sub_models {
+  name: "root"
+  layer_names: "test"
+  layer_names: "__embedding_0__"
+  layer_names: "__mixed_0__"
+  layer_names: "__mixed_1__"
+  layer_names: "__mixed_2__"
+  layer_names: "__mixed_3__"
+  layer_names: "__mixed_4__"
+  layer_names: "__mixed_5__"
+  layer_names: "img"
+  layer_names: "filter"
+  layer_names: "__mixed_6__"
+  layer_names: "__mixed_7__"
+  input_layer_names: "test"
+  input_layer_names: "img"
+  input_layer_names: "filter"
+  output_layer_names: "__mixed_7__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_fc.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_fc.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..3e8633b0798318bfc50988dbd329256629d5176c
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_fc.protostr
@@ -0,0 +1,125 @@
+type: "nn"
+layers {
+  name: "feature_a"
+  type: "data"
+  size: 200
+  active_type: ""
+}
+layers {
+  name: "feature_b"
+  type: "data"
+  size: 200
+  active_type: ""
+}
+layers {
+  name: "__fc_layer_0__"
+  type: "fc"
+  size: 200
+  active_type: "tanh"
+  inputs {
+    input_layer_name: "feature_a"
+    input_parameter_name: "fc_param"
+  }
+  bias_parameter_name: "bias_param"
+}
+layers {
+  name: "__fc_layer_1__"
+  type: "fc"
+  size: 200
+  active_type: "tanh"
+  inputs {
+    input_layer_name: "feature_b"
+    input_parameter_name: "fc_param"
+  }
+  bias_parameter_name: "bias_param"
+}
+layers {
+  name: "__fc_layer_2__"
+  type: "fc"
+  size: 10
+  active_type: "softmax"
+  inputs {
+    input_layer_name: "__fc_layer_0__"
+    input_parameter_name: "softmax_param"
+  }
+  inputs {
+    input_layer_name: "__fc_layer_1__"
+    input_parameter_name: "softmax_param"
+  }
+}
+layers {
+  name: "label"
+  type: "data"
+  size: 10
+  active_type: ""
+}
+layers {
+  name: "__cost_0__"
+  type: "multi-class-cross-entropy"
+  size: 1
+  active_type: ""
+  inputs {
+    input_layer_name: "__fc_layer_2__"
+  }
+  inputs {
+    input_layer_name: "label"
+  }
+  coeff: 1.0
+}
+parameters {
+  name: "fc_param"
+  size: 40000
+  initial_mean: 0.0
+  initial_std: 1.0
+  dims: 200
+  dims: 200
+  initial_strategy: 1
+  initial_smart: false
+}
+parameters {
+  name: "bias_param"
+  size: 200
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 200
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "softmax_param"
+  size: 2000
+  initial_mean: 0.0
+  initial_std: 1.0
+  dims: 200
+  dims: 10
+  initial_strategy: 1
+  initial_smart: false
+}
+input_layer_names: "feature_a"
+input_layer_names: "feature_b"
+input_layer_names: "label"
+output_layer_names: "__cost_0__"
+evaluators {
+  name: "classification_error_evaluator"
+  type: "classification_error"
+  input_layers: "__fc_layer_2__"
+  input_layers: "label"
+}
+sub_models {
+  name: "root"
+  layer_names: "feature_a"
+  layer_names: "feature_b"
+  layer_names: "__fc_layer_0__"
+  layer_names: "__fc_layer_1__"
+  layer_names: "__fc_layer_2__"
+  layer_names: "label"
+  layer_names: "__cost_0__"
+  input_layer_names: "feature_a"
+  input_layer_names: "feature_b"
+  input_layer_names: "label"
+  output_layer_names: "__cost_0__"
+  evaluator_names: "classification_error_evaluator"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..0a83499b724806666a241489467207f3c7151a3a
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
@@ -0,0 +1,393 @@
+type: "recurrent_nn"
+layers {
+  name: "data_a"
+  type: "data"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "data_b"
+  type: "data"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__mixed_0__"
+  type: "mixed"
+  size: 400
+  active_type: ""
+  inputs {
+    input_layer_name: "data_a"
+    input_parameter_name: "mixed_param"
+    proj_conf {
+      type: "fc"
+      name: "___mixed_0__.w0"
+      input_size: 100
+      output_size: 400
+    }
+  }
+}
+layers {
+  name: "__mixed_1__"
+  type: "mixed"
+  size: 400
+  active_type: ""
+  inputs {
+    input_layer_name: "data_b"
+    input_parameter_name: "mixed_param"
+    proj_conf {
+      type: "fc"
+      name: "___mixed_1__.w0"
+      input_size: 100
+      output_size: 400
+    }
+  }
+}
+layers {
+  name: "__lstm_group_0___recurrent_group"
+  type: "recurrent_layer_group"
+  active_type: ""
+}
+layers {
+  name: "__mixed_0__@__lstm_group_0___recurrent_group"
+  type: "scatter_agent"
+  size: 400
+  active_type: ""
+}
+layers {
+  name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
+  type: "agent"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
+  type: "agent"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group"
+  type: "mixed"
+  size: 400
+  active_type: ""
+  inputs {
+    input_layer_name: "__mixed_0__@__lstm_group_0___recurrent_group"
+    proj_conf {
+      type: "identity"
+      name: "___lstm_group_0___input_recurrent.w0"
+      input_size: 400
+      output_size: 400
+    }
+  }
+  inputs {
+    input_layer_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
+    input_parameter_name: "lstm_param"
+    proj_conf {
+      type: "fc"
+      name: "___lstm_group_0___input_recurrent.w1"
+      input_size: 100
+      output_size: 400
+    }
+  }
+}
+layers {
+  name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
+  type: "lstm_step"
+  size: 100
+  active_type: "tanh"
+  inputs {
+    input_layer_name: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group"
+  }
+  inputs {
+    input_layer_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
+  }
+  bias_parameter_name: "lstm_bias"
+  active_gate_type: "sigmoid"
+  active_state_type: "sigmoid"
+}
+layers {
+  name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
+  type: "get_output"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
+    input_layer_argument: "state"
+  }
+}
+layers {
+  name: "__lstm_group_0__"
+  type: "gather_agent"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__lstm_group_1___recurrent_group"
+  type: "recurrent_layer_group"
+  active_type: ""
+}
+layers {
+  name: "__mixed_1__@__lstm_group_1___recurrent_group"
+  type: "scatter_agent"
+  size: 400
+  active_type: ""
+}
+layers {
+  name: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group"
+  type: "agent"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group"
+  type: "agent"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__lstm_group_1___input_recurrent@__lstm_group_1___recurrent_group"
+  type: "mixed"
+  size: 400
+  active_type: ""
+  inputs {
+    input_layer_name: "__mixed_1__@__lstm_group_1___recurrent_group"
+    proj_conf {
+      type: "identity"
+      name: "___lstm_group_1___input_recurrent.w0"
+      input_size: 400
+      output_size: 400
+    }
+  }
+  inputs {
+    input_layer_name: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group"
+    input_parameter_name: "lstm_param"
+    proj_conf {
+      type: "fc"
+      name: "___lstm_group_1___input_recurrent.w1"
+      input_size: 100
+      output_size: 400
+    }
+  }
+}
+layers {
+  name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
+  type: "lstm_step"
+  size: 100
+  active_type: "tanh"
+  inputs {
+    input_layer_name: "__lstm_group_1___input_recurrent@__lstm_group_1___recurrent_group"
+  }
+  inputs {
+    input_layer_name: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group"
+  }
+  bias_parameter_name: "lstm_bias"
+  active_gate_type: "sigmoid"
+  active_state_type: "sigmoid"
+}
+layers {
+  name: "__lstm_group_1___state@__lstm_group_1___recurrent_group"
+  type: "get_output"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
+    input_layer_argument: "state"
+  }
+}
+layers {
+  name: "__lstm_group_1__"
+  type: "gather_agent"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__last_seq_0__"
+  type: "seqlastins"
+  size: 100
+  active_type: "linear"
+  inputs {
+    input_layer_name: "__lstm_group_0__"
+  }
+  trans_type: "non-seq"
+}
+layers {
+  name: "__last_seq_1__"
+  type: "seqlastins"
+  size: 100
+  active_type: "linear"
+  inputs {
+    input_layer_name: "__lstm_group_1__"
+  }
+  trans_type: "non-seq"
+}
+layers {
+  name: "__fc_layer_0__"
+  type: "fc"
+  size: 10
+  active_type: "softmax"
+  inputs {
+    input_layer_name: "__last_seq_0__"
+    input_parameter_name: "softmax_param"
+  }
+  inputs {
+    input_layer_name: "__last_seq_1__"
+    input_parameter_name: "softmax_param"
+  }
+}
+layers {
+  name: "label"
+  type: "data"
+  size: 10
+  active_type: ""
+}
+layers {
+  name: "__cost_0__"
+  type: "multi-class-cross-entropy"
+  size: 1
+  active_type: ""
+  inputs {
+    input_layer_name: "__fc_layer_0__"
+  }
+  inputs {
+    input_layer_name: "label"
+  }
+  coeff: 1.0
+}
+parameters {
+  name: "mixed_param"
+  size: 40000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 400
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "lstm_param"
+  size: 40000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 400
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "lstm_bias"
+  size: 300
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 300
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "softmax_param"
+  size: 1000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 10
+  initial_strategy: 0
+  initial_smart: true
+}
+input_layer_names: "data_a"
+input_layer_names: "data_b"
+input_layer_names: "label"
+output_layer_names: "__cost_0__"
+evaluators {
+  name: "classification_error_evaluator"
+  type: "classification_error"
+  input_layers: "__fc_layer_0__"
+  input_layers: "label"
+}
+sub_models {
+  name: "root"
+  layer_names: "data_a"
+  layer_names: "data_b"
+  layer_names: "__mixed_0__"
+  layer_names: "__mixed_1__"
+  layer_names: "__lstm_group_0___recurrent_group"
+  layer_names: "__lstm_group_0__"
+  layer_names: "__lstm_group_1___recurrent_group"
+  layer_names: "__lstm_group_1__"
+  layer_names: "__last_seq_0__"
+  layer_names: "__last_seq_1__"
+  layer_names: "__fc_layer_0__"
+  layer_names: "label"
+  layer_names: "__cost_0__"
+  input_layer_names: "data_a"
+  input_layer_names: "data_b"
+  input_layer_names: "label"
+  output_layer_names: "__cost_0__"
+  evaluator_names: "classification_error_evaluator"
+  is_recurrent_layer_group: false
+}
+sub_models {
+  name: "__lstm_group_0___recurrent_group"
+  layer_names: "__mixed_0__@__lstm_group_0___recurrent_group"
+  layer_names: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
+  layer_names: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
+  layer_names: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group"
+  layer_names: "__lstm_group_0__@__lstm_group_0___recurrent_group"
+  layer_names: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
+  is_recurrent_layer_group: true
+  reversed: false
+  memories {
+    layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
+    link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
+    is_sequence: false
+  }
+  memories {
+    layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
+    link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
+    is_sequence: false
+  }
+  in_links {
+    layer_name: "__mixed_0__"
+    link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
+    has_subseq: false
+  }
+  out_links {
+    layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
+    link_name: "__lstm_group_0__"
+    has_subseq: false
+  }
+  target_inlinkid: -1
+}
+sub_models {
+  name: "__lstm_group_1___recurrent_group"
+  layer_names: "__mixed_1__@__lstm_group_1___recurrent_group"
+  layer_names: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group"
+  layer_names: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group"
+  layer_names: "__lstm_group_1___input_recurrent@__lstm_group_1___recurrent_group"
+  layer_names: "__lstm_group_1__@__lstm_group_1___recurrent_group"
+  layer_names: "__lstm_group_1___state@__lstm_group_1___recurrent_group"
+  is_recurrent_layer_group: true
+  reversed: false
+  memories {
+    layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
+    link_name: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group"
+    is_sequence: false
+  }
+  memories {
+    layer_name: "__lstm_group_1___state@__lstm_group_1___recurrent_group"
+    link_name: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group"
+    is_sequence: false
+  }
+  in_links {
+    layer_name: "__mixed_1__"
+    link_name: "__mixed_1__@__lstm_group_1___recurrent_group"
+    has_subseq: false
+  }
+  out_links {
+    layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
+    link_name: "__lstm_group_1__"
+    has_subseq: false
+  }
+  target_inlinkid: -1
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..dacb40185f863025528c2d4eeb8b325425953a93
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr
@@ -0,0 +1,418 @@
+type: "nn"
+layers {
+  name: "data"
+  type: "data"
+  size: 200
+  active_type: ""
+}
+layers {
+  name: "__fc_layer_0__"
+  type: "fc"
+  size: 200
+  active_type: "sigmoid"
+  inputs {
+    input_layer_name: "data"
+    input_parameter_name: "___fc_layer_0__.w0"
+  }
+  bias_parameter_name: "___fc_layer_0__.wbias"
+}
+layers {
+  name: "__recurrent_layer_0__"
+  type: "recurrent"
+  size: 200
+  active_type: "sigmoid"
+  inputs {
+    input_layer_name: "__fc_layer_0__"
+    input_parameter_name: "___recurrent_layer_0__.w0"
+  }
+  bias_parameter_name: "___recurrent_layer_0__.wbias"
+  reversed: false
+}
+layers {
+  name: "__recurrent_layer_1__"
+  type: "recurrent"
+  size: 200
+  active_type: "sigmoid"
+  inputs {
+    input_layer_name: "__fc_layer_0__"
+    input_parameter_name: "___recurrent_layer_1__.w0"
+  }
+  bias_parameter_name: "___recurrent_layer_1__.wbias"
+  reversed: true
+}
+layers {
+  name: "__fc_layer_1__"
+  type: "fc"
+  size: 800
+  active_type: ""
+  inputs {
+    input_layer_name: "__fc_layer_0__"
+    input_parameter_name: "___fc_layer_1__.w0"
+  }
+}
+layers {
+  name: "__lstmemory_0__"
+  type: "lstmemory"
+  size: 200
+  active_type: "sigmoid"
+  inputs {
+    input_layer_name: "__fc_layer_1__"
+    input_parameter_name: "___lstmemory_0__.w0"
+  }
+  bias_parameter_name: "___lstmemory_0__.wbias"
+  reversed: false
+  active_gate_type: "sigmoid"
+  active_state_type: "tanh"
+}
+layers {
+  name: "__fc_layer_2__"
+  type: "fc"
+  size: 800
+  active_type: ""
+  inputs {
+    input_layer_name: "__fc_layer_0__"
+    input_parameter_name: "___fc_layer_2__.w0"
+  }
+}
+layers {
+  name: "__lstmemory_1__"
+  type: "lstmemory"
+  size: 200
+  active_type: "sigmoid"
+  inputs {
+    input_layer_name: "__fc_layer_2__"
+    input_parameter_name: "___lstmemory_1__.w0"
+  }
+  bias_parameter_name: "___lstmemory_1__.wbias"
+  reversed: true
+  active_gate_type: "sigmoid"
+  active_state_type: "tanh"
+}
+layers {
+  name: "__fc_layer_3__"
+  type: "fc"
+  size: 600
+  active_type: ""
+  inputs {
+    input_layer_name: "__fc_layer_0__"
+    input_parameter_name: "___fc_layer_3__.w0"
+  }
+}
+layers {
+  name: "__gru_0__"
+  type: "gated_recurrent"
+  size: 200
+  active_type: "sigmoid"
+  inputs {
+    input_layer_name: "__fc_layer_3__"
+    input_parameter_name: "___gru_0__.w0"
+  }
+  bias_parameter_name: "___gru_0__.wbias"
+  reversed: false
+  active_gate_type: "sigmoid"
+}
+layers {
+  name: "__fc_layer_4__"
+  type: "fc"
+  size: 600
+  active_type: ""
+  inputs {
+    input_layer_name: "__fc_layer_0__"
+    input_parameter_name: "___fc_layer_4__.w0"
+  }
+}
+layers {
+  name: "__gru_1__"
+  type: "gated_recurrent"
+  size: 200
+  active_type: "sigmoid"
+  inputs {
+    input_layer_name: "__fc_layer_4__"
+    input_parameter_name: "___gru_1__.w0"
+  }
+  bias_parameter_name: "___gru_1__.wbias"
+  reversed: true
+  active_gate_type: "sigmoid"
+}
+layers {
+  name: "__last_seq_0__"
+  type: "seqlastins"
+  size: 200
+  active_type: "linear"
+  inputs {
+    input_layer_name: "__recurrent_layer_0__"
+  }
+  trans_type: "non-seq"
+}
+layers {
+  name: "__first_seq_0__"
+  type: "seqlastins"
+  size: 200
+  active_type: "linear"
+  inputs {
+    input_layer_name: "__recurrent_layer_1__"
+  }
+  select_first: true
+  trans_type: "non-seq"
+}
+layers {
+  name: "__last_seq_1__"
+  type: "seqlastins"
+  size: 200
+  active_type: "linear"
+  inputs {
+    input_layer_name: "__lstmemory_0__"
+  }
+  trans_type: "non-seq"
+}
+layers {
+  name: "__first_seq_1__"
+  type: "seqlastins"
+  size: 200
+  active_type: "linear"
+  inputs {
+    input_layer_name: "__lstmemory_1__"
+  }
+  select_first: true
+  trans_type: "non-seq"
+}
+layers {
+  name: "__last_seq_2__"
+  type: "seqlastins"
+  size: 200
+  active_type: "linear"
+  inputs {
+    input_layer_name: "__gru_0__"
+  }
+  trans_type: "non-seq"
+}
+layers {
+  name: "__first_seq_2__"
+  type: "seqlastins"
+  size: 200
+  active_type: "linear"
+  inputs {
+    input_layer_name: "__gru_1__"
+  }
+  select_first: true
+  trans_type: "non-seq"
+}
+parameters {
+  name: "___fc_layer_0__.w0"
+  size: 40000
+  initial_mean: 0.0
+  initial_std: 0.0707106781187
+  dims: 200
+  dims: 200
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___fc_layer_0__.wbias"
+  size: 200
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 200
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___recurrent_layer_0__.w0"
+  size: 40000
+  initial_mean: 0.0
+  initial_std: 0.0707106781187
+  dims: 200
+  dims: 200
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___recurrent_layer_0__.wbias"
+  size: 200
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 200
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___recurrent_layer_1__.w0"
+  size: 40000
+  initial_mean: 0.0
+  initial_std: 0.0707106781187
+  dims: 200
+  dims: 200
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___recurrent_layer_1__.wbias"
+  size: 200
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 200
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___fc_layer_1__.w0"
+  size: 160000
+  initial_mean: 0.0
+  initial_std: 0.0707106781187
+  dims: 200
+  dims: 800
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___lstmemory_0__.w0"
+  size: 160000
+  initial_mean: 0.0
+  initial_std: 0.0707106781187
+  dims: 200
+  dims: 200
+  dims: 4
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___lstmemory_0__.wbias"
+  size: 1400
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 1400
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___fc_layer_2__.w0"
+  size: 160000
+  initial_mean: 0.0
+  initial_std: 0.0707106781187
+  dims: 200
+  dims: 800
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___lstmemory_1__.w0"
+  size: 160000
+  initial_mean: 0.0
+  initial_std: 0.0707106781187
+  dims: 200
+  dims: 200
+  dims: 4
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___lstmemory_1__.wbias"
+  size: 1400
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 1400
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___fc_layer_3__.w0"
+  size: 120000
+  initial_mean: 0.0
+  initial_std: 0.0707106781187
+  dims: 200
+  dims: 600
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___gru_0__.w0"
+  size: 120000
+  initial_mean: 0.0
+  initial_std: 0.0707106781187
+  dims: 200
+  dims: 600
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___gru_0__.wbias"
+  size: 600
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 600
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___fc_layer_4__.w0"
+  size: 120000
+  initial_mean: 0.0
+  initial_std: 0.0707106781187
+  dims: 200
+  dims: 600
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___gru_1__.w0"
+  size: 120000
+  initial_mean: 0.0
+  initial_std: 0.0707106781187
+  dims: 200
+  dims: 600
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___gru_1__.wbias"
+  size: 600
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 600
+  initial_strategy: 0
+  initial_smart: false
+}
+input_layer_names: "data"
+output_layer_names: "__last_seq_0__"
+output_layer_names: "__first_seq_0__"
+output_layer_names: "__last_seq_1__"
+output_layer_names: "__first_seq_1__"
+output_layer_names: "__last_seq_2__"
+output_layer_names: "__first_seq_2__"
+sub_models {
+  name: "root"
+  layer_names: "data"
+  layer_names: "__fc_layer_0__"
+  layer_names: "__recurrent_layer_0__"
+  layer_names: "__recurrent_layer_1__"
+  layer_names: "__fc_layer_1__"
+  layer_names: "__lstmemory_0__"
+  layer_names: "__fc_layer_2__"
+  layer_names: "__lstmemory_1__"
+  layer_names: "__fc_layer_3__"
+  layer_names: "__gru_0__"
+  layer_names: "__fc_layer_4__"
+  layer_names: "__gru_1__"
+  layer_names: "__last_seq_0__"
+  layer_names: "__first_seq_0__"
+  layer_names: "__last_seq_1__"
+  layer_names: "__first_seq_1__"
+  layer_names: "__last_seq_2__"
+  layer_names: "__first_seq_2__"
+  input_layer_names: "data"
+  output_layer_names: "__last_seq_0__"
+  output_layer_names: "__first_seq_0__"
+  output_layer_names: "__last_seq_1__"
+  output_layer_names: "__first_seq_1__"
+  output_layer_names: "__last_seq_2__"
+  output_layer_names: "__first_seq_2__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bi_grumemory.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bi_grumemory.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..b110e91498ce7d112987714bd769868179141c54
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bi_grumemory.protostr
@@ -0,0 +1,152 @@
+type: "nn"
+layers {
+  name: "data"
+  type: "data"
+  size: 120
+  active_type: ""
+}
+layers {
+  name: "__bidirectional_gru_0___fw_transform"
+  type: "mixed"
+  size: 120
+  active_type: ""
+  inputs {
+    input_layer_name: "data"
+    input_parameter_name: "___bidirectional_gru_0___fw_transform.w0"
+    proj_conf {
+      type: "fc"
+      name: "___bidirectional_gru_0___fw_transform.w0"
+      input_size: 120
+      output_size: 120
+    }
+  }
+}
+layers {
+  name: "__bidirectional_gru_0___fw"
+  type: "gated_recurrent"
+  size: 40
+  active_type: "tanh"
+  inputs {
+    input_layer_name: "__bidirectional_gru_0___fw_transform"
+    input_parameter_name: "___bidirectional_gru_0___fw.w0"
+  }
+  bias_parameter_name: "___bidirectional_gru_0___fw.wbias"
+  reversed: false
+  active_gate_type: "sigmoid"
+}
+layers {
+  name: "__bidirectional_gru_0___bw_transform"
+  type: "mixed"
+  size: 120
+  active_type: ""
+  inputs {
+    input_layer_name: "data"
+    input_parameter_name: "___bidirectional_gru_0___bw_transform.w0"
+    proj_conf {
+      type: "fc"
+      name: "___bidirectional_gru_0___bw_transform.w0"
+      input_size: 120
+      output_size: 120
+    }
+  }
+}
+layers {
+  name: "__bidirectional_gru_0___bw"
+  type: "gated_recurrent"
+  size: 40
+  active_type: "tanh"
+  inputs {
+    input_layer_name: "__bidirectional_gru_0___bw_transform"
+    input_parameter_name: "___bidirectional_gru_0___bw.w0"
+  }
+  bias_parameter_name: "___bidirectional_gru_0___bw.wbias"
+  reversed: true
+  active_gate_type: "sigmoid"
+}
+layers {
+  name: "__bidirectional_gru_0__"
+  type: "concat"
+  size: 80
+  active_type: ""
+  inputs {
+    input_layer_name: "__bidirectional_gru_0___fw"
+  }
+  inputs {
+    input_layer_name: "__bidirectional_gru_0___bw"
+  }
+}
+parameters {
+  name: "___bidirectional_gru_0___fw_transform.w0"
+  size: 14400
+  initial_mean: 0.0
+  initial_std: 0.0912870929175
+  dims: 120
+  dims: 120
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___bidirectional_gru_0___fw.w0"
+  size: 4800
+  initial_mean: 0.0
+  initial_std: 0.158113883008
+  dims: 40
+  dims: 120
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___bidirectional_gru_0___fw.wbias"
+  size: 120
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 120
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___bidirectional_gru_0___bw_transform.w0"
+  size: 14400
+  initial_mean: 0.0
+  initial_std: 0.0912870929175
+  dims: 120
+  dims: 120
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___bidirectional_gru_0___bw.w0"
+  size: 4800
+  initial_mean: 0.0
+  initial_std: 0.158113883008
+  dims: 40
+  dims: 120
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___bidirectional_gru_0___bw.wbias"
+  size: 120
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 120
+  initial_strategy: 0
+  initial_smart: false
+}
+input_layer_names: "data"
+output_layer_names: "__bidirectional_gru_0__"
+sub_models {
+  name: "root"
+  layer_names: "data"
+  layer_names: "__bidirectional_gru_0___fw_transform"
+  layer_names: "__bidirectional_gru_0___fw"
+  layer_names: "__bidirectional_gru_0___bw_transform"
+  layer_names: "__bidirectional_gru_0___bw"
+  layer_names: "__bidirectional_gru_0__"
+  input_layer_names: "data"
+  output_layer_names: "__bidirectional_gru_0__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..5261cf0c44943689a957bb99c21075bb7341cd49
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr
@@ -0,0 +1,289 @@
+type: "nn"
+layers {
+  name: "input"
+  type: "data"
+  size: 200
+  active_type: ""
+}
+layers {
+  name: "labels"
+  type: "data"
+  size: 5000
+  active_type: ""
+}
+layers {
+  name: "probs"
+  type: "data"
+  size: 10
+  active_type: ""
+}
+layers {
+  name: "xe-label"
+  type: "data"
+  size: 10
+  active_type: ""
+}
+layers {
+  name: "__ctc_layer_0__"
+  type: "ctc"
+  size: 5001
+  active_type: ""
+  inputs {
+    input_layer_name: "input"
+  }
+  inputs {
+    input_layer_name: "labels"
+  }
+  norm_by_times: false
+}
+layers {
+  name: "__fc_layer_0__"
+  type: "fc"
+  size: 4
+  active_type: "tanh"
+  inputs {
+    input_layer_name: "input"
+    input_parameter_name: "___fc_layer_0__.w0"
+  }
+  bias_parameter_name: "___fc_layer_0__.wbias"
+}
+layers {
+  name: "crf_label"
+  type: "data"
+  size: 4
+  active_type: ""
+}
+layers {
+  name: "__crf_layer_0__"
+  type: "crf"
+  size: 4
+  active_type: ""
+  inputs {
+    input_layer_name: "__fc_layer_0__"
+    input_parameter_name: "___crf_layer_0__.w0"
+  }
+  inputs {
+    input_layer_name: "crf_label"
+  }
+  coeff: 1.0
+}
+layers {
+  name: "left"
+  type: "data"
+  size: 1
+  active_type: ""
+}
+layers {
+  name: "right"
+  type: "data"
+  size: 1
+  active_type: ""
+}
+layers {
+  name: "label"
+  type: "data"
+  size: 1
+  active_type: ""
+}
+layers {
+  name: "__rank_cost_0__"
+  type: "rank-cost"
+  size: 1
+  active_type: ""
+  inputs {
+    input_layer_name: "left"
+  }
+  inputs {
+    input_layer_name: "right"
+  }
+  inputs {
+    input_layer_name: "label"
+  }
+  coeff: 1.0
+}
+layers {
+  name: "list_feature"
+  type: "data"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "list_scores"
+  type: "data"
+  size: 1
+  active_type: ""
+}
+layers {
+  name: "__lambda_cost_0__"
+  type: "lambda_cost"
+  size: 1
+  active_type: ""
+  inputs {
+    input_layer_name: "list_feature"
+  }
+  inputs {
+    input_layer_name: "list_scores"
+  }
+  NDCG_num: 5
+  max_sort_size: -1
+}
+layers {
+  name: "__cross_entropy_0__"
+  type: "multi-class-cross-entropy"
+  size: 1
+  active_type: ""
+  inputs {
+    input_layer_name: "probs"
+  }
+  inputs {
+    input_layer_name: "xe-label"
+  }
+  coeff: 1.0
+}
+layers {
+  name: "__cross_entropy_with_selfnorm_0__"
+  type: "multi_class_cross_entropy_with_selfnorm"
+  active_type: ""
+  inputs {
+    input_layer_name: "probs"
+  }
+  inputs {
+    input_layer_name: "xe-label"
+  }
+  softmax_selfnorm_alpha: 0.1
+  coeff: 1.0
+}
+layers {
+  name: "huber_probs"
+  type: "data"
+  size: 1
+  active_type: ""
+}
+layers {
+  name: "huber_label"
+  type: "data"
+  size: 1
+  active_type: ""
+}
+layers {
+  name: "__huber_cost_0__"
+  type: "huber"
+  size: 1
+  active_type: ""
+  inputs {
+    input_layer_name: "huber_probs"
+  }
+  inputs {
+    input_layer_name: "huber_label"
+  }
+  coeff: 1.0
+}
+layers {
+  name: "__multi_binary_label_cross_entropy_0__"
+  type: "multi_binary_label_cross_entropy"
+  size: 1
+  active_type: ""
+  inputs {
+    input_layer_name: "probs"
+  }
+  inputs {
+    input_layer_name: "xe-label"
+  }
+  coeff: 1.0
+}
+parameters {
+  name: "___fc_layer_0__.w0"
+  size: 800
+  initial_mean: 0.0
+  initial_std: 0.0707106781187
+  dims: 200
+  dims: 4
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___fc_layer_0__.wbias"
+  size: 4
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 4
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___crf_layer_0__.w0"
+  size: 24
+  initial_mean: 0.0
+  initial_std: 0.5
+  dims: 4
+  dims: 6
+  initial_strategy: 0
+  initial_smart: true
+}
+input_layer_names: "input"
+input_layer_names: "labels"
+input_layer_names: "crf_label"
+input_layer_names: "left"
+input_layer_names: "right"
+input_layer_names: "label"
+input_layer_names: "list_feature"
+input_layer_names: "list_scores"
+input_layer_names: "probs"
+input_layer_names: "xe-label"
+input_layer_names: "huber_probs"
+input_layer_names: "huber_label"
+output_layer_names: "__ctc_layer_0__"
+output_layer_names: "__crf_layer_0__"
+output_layer_names: "__rank_cost_0__"
+output_layer_names: "__lambda_cost_0__"
+output_layer_names: "__cross_entropy_0__"
+output_layer_names: "__cross_entropy_with_selfnorm_0__"
+output_layer_names: "__huber_cost_0__"
+output_layer_names: "__multi_binary_label_cross_entropy_0__"
+sub_models {
+  name: "root"
+  layer_names: "input"
+  layer_names: "labels"
+  layer_names: "probs"
+  layer_names: "xe-label"
+  layer_names: "__ctc_layer_0__"
+  layer_names: "__fc_layer_0__"
+  layer_names: "crf_label"
+  layer_names: "__crf_layer_0__"
+  layer_names: "left"
+  layer_names: "right"
+  layer_names: "label"
+  layer_names: "__rank_cost_0__"
+  layer_names: "list_feature"
+  layer_names: "list_scores"
+  layer_names: "__lambda_cost_0__"
+  layer_names: "__cross_entropy_0__"
+  layer_names: "__cross_entropy_with_selfnorm_0__"
+  layer_names: "huber_probs"
+  layer_names: "huber_label"
+  layer_names: "__huber_cost_0__"
+  layer_names: "__multi_binary_label_cross_entropy_0__"
+  input_layer_names: "input"
+  input_layer_names: "labels"
+  input_layer_names: "crf_label"
+  input_layer_names: "left"
+  input_layer_names: "right"
+  input_layer_names: "label"
+  input_layer_names: "list_feature"
+  input_layer_names: "list_scores"
+  input_layer_names: "probs"
+  input_layer_names: "xe-label"
+  input_layer_names: "huber_probs"
+  input_layer_names: "huber_label"
+  output_layer_names: "__ctc_layer_0__"
+  output_layer_names: "__crf_layer_0__"
+  output_layer_names: "__rank_cost_0__"
+  output_layer_names: "__lambda_cost_0__"
+  output_layer_names: "__cross_entropy_0__"
+  output_layer_names: "__cross_entropy_with_selfnorm_0__"
+  output_layer_names: "__huber_cost_0__"
+  output_layer_names: "__multi_binary_label_cross_entropy_0__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..811b38ae4a51e8faedb59fea2b81a8be3cceeae6
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr
@@ -0,0 +1,111 @@
+type: "nn"
+layers {
+  name: "input"
+  type: "data"
+  size: 300
+  active_type: ""
+}
+layers {
+  name: "label"
+  type: "data"
+  size: 1
+  active_type: ""
+}
+layers {
+  name: "weight"
+  type: "data"
+  size: 1
+  active_type: ""
+}
+layers {
+  name: "__fc_layer_0__"
+  type: "fc"
+  size: 10
+  active_type: "softmax"
+  inputs {
+    input_layer_name: "input"
+    input_parameter_name: "___fc_layer_0__.w0"
+  }
+  bias_parameter_name: "___fc_layer_0__.wbias"
+}
+layers {
+  name: "__cost_0__"
+  type: "multi-class-cross-entropy"
+  size: 1
+  active_type: ""
+  inputs {
+    input_layer_name: "__fc_layer_0__"
+  }
+  inputs {
+    input_layer_name: "label"
+  }
+  inputs {
+    input_layer_name: "weight"
+  }
+  coeff: 1.0
+}
+layers {
+  name: "__regression_cost_0__"
+  type: "square_error"
+  size: 1
+  active_type: ""
+  inputs {
+    input_layer_name: "__fc_layer_0__"
+  }
+  inputs {
+    input_layer_name: "label"
+  }
+  inputs {
+    input_layer_name: "weight"
+  }
+  coeff: 1.0
+}
+parameters {
+  name: "___fc_layer_0__.w0"
+  size: 3000
+  initial_mean: 0.0
+  initial_std: 0.057735026919
+  dims: 300
+  dims: 10
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___fc_layer_0__.wbias"
+  size: 10
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 10
+  initial_strategy: 0
+  initial_smart: false
+}
+input_layer_names: "input"
+input_layer_names: "label"
+input_layer_names: "weight"
+output_layer_names: "__cost_0__"
+output_layer_names: "__regression_cost_0__"
+evaluators {
+  name: "classification_error_evaluator"
+  type: "classification_error"
+  input_layers: "__fc_layer_0__"
+  input_layers: "label"
+  input_layers: "weight"
+}
+sub_models {
+  name: "root"
+  layer_names: "input"
+  layer_names: "label"
+  layer_names: "weight"
+  layer_names: "__fc_layer_0__"
+  layer_names: "__cost_0__"
+  layer_names: "__regression_cost_0__"
+  input_layer_names: "input"
+  input_layer_names: "label"
+  input_layer_names: "weight"
+  output_layer_names: "__cost_0__"
+  output_layer_names: "__regression_cost_0__"
+  evaluator_names: "classification_error_evaluator"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_expand_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_expand_layer.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..f4b36052264bc41b4c06826c3b3c1428c103add7
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_expand_layer.protostr
@@ -0,0 +1,56 @@
+type: "nn"
+layers {
+  name: "data"
+  type: "data"
+  size: 30
+  active_type: ""
+}
+layers {
+  name: "data_seq"
+  type: "data"
+  size: 30
+  active_type: ""
+}
+layers {
+  name: "__expand_layer_0__"
+  type: "expand"
+  size: 30
+  active_type: ""
+  inputs {
+    input_layer_name: "data"
+  }
+  inputs {
+    input_layer_name: "data_seq"
+  }
+  trans_type: "seq"
+}
+layers {
+  name: "__expand_layer_1__"
+  type: "expand"
+  size: 30
+  active_type: ""
+  inputs {
+    input_layer_name: "data"
+  }
+  inputs {
+    input_layer_name: "data_seq"
+  }
+  trans_type: "non-seq"
+}
+input_layer_names: "data"
+input_layer_names: "data_seq"
+output_layer_names: "__expand_layer_0__"
+output_layer_names: "__expand_layer_1__"
+sub_models {
+  name: "root"
+  layer_names: "data"
+  layer_names: "data_seq"
+  layer_names: "__expand_layer_0__"
+  layer_names: "__expand_layer_1__"
+  input_layer_names: "data"
+  input_layer_names: "data_seq"
+  output_layer_names: "__expand_layer_0__"
+  output_layer_names: "__expand_layer_1__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_fc.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_fc.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..8151898832ded3796fb8c56b201d5ebfca3ce6cb
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_fc.protostr
@@ -0,0 +1,98 @@
+type: "nn"
+layers {
+  name: "data"
+  type: "data"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__trans_layer_0__"
+  type: "trans"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "data"
+  }
+}
+layers {
+  name: "__fc_layer_0__"
+  type: "fc"
+  size: 100
+  active_type: "tanh"
+  inputs {
+    input_layer_name: "__trans_layer_0__"
+    input_parameter_name: "___fc_layer_0__.w0"
+  }
+}
+layers {
+  name: "mask"
+  type: "data"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__selective_fc_layer_0__"
+  type: "selective_fc"
+  size: 100
+  active_type: "sigmoid"
+  inputs {
+    input_layer_name: "data"
+    input_parameter_name: "___selective_fc_layer_0__.w0"
+  }
+  inputs {
+    input_layer_name: "mask"
+  }
+  bias_parameter_name: "___selective_fc_layer_0__.wbias"
+  selective_fc_pass_generation: false
+  has_selected_colums: true
+  selective_fc_full_mul_ratio: 0.02
+}
+parameters {
+  name: "___fc_layer_0__.w0"
+  size: 10000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___selective_fc_layer_0__.w0"
+  size: 10000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+  is_sparse: false
+}
+parameters {
+  name: "___selective_fc_layer_0__.wbias"
+  size: 100
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 100
+  initial_strategy: 0
+  initial_smart: false
+}
+input_layer_names: "data"
+input_layer_names: "mask"
+output_layer_names: "__fc_layer_0__"
+output_layer_names: "__selective_fc_layer_0__"
+sub_models {
+  name: "root"
+  layer_names: "data"
+  layer_names: "__trans_layer_0__"
+  layer_names: "__fc_layer_0__"
+  layer_names: "mask"
+  layer_names: "__selective_fc_layer_0__"
+  input_layer_names: "data"
+  input_layer_names: "mask"
+  output_layer_names: "__fc_layer_0__"
+  output_layer_names: "__selective_fc_layer_0__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_grumemory_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_grumemory_layer.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..2c19b2fd120e7c01ee9aa088f674a74498540a3c
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_grumemory_layer.protostr
@@ -0,0 +1,51 @@
+type: "nn"
+layers {
+  name: "data"
+  type: "data"
+  size: 120
+  active_type: ""
+}
+layers {
+  name: "__gru_0__"
+  type: "gated_recurrent"
+  size: 40
+  active_type: "sigmoid"
+  inputs {
+    input_layer_name: "data"
+    input_parameter_name: "___gru_0__.w0"
+  }
+  bias_parameter_name: "___gru_0__.wbias"
+  reversed: true
+  active_gate_type: "tanh"
+}
+parameters {
+  name: "___gru_0__.w0"
+  size: 4800
+  initial_mean: 0.0
+  initial_std: 0.158113883008
+  dims: 40
+  dims: 120
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___gru_0__.wbias"
+  size: 120
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 120
+  initial_strategy: 0
+  initial_smart: false
+}
+input_layer_names: "data"
+output_layer_names: "__gru_0__"
+sub_models {
+  name: "root"
+  layer_names: "data"
+  layer_names: "__gru_0__"
+  input_layer_names: "data"
+  output_layer_names: "__gru_0__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_hsigmoid.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_hsigmoid.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..e81fcb13c4c6ee8e76036d71d47fdaac9cd3d716
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_hsigmoid.protostr
@@ -0,0 +1,62 @@
+type: "nn"
+layers {
+  name: "data"
+  type: "data"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "label"
+  type: "data"
+  size: 10
+  active_type: ""
+}
+layers {
+  name: "__hsigmoid_0__"
+  type: "hsigmoid"
+  size: 1
+  active_type: ""
+  inputs {
+    input_layer_name: "data"
+    input_parameter_name: "___hsigmoid_0__.w0"
+  }
+  inputs {
+    input_layer_name: "label"
+  }
+  bias_parameter_name: "___hsigmoid_0__.wbias"
+  num_classes: 10
+}
+parameters {
+  name: "___hsigmoid_0__.w0"
+  size: 900
+  initial_mean: 0.0
+  initial_std: 0.333333333333
+  dims: 9
+  dims: 100
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___hsigmoid_0__.wbias"
+  size: 9
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 9
+  initial_strategy: 0
+  initial_smart: false
+}
+input_layer_names: "data"
+input_layer_names: "label"
+output_layer_names: "__hsigmoid_0__"
+sub_models {
+  name: "root"
+  layer_names: "data"
+  layer_names: "label"
+  layer_names: "__hsigmoid_0__"
+  input_layer_names: "data"
+  input_layer_names: "label"
+  output_layer_names: "__hsigmoid_0__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_lstmemory_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_lstmemory_layer.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..76a4afab82c59196564128cb9cb8d72ba2a7b101
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_lstmemory_layer.protostr
@@ -0,0 +1,53 @@
+type: "nn"
+layers {
+  name: "data"
+  type: "data"
+  size: 128
+  active_type: ""
+}
+layers {
+  name: "__lstmemory_0__"
+  type: "lstmemory"
+  size: 32
+  active_type: "tanh"
+  inputs {
+    input_layer_name: "data"
+    input_parameter_name: "___lstmemory_0__.w0"
+  }
+  bias_parameter_name: "___lstmemory_0__.wbias"
+  reversed: true
+  active_gate_type: "tanh"
+  active_state_type: "tanh"
+}
+parameters {
+  name: "___lstmemory_0__.w0"
+  size: 4096
+  initial_mean: 0.0
+  initial_std: 0.176776695297
+  dims: 32
+  dims: 32
+  dims: 4
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___lstmemory_0__.wbias"
+  size: 224
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 224
+  initial_strategy: 0
+  initial_smart: false
+}
+input_layer_names: "data"
+output_layer_names: "__lstmemory_0__"
+sub_models {
+  name: "root"
+  layer_names: "data"
+  layer_names: "__lstmemory_0__"
+  input_layer_names: "data"
+  output_layer_names: "__lstmemory_0__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..1be2a7ceebfb74d677ac056dcc3a9f72fd31ccd6
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr
@@ -0,0 +1,209 @@
+type: "nn"
+layers {
+  name: "data"
+  type: "data"
+  size: 2304
+  active_type: ""
+}
+layers {
+  name: "__conv_0__"
+  type: "exconv"
+  size: 36864
+  active_type: ""
+  inputs {
+    input_layer_name: "data"
+    input_parameter_name: "___conv_0__.w0"
+    conv_conf {
+      filter_size: 3
+      channels: 1
+      stride: 1
+      padding: 1
+      groups: 1
+      filter_channels: 1
+      output_x: 48
+      img_size: 48
+      caffe_mode: true
+      filter_size_y: 3
+      padding_y: 1
+      stride_y: 1
+    }
+  }
+  bias_parameter_name: "___conv_0__.wbias"
+  num_filters: 16
+  shared_biases: true
+}
+layers {
+  name: "__maxout_layer_0__"
+  type: "maxout"
+  size: 18432
+  active_type: ""
+  inputs {
+    input_layer_name: "__conv_0__"
+    maxout_conf {
+      channels: 16
+      groups: 2
+      img_size_x: 0
+      img_size_y: 0
+    }
+  }
+}
+layers {
+  name: "__pool_0__"
+  type: "pool"
+  size: 4608
+  active_type: ""
+  inputs {
+    input_layer_name: "__maxout_layer_0__"
+    pool_conf {
+      pool_type: "max-projection"
+      channels: 8
+      size_x: 2
+      stride: 2
+      output_x: 24
+      img_size: 48
+      padding: 0
+      size_y: 2
+      stride_y: 2
+      output_y: 24
+      img_size_y: 48
+      padding_y: 0
+    }
+  }
+}
+layers {
+  name: "__conv_1__"
+  type: "exconv"
+  size: 18432
+  active_type: ""
+  inputs {
+    input_layer_name: "__pool_0__"
+    input_parameter_name: "___conv_1__.w0"
+    conv_conf {
+      filter_size: 3
+      channels: 32
+      stride: 1
+      padding: 1
+      groups: 1
+      filter_channels: 32
+      output_x: 12
+      img_size: 12
+      caffe_mode: true
+      filter_size_y: 3
+      padding_y: 1
+      stride_y: 1
+    }
+  }
+  bias_parameter_name: "___conv_1__.wbias"
+  num_filters: 128
+  shared_biases: true
+}
+layers {
+  name: "__maxout_layer_1__"
+  type: "maxout"
+  size: 9216
+  active_type: ""
+  inputs {
+    input_layer_name: "__conv_0__"
+    maxout_conf {
+      channels: 128
+      groups: 4
+      img_size_x: 0
+      img_size_y: 0
+    }
+  }
+}
+layers {
+  name: "__block_expand_layer_0__"
+  type: "blockexpand"
+  size: 192
+  active_type: ""
+  inputs {
+    input_layer_name: "__maxout_layer_0__"
+    block_expand_conf {
+      channels: 32
+      stride_x: 1
+      stride_y: 1
+      padding_x: 0
+      padding_y: 0
+      block_x: 1
+      block_y: 6
+      output_x: 0
+      output_y: 0
+      img_size_x: 0
+      img_size_y: 0
+    }
+  }
+}
+layers {
+  name: "__fc_layer_0__"
+  type: "fc"
+  size: 384
+  active_type: "tanh"
+  inputs {
+    input_layer_name: "__block_expand_layer_0__"
+    input_parameter_name: "___fc_layer_0__.w0"
+  }
+}
+parameters {
+  name: "___conv_0__.w0"
+  size: 144
+  initial_mean: 0.0
+  initial_std: 0.471404520791
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___conv_0__.wbias"
+  size: 16
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 16
+  dims: 1
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___conv_1__.w0"
+  size: 36864
+  initial_mean: 0.0
+  initial_std: 0.0833333333333
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___conv_1__.wbias"
+  size: 128
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 128
+  dims: 1
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___fc_layer_0__.w0"
+  size: 73728
+  initial_mean: 0.0
+  initial_std: 0.0721687836487
+  dims: 192
+  dims: 384
+  initial_strategy: 0
+  initial_smart: true
+}
+input_layer_names: "data"
+output_layer_names: "__fc_layer_0__"
+sub_models {
+  name: "root"
+  layer_names: "data"
+  layer_names: "__conv_0__"
+  layer_names: "__maxout_layer_0__"
+  layer_names: "__pool_0__"
+  layer_names: "__conv_1__"
+  layer_names: "__maxout_layer_1__"
+  layer_names: "__block_expand_layer_0__"
+  layer_names: "__fc_layer_0__"
+  input_layer_names: "data"
+  output_layer_names: "__fc_layer_0__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_ntm_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_ntm_layers.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..b30bbb2a4e24d74ebe1d6c8eda8be5aa09217f6d
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_ntm_layers.protostr
@@ -0,0 +1,225 @@
+type: "nn"
+layers {
+  name: "w"
+  type: "data"
+  size: 1
+  active_type: ""
+}
+layers {
+  name: "a"
+  type: "data"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "b"
+  type: "data"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "c"
+  type: "data"
+  size: 200
+  active_type: ""
+}
+layers {
+  name: "d"
+  type: "data"
+  size: 31
+  active_type: ""
+}
+layers {
+  name: "__interpolation_layer_0__"
+  type: "interpolation"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "w"
+  }
+  inputs {
+    input_layer_name: "a"
+  }
+  inputs {
+    input_layer_name: "b"
+  }
+}
+layers {
+  name: "__power_layer_0__"
+  type: "power"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "w"
+  }
+  inputs {
+    input_layer_name: "a"
+  }
+}
+layers {
+  name: "__scaling_layer_0__"
+  type: "scaling"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "w"
+  }
+  inputs {
+    input_layer_name: "a"
+  }
+}
+layers {
+  name: "__cos_sim_0__"
+  type: "cos"
+  size: 1
+  active_type: ""
+  inputs {
+    input_layer_name: "a"
+  }
+  inputs {
+    input_layer_name: "b"
+  }
+  cos_scale: 5
+}
+layers {
+  name: "__cos_sim_1__"
+  type: "cos_vm"
+  size: 2
+  active_type: ""
+  inputs {
+    input_layer_name: "a"
+  }
+  inputs {
+    input_layer_name: "c"
+  }
+  cos_scale: 5
+}
+layers {
+  name: "__sum_to_one_norm_layer_0__"
+  type: "sum_to_one_norm"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "a"
+  }
+}
+layers {
+  name: "__conv_shift_layer_0__"
+  type: "conv_shift"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "a"
+  }
+  inputs {
+    input_layer_name: "d"
+  }
+}
+layers {
+  name: "__tensor_layer_0__"
+  type: "tensor"
+  size: 1000
+  active_type: ""
+  inputs {
+    input_layer_name: "a"
+    input_parameter_name: "___tensor_layer_0__.w0"
+  }
+  inputs {
+    input_layer_name: "b"
+  }
+  bias_parameter_name: "___tensor_layer_0__.wbias"
+}
+layers {
+  name: "__slope_intercept_layer_0__"
+  type: "slope_intercept"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "a"
+  }
+  slope: 0.7
+  intercept: 0.9
+}
+layers {
+  name: "__linear_comb_layer_0__"
+  type: "convex_comb"
+  size: 2
+  active_type: ""
+  inputs {
+    input_layer_name: "b"
+  }
+  inputs {
+    input_layer_name: "c"
+  }
+}
+parameters {
+  name: "___tensor_layer_0__.w0"
+  size: 10000000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 100
+  dims: 1000
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___tensor_layer_0__.wbias"
+  size: 1000
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 1000
+  initial_strategy: 0
+  initial_smart: false
+}
+input_layer_names: "w"
+input_layer_names: "a"
+input_layer_names: "b"
+input_layer_names: "c"
+input_layer_names: "d"
+output_layer_names: "__interpolation_layer_0__"
+output_layer_names: "__power_layer_0__"
+output_layer_names: "__scaling_layer_0__"
+output_layer_names: "__cos_sim_0__"
+output_layer_names: "__cos_sim_1__"
+output_layer_names: "__sum_to_one_norm_layer_0__"
+output_layer_names: "__conv_shift_layer_0__"
+output_layer_names: "__tensor_layer_0__"
+output_layer_names: "__slope_intercept_layer_0__"
+output_layer_names: "__linear_comb_layer_0__"
+sub_models {
+  name: "root"
+  layer_names: "w"
+  layer_names: "a"
+  layer_names: "b"
+  layer_names: "c"
+  layer_names: "d"
+  layer_names: "__interpolation_layer_0__"
+  layer_names: "__power_layer_0__"
+  layer_names: "__scaling_layer_0__"
+  layer_names: "__cos_sim_0__"
+  layer_names: "__cos_sim_1__"
+  layer_names: "__sum_to_one_norm_layer_0__"
+  layer_names: "__conv_shift_layer_0__"
+  layer_names: "__tensor_layer_0__"
+  layer_names: "__slope_intercept_layer_0__"
+  layer_names: "__linear_comb_layer_0__"
+  input_layer_names: "w"
+  input_layer_names: "a"
+  input_layer_names: "b"
+  input_layer_names: "c"
+  input_layer_names: "d"
+  output_layer_names: "__interpolation_layer_0__"
+  output_layer_names: "__power_layer_0__"
+  output_layer_names: "__scaling_layer_0__"
+  output_layer_names: "__cos_sim_0__"
+  output_layer_names: "__cos_sim_1__"
+  output_layer_names: "__sum_to_one_norm_layer_0__"
+  output_layer_names: "__conv_shift_layer_0__"
+  output_layer_names: "__tensor_layer_0__"
+  output_layer_names: "__slope_intercept_layer_0__"
+  output_layer_names: "__linear_comb_layer_0__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_print_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_print_layer.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..c402aff174ab7c7d7f63234960d4a24d84622dd4
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_print_layer.protostr
@@ -0,0 +1,26 @@
+type: "nn"
+layers {
+  name: "input"
+  type: "data"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__print_0__"
+  type: "print"
+  active_type: ""
+  inputs {
+    input_layer_name: "input"
+  }
+}
+input_layer_names: "input"
+output_layer_names: "input"
+sub_models {
+  name: "root"
+  layer_names: "input"
+  layer_names: "__print_0__"
+  input_layer_names: "input"
+  output_layer_names: "input"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..41d2e2f2671f5c05425f9bd2e91d8adc33129761
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
@@ -0,0 +1,650 @@
+type: "recurrent_nn"
+layers {
+  name: "seq_input"
+  type: "data"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "sub_seq_input"
+  type: "data"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "label"
+  type: "data"
+  size: 1
+  active_type: ""
+}
+layers {
+  name: "__mixed_0__"
+  type: "mixed"
+  size: 400
+  active_type: ""
+  inputs {
+    input_layer_name: "seq_input"
+    input_parameter_name: "___mixed_0__.w0"
+    proj_conf {
+      type: "fc"
+      name: "___mixed_0__.w0"
+      input_size: 100
+      output_size: 400
+    }
+  }
+}
+layers {
+  name: "__mixed_1__"
+  type: "mixed"
+  size: 300
+  active_type: ""
+  inputs {
+    input_layer_name: "seq_input"
+    input_parameter_name: "___mixed_1__.w0"
+    proj_conf {
+      type: "fc"
+      name: "___mixed_1__.w0"
+      input_size: 100
+      output_size: 300
+    }
+  }
+}
+layers {
+  name: "__recurrent_group_0__"
+  type: "recurrent_layer_group"
+  active_type: ""
+}
+layers {
+  name: "seq_input@__recurrent_group_0__"
+  type: "scatter_agent"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "rnn_forward+delay1@__recurrent_group_0__"
+  type: "agent"
+  size: 200
+  active_type: ""
+}
+layers {
+  name: "rnn_forward@__recurrent_group_0__"
+  type: "fc"
+  size: 200
+  active_type: "tanh"
+  inputs {
+    input_layer_name: "seq_input@__recurrent_group_0__"
+    input_parameter_name: "_rnn_forward@__recurrent_group_0__.w0"
+  }
+  inputs {
+    input_layer_name: "rnn_forward+delay1@__recurrent_group_0__"
+    input_parameter_name: "_rnn_forward@__recurrent_group_0__.w1"
+  }
+  bias_parameter_name: "_rnn_forward@__recurrent_group_0__.wbias"
+}
+layers {
+  name: "rnn_forward"
+  type: "gather_agent"
+  size: 200
+  active_type: ""
+}
+layers {
+  name: "__last_seq_0__"
+  type: "seqlastins"
+  size: 200
+  active_type: "linear"
+  inputs {
+    input_layer_name: "rnn_forward"
+  }
+  trans_type: "non-seq"
+}
+layers {
+  name: "__recurrent_group_1__"
+  type: "recurrent_layer_group"
+  active_type: ""
+}
+layers {
+  name: "seq_input@__recurrent_group_1__"
+  type: "scatter_agent"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "rnn_back+delay1@__recurrent_group_1__"
+  type: "agent"
+  size: 200
+  active_type: ""
+}
+layers {
+  name: "rnn_back@__recurrent_group_1__"
+  type: "fc"
+  size: 200
+  active_type: "tanh"
+  inputs {
+    input_layer_name: "seq_input@__recurrent_group_1__"
+    input_parameter_name: "_rnn_back@__recurrent_group_1__.w0"
+  }
+  inputs {
+    input_layer_name: "rnn_back+delay1@__recurrent_group_1__"
+    input_parameter_name: "_rnn_back@__recurrent_group_1__.w1"
+  }
+  bias_parameter_name: "_rnn_back@__recurrent_group_1__.wbias"
+}
+layers {
+  name: "rnn_back"
+  type: "gather_agent"
+  size: 200
+  active_type: ""
+}
+layers {
+  name: "__first_seq_0__"
+  type: "seqlastins"
+  size: 200
+  active_type: "linear"
+  inputs {
+    input_layer_name: "rnn_back"
+  }
+  select_first: true
+  trans_type: "non-seq"
+}
+layers {
+  name: "__recurrent_group_2__"
+  type: "recurrent_layer_group"
+  active_type: ""
+}
+layers {
+  name: "sub_seq_input@__recurrent_group_2__"
+  type: "sequence_scatter_agent"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "rnn_subseq_forward+delay1@__recurrent_group_2__"
+  type: "agent"
+  size: 200
+  active_type: ""
+}
+layers {
+  name: "rnn_subseq_forward@__recurrent_group_2__"
+  type: "fc"
+  size: 200
+  active_type: "tanh"
+  inputs {
+    input_layer_name: "sub_seq_input@__recurrent_group_2__"
+    input_parameter_name: "_rnn_subseq_forward@__recurrent_group_2__.w0"
+  }
+  inputs {
+    input_layer_name: "rnn_subseq_forward+delay1@__recurrent_group_2__"
+    input_parameter_name: "_rnn_subseq_forward@__recurrent_group_2__.w1"
+  }
+  bias_parameter_name: "_rnn_subseq_forward@__recurrent_group_2__.wbias"
+}
+layers {
+  name: "rnn_subseq_forward"
+  type: "sequence_gather_agent"
+  size: 200
+  active_type: ""
+}
+layers {
+  name: "__last_seq_1__"
+  type: "seqlastins"
+  size: 200
+  active_type: "linear"
+  inputs {
+    input_layer_name: "rnn_subseq_forward"
+  }
+  trans_type: "non-seq"
+}
+layers {
+  name: "__lstm_group_0___recurrent_group"
+  type: "recurrent_layer_group"
+  active_type: ""
+}
+layers {
+  name: "__mixed_0__@__lstm_group_0___recurrent_group"
+  type: "scatter_agent"
+  size: 400
+  active_type: ""
+}
+layers {
+  name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
+  type: "agent"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
+  type: "agent"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group"
+  type: "mixed"
+  size: 400
+  active_type: ""
+  inputs {
+    input_layer_name: "__mixed_0__@__lstm_group_0___recurrent_group"
+    proj_conf {
+      type: "identity"
+      name: "___lstm_group_0___input_recurrent.w0"
+      input_size: 400
+      output_size: 400
+    }
+  }
+  inputs {
+    input_layer_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
+    input_parameter_name: "___lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group.w1"
+    proj_conf {
+      type: "fc"
+      name: "___lstm_group_0___input_recurrent.w1"
+      input_size: 100
+      output_size: 400
+    }
+  }
+}
+layers {
+  name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
+  type: "lstm_step"
+  size: 100
+  active_type: "tanh"
+  inputs {
+    input_layer_name: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group"
+  }
+  inputs {
+    input_layer_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
+  }
+  bias_parameter_name: "___lstm_group_0__@__lstm_group_0___recurrent_group.wbias"
+  active_gate_type: "sigmoid"
+  active_state_type: "sigmoid"
+}
+layers {
+  name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
+  type: "get_output"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
+    input_layer_argument: "state"
+  }
+}
+layers {
+  name: "__lstm_group_0__"
+  type: "gather_agent"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__last_seq_2__"
+  type: "seqlastins"
+  size: 100
+  active_type: "linear"
+  inputs {
+    input_layer_name: "__lstm_group_0__"
+  }
+  trans_type: "non-seq"
+}
+layers {
+  name: "__gru_group_0___recurrent_group"
+  type: "recurrent_layer_group"
+  active_type: ""
+}
+layers {
+  name: "__mixed_1__@__gru_group_0___recurrent_group"
+  type: "scatter_agent"
+  size: 300
+  active_type: ""
+}
+layers {
+  name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group"
+  type: "agent"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__gru_group_0__@__gru_group_0___recurrent_group"
+  type: "gru_step"
+  size: 100
+  active_type: "tanh"
+  inputs {
+    input_layer_name: "__mixed_1__@__gru_group_0___recurrent_group"
+    input_parameter_name: "___gru_group_0__@__gru_group_0___recurrent_group.w0"
+  }
+  inputs {
+    input_layer_name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group"
+  }
+  bias_parameter_name: "___gru_group_0__@__gru_group_0___recurrent_group.wbias"
+  active_gate_type: "sigmoid"
+}
+layers {
+  name: "__gru_group_0__"
+  type: "gather_agent"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__last_seq_3__"
+  type: "seqlastins"
+  size: 100
+  active_type: "linear"
+  inputs {
+    input_layer_name: "__gru_group_0__"
+  }
+  trans_type: "non-seq"
+}
+parameters {
+  name: "___mixed_0__.w0"
+  size: 40000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 400
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___mixed_1__.w0"
+  size: 30000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 300
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "_rnn_forward@__recurrent_group_0__.w0"
+  size: 20000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 200
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "_rnn_forward@__recurrent_group_0__.w1"
+  size: 40000
+  initial_mean: 0.0
+  initial_std: 0.0707106781187
+  dims: 200
+  dims: 200
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "_rnn_forward@__recurrent_group_0__.wbias"
+  size: 200
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 200
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "_rnn_back@__recurrent_group_1__.w0"
+  size: 20000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 200
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "_rnn_back@__recurrent_group_1__.w1"
+  size: 40000
+  initial_mean: 0.0
+  initial_std: 0.0707106781187
+  dims: 200
+  dims: 200
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "_rnn_back@__recurrent_group_1__.wbias"
+  size: 200
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 200
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "_rnn_subseq_forward@__recurrent_group_2__.w0"
+  size: 20000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 200
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "_rnn_subseq_forward@__recurrent_group_2__.w1"
+  size: 40000
+  initial_mean: 0.0
+  initial_std: 0.0707106781187
+  dims: 200
+  dims: 200
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "_rnn_subseq_forward@__recurrent_group_2__.wbias"
+  size: 200
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 200
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group.w1"
+  size: 40000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 400
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___lstm_group_0__@__lstm_group_0___recurrent_group.wbias"
+  size: 300
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 300
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___gru_group_0__@__gru_group_0___recurrent_group.w0"
+  size: 30000
+  initial_mean: 0.0
+  initial_std: 0.01
+  dims: 100
+  dims: 300
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___gru_group_0__@__gru_group_0___recurrent_group.wbias"
+  size: 300
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 300
+  initial_strategy: 0
+  initial_smart: false
+}
+input_layer_names: "seq_input"
+input_layer_names: "sub_seq_input"
+output_layer_names: "__last_seq_0__"
+output_layer_names: "__first_seq_0__"
+output_layer_names: "__last_seq_1__"
+output_layer_names: "__last_seq_2__"
+output_layer_names: "__last_seq_3__"
+sub_models {
+  name: "root"
+  layer_names: "seq_input"
+  layer_names: "sub_seq_input"
+  layer_names: "label"
+  layer_names: "__mixed_0__"
+  layer_names: "__mixed_1__"
+  layer_names: "__recurrent_group_0__"
+  layer_names: "rnn_forward"
+  layer_names: "__last_seq_0__"
+  layer_names: "__recurrent_group_1__"
+  layer_names: "rnn_back"
+  layer_names: "__first_seq_0__"
+  layer_names: "__recurrent_group_2__"
+  layer_names: "rnn_subseq_forward"
+  layer_names: "__last_seq_1__"
+  layer_names: "__lstm_group_0___recurrent_group"
+  layer_names: "__lstm_group_0__"
+  layer_names: "__last_seq_2__"
+  layer_names: "__gru_group_0___recurrent_group"
+  layer_names: "__gru_group_0__"
+  layer_names: "__last_seq_3__"
+  input_layer_names: "seq_input"
+  input_layer_names: "sub_seq_input"
+  output_layer_names: "__last_seq_0__"
+  output_layer_names: "__first_seq_0__"
+  output_layer_names: "__last_seq_1__"
+  output_layer_names: "__last_seq_2__"
+  output_layer_names: "__last_seq_3__"
+  is_recurrent_layer_group: false
+}
+sub_models {
+  name: "__recurrent_group_0__"
+  layer_names: "seq_input@__recurrent_group_0__"
+  layer_names: "rnn_forward+delay1@__recurrent_group_0__"
+  layer_names: "rnn_forward@__recurrent_group_0__"
+  is_recurrent_layer_group: true
+  reversed: false
+  memories {
+    layer_name: "rnn_forward@__recurrent_group_0__"
+    link_name: "rnn_forward+delay1@__recurrent_group_0__"
+    is_sequence: false
+  }
+  in_links {
+    layer_name: "seq_input"
+    link_name: "seq_input@__recurrent_group_0__"
+    has_subseq: false
+  }
+  out_links {
+    layer_name: "rnn_forward@__recurrent_group_0__"
+    link_name: "rnn_forward"
+    has_subseq: false
+  }
+  target_inlinkid: -1
+}
+sub_models {
+  name: "__recurrent_group_1__"
+  layer_names: "seq_input@__recurrent_group_1__"
+  layer_names: "rnn_back+delay1@__recurrent_group_1__"
+  layer_names: "rnn_back@__recurrent_group_1__"
+  is_recurrent_layer_group: true
+  reversed: true
+  memories {
+    layer_name: "rnn_back@__recurrent_group_1__"
+    link_name: "rnn_back+delay1@__recurrent_group_1__"
+    is_sequence: false
+  }
+  in_links {
+    layer_name: "seq_input"
+    link_name: "seq_input@__recurrent_group_1__"
+    has_subseq: false
+  }
+  out_links {
+    layer_name: "rnn_back@__recurrent_group_1__"
+    link_name: "rnn_back"
+    has_subseq: false
+  }
+  target_inlinkid: -1
+}
+sub_models {
+  name: "__recurrent_group_2__"
+  layer_names: "sub_seq_input@__recurrent_group_2__"
+  layer_names: "rnn_subseq_forward+delay1@__recurrent_group_2__"
+  layer_names: "rnn_subseq_forward@__recurrent_group_2__"
+  is_recurrent_layer_group: true
+  reversed: false
+  memories {
+    layer_name: "rnn_subseq_forward@__recurrent_group_2__"
+    link_name: "rnn_subseq_forward+delay1@__recurrent_group_2__"
+    is_sequence: false
+  }
+  in_links {
+    layer_name: "sub_seq_input"
+    link_name: "sub_seq_input@__recurrent_group_2__"
+    has_subseq: true
+  }
+  out_links {
+    layer_name: "rnn_subseq_forward@__recurrent_group_2__"
+    link_name: "rnn_subseq_forward"
+    has_subseq: true
+  }
+  target_inlinkid: -1
+}
+sub_models {
+  name: "__lstm_group_0___recurrent_group"
+  layer_names: "__mixed_0__@__lstm_group_0___recurrent_group"
+  layer_names: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
+  layer_names: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
+  layer_names: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group"
+  layer_names: "__lstm_group_0__@__lstm_group_0___recurrent_group"
+  layer_names: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
+  is_recurrent_layer_group: true
+  reversed: false
+  memories {
+    layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
+    link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
+    is_sequence: false
+  }
+  memories {
+    layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
+    link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
+    is_sequence: false
+  }
+  in_links {
+    layer_name: "__mixed_0__"
+    link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
+    has_subseq: false
+  }
+  out_links {
+    layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
+    link_name: "__lstm_group_0__"
+    has_subseq: false
+  }
+  target_inlinkid: -1
+}
+sub_models {
+  name: "__gru_group_0___recurrent_group"
+  layer_names: "__mixed_1__@__gru_group_0___recurrent_group"
+  layer_names: "__gru_group_0__+delay1@__gru_group_0___recurrent_group"
+  layer_names: "__gru_group_0__@__gru_group_0___recurrent_group"
+  is_recurrent_layer_group: true
+  reversed: false
+  memories {
+    layer_name: "__gru_group_0__@__gru_group_0___recurrent_group"
+    link_name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group"
+    is_sequence: false
+  }
+  in_links {
+    layer_name: "__mixed_1__"
+    link_name: "__mixed_1__@__gru_group_0___recurrent_group"
+    has_subseq: false
+  }
+  out_links {
+    layer_name: "__gru_group_0__@__gru_group_0___recurrent_group"
+    link_name: "__gru_group_0__"
+    has_subseq: false
+  }
+  target_inlinkid: -1
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..1999c006d237eb449d59c8e8a2a83c1e4fab9d0e
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr
@@ -0,0 +1,111 @@
+type: "nn"
+layers {
+  name: "dat_in"
+  type: "data"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__seq_pooling_0__"
+  type: "max"
+  size: 100
+  active_type: "linear"
+  inputs {
+    input_layer_name: "dat_in"
+  }
+  trans_type: "seq"
+}
+layers {
+  name: "__seq_pooling_1__"
+  type: "max"
+  size: 100
+  active_type: "linear"
+  inputs {
+    input_layer_name: "dat_in"
+  }
+  trans_type: "non-seq"
+}
+layers {
+  name: "__seq_pooling_2__"
+  type: "average"
+  size: 100
+  active_type: "linear"
+  inputs {
+    input_layer_name: "dat_in"
+  }
+  average_strategy: "average"
+  trans_type: "seq"
+}
+layers {
+  name: "__seq_pooling_3__"
+  type: "average"
+  size: 100
+  active_type: "linear"
+  inputs {
+    input_layer_name: "dat_in"
+  }
+  average_strategy: "average"
+  trans_type: "non-seq"
+}
+layers {
+  name: "__seq_pooling_4__"
+  type: "average"
+  size: 100
+  active_type: "linear"
+  inputs {
+    input_layer_name: "dat_in"
+  }
+  average_strategy: "sum"
+  trans_type: "seq"
+}
+layers {
+  name: "__seq_pooling_5__"
+  type: "average"
+  size: 100
+  active_type: "linear"
+  inputs {
+    input_layer_name: "dat_in"
+  }
+  average_strategy: "sum"
+  trans_type: "non-seq"
+}
+layers {
+  name: "__seq_pooling_6__"
+  type: "max"
+  size: 100
+  active_type: "linear"
+  inputs {
+    input_layer_name: "dat_in"
+  }
+  output_max_index: true
+  trans_type: "non-seq"
+}
+input_layer_names: "dat_in"
+output_layer_names: "__seq_pooling_0__"
+output_layer_names: "__seq_pooling_1__"
+output_layer_names: "__seq_pooling_2__"
+output_layer_names: "__seq_pooling_3__"
+output_layer_names: "__seq_pooling_4__"
+output_layer_names: "__seq_pooling_5__"
+output_layer_names: "__seq_pooling_6__"
+sub_models {
+  name: "root"
+  layer_names: "dat_in"
+  layer_names: "__seq_pooling_0__"
+  layer_names: "__seq_pooling_1__"
+  layer_names: "__seq_pooling_2__"
+  layer_names: "__seq_pooling_3__"
+  layer_names: "__seq_pooling_4__"
+  layer_names: "__seq_pooling_5__"
+  layer_names: "__seq_pooling_6__"
+  input_layer_names: "dat_in"
+  output_layer_names: "__seq_pooling_0__"
+  output_layer_names: "__seq_pooling_1__"
+  output_layer_names: "__seq_pooling_2__"
+  output_layer_names: "__seq_pooling_3__"
+  output_layer_names: "__seq_pooling_4__"
+  output_layer_names: "__seq_pooling_5__"
+  output_layer_names: "__seq_pooling_6__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/unused_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/unused_layers.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..89ed28406e553ba93bec8c86879a85f0a5c1caa1
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/unused_layers.protostr
@@ -0,0 +1,27 @@
+type: "nn"
+layers {
+  name: "probs"
+  type: "data"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__sampling_id_layer_0__"
+  type: "sampling_id"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "probs"
+  }
+}
+input_layer_names: "probs"
+output_layer_names: "__sampling_id_layer_0__"
+sub_models {
+  name: "root"
+  layer_names: "probs"
+  layer_names: "__sampling_id_layer_0__"
+  input_layer_names: "probs"
+  output_layer_names: "__sampling_id_layer_0__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/util_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/util_layers.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..d0ad388165007b8f96f059e5b003c52f756383e5
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/util_layers.protostr
@@ -0,0 +1,81 @@
+type: "nn"
+layers {
+  name: "a"
+  type: "data"
+  size: 10
+  active_type: ""
+}
+layers {
+  name: "b"
+  type: "data"
+  size: 10
+  active_type: ""
+}
+layers {
+  name: "__addto_0__"
+  type: "addto"
+  size: 10
+  active_type: ""
+  inputs {
+    input_layer_name: "a"
+  }
+  inputs {
+    input_layer_name: "b"
+  }
+}
+layers {
+  name: "__concat_0__"
+  type: "concat"
+  size: 20
+  active_type: ""
+  inputs {
+    input_layer_name: "a"
+  }
+  inputs {
+    input_layer_name: "b"
+  }
+}
+layers {
+  name: "__concat_1__"
+  type: "concat2"
+  size: 20
+  active_type: ""
+  inputs {
+    input_layer_name: "a"
+    proj_conf {
+      type: "identity"
+      name: "___concat_1__.w0"
+      input_size: 10
+      output_size: 10
+    }
+  }
+  inputs {
+    input_layer_name: "b"
+    proj_conf {
+      type: "identity"
+      name: "___concat_1__.w1"
+      input_size: 10
+      output_size: 10
+    }
+  }
+}
+input_layer_names: "a"
+input_layer_names: "b"
+output_layer_names: "__addto_0__"
+output_layer_names: "__concat_0__"
+output_layer_names: "__concat_1__"
+sub_models {
+  name: "root"
+  layer_names: "a"
+  layer_names: "b"
+  layer_names: "__addto_0__"
+  layer_names: "__concat_0__"
+  layer_names: "__concat_1__"
+  input_layer_names: "a"
+  input_layer_names: "b"
+  output_layer_names: "__addto_0__"
+  output_layer_names: "__concat_0__"
+  output_layer_names: "__concat_1__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh b/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
index 78114ce32b019cde7a028acde4d281cf6b3dac8e..f05fc46cd55207149b0b8511881eb02b1150c000 100755
--- a/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
@@ -1,5 +1,17 @@
 #!/bin/bash
 cd `dirname $0`
+
 set -e
+
+protostr=`dirname $0`/protostr
+
+files=`ls $protostr | grep -v "unitest"`
+
 ./generate_protostr.sh
-md5sum -c check.md5
+
+for file in $files
+do
+    base_protostr=$protostr/$file
+    new_protostr=$protostr/$file.unitest
+    diff $base_protostr $new_protostr
+done
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
index 6786c27639ea8331604d7585971ec40dab56e1b8..2cbc76ce20b8ab1e1e981bc53d695acaeabf942c 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
@@ -7,7 +7,6 @@ settings(
 
 data = data_layer(name='data', size=3200)
 
-
 spp = spp_layer(input=data,
                 pyramid_height=2,
                 num_channels=16,