Merge pull request #484 from PaddlePaddle/release/v0.9.0

Release v0.9.0

Merge pull request #484 from PaddlePaddle/release/v0.9.0
Release v0.9.0
4c2b3b6e · Yu Yang · GitHub · d539e780 · 0ad33b54 · 4c2b3b6e
475 changed file
--- a/.clang-format
+++ b/.clang-format
@@ -13,8 +13,6 @@
 # The document of clang-format is 
 #   http://clang.llvm.org/docs/ClangFormat.html
 #   http://clang.llvm.org/docs/ClangFormatStyleOptions.html
-#
-# TODO(yuyang18): Add python and other language code style
 ---
 Language:        Cpp
 BasedOnStyle:  Google
@@ -22,8 +20,9 @@ IndentWidth:     2
 TabWidth:        2
 ContinuationIndentWidth: 4
 AccessModifierOffset: -2  # The private/protected/public has no indent in class
-PointerAlignment: Left    # int* p/int& p, not int *p/int &p
 Standard:  Cpp11 
 AllowAllParametersOfDeclarationOnNextLine: true
+BinPackParameters: false
+BinPackArguments: false
 ...
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,8 @@ build/
 *.user
 .vscode
 .idea
\ No newline at end of file
+.project
+.cproject
+.pydevproject
+Makefile
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
+-   repo: https://github.com/Lucas-C/pre-commit-hooks.git
+    sha: c25201a00e6b0514370501050cf2a8538ac12270
+    hooks:
+    -   id: remove-crlf
+-   repo: https://github.com/reyoung/mirrors-yapf.git
+    sha: v0.13.2
+    hooks:
+    -   id: yapf
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    sha: 4ef03c4223ad322c7adaa6c6c0efb26b57df3b71
+    hooks:
+    -   id: check-added-large-files
+    -   id: check-merge-conflict
+    -   id: check-symlinks
+    -   id: detect-private-key
+    -   id: end-of-file-fixer
+# TODO(yuyang): trailing whitespace has some bugs on markdown 
+# files now, please not add it to pre-commit hook now
+#    -   id: trailing-whitespace
+#
+# TODO(yuyang): debug-statements not fit for Paddle, because
+# not all of our python code is runnable. Some are used for 
+# documenation
+#    -   id: debug-statements
--- a/.style.yapf
+++ b/.style.yapf
+[style]
+based_on_style = pep8
+column_limit = 80
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,9 +2,17 @@ language: cpp
 cache: ccache
 sudo: required
 dist: trusty
+os:
+  - linux
+  - osx
 env:
  - JOB=DOCS
  - JOB=BUILD_AND_TEST
+matrix:
+  exclude:
+    - os: osx
+      env: JOB=DOCS  # Only generate documentation in linux
 addons:
  apt:
    packages:
@@ -27,9 +35,22 @@ addons:
      - libgoogle-glog-dev
      - libgflags-dev
      - libgtest-dev
+      - curl
+      - lcov
+      - graphviz
+      - swig
 before_install:
-  - pip install wheel protobuf sphinx breathe recommonmark
+  - |
-  - sudo paddle/scripts/travis/before_install.sh
+    if [ ${JOB} == "BUILD_AND_TEST" ]; then
+      if ! git diff --name-only $TRAVIS_COMMIT_RANGE | grep -qvE '(\.md$)'
+      then
+        echo "Only markdown docs were updated, stopping build process."
+        exit
+      fi
+    fi
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi
+  - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
+  - pip install wheel protobuf sphinx breathe recommonmark virtualenv numpy
 script:
  - paddle/scripts/travis/main.sh
 notifications:

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,14 +2,14 @@ cmake_minimum_required(VERSION 2.8)
 project(paddle CXX C)
 set(PADDLE_MAJOR_VERSION 0)
-set(PADDLE_MINOR_VERSION 8)
+set(PADDLE_MINOR_VERSION 9)
-set(PADDLE_PATCH_VERSION 0b1)
+set(PADDLE_PATCH_VERSION 0)
 set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION})
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
 set(PROJ_ROOT ${CMAKE_SOURCE_DIR})
 include(package)
-include(swig)
+find_package(SWIG 2.0)
 find_package(CUDA QUIET)
 find_package(Protobuf REQUIRED)
 find_package(PythonLibs 2.7 REQUIRED)
@@ -40,6 +40,9 @@ option(WITH_TESTING "Compile and run unittest for PaddlePaddle" ${GTEST_FOUND})
 option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
 option(WITH_SWIG_PY "Compile PaddlePaddle with py PaddlePaddle prediction api" ${SWIG_FOUND})
 option(ON_TRAVIS "Running test on travis-ci or not." OFF)
+option(ON_COVERALLS "Generating code coverage data on coveralls or not." OFF)
+option(COVERALLS_UPLOAD "Uploading the generated coveralls json." ON)
 if(NOT CMAKE_BUILD_TYPE)
    set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING 
        "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
@@ -49,11 +52,16 @@ endif()
 include(enableCXX11)
 include(cpplint)
 include(ccache)
+if(WITH_RDMA)
+  include(rdma)
+endif()
 include(util)
 include(flags)
 include(cudnn)
 include(FindPythonModule)
 include(check_packages)
+include(swig)
+include(coveralls)
 # add PaddlePaddle version
 if(DEFINED ENV{PADDLE_VERSION})
@@ -65,6 +73,19 @@ else()
            Subversion_WC_INFO(${PROJ_ROOT} Project)
            add_definitions(-DPADDLE_VERSION=${Project_WC_REVISION})
        endif()
+    elseif(EXISTS ${PROJ_ROOT}/.git/)
+        find_package(Git REQUIRED)
+        execute_process(
+            COMMAND ${GIT_EXECUTABLE} log -1 --format=%H
+            WORKING_DIRECTORY ${PROJ_ROOT}
+            OUTPUT_VARIABLE GIT_SHA1
+            RESULT_VARIABLE GIT_RESULT
+            ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+        if(NOT ${GIT_RESULT})
+            add_definitions(-DPADDLE_VERSION=\"${GIT_SHA1}\")
+        else()
+            message(WARNING "Cannot add paddle version from git tag")
+        endif()
    endif()
 endif()
@@ -74,11 +95,24 @@ if(NOT WITH_GPU)
    add_definitions(-DHPPL_STUB_FUNC)
    list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
 else()
+    if(${CUDA_VERSION_MAJOR} GREATER 6)
+        if(COMPILER_SUPPORT_CXX11)
+            LIST(APPEND CUDA_NVCC_FLAGS -std=c++11)
+        endif()
+    endif()
    # TODO(yuyang18): Change it to remove std=c++11 in cuda compile.
    set(CUDA_PROPAGATE_HOST_FLAGS OFF)
    if(NOT CUDNN_FOUND)
        message(FATAL_ERROR "Paddle need cudnn to compile")
    endif()
+    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-g -O3 --use_fast_math")
+    if(WITH_AVX)
+        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${AVX_FLAG}")
+    else(WITH_AVX)
+        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SSE3_FLAG}")
+    endif(WITH_AVX)
    if(WITH_DSO)
        set(CUDA_LIBRARIES "")
@@ -91,7 +125,7 @@ else()
 endif(NOT WITH_GPU)
 if(WITH_DOUBLE)
-    add_definitions(-DPADDLE_TYPE_DOUBLE -DHPPL_TYPE_DOUBLE)
+    add_definitions(-DPADDLE_TYPE_DOUBLE)
    set(ACCURACY double)
 else(WITH_DOUBLE)
    set(ACCURACY float)
@@ -102,11 +136,11 @@ if(NOT WITH_TIMER)
 endif(NOT WITH_TIMER)
 if(WITH_AVX)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAGS}")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAG}")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAG}")
 else(WITH_AVX)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SSE3_FLAG}")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SSE3_FLAG}")
 endif(WITH_AVX)
 if(WITH_PYTHON)
@@ -116,12 +150,15 @@ else(WITH_PYTHON)
    add_definitions(-DPADDLE_NO_PYTHON)
 endif(WITH_PYTHON)
-if(NOT WITH_RDMA)
+if(WITH_RDMA)
-    add_definitions(-DPADDLE_DISABLE_RDMA)
+  include_directories("${RDMA_INC_DIR}")
-endif()
+else(WITH_RDMA)
+  add_definitions(-DPADDLE_DISABLE_RDMA)
+endif(WITH_RDMA)
 if(WITH_GLOG)
    add_definitions(-DPADDLE_USE_GLOG)
+    include_directories(${LIBGLOG_INCLUDE_DIR})
 endif()
 if(WITH_GFLAGS)

--- a/ISSUE_TEMPLATE.md
+++ b/ISSUE_TEMPLATE.md
+Thank you for contributing to PaddlePaddle. Submitting an issue is a great help for us.
+Both Chinese and English issues are welcome.
+It's hard to solve a problem when important details are missing.
+Before submitting the issue, look over the following criteria before handing your request in.
+- [ ] Was there a similar issue submitted or resolved before ? You could search issue in the github.
+- [ ] Did you retrieve your issue from widespread search engines ?
+- [ ] Is my description of the issue clear enough to reproduce this problem?
+   * If some errors occurred, we need details about `how do you run your code?`, `what system do you use?`, `Are you using GPU or not?`, etc.
+   * If you use an recording [asciinema](https://asciinema.org/) to show what you are doing to make it happen, that's awesome! We could help you solve the problem more quickly.
+- [ ] Is my description of the issue use the github markdown correctly?
+   * Please use the proper markdown syntaxes for styling all forms of writing, e.g, source code, error information, etc.
+   * Check out [this page](https://guides.github.com/features/mastering-markdown/) to find out much more about markdown.
--- a/README.md
+++ b/README.md
 # PaddlePaddle
-[![Build Status](https://travis-ci.org/baidu/Paddle.svg?branch=master)](https://travis-ci.org/baidu/Paddle)
-Welcome to the PaddlePaddle GitHub.
-The software will be released on Sept. 30 with full documentation and installation support. 
+[![Build Status](https://travis-ci.org/baidu/Paddle.svg?branch=master)](https://travis-ci.org/baidu/Paddle)
+[![Coverage Status](https://coveralls.io/repos/github/baidu/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/baidu/Paddle?branch=develop)
+[![Join the chat at https://gitter.im/PaddlePaddle/Deep_Learning](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/PaddlePaddle/Deep_Learning?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+[![License](https://img.shields.io/badge/license-Apache%202.0-green.svg)](LICENSE)
-A pre-release version is available now for those who are eager to take a look.
+Welcome to the PaddlePaddle GitHub.
 PaddlePaddle (PArallel Distributed Deep LEarning) is an easy-to-use,
 efficient, flexible and scalable deep learning platform, which is originally
 developed by Baidu scientists and engineers for the purpose of applying deep
 learning to many products at Baidu.
+Our vision is to enable deep learning for everyone via PaddlePaddle.
+Please refer to our [release announcement](https://github.com/baidu/Paddle/releases) to track the latest feature of PaddlePaddle. 
 ## Features
 - **Flexibility**
-   PaddlePaddle supports a wide range of neural network architectures and
+    PaddlePaddle supports a wide range of neural network architectures and
-   optimization algorithms. It is easy to configure complex models such as
+    optimization algorithms. It is easy to configure complex models such as
-   neural machine translation model with attention mechanism or complex memory
+    neural machine translation model with attention mechanism or complex memory
-   connection.
+    connection.
 -  **Efficiency**
-  In order to unleash the power of heterogeneous computing resource,
+    In order to unleash the power of heterogeneous computing resource,
-  optimization occurs at different levels of PaddlePaddle, including
+    optimization occurs at different levels of PaddlePaddle, including
-  computing, memory, architecture and communication. The following are some
+    computing, memory, architecture and communication. The following are some
-  examples:
+    examples:
-  1. Optimized math operations through SSE/AVX intrinsics, BLAS libraries
-  (e.g. MKL, ATLAS, cuBLAS) or customized CPU/GPU kernels. 
+      - Optimized math operations through SSE/AVX intrinsics, BLAS libraries
-  2. Highly optimized recurrent networks which can handle **variable-length** 
+      (e.g. MKL, ATLAS, cuBLAS) or customized CPU/GPU kernels. 
-  sequence without padding.
+      - Highly optimized recurrent networks which can handle **variable-length** 
-  3. Optimized local and distributed training for models with high dimensional
+      sequence without padding.
-  sparse data.
+      - Optimized local and distributed training for models with high dimensional
+      sparse data.
 - **Scalability**
-  With PaddlePaddle, it is easy to use many CPUs/GPUs and machines to speed
+    With PaddlePaddle, it is easy to use many CPUs/GPUs and machines to speed
-  up your training. PaddlePaddle can achieve high throughput and performance
+    up your training. PaddlePaddle can achieve high throughput and performance
-  via optimized communication.
+    via optimized communication.
 - **Connected to Products**
-  In addition, PaddlePaddle is also designed to be easily deployable. At Baidu,
+    In addition, PaddlePaddle is also designed to be easily deployable. At Baidu,
-  PaddlePaddle has been deployed into products or service with a vast number
+    PaddlePaddle has been deployed into products or service with a vast number
-  of users, including ad click-through rate (CTR) prediction, large-scale image
+    of users, including ad click-through rate (CTR) prediction, large-scale image
-  classification, optical character recognition(OCR), search ranking, computer
+    classification, optical character recognition(OCR), search ranking, computer
-  virus detection, recommendation, etc. It is widely utilized in products at
+    virus detection, recommendation, etc. It is widely utilized in products at
-  Baidu and it has achieved a significant impact. We hope you can also exploit
+    Baidu and it has achieved a significant impact. We hope you can also exploit
-  the capability of PaddlePaddle to make a huge impact for your product.
+    the capability of PaddlePaddle to make a huge impact for your product.
 ## Installation
-See [Installation Guide](http://paddlepaddle.org/doc/build/) to install from pre-built package or build from the source code. (Note: The installation packages are still in pre-release state and your experience of installation may not be smooth.).
+Check out the [Install Guide](http://paddlepaddle.org/doc/build/) to install from
+pre-built packages (**docker image**, **deb package**) or 
+directly build on **Linux** and **Mac OS X** from the source code.
 ## Documentation
- [Chinese Documentation](http://paddlepaddle.org/doc_cn/) <br>
+Both [English Docs](http://paddlepaddle.org/doc/) and [Chinese Docs](http://paddlepaddle.org/doc_cn/) are provided for our users and developers.
 - [Quick Start](http://paddlepaddle.org/doc/demo/quick_start/index_en) <br>
   You can follow the quick start tutorial to learn how use PaddlePaddle
@@ -81,9 +88,9 @@ See [Installation Guide](http://paddlepaddle.org/doc/build/) to install from pre
 - [Source Code Documents](http://paddlepaddle.org/doc/source/) <br>
 ## Ask Questions
+Please join the [**gitter chat**](https://gitter.im/PaddlePaddle/Deep_Learning) or send email to
-If you want to ask questions and discuss about methods and models, welcome
+**paddle-dev@baidu.com** to ask questions and talk about methods and models.
-to send email to paddle-dev@baidu.com. Framework development discussions and
+Framework development discussions and
 bug reports are collected on [Issues](https://github.com/baidu/paddle/issues).
 ## Copyright and License

--- a/RELEASE.md
+++ b/RELEASE.md
+# Release v0.9.0
+## New Features:
+* New Layers
+  * bilinear interpolation layer.
+  * spatial pyramid-pool layer.
+  * de-convolution layer.
+  * maxout layer.
+* Support rectangle padding, stride, window and input for Pooling Operation.
+* Add —job=time in trainer, which can be used to print time info without compiler option -WITH_TIMER=ON.
+* Expose cost_weight/nce_layer in `trainer_config_helpers`
+* Add FAQ, concepts, h-rnn docs.
+* Add Bidi-LSTM and DB-LSTM to quick start demo @alvations
+* Add usage track scripts.
+## Improvements
+* Add Travis-CI for Mac OS X. Enable swig unittest in Travis-CI. Skip Travis-CI when only docs are changed.
+* Add code coverage tools.
+* Refine convolution layer to speedup and reduce GPU memory.
+* Speed up PyDataProvider2
+* Add ubuntu deb package build scripts.
+* Make Paddle use git-flow branching model.
+* PServer support no parameter blocks.
+## Bug Fixes
+* add zlib link to py_paddle
+* add input sparse data check for sparse layer at runtime
+* Bug fix for sparse matrix multiplication
+* Fix floating-point overflow problem of tanh
+* Fix some nvcc compile options
+* Fix a bug in yield dictionary in DataProvider
+* Fix SRL hang when exit.
+# Release v0.8.0beta.1
+New features:
+* Mac OSX is supported by source code. #138
+   * Both GPU and CPU versions of PaddlePaddle are supported.
+* Support CUDA 8.0
+* Enhance `PyDataProvider2`
+   * Add dictionary yield format. `PyDataProvider2` can yield a dictionary with key is data_layer's name, value is features.
+   * Add `min_pool_size` to control memory pool in provider.
+* Add `deb` install package & docker image for no_avx machines.
+   * Especially for cloud computing and virtual machines
+* Automatically disable `avx` instructions in cmake when machine's CPU don't support `avx` instructions.
+* Add Parallel NN api in trainer_config_helpers.
+* Add `travis ci` for Github
+Bug fixes:
+* Several bugs in trainer_config_helpers. Also complete the unittest for trainer_config_helpers
+* Check if PaddlePaddle is installed when unittest.
+* Fix bugs in GTX series GPU
+* Fix bug in MultinomialSampler
+Also more documentation was written since last release.
+# Release v0.8.0beta.0
+PaddlePaddle v0.8.0beta.0 release. The install package is not stable yet and it's a pre-release version.
--- a/cmake/FindAVX.cmake
+++ b/cmake/FindAVX.cmake
@@ -3,36 +3,55 @@
 INCLUDE(CheckCXXSourceRuns)
-SET(FIND_AVX_10)
-SET(FIND_AVX_20)
-SET(AVX_FLAGS)
-SET(AVX_FOUND)
-# Check AVX 2
-SET(CMAKE_REQUIRED_FLAGS)
 IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-  SET(CMAKE_REQUIRED_FLAGS "-mavx2")
+    set(MMX_FLAG "-mmmx")
-ELSEIF(MSVC AND NOT CMAKE_CL_64)  # reserve for WINDOWS
+    set(SSE2_FLAG "-msse2")
-  SET(CMAKE_REQUIRED_FLAGS "/arch:AVX2")
+    set(SSE3_FLAG "-msse3")
+    SET(AVX_FLAG "-mavx")
+    SET(AVX2_FLAG "-mavx2")
+ELSEIF(MSVC)
+    set(MMX_FLAG "/arch:MMX")
+    set(SSE2_FLAG "/arch:SSE2")
+    set(SSE3_FLAG "/arch:SSE3")
+    SET(AVX_FLAG "/arch:AVX")
+    SET(AVX2_FLAG "/arch:AVX2")
 ENDIF()
+# Check  MMX
+set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG})
 CHECK_CXX_SOURCE_RUNS("
-#include <immintrin.h>
+#include <mmintrin.h>
 int main()
 {
-    __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
+    _mm_setzero_si64();
-    __m256i result = _mm256_abs_epi32 (a);
    return 0;
-}" FIND_AVX_20)
+}" MMX_FOUND)
-# Check AVX
+# Check SSE2
-SET(CMAKE_REQUIRED_FLAGS)
+set(CMAKE_REQUIRED_FLAGS ${SSE2_FLAG})
-IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+CHECK_CXX_SOURCE_RUNS("
-    SET(CMAKE_REQUIRED_FLAGS "-mavx")
+#include <emmintrin.h>
-ELSEIF(MSVC AND NOT CMAKE_CL_64)
+int main()
-    SET(CMAKE_REQUIRED_FLAGS "/arch:AVX")
+{
-endif()
+    _mm_setzero_si128();
+    return 0;
+}" SSE2_FOUND)
+# Check SSE3
+set(CMAKE_REQUIRED_FLAGS ${SSE3_FLAG})
+CHECK_CXX_SOURCE_RUNS("
+#include <pmmintrin.h>
+int main()
+{
+    __m128d a = _mm_set1_pd(6.28);
+    __m128d b = _mm_set1_pd(3.14);
+    __m128d result = _mm_addsub_pd(a, b);
+    result = _mm_movedup_pd(result);
+    return 0;
+}" SSE3_FOUND)
+# Check AVX
+set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG})
 CHECK_CXX_SOURCE_RUNS("
 #include <immintrin.h>
 int main()
@@ -41,25 +60,17 @@ int main()
    __m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
    __m256 result = _mm256_add_ps (a, b);
    return 0;
-}" FIND_AVX_10)
+}" AVX_FOUND)
-IF(${FIND_AVX_20})
-    IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-        SET(AVX_FLAGS "${AVX_FLAGS} -mavx2")
-    ELSEIF(MSVC)
-        SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX2")
-    ENDIF()
-ENDIF()
-IF(${FIND_AVX_10})
+# Check AVX 2
-    IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG})
-        SET(AVX_FLAGS "${AVX_FLAGS} -mavx")
+CHECK_CXX_SOURCE_RUNS("
-    ELSEIF(MSVC)
+#include <immintrin.h>
-        SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX")
+int main()
-    ENDIF()
+{
-ENDIF()
+    __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
+    __m256i result = _mm256_abs_epi32 (a);
+    return 0;
+}" AVX2_FOUND)
-IF("${FIND_AVX_10}" OR "${FIND_AVX_20}")
+mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND)
-    SET(AVX_FOUND TRUE)
-    MESSAGE(STATUS "Find CPU supports ${AVX_FLAGS}.")
-ENDIF()
--- a/cmake/cblas.cmake
+++ b/cmake/cblas.cmake
-# Find the CBlas libraries
+# Find the CBlas and lapack libraries
 #
 # It will search MKL, atlas, OpenBlas, reference-cblas in order.
 #
@@ -17,10 +17,19 @@
 ## Find MKL First.
 set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains MKL")
-find_path(MKL_INCLUDE_DIR mkl.h PATHS ${MKL_ROOT}/include)
+find_path(MKL_INCLUDE_DIR mkl.h PATHS
-find_library(MKL_CORE_LIB NAMES mkl_core PATHS ${MKL_ROOT}/lib)
+  ${MKL_ROOT}/include)
-find_library(MKL_SEQUENTIAL_LIB NAMES mkl_sequential PATHS ${MKL_ROOT}/lib)
+find_path(MKL_INCLUDE_DIR mkl_lapacke.h PATHS
-find_library(MKL_INTEL_LP64 NAMES mkl_intel_lp64 PATHS ${MKL_ROOT}/lib)
+  ${MKL_ROOT}/include)
+find_library(MKL_CORE_LIB NAMES mkl_core PATHS
+  ${MKL_ROOT}/lib
+  ${MKL_ROOT}/lib/intel64)
+find_library(MKL_SEQUENTIAL_LIB NAMES mkl_sequential PATHS
+  ${MKL_ROOT}/lib
+  ${MKL_ROOT}/lib/intel64)
+find_library(MKL_INTEL_LP64 NAMES mkl_intel_lp64 PATHS
+  ${MKL_ROOT}/lib
+  ${MKL_ROOT}/lib/intel64)
 if(MKL_INCLUDE_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
@@ -30,6 +39,7 @@ if(MKL_INCLUDE_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
          ${MKL_SEQUENTIAL_LIB}
          ${MKL_CORE_LIB})
  add_definitions(-DPADDLE_USE_MKL)
+  message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
  return() # return file.
 endif()
@@ -48,15 +58,19 @@ set(ATLAS_LIB_SEARCH_PATHS
    )
 find_path(ATLAS_INC_DIR NAMES cblas.h 
  PATHS ${ATLAS_INCLUDE_SEARCH_PATHS})
+find_path(ATLAS_CLAPACK_INC_DIR NAMES clapack.h
+  PATHS ${ATLAS_INCLUDE_SEARCH_PATHS})
 find_library(ATLAS_CBLAS_LIB NAMES cblas libcblas.so.3 
  PATHS ${ATLAS_LIB_SEARCH_PATHS})
-find_library(ATLAS_LIB NAMES atlas libatlas.so.3
+find_library(ATLAS_LIB NAMES lapack_atlas liblapack_atlas.so.3
  PATHS ${ATLAS_LIB_SEARCH_PATHS})
 if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB)
  set(CBLAS_PROVIDER ATLAS)
-  set(CBLAS_INC_DIR ${ATLAS_INC_DIR})
+  set(CBLAS_INC_DIR ${ATLAS_INC_DIR} ${ATLAS_CLAPACK_INC_DIR})
  set(CBLAS_LIBS ${ATLAS_LIB} ${ATLAS_CBLAS_LIB})
+  add_definitions(-DPADDLE_USE_ATLAS)  
+  message(STATUS "Found Atlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
  return()
 endif()
@@ -76,6 +90,8 @@ set(OPENBLAS_LIB_SEARCH_PATHS
 find_path(OPENBLAS_INC_DIR NAMES cblas.h
  PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS})
+find_path(OPENBLAS_LAPACKE_INC_DIR NAMES lapacke.h
+  PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS})
 find_library(OPENBLAS_LIB NAMES openblas
  PATHS ${OPENBLAS_LIB_SEARCH_PATHS})
@@ -83,6 +99,7 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB)
  set(CBLAS_PROVIDER OPENBLAS)
  set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR})
  set(CBLAS_LIBS ${OPENBLAS_LIB})
+  message(STATUS "Found OpenBlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
  return()
 endif()

--- a/cmake/coveralls.cmake
+++ b/cmake/coveralls.cmake
+# CMake script for code coverage.
+# If _COVERALLS_UPLOAD is ON, it will upload json files to overalls.io automatically.
+# Param _COVERAGE_SRCS          A list of coverage source files.
+# Param _COVERALLS_UPLOAD       Upload the result to coveralls.
+# Param _CMAKE_SCRIPT_PATH      CMake script path.
+function(code_coverage _COVERAGE_SRCS _COVERALLS_UPLOAD _CMAKE_SCRIPT_PATH)
+    # clean previous gcov data.
+    file(REMOVE_RECURSE ${PROJECT_BINARY_DIR}/*.gcda)
+    # find curl for upload JSON soon.
+    if (_COVERALLS_UPLOAD)
+        find_program(CURL_EXECUTABLE curl)
+        if (NOT CURL_EXECUTABLE)
+            message(FATAL_ERROR "Coveralls: curl not found!")
+        endif()
+    endif()
+    # When passing a CMake list to an external process, the list
+    # will be converted from the format "1;2;3" to "1 2 3".
+    set(COVERAGE_SRCS "")
+    foreach (SINGLE_SRC ${_COVERAGE_SRCS})
+        set(COVERAGE_SRCS "${COVERAGE_SRCS}*${SINGLE_SRC}")
+    endforeach()
+    # query number of logical cores
+    cmake_host_system_information(RESULT core_size QUERY NUMBER_OF_LOGICAL_CORES)
+    # coveralls json file.
+    set(COVERALLS_FILE ${PROJECT_BINARY_DIR}/coveralls.json)
+    add_custom_target(coveralls_generate
+        # Run regress tests.
+        COMMAND ${CMAKE_CTEST_COMMAND}
+                -j ${core_size}
+                --output-on-failure
+        # Generate Gcov and translate it into coveralls JSON.
+        COMMAND ${CMAKE_COMMAND}
+                -DCOVERAGE_SRCS="${COVERAGE_SRCS}"
+                -DCOVERALLS_OUTPUT_FILE="${COVERALLS_FILE}"
+                -DCOV_PATH="${PROJECT_BINARY_DIR}"
+                -DPROJECT_ROOT="${PROJECT_SOURCE_DIR}"
+                -P "${_CMAKE_SCRIPT_PATH}/coverallsGcovJsons.cmake"
+        WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
+        COMMENT "Coveralls: generating coveralls output..."
+    )
+    if (_COVERALLS_UPLOAD)
+        message("COVERALLS UPLOAD: ON")
+        # Upload the JSON to coveralls.
+        add_custom_target(coveralls_upload
+            COMMAND ${CURL_EXECUTABLE}
+                    -S -F json_file=@${COVERALLS_FILE}
+                    https://coveralls.io/api/v1/jobs
+            DEPENDS coveralls_generate
+            WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
+            COMMENT "Coveralls: uploading coveralls output...")
+        add_custom_target(coveralls DEPENDS coveralls_upload)
+    else()
+        message("COVERALLS UPLOAD: OFF")
+        add_custom_target(coveralls DEPENDS coveralls_generate)
+    endif()
+endfunction()
+if(ON_COVERALLS)
+    set(CMAKE_BUILD_TYPE "Debug")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
+    set(EXCLUDE_DIRS
+        "demo/"
+        "build/"
+        "tests/"
+        ".test_env/"
+    )
+    if(WITH_GPU)
+        file(GLOB_RECURSE PADDLE_SOURCES RELATIVE "${PROJECT_SOURCE_DIR}" "*.cpp" "*.cc" ".c" "*.cu")
+    else()
+        file(GLOB_RECURSE PADDLE_SOURCES RELATIVE "${PROJECT_SOURCE_DIR}" "*.cpp" "*.cc" "*.c")
+    endif()
+    # exclude trivial files in PADDLE_SOURCES
+    foreach(EXCLUDE_DIR ${EXCLUDE_DIRS})
+        foreach(TMP_PATH ${PADDLE_SOURCES})
+            string(FIND ${TMP_PATH} ${EXCLUDE_DIR} EXCLUDE_DIR_FOUND)
+            if(NOT ${EXCLUDE_DIR_FOUND} EQUAL -1)
+                list(REMOVE_ITEM PADDLE_SOURCES ${TMP_PATH})
+            endif()
+        endforeach(TMP_PATH)
+    endforeach()
+    # convert to absolute path
+    set(PADDLE_SRCS "")
+    foreach(PADDLE_SRC ${PADDLE_SOURCES})
+        set(PADDLE_SRCS "${PADDLE_SRCS};${PROJECT_SOURCE_DIR}/${PADDLE_SRC}")
+    endforeach()
+    code_coverage(
+        "${PADDLE_SRCS}"
+        ${COVERALLS_UPLOAD}
+        "${PROJECT_SOURCE_DIR}/cmake"
+    )
+endif()
--- a/cmake/coverallsGcovJsons.cmake
+++ b/cmake/coverallsGcovJsons.cmake
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Copyright (C) 2014 Joakim Söderberg <joakim.soderberg@gmail.com>
+#
+# This is intended to be run by a custom target in a CMake project like this.
+# 0. Compile program with coverage support.
+# 1. Clear coverage data. (Recursively delete *.gcda in build dir)
+# 2. Run the unit tests.
+# 3. Run this script specifying which source files the coverage should be performed on.
+#
+# This script will then use gcov to generate .gcov files in the directory specified
+# via the COV_PATH var. This should probably be the same as your cmake build dir.
+#
+# It then parses the .gcov files to convert them into the Coveralls JSON format:
+# https://coveralls.io/docs/api
+#
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
+# Since it's not possible to pass a CMake list properly in the
+# "1;2;3" format to an external process, we have replaced the
+# ";" with "*", so reverse that here so we get it back into the
+# CMake list format.
+string(REGEX REPLACE "\\*" ";" COVERAGE_SRCS ${COVERAGE_SRCS})
+find_program(GCOV_EXECUTABLE gcov)
+if (NOT GCOV_EXECUTABLE)
+	message(FATAL_ERROR "gcov not found! Aborting...")
+endif()
+find_package(Git)
+# TODO: Add these git things to the coveralls json.
+if (GIT_FOUND)
+	# Branch.
+	execute_process(
+		COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref HEAD
+		WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+		OUTPUT_VARIABLE GIT_BRANCH
+		OUTPUT_STRIP_TRAILING_WHITESPACE
+	)
+	macro (git_log_format FORMAT_CHARS VAR_NAME)
+		execute_process(
+			COMMAND ${GIT_EXECUTABLE} log -1 --pretty=format:%${FORMAT_CHARS}
+			WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+			OUTPUT_VARIABLE ${VAR_NAME}
+			OUTPUT_STRIP_TRAILING_WHITESPACE
+		)
+	endmacro()
+	git_log_format(an GIT_AUTHOR_EMAIL)
+	git_log_format(ae GIT_AUTHOR_EMAIL)
+	git_log_format(cn GIT_COMMITTER_NAME)
+	git_log_format(ce GIT_COMMITTER_EMAIL)
+	git_log_format(B GIT_COMMIT_MESSAGE)
+	message("Git exe: ${GIT_EXECUTABLE}")
+	message("Git branch: ${GIT_BRANCH}")
+	message("Git author: ${GIT_AUTHOR_NAME}")
+	message("Git e-mail: ${GIT_AUTHOR_EMAIL}")
+	message("Git commiter name: ${GIT_COMMITTER_NAME}")
+	message("Git commiter e-mail: ${GIT_COMMITTER_EMAIL}")
+	message("Git commit message: ${GIT_COMMIT_MESSAGE}")
+endif()
+############################# Macros #########################################
+#
+# This macro converts from the full path format gcov outputs:
+#
+#    /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov
+#
+# to the original source file path the .gcov is for:
+#
+#   /path/to/project/root/subdir/the_file.c
+#
+macro(get_source_path_from_gcov_filename _SRC_FILENAME _GCOV_FILENAME)
+	# /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov 
+	# -> 
+	# #path#to#project#root#subdir#the_file.c.gcov   
+	get_filename_component(_GCOV_FILENAME_WEXT ${_GCOV_FILENAME} NAME)
+	# #path#to#project#root#subdir#the_file.c.gcov -> /path/to/project/root/subdir/the_file.c
+	string(REGEX REPLACE "\\.gcov$" "" SRC_FILENAME_TMP ${_GCOV_FILENAME_WEXT})
+	string(REGEX REPLACE "\#" "/" SRC_FILENAME_TMP ${SRC_FILENAME_TMP})
+	set(${_SRC_FILENAME} "${SRC_FILENAME_TMP}")
+endmacro()
+##############################################################################
+# Get the coverage data.
+file(GLOB_RECURSE GCDA_FILES "${COV_PATH}" "*.gcda")
+message("GCDA files:")
+# Get a list of all the object directories needed by gcov
+# (The directories the .gcda files and .o files are found in)
+# and run gcov on those.
+foreach(GCDA ${GCDA_FILES})
+	message("Process: ${GCDA}")
+	message("------------------------------------------------------------------------------")
+	get_filename_component(GCDA_DIR ${GCDA} PATH)
+	#
+	# The -p below refers to "Preserve path components",
+	# This means that the generated gcov filename of a source file will
+	# keep the original files entire filepath, but / is replaced with #.
+	# Example:
+	#
+	# /path/to/project/root/build/CMakeFiles/the_file.dir/subdir/the_file.c.gcda
+	# ------------------------------------------------------------------------------
+	# File '/path/to/project/root/subdir/the_file.c'
+	# Lines executed:68.34% of 199
+	# /path/to/project/root/subdir/the_file.c:creating '#path#to#project#root#subdir#the_file.c.gcov'
+	#
+	# If -p is not specified then the file is named only "the_file.c.gcov"
+	#
+	execute_process(
+		COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA}
+		WORKING_DIRECTORY ${GCDA_DIR}
+	)
+endforeach()
+# TODO: Make these be absolute path
+file(GLOB_RECURSE ALL_GCOV_FILES "${COV_PATH}" "*.gcov")
+# Get only the filenames to use for filtering.
+#set(COVERAGE_SRCS_NAMES "")
+#foreach (COVSRC ${COVERAGE_SRCS})
+#	get_filename_component(COVSRC_NAME ${COVSRC} NAME)
+#	message("${COVSRC} -> ${COVSRC_NAME}")
+#	list(APPEND COVERAGE_SRCS_NAMES "${COVSRC_NAME}")
+#endforeach()
+#
+# Filter out all but the gcov files we want.
+#
+# We do this by comparing the list of COVERAGE_SRCS filepaths that the
+# user wants the coverage data for with the paths of the generated .gcov files,
+# so that we only keep the relevant gcov files.
+#
+# Example:
+# COVERAGE_SRCS =
+#				/path/to/project/root/subdir/the_file.c
+#
+# ALL_GCOV_FILES =
+#				/path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov
+#				/path/to/project/root/build/#path#to#project#root#subdir#other_file.c.gcov
+# 
+# Result should be:
+# GCOV_FILES = 
+#				/path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov
+#
+set(GCOV_FILES "")
+#message("Look in coverage sources: ${COVERAGE_SRCS}")
+message("\nFilter out unwanted GCOV files:")
+message("===============================")
+set(COVERAGE_SRCS_REMAINING ${COVERAGE_SRCS})
+foreach (GCOV_FILE ${ALL_GCOV_FILES})
+	#
+	# /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov 
+	# -> 
+	# /path/to/project/root/subdir/the_file.c 
+	get_source_path_from_gcov_filename(GCOV_SRC_PATH ${GCOV_FILE})
+	# Is this in the list of source files?
+	# TODO: We want to match against relative path filenames from the source file root...
+	list(FIND COVERAGE_SRCS ${GCOV_SRC_PATH} WAS_FOUND)
+	if (NOT WAS_FOUND EQUAL -1)
+		message("YES: ${GCOV_FILE}")
+		list(APPEND GCOV_FILES ${GCOV_FILE})
+		# We remove it from the list, so we don't bother searching for it again.
+		# Also files left in COVERAGE_SRCS_REMAINING after this loop ends should
+		# have coverage data generated from them (no lines are covered).
+		list(REMOVE_ITEM COVERAGE_SRCS_REMAINING ${GCOV_SRC_PATH})
+	else()
+		message("NO:  ${GCOV_FILE}")
+	endif()
+endforeach()
+# TODO: Enable setting these
+set(JSON_SERVICE_NAME "travis-ci")
+set(JSON_SERVICE_JOB_ID $ENV{TRAVIS_JOB_ID})
+set(JSON_TEMPLATE
+"{
+  \"service_name\": \"\@JSON_SERVICE_NAME\@\",
+  \"service_job_id\": \"\@JSON_SERVICE_JOB_ID\@\",
+  \"source_files\": \@JSON_GCOV_FILES\@
+}"
+)
+set(SRC_FILE_TEMPLATE
+"{
+      \"name\": \"\@GCOV_SRC_REL_PATH\@\",
+      \"source_digest\": \"\@GCOV_CONTENTS_MD5\@\",
+      \"coverage\": \@GCOV_FILE_COVERAGE\@
+  }"
+)
+message("\nGenerate JSON for files:")
+message("=========================")
+set(JSON_GCOV_FILES "[")
+# Read the GCOV files line by line and get the coverage data.
+foreach (GCOV_FILE ${GCOV_FILES})
+	get_source_path_from_gcov_filename(GCOV_SRC_PATH ${GCOV_FILE})
+	file(RELATIVE_PATH GCOV_SRC_REL_PATH "${PROJECT_ROOT}" "${GCOV_SRC_PATH}")
+	# The new coveralls API doesn't need the entire source (Yay!)
+	# However, still keeping that part for now. Will cleanup in the future.
+	file(MD5 "${GCOV_SRC_PATH}" GCOV_CONTENTS_MD5)
+	message("MD5: ${GCOV_SRC_PATH} = ${GCOV_CONTENTS_MD5}")
+	# Loads the gcov file as a list of lines.
+	# (We first open the file and replace all occurences of [] with _
+	#  because CMake will fail to parse a line containing unmatched brackets...
+	#  also the \ to escaped \n in macros screws up things.)
+	# https://public.kitware.com/Bug/view.php?id=15369
+	file(READ ${GCOV_FILE} GCOV_CONTENTS)
+	string(REPLACE "[" "_" GCOV_CONTENTS "${GCOV_CONTENTS}")
+	string(REPLACE "]" "_" GCOV_CONTENTS "${GCOV_CONTENTS}")
+	string(REPLACE "\\" "_" GCOV_CONTENTS "${GCOV_CONTENTS}")
+	file(WRITE ${GCOV_FILE}_tmp "${GCOV_CONTENTS}")
+	file(STRINGS ${GCOV_FILE}_tmp GCOV_LINES)
+	list(LENGTH GCOV_LINES LINE_COUNT)
+	# Instead of trying to parse the source from the
+	# gcov file, simply read the file contents from the source file.
+	# (Parsing it from the gcov is hard because C-code uses ; in many places
+	#  which also happens to be the same as the CMake list delimeter).
+	file(READ ${GCOV_SRC_PATH} GCOV_FILE_SOURCE)
+	string(REPLACE "\\" "\\\\" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+	string(REGEX REPLACE "\"" "\\\\\"" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+	string(REPLACE "\t" "\\\\t" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+	string(REPLACE "\r" "\\\\r" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+	string(REPLACE "\n" "\\\\n" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+	# According to http://json.org/ these should be escaped as well.
+	# Don't know how to do that in CMake however...
+	#string(REPLACE "\b" "\\\\b" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+	#string(REPLACE "\f" "\\\\f" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+	#string(REGEX REPLACE "\u([a-fA-F0-9]{4})" "\\\\u\\1" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+	# We want a json array of coverage data as a single string
+	# start building them from the contents of the .gcov
+	set(GCOV_FILE_COVERAGE "[")
+	set(GCOV_LINE_COUNT 1) # Line number for the .gcov.
+	set(DO_SKIP 0)
+	foreach (GCOV_LINE ${GCOV_LINES})
+		#message("${GCOV_LINE}")
+		# Example of what we're parsing:
+		# Hitcount  |Line | Source
+		# "        8:   26:        if (!allowed || (strlen(allowed) == 0))"
+		string(REGEX REPLACE 
+			"^([^:]*):([^:]*):(.*)$" 
+			"\\1;\\2;\\3"
+			RES
+			"${GCOV_LINE}")
+		# Check if we should exclude lines using the Lcov syntax.
+		string(REGEX MATCH "LCOV_EXCL_START" START_SKIP "${GCOV_LINE}")
+		string(REGEX MATCH "LCOV_EXCL_END" END_SKIP "${GCOV_LINE}")
+		string(REGEX MATCH "LCOV_EXCL_LINE" LINE_SKIP "${GCOV_LINE}")
+		set(RESET_SKIP 0)
+		if (LINE_SKIP AND NOT DO_SKIP)
+			set(DO_SKIP 1)
+			set(RESET_SKIP 1)
+		endif()
+		if (START_SKIP)
+			set(DO_SKIP 1)
+			message("${GCOV_LINE_COUNT}: Start skip")
+		endif()
+		if (END_SKIP)
+			set(DO_SKIP 0)
+		endif()
+		list(LENGTH RES RES_COUNT)
+		if (RES_COUNT GREATER 2)
+			list(GET RES 0 HITCOUNT)
+			list(GET RES 1 LINE)
+			list(GET RES 2 SOURCE)
+			string(STRIP ${HITCOUNT} HITCOUNT)
+			string(STRIP ${LINE} LINE)
+			# Lines with 0 line numbers are metadata and can be ignored.
+			if (NOT ${LINE} EQUAL 0)
+				if (DO_SKIP)
+					set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}null, ")
+				else()
+					# Translate the hitcount into valid JSON values.
+					if (${HITCOUNT} STREQUAL "#####")
+						set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}0, ")
+					elseif (${HITCOUNT} STREQUAL "-")
+						set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}null, ")
+					else()
+						set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}${HITCOUNT}, ")
+					endif()
+				endif()
+			endif()
+		else()
+			message(WARNING "Failed to properly parse line (RES_COUNT = ${RES_COUNT}) ${GCOV_FILE}:${GCOV_LINE_COUNT}\n-->${GCOV_LINE}")
+		endif()
+		if (RESET_SKIP)
+			set(DO_SKIP 0)
+		endif()
+		math(EXPR GCOV_LINE_COUNT "${GCOV_LINE_COUNT}+1")
+	endforeach()
+	message("${GCOV_LINE_COUNT} of ${LINE_COUNT} lines read!")
+	# Advanced way of removing the trailing comma in the JSON array.
+	# "[1, 2, 3, " -> "[1, 2, 3"
+	string(REGEX REPLACE ",[ ]*$" "" GCOV_FILE_COVERAGE ${GCOV_FILE_COVERAGE})
+	# Append the trailing ] to complete the JSON array.
+	set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]")
+	# Generate the final JSON for this file.
+	message("Generate JSON for file: ${GCOV_SRC_REL_PATH}...")
+	string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON)
+	set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ")
+endforeach()
+# Loop through all files we couldn't find any coverage for
+# as well, and generate JSON for those as well with 0% coverage.
+foreach(NOT_COVERED_SRC ${COVERAGE_SRCS_REMAINING})
+	# Loads the source file as a list of lines.
+	file(STRINGS ${NOT_COVERED_SRC} SRC_LINES)
+	set(GCOV_FILE_COVERAGE "[")
+	set(GCOV_FILE_SOURCE "")
+	foreach (SOURCE ${SRC_LINES})
+		set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}0, ")
+		string(REPLACE "\\" "\\\\" SOURCE "${SOURCE}")
+		string(REGEX REPLACE "\"" "\\\\\"" SOURCE "${SOURCE}")
+		string(REPLACE "\t" "\\\\t" SOURCE "${SOURCE}")
+		string(REPLACE "\r" "\\\\r" SOURCE "${SOURCE}")
+		set(GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}${SOURCE}\\n")
+	endforeach()
+	# Remove trailing comma, and complete JSON array with ]
+	string(REGEX REPLACE ",[ ]*$" "" GCOV_FILE_COVERAGE ${GCOV_FILE_COVERAGE})
+	set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]")
+	# Generate the final JSON for this file.
+	message("Generate JSON for non-gcov file: ${NOT_COVERED_SRC}...")
+	string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON)
+	set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ")
+endforeach()
+# Get rid of trailing comma.
+string(REGEX REPLACE ",[ ]*$" "" JSON_GCOV_FILES ${JSON_GCOV_FILES})
+set(JSON_GCOV_FILES "${JSON_GCOV_FILES}]")
+# Generate the final complete JSON!
+message("Generate final JSON...")
+string(CONFIGURE ${JSON_TEMPLATE} JSON)
+file(WRITE "${COVERALLS_OUTPUT_FILE}" "${JSON}")
+message("###########################################################################")
+message("Generated coveralls JSON containing coverage data:") 
+message("${COVERALLS_OUTPUT_FILE}")
+message("###########################################################################")
--- a/cmake/flags.cmake
+++ b/cmake/flags.cmake
@@ -21,12 +21,6 @@ function(safe_set_flag is_c src_list flag_name)
    endif()
    if(${safe_name})
        set(${src_list} "${${src_list}} ${flag_name}" PARENT_SCOPE)
-        if(is_c)
-          set(CUDA_NVCC_FLAGS
-              --compiler-options;${flag_name}
-              ${CUDA_NVCC_FLAGS}
-              PARENT_SCOPE)
-        endif()
    endif()
 endfunction()
@@ -40,6 +34,20 @@ macro(safe_set_cxxflag src_list flag_name)
    safe_set_flag(OFF ${src_list} ${flag_name})
 endmacro()
+# helper macro to set nvcc flag
+macro(safe_set_nvflag flag_name)
+    string(REPLACE "-" "_" safe_name ${flag_name})
+    string(REPLACE "=" "_" safe_name ${safe_name})
+    CHECK_C_COMPILER_FLAG(${flag_name} C_COMPILER_SUPPORT_FLAG_${safe_name})
+    set(safe_name C_COMPILER_SUPPORT_FLAG_${safe_name})
+    if(${safe_name})
+        set(CUDA_NVCC_FLAGS
+            --compiler-options;${flag_name}
+            ${CUDA_NVCC_FLAGS})
+    endif()
+endmacro()
 CHECK_CXX_SYMBOL_EXISTS(UINT64_MAX "stdint.h" UINT64_MAX_EXISTS)
 if(NOT UINT64_MAX_EXISTS)
  set(CMAKE_REQUIRED_DEFINITIONS -D__STDC_LIMIT_MACROS)
@@ -63,14 +71,44 @@ set(COMMON_FLAGS
    -Wnon-virtual-dtor
    -Wdelete-non-virtual-dtor
    -Wno-unused-parameter
+    -Wno-unused-function
    -Wno-error=literal-suffix
    -Wno-error=unused-local-typedefs)
+set(GPU_COMMON_FLAGS
+    -fPIC
+    -fno-omit-frame-pointer
+    -Wnon-virtual-dtor
+    -Wdelete-non-virtual-dtor
+    -Wno-unused-parameter
+    -Wno-unused-function
+    -Wno-error=literal-suffix
+    -Wno-error=unused-local-typedefs
+    -Wno-error=unused-function  # Warnings in Numpy Header.
+)
+if (APPLE)
+    # On Mac OS X build fat binaries with x86_64 architectures by default.
+    set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE)
+else()
+    set(GPU_COMMON_FLAGS
+        -Wall
+        -Wextra
+        -Werror
+        ${GPU_COMMON_FLAGS})
+endif()
 foreach(flag ${COMMON_FLAGS})
    safe_set_cflag(CMAKE_C_FLAGS ${flag})
    safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag})
 endforeach()
+foreach(flag ${GPU_COMMON_FLAGS})
+    safe_set_nvflag(${flag})
+endforeach()
 # Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
 # So, don't set these flags here.
@@ -88,15 +126,15 @@ endfunction()
 # Common gpu architectures: Kepler, Maxwell
 foreach(capability 30 35 50)
-    list(APPEND __arch_flags " -gencode arch=compute_${capability},code=sm_${capability}")
+      list(APPEND __arch_flags " -gencode arch=compute_${capability},code=sm_${capability}")
 endforeach()
-if (CUDA_VERSION VERSION_GREATER "7.0")
+if (CUDA_VERSION VERSION_GREATER "7.0" OR CUDA_VERSION VERSION_EQUAL "7.0")
      list(APPEND __arch_flags " -gencode arch=compute_52,code=sm_52")
 endif()
 # Modern gpu architectures: Pascal
-if (CUDA_VERSION VERSION_GREATER "8.0")
+if (CUDA_VERSION VERSION_GREATER "8.0" OR CUDA_VERSION VERSION_EQUAL "8.0")
      list(APPEND __arch_flags " -gencode arch=compute_60,code=sm_60")
 endif()

--- a/cmake/rdma.cmake
+++ b/cmake/rdma.cmake
+# user should download rdma first from subversion repository
+# execute following instruction to download svn mannally
+# svn co https://svn.baidu.com/sys/ip/trunk/rdma/sockrdmav1 rdma/
+# svn co https://svn.baidu.com/sys/ip/trunk/rdma/thirdparty rdma/
+# we use static output in svn repositories to avoid implict bugs from not standard runtime env.
+set(RDMA_ROOT $ENV{RDMA_ROOT} CACHE PATH "Folder contains RDMA sock library and thirdparty library")
+function(generate_rdma_links)
+  #redirect to current DIR to isolate the pollution from system runtime environment
+  #it can benifits unified control for different gcc environment. 
+  #e.g, by default gcc48 did not refer /usr/lib64 which could contain low version
+  #runtime libraries that will crash process while loading it. That redirect trick
+  #can fix it.
+  execute_process(
+    COMMAND mkdir -p librdma
+    COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so.1
+    COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so
+    COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so.1
+    COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so 
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  )
+endfunction(generate_rdma_links)
+#check and set headers
+find_path(RDMA_INC_SXISOCK sxi_sock.h PATHS ${RDMA_ROOT}/sockrdmav1/output/include)
+find_path(RDMA_INC_XIO libxio.h PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
+find_path(RDMA_INC_EVENT event2 PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
+find_path(RDMA_INC_NUMA numa.h PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
+#check and set libs
+find_library(RDMA_LIB_SXISOCK NAMES sxisock PATHS ${RDMA_ROOT}/sockrdmav1/output)
+find_library(RDMA_LIB_XIO NAMES xio PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
+find_library(RDMA_LIB_EVENT NAMES event PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
+find_library(RDMA_LIB_EVENT_CORE NAMES event_core PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
+find_library(RDMA_LIB_EVENT_EXTRA NAMES event_extra PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
+find_library(RDMA_LIB_EVENT_PTHREADS NAMES event_pthreads PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
+find_library(RDMA_LIB_NUMA NAMES numa PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
+if(
+    RDMA_INC_SXISOCK AND
+    RDMA_INC_XIO AND
+    RDMA_INC_EVENT AND
+    RDMA_INC_NUMA AND
+    RDMA_LIB_SXISOCK AND 
+    RDMA_LIB_XIO AND
+    RDMA_LIB_EVENT AND
+    RDMA_LIB_EVENT_CORE AND
+    RDMA_LIB_EVENT_EXTRA AND
+    RDMA_LIB_EVENT_PTHREADS AND
+    RDMA_LIB_NUMA
+    )
+  set(RDMA_INC_DIR 
+    ${RDMA_INC_SXISOCK} 
+    ${RDMA_INC_XIO}
+    ${RDMA_INC_EVENT}
+    ${RDMA_INC_NUMA})
+  set(RDMA_LIBS  
+    ${RDMA_LIB_SXISOCK} 
+    ${RDMA_LIB_XIO} 
+    ${RDMA_LIB_EVENT} 
+    ${RDMA_LIB_EVENT_CORE} 
+    ${RDMA_LIB_EVENT_EXTRA} 
+    ${RDMA_LIB_EVENT_PTHREADS} 
+    ${RDMA_LIB_NUMA} 
+    )
+  set(RDMA_LD_FLAGS "-L./librdma -libverbs -lrdmacm -Xlinker -rpath ./librdma")
+  return()
+endif()
+#if this module is not called, RDMA_INC_DIR RDMA_LIBS will be null, so top module always refer this variable
+message(FATAL_ERROR, "RDMA libraries are not found, try to set RDMA_ROOT or check all related libraries.")
--- a/cmake/swig.cmake
+++ b/cmake/swig.cmake
-find_program(
-    SWIG_BINARY_PATH
-    swig)
-if(${SWIG_BINARY_PATH} STREQUAL "SWIG_BINARY_PATH-NOTFOUND")
-    set(SWIG_FOUND OFF)
-else()
-    set(SWIG_FOUND ON)
-endif()
-set(MIN_SWIG_VERSION 2)
-if(SWIG_FOUND)
-    execute_process(COMMAND sh -c "${SWIG_BINARY_PATH} -version | grep Version | cut -f3 -d' '"
-        OUTPUT_VARIABLE _SWIG_VERSION
-        OUTPUT_STRIP_TRAILING_WHITESPACE)
-    if(${_SWIG_VERSION} VERSION_LESS ${MIN_SWIG_VERSION})
-        message("swig version ${MIN_SWIG_VERSION} or greater is needed for generating python api. "
-                 "Only version ${_SWIG_VERSION} is found. Set SWIG_FOUND to FALSE")
-        set(SWIG_FOUND FALSE)
-    endif(${_SWIG_VERSION} VERSION_LESS ${MIN_SWIG_VERSION})
-endif(SWIG_FOUND)
 function(generate_python_api target_name)
    add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
                              ${PROJ_ROOT}/paddle/Paddle_wrap.cxx
@@ -27,6 +5,7 @@ function(generate_python_api target_name)
        COMMAND swig -python -c++ -outcurrentdir -I../ api/Paddle.swig
                && mv ${PROJ_ROOT}/paddle/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
        DEPENDS ${PROJ_ROOT}/paddle/api/Paddle.swig
+                ${PROJ_ROOT}/paddle/api/PaddleAPI.h
        WORKING_DIRECTORY ${PROJ_ROOT}/paddle
        COMMENT "Generate Python API from swig")
    add_custom_target(${target_name} ALL DEPENDS

--- a/cmake/util.cmake
+++ b/cmake/util.cmake
@@ -67,6 +67,10 @@ endmacro()
 #
 # It will handle WITH_PYTHON/WITH_GLOG etc.
 function(link_paddle_exe TARGET_NAME)
+    if(WITH_RDMA)
+        generate_rdma_links()
+    endif()
    if(WITH_METRIC)
        if(WITH_GPU)
            set(METRIC_LIBS paddle_metric_learning paddle_dserver_lib metric metric_cpu)
@@ -109,6 +113,12 @@ function(link_paddle_exe TARGET_NAME)
        ${ZLIB_LIBRARIES}
        ${INTERAL_LIBS}
        ${CMAKE_DL_LIBS})
+    if(WITH_RDMA)
+        target_link_libraries(${TARGET_NAME}
+            ${RDMA_LD_FLAGS}
+            ${RDMA_LIBS})
+    endif()
    if(WITH_PYTHON)
        target_link_libraries(${TARGET_NAME}
@@ -178,9 +188,18 @@ macro(add_simple_unittest TARGET_NAME)
    add_unittest(${TARGET_NAME} ${TARGET_NAME}.cpp)
 endmacro()
-macro(add_paddle_culib TARGET_NAME)
+# Creates C resources file from files in given resource file
-    set(NVCC_FLAG ${CUDA_NVCC_FLAGS})
+function(create_resources res_file output)
-    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--use_fast_math)
+    # Create empty output file
-    cuda_add_library(${TARGET_NAME} STATIC ${ARGN})
+    file(WRITE ${output} "")
-    set(CUDA_NVCC_FLAGS ${NVCC_FLAG})
+    # Get short filename
-endmacro()
+    string(REGEX MATCH "([^/]+)$" filename ${res_file})
+    # Replace filename spaces & extension separator for C compatibility
+    string(REGEX REPLACE "\\.| |-" "_" filename ${filename})
+    # Read hex data from file
+    file(READ ${res_file} filedata HEX)
+    # Convert hex data for C compatibility
+    string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata})
+    # Append data to output file
+    file(APPEND ${output} "const unsigned char ${filename}[] = {${filedata}};\nconst unsigned ${filename}_size = sizeof(${filename});\n")
+endfunction()
--- a/demo/image_classification/.gitignore
+++ b/demo/image_classification/.gitignore
@@ -5,3 +5,5 @@ plot.png
 train.log
 image_provider_copy_1.py
 *pyc
+train.list
+test.list
--- a/demo/image_classification/data/download_cifar.sh
+++ b/demo/image_classification/data/download_cifar.sh
--- a/demo/image_classification/data/process_cifar.py
+++ b/demo/image_classification/data/process_cifar.py
@@ -16,7 +16,6 @@ import numpy as np
 import sys
 import os
 import PIL.Image as Image
 """
  Usage: python process_cifar input_dir output_dir
 """
@@ -30,6 +29,7 @@ def mkdir_not_exist(path):
    if not os.path.exists(path):
        os.mkdir(path)
 def create_dir_structure(output_dir):
    """
    Create the directory structure for the directory.
@@ -39,8 +39,8 @@ def create_dir_structure(output_dir):
    mkdir_not_exist(os.path.join(output_dir, "train"))
    mkdir_not_exist(os.path.join(output_dir, "test"))
-def convert_batch(batch_path, label_set, label_map,
-                  output_dir, data_split):
+def convert_batch(batch_path, label_set, label_map, output_dir, data_split):
    """
    Convert CIFAR batch to the structure of Paddle format.
    batch_path: the batch to be converted.
@@ -67,11 +67,23 @@ if __name__ == '__main__':
    output_dir = sys.argv[2]
    num_batch = 5
    create_dir_structure(output_dir)
-    label_map = {0: "airplane", 1: "automobile", 2: "bird", 3: "cat", 4: "deer",
+    label_map = {
-                 5: "dog", 6: "frog", 7: "horse", 8: "ship", 9: "truck"}
+        0: "airplane",
+        1: "automobile",
+        2: "bird",
+        3: "cat",
+        4: "deer",
+        5: "dog",
+        6: "frog",
+        7: "horse",
+        8: "ship",
+        9: "truck"
+    }
    labels = {}
    for i in range(1, num_batch + 1):
-        convert_batch(os.path.join(input_dir, "data_batch_%d" % i), labels,
+        convert_batch(
-                      label_map, output_dir, "train")
+            os.path.join(input_dir, "data_batch_%d" % i), labels, label_map,
-    convert_batch(os.path.join(input_dir, "test_batch"), {},
+            output_dir, "train")
-                  label_map, output_dir, "test")
+    convert_batch(
\ No newline at end of file
+        os.path.join(input_dir, "test_batch"), {}, label_map, output_dir,
+        "test")
--- a/demo/image_classification/image_provider.py
+++ b/demo/image_classification/image_provider.py
@@ -46,36 +46,41 @@ def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg,
    settings.img_mean = image_util.load_meta(settings.meta_path,
                                             settings.mean_img_size,
-                                             settings.img_size,
+                                             settings.img_size, settings.color)
-                                             settings.color)
    settings.logger.info('Image size: %s', settings.img_size)
    settings.logger.info('Meta path: %s', settings.meta_path)
    settings.input_types = [
        dense_vector(settings.img_raw_size),  # image feature
-        integer_value(settings.num_classes)]  # labels
+        integer_value(settings.num_classes)
+    ]  # labels
    settings.logger.info('DataProvider Initialization finished')
-@provider(init_hook=hook)
+@provider(init_hook=hook, min_pool_size=0)
-def processData(settings, file_name):
+def processData(settings, file_list):
    """
    The main function for loading data.
    Load the batch, iterate all the images and labels in this batch.
-    file_name: the batch file name.
+    file_list: the batch file list.
    """
-    data = cPickle.load(io.open(file_name, 'rb'))
+    with open(file_list, 'r') as fdata:
-    indexes = list(range(len(data['images'])))
+        lines = [line.strip() for line in fdata]
-    if settings.is_train:
+        random.shuffle(lines)
-        random.shuffle(indexes)
+        for file_name in lines:
-    for i in indexes:
+            with io.open(file_name.strip(), 'rb') as file:
-        if settings.use_jpeg == 1:
+                data = cPickle.load(file)
-            img = image_util.decode_jpeg(data['images'][i])
+                indexes = list(range(len(data['images'])))
-        else:
+                if settings.is_train:
-            img = data['images'][i]
+                    random.shuffle(indexes)
-        img_feat = image_util.preprocess_img(img, settings.img_mean,
+                for i in indexes:
-                                             settings.img_size, settings.is_train,
+                    if settings.use_jpeg == 1:
-                                             settings.color)
+                        img = image_util.decode_jpeg(data['images'][i])
-        label = data['labels'][i]
+                    else:
-        yield img_feat.tolist(), int(label)
+                        img = data['images'][i]
+                    img_feat = image_util.preprocess_img(
+                        img, settings.img_mean, settings.img_size,
+                        settings.is_train, settings.color)
+                    label = data['labels'][i]
+                    yield img_feat.astype('float32'), int(label)
--- a/demo/image_classification/image_util.py
+++ b/demo/image_classification/image_util.py
@@ -16,17 +16,20 @@ import numpy as np
 from PIL import Image
 from cStringIO import StringIO
 def resize_image(img, target_size):
    """
    Resize an image so that the shorter edge has length target_size.
    img: the input image to be resized.
    target_size: the target resized image size.
    """
-    percent = (target_size/float(min(img.size[0], img.size[1])))
+    percent = (target_size / float(min(img.size[0], img.size[1])))
-    resized_size = int(round(img.size[0] * percent)), int(round(img.size[1] * percent))
+    resized_size = int(round(img.size[0] * percent)), int(
+        round(img.size[1] * percent))
    img = img.resize(resized_size, Image.ANTIALIAS)
    return img
 def flip(im):
    """
    Return the flipped image.
@@ -38,6 +41,7 @@ def flip(im):
    else:
        return im[:, ::-1]
 def crop_img(im, inner_size, color=True, test=True):
    """
    Return cropped image.
@@ -50,20 +54,22 @@ def crop_img(im, inner_size, color=True, test=True):
      If True, crop the center of images.
    """
    if color:
-        height, width = max(inner_size, im.shape[1]), max(inner_size, im.shape[2])
+        height, width = max(inner_size, im.shape[1]), max(inner_size,
+                                                          im.shape[2])
        padded_im = np.zeros((3, height, width))
        startY = (height - im.shape[1]) / 2
        startX = (width - im.shape[2]) / 2
        endY, endX = startY + im.shape[1], startX + im.shape[2]
-        padded_im[:, startY: endY, startX: endX] = im
+        padded_im[:, startY:endY, startX:endX] = im
    else:
        im = im.astype('float32')
-        height, width = max(inner_size, im.shape[0]), max(inner_size, im.shape[1])
+        height, width = max(inner_size, im.shape[0]), max(inner_size,
+                                                          im.shape[1])
        padded_im = np.zeros((height, width))
        startY = (height - im.shape[0]) / 2
        startX = (width - im.shape[1]) / 2
        endY, endX = startY + im.shape[0], startX + im.shape[1]
-        padded_im[startY: endY, startX: endX] = im
+        padded_im[startY:endY, startX:endX] = im
    if test:
        startY = (height - inner_size) / 2
        startX = (width - inner_size) / 2
@@ -72,19 +78,21 @@ def crop_img(im, inner_size, color=True, test=True):
        startX = np.random.randint(0, width - inner_size + 1)
    endY, endX = startY + inner_size, startX + inner_size
    if color:
-        pic = padded_im[:, startY: endY, startX: endX]
+        pic = padded_im[:, startY:endY, startX:endX]
    else:
-        pic = padded_im[startY: endY, startX: endX]
+        pic = padded_im[startY:endY, startX:endX]
    if (not test) and (np.random.randint(2) == 0):
        pic = flip(pic)
    return pic
 def decode_jpeg(jpeg_string):
    np_array = np.array(Image.open(StringIO(jpeg_string)))
    if len(np_array.shape) == 3:
        np_array = np.transpose(np_array, (2, 0, 1))
    return np_array
 def preprocess_img(im, img_mean, crop_size, is_train, color=True):
    """
    Does data augmentation for images.
@@ -99,6 +107,7 @@ def preprocess_img(im, img_mean, crop_size, is_train, color=True):
    pic -= img_mean
    return pic.flatten()
 def load_meta(meta_path, mean_img_size, crop_size, color=True):
    """
    Return the loaded meta file.
@@ -109,17 +118,18 @@ def load_meta(meta_path, mean_img_size, crop_size, color=True):
    mean = np.load(meta_path)['data_mean']
    border = (mean_img_size - crop_size) / 2
    if color:
-        assert(mean_img_size * mean_img_size * 3 == mean.shape[0])
+        assert (mean_img_size * mean_img_size * 3 == mean.shape[0])
        mean = mean.reshape(3, mean_img_size, mean_img_size)
-        mean = mean[:, border: border + crop_size,
+        mean = mean[:, border:border + crop_size, border:border +
-                       border: border + crop_size].astype('float32')
+                    crop_size].astype('float32')
    else:
-        assert(mean_img_size * mean_img_size == mean.shape[0])
+        assert (mean_img_size * mean_img_size == mean.shape[0])
        mean = mean.reshape(mean_img_size, mean_img_size)
-        mean = mean[border: border + crop_size,
+        mean = mean[border:border + crop_size, border:border +
-                    border: border + crop_size].astype('float32')
+                    crop_size].astype('float32')
    return mean
 def load_image(img_path, is_color=True):
    """
    Load image and return. 
@@ -130,6 +140,7 @@ def load_image(img_path, is_color=True):
    img.load()
    return img
 def oversample(img, crop_dims):
    """
    image : iterable of (H x W x K) ndarrays
@@ -152,50 +163,53 @@ def oversample(img, crop_dims):
        for j in w_indices:
            crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
            curr += 1
-    crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([
+    crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate(
-        -crop_dims / 2.0,
+        [-crop_dims / 2.0, crop_dims / 2.0])
-         crop_dims / 2.0
-    ])
    crops_ix = np.tile(crops_ix, (2, 1))
    # Extract crops
-    crops = np.empty((10 * len(img), crop_dims[0], crop_dims[1],
+    crops = np.empty(
-                      im_shape[-1]), dtype=np.float32)
+        (10 * len(img), crop_dims[0], crop_dims[1], im_shape[-1]),
+        dtype=np.float32)
    ix = 0
    for im in img:
        for crop in crops_ix:
            crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :]
            ix += 1
-        crops[ix-5:ix] = crops[ix-5:ix, :, ::-1, :]  # flip for mirrors
+        crops[ix - 5:ix] = crops[ix - 5:ix, :, ::-1, :]  # flip for mirrors
    return crops
 class ImageTransformer:
-    def __init__(self, transpose = None,
+    def __init__(self,
-                 channel_swap = None, mean = None, is_color = True):
+                 transpose=None,
+                 channel_swap=None,
+                 mean=None,
+                 is_color=True):
        self.transpose = transpose
        self.channel_swap = None
        self.mean = None
-        self.is_color = is_color 
+        self.is_color = is_color
-    def set_transpose(self, order): 
+    def set_transpose(self, order):
        if self.is_color:
-            assert 3 == len(order) 
+            assert 3 == len(order)
        self.transpose = order
-    def set_channel_swap(self, order): 
+    def set_channel_swap(self, order):
        if self.is_color:
-            assert 3 == len(order) 
+            assert 3 == len(order)
        self.channel_swap = order
    def set_mean(self, mean):
        # mean value, may be one value per channel 
        if mean.ndim == 1:
-            mean = mean[:, np.newaxis, np.newaxis]       
+            mean = mean[:, np.newaxis, np.newaxis]
-        else: 
+        else:
            # elementwise mean
            if self.is_color:
                assert len(mean.shape) == 3
-        self.mean = mean 
+        self.mean = mean
    def transformer(self, data):
        if self.transpose is not None:

--- a/demo/image_classification/prediction.py
+++ b/demo/image_classification/prediction.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import os,sys
+import os, sys
 import numpy as np
 import logging
 from PIL import Image
@@ -24,9 +24,11 @@ from py_paddle import swig_paddle, DataProviderConverter
 from paddle.trainer.PyDataProvider2 import dense_vector
 from paddle.trainer.config_parser import parse_config
-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
 logging.getLogger().setLevel(logging.INFO)
 class ImageClassifier():
    def __init__(self,
                 train_conf,
@@ -58,18 +60,19 @@ class ImageClassifier():
        self.oversample = oversample
        self.is_color = is_color
-        self.transformer = image_util.ImageTransformer(is_color = is_color)
+        self.transformer = image_util.ImageTransformer(is_color=is_color)
-        self.transformer.set_transpose((2,0,1))
+        self.transformer.set_transpose((2, 0, 1))
        self.mean_file = mean_file
        mean = np.load(self.mean_file)['data_mean']
        mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
-        self.transformer.set_mean(mean) # mean pixel
+        self.transformer.set_mean(mean)  # mean pixel
        gpu = 1 if use_gpu else 0
        conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (gpu)
        conf = parse_config(train_conf, conf_args)
        swig_paddle.initPaddle("--use_gpu=%d" % (gpu))
-        self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+        self.network = swig_paddle.GradientMachine.createFromConfigProto(
+            conf.model_config)
        assert isinstance(self.network, swig_paddle.GradientMachine)
        self.network.loadParameters(self.model_dir)
@@ -90,14 +93,14 @@ class ImageClassifier():
            # image_util.resize_image: short side is self.resize_dim
            image = image_util.resize_image(image, self.resize_dim)
            image = np.array(image)
-            input = np.zeros((1, image.shape[0], image.shape[1], 3),
+            input = np.zeros(
-                             dtype=np.float32)
+                (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
            input[0] = image.astype(np.float32)
            input = image_util.oversample(input, self.crop_dims)
        else:
            image = image.resize(self.crop_dims, Image.ANTIALIAS)
-            input = np.zeros((1, self.crop_dims[0], self.crop_dims[1], 3),
+            input = np.zeros(
-                             dtype=np.float32)
+                (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
            input[0] = np.array(image).astype(np.float32)
        data_in = []
@@ -133,22 +136,24 @@ class ImageClassifier():
        lab = np.argsort(-prob)
        logging.info("Label of %s is: %d", image, lab[0])
 if __name__ == '__main__':
-    image_size=32
+    image_size = 32
-    crop_size=32
+    crop_size = 32
-    multi_crop=True
+    multi_crop = True
-    config="vgg_16_cifar.py"
+    config = "vgg_16_cifar.py"
-    output_layer="__fc_layer_1__"
+    output_layer = "__fc_layer_1__"
-    mean_path="data/cifar-out/batches/batches.meta"
+    mean_path = "data/cifar-out/batches/batches.meta"
-    model_path=sys.argv[1]
+    model_path = sys.argv[1]
-    image=sys.argv[2]
+    image = sys.argv[2]
-    use_gpu=bool(int(sys.argv[3]))
+    use_gpu = bool(int(sys.argv[3]))
-    obj = ImageClassifier(train_conf=config,
+    obj = ImageClassifier(
-                          model_dir=model_path,
+        train_conf=config,
-                          resize_dim=image_size,
+        model_dir=model_path,
-                          crop_dim=crop_size,
+        resize_dim=image_size,
-                          mean_file=mean_path,
+        crop_dim=crop_size,
-                          use_gpu=use_gpu,
+        mean_file=mean_path,
-                          oversample=multi_crop)
+        use_gpu=use_gpu,
+        oversample=multi_crop)
    obj.predict(image, output_layer)
--- a/demo/image_classification/preprocess.py
+++ b/demo/image_classification/preprocess.py
@@ -19,22 +19,36 @@ from optparse import OptionParser
 def option_parser():
    parser = OptionParser(usage="usage: python preprcoess.py "\
                          "-i data_dir [options]")
-    parser.add_option("-i", "--input", action="store",
+    parser.add_option(
-                      dest="input", help="Input data directory.")
+        "-i",
-    parser.add_option("-s", "--size", action="store",
+        "--input",
-                      dest="size", help="Processed image size.")
+        action="store",
-    parser.add_option("-c", "--color", action="store",
+        dest="input",
-                      dest="color", help="whether to use color images.")
+        help="Input data directory.")
+    parser.add_option(
+        "-s",
+        "--size",
+        action="store",
+        dest="size",
+        help="Processed image size.")
+    parser.add_option(
+        "-c",
+        "--color",
+        action="store",
+        dest="color",
+        help="whether to use color images.")
    return parser.parse_args()
 if __name__ == '__main__':
-     options, args = option_parser()
+    options, args = option_parser()
-     data_dir = options.input
+    data_dir = options.input
-     processed_image_size = int(options.size)
+    processed_image_size = int(options.size)
-     color = options.color == "1"
+    color = options.color == "1"
-     data_creator = ImageClassificationDatasetCreater(data_dir,
+    data_creator = ImageClassificationDatasetCreater(
-                                                      processed_image_size,
+        data_dir, processed_image_size, color)
-                                                      color)
+    data_creator.train_list_name = "train.txt"
-     data_creator.num_per_batch = 1000
+    data_creator.test_list_name = "test.txt"
-     data_creator.overwrite = True
+    data_creator.num_per_batch = 1000
-     data_creator.create_batches()
+    data_creator.overwrite = True
+    data_creator.create_batches()
--- a/demo/image_classification/preprocess.sh
+++ b/demo/image_classification/preprocess.sh
@@ -17,3 +17,6 @@ set -e
 data_dir=./data/cifar-out
 python preprocess.py -i $data_dir -s 32 -c 1
+echo "data/cifar-out/batches/train.txt" > train.list
+echo "data/cifar-out/batches/test.txt" > test.list
--- a/demo/image_classification/train.sh
+++ b/demo/image_classification/train.sh
@@ -24,7 +24,7 @@ paddle train \
 --test_all_data_in_one_period=1 \
 --use_gpu=1 \
 --trainer_count=1 \
--num_passes=200 \
+--num_passes=300 \
 --save_dir=$output \
 2>&1 | tee $log

--- a/demo/image_classification/vgg_16_cifar.py
+++ b/demo/image_classification/vgg_16_cifar.py
@@ -18,36 +18,38 @@ is_predict = get_config_arg("is_predict", bool, False)
 ####################Data Configuration ##################
 if not is_predict:
-  data_dir='data/cifar-out/batches/'
+    data_dir = 'data/cifar-out/batches/'
-  meta_path=data_dir+'batches.meta'
+    meta_path = data_dir + 'batches.meta'
-  args = {'meta':meta_path,'mean_img_size': 32,
+    args = {
-          'img_size': 32,'num_classes': 10,
+        'meta': meta_path,
-          'use_jpeg': 1,'color': "color"}
+        'mean_img_size': 32,
+        'img_size': 32,
-  define_py_data_sources2(train_list=data_dir+"train.list",
+        'num_classes': 10,
-                          test_list=data_dir+'test.list',
+        'use_jpeg': 1,
-                          module='image_provider',
+        'color': "color"
-                          obj='processData',
+    }
-                          args=args)
+    define_py_data_sources2(
+        train_list="train.list",
+        test_list="train.list",
+        module='image_provider',
+        obj='processData',
+        args=args)
 ######################Algorithm Configuration #############
 settings(
-    batch_size = 128,
+    batch_size=128,
-    learning_rate = 0.1 / 128.0,
+    learning_rate=0.1 / 128.0,
-    learning_method = MomentumOptimizer(0.9),
+    learning_method=MomentumOptimizer(0.9),
-    regularization = L2Regularization(0.0005 * 128)
+    regularization=L2Regularization(0.0005 * 128))
-)
 #######################Network Configuration #############
-data_size=3*32*32
+data_size = 3 * 32 * 32
-label_size=10
+label_size = 10
-img = data_layer(name='image',
+img = data_layer(name='image', size=data_size)
-                 size=data_size)
+# small_vgg is predefined in trainer_config_helpers.networks
-# small_vgg is predined in trainer_config_helpers.network
+predict = small_vgg(input_image=img, num_channels=3, num_classes=label_size)
-predict = small_vgg(input_image=img,
-                    num_channels=3,
-                    num_classes=label_size)
 if not is_predict:
    lbl = data_layer(name="label", size=label_size)

--- a/demo/introduction/README.md
+++ b/demo/introduction/README.md
+This folder contains scripts used in PaddlePaddle introduction.
+- use `bash train.sh` to train a simple linear regression model
+- use `python evaluate_model.py` to read model parameters. You can see that `w` and `b` are very close to [2, 0.3].
--- a/demo/introduction/dataprovider.py
+++ b/demo/introduction/dataprovider.py
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle.trainer.PyDataProvider2 import *
+import random
+# define data types of input: 2 real numbers
+@provider(input_types=[dense_vector(1), dense_vector(1)], use_seq=False)
+def process(settings, input_file):
+    for i in xrange(2000):
+        x = random.random()
+        yield [x], [2 * x + 0.3]
--- a/demo/introduction/evaluate_model.py
+++ b/demo/introduction/evaluate_model.py
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Print model parameters in last model
+Usage:
+    python evaluate_model.py
+"""
+import numpy as np
+import os
+def load(file_name):
+    with open(file_name, 'rb') as f:
+        f.read(16)  # skip header for float type.
+        return np.fromfile(f, dtype=np.float32)
+def main():
+    print 'w=%.6f, b=%.6f from pass 29' % (load('output/pass-00029/w'),
+                                           load('output/pass-00029/b'))
+if __name__ == '__main__':
+    main()
--- a/demo/introduction/train.sh
+++ b/demo/introduction/train.sh
+#!/bin/bash
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+set -e
+paddle train \
+    --config=trainer_config.py \
+    --save_dir=./output \
+    --num_passes=30 \
+    2>&1 |tee 'train.log'
--- a/demo/introduction/trainer_config.py
+++ b/demo/introduction/trainer_config.py
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle.trainer_config_helpers import *
+# 1. read data. Suppose you saved above python code as dataprovider.py
+data_file = 'empty.list'
+with open(data_file, 'w') as f:
+    f.writelines(' ')
+define_py_data_sources2(
+    train_list=data_file,
+    test_list=None,
+    module='dataprovider',
+    obj='process',
+    args={})
+# 2. learning algorithm
+settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
+# 3. Network configuration
+x = data_layer(name='x', size=1)
+y = data_layer(name='y', size=1)
+y_predict = fc_layer(
+    input=x,
+    param_attr=ParamAttr(name='w'),
+    size=1,
+    act=LinearActivation(),
+    bias_attr=ParamAttr(name='b'))
+cost = regression_cost(input=y_predict, label=y)
+outputs(cost)
--- a/demo/mnist/.gitignore
+++ b/demo/mnist/.gitignore
+data/raw_data
+data/*.list
+mnist_vgg_model
+plot.png
+train.log
+*pyc
--- a/demo/mnist/data/generate_list.py
+++ b/demo/mnist/data/generate_list.py
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+o = open("./" + "train.list", "w")
+o.write("./data/raw_data/train" + "\n")
+o.close()
+o = open("./" + "test.list", "w")
+o.write("./data/raw_data/t10k" + "\n")
+o.close()
--- a/demo/mnist/data/get_mnist_data.sh
+++ b/demo/mnist/data/get_mnist_data.sh
+#!/usr/bin/env sh
+# This scripts downloads the mnist data and unzips it.
+set -e
+DIR="$( cd "$(dirname "$0")" ; pwd -P )"
+rm -rf "$DIR/raw_data"
+mkdir "$DIR/raw_data"
+cd "$DIR/raw_data"
+echo "Downloading..."
+for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
+do
+    if [ ! -e $fname ]; then
+        wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
+        gunzip ${fname}.gz
+    fi
+done
+cd $DIR
+rm -f *.list
+python generate_list.py
--- a/demo/mnist/mnist_provider.py
+++ b/demo/mnist/mnist_provider.py
+from paddle.trainer.PyDataProvider2 import *
+# Define a py data provider
+@provider(
+    input_types={'pixel': dense_vector(28 * 28),
+                 'label': integer_value(10)})
+def process(settings, filename):  # settings is not used currently.
+    imgf = filename + "-images-idx3-ubyte"
+    labelf = filename + "-labels-idx1-ubyte"
+    f = open(imgf, "rb")
+    l = open(labelf, "rb")
+    f.read(16)
+    l.read(8)
+    # Define number of samples for train/test
+    if "train" in filename:
+        n = 60000
+    else:
+        n = 10000
+    for i in range(n):
+        label = ord(l.read(1))
+        pixels = []
+        for j in range(28 * 28):
+            pixels.append(float(ord(f.read(1))) / 255.0)
+        yield {"pixel": pixels, 'label': label}
+    f.close()
+    l.close()
--- a/demo/mnist/train.sh
+++ b/demo/mnist/train.sh
+#!/bin/bash
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+set -e
+config=vgg_16_mnist.py
+output=./mnist_vgg_model
+log=train.log
+paddle train \
+--config=$config \
+--dot_period=10 \
+--log_period=100 \
+--test_all_data_in_one_period=1 \
+--use_gpu=0 \
+--trainer_count=1 \
+--num_passes=100 \
+--save_dir=$output \
+2>&1 | tee $log
+python -m paddle.utils.plotcurve -i $log > plot.png
--- a/demo/mnist/vgg_16_mnist.py
+++ b/demo/mnist/vgg_16_mnist.py
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle.trainer_config_helpers import *
+is_predict = get_config_arg("is_predict", bool, False)
+####################Data Configuration ##################
+if not is_predict:
+    data_dir = './data/'
+    define_py_data_sources2(
+        train_list=data_dir + 'train.list',
+        test_list=data_dir + 'test.list',
+        module='mnist_provider',
+        obj='process')
+######################Algorithm Configuration #############
+settings(
+    batch_size=128,
+    learning_rate=0.1 / 128.0,
+    learning_method=MomentumOptimizer(0.9),
+    regularization=L2Regularization(0.0005 * 128))
+#######################Network Configuration #############
+data_size = 1 * 28 * 28
+label_size = 10
+img = data_layer(name='pixel', size=data_size)
+# small_vgg is predined in trainer_config_helpers.network
+predict = small_vgg(input_image=img, num_channels=1, num_classes=label_size)
+if not is_predict:
+    lbl = data_layer(name="label", size=label_size)
+    inputs(img, lbl)
+    outputs(classification_cost(input=predict, label=lbl))
+else:
+    outputs(predict)
--- a/demo/model_zoo/embedding/extract_para.py
+++ b/demo/model_zoo/embedding/extract_para.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 Example:
    python extract_para.py --preModel PREMODEL --preDict PREDICT \
@@ -29,6 +28,7 @@ Options:
 from optparse import OptionParser
 import struct
 def get_row_index(preDict, usrDict):
    """
    Get the row positions for all words in user dictionary from pre-trained dictionary.
@@ -47,7 +47,9 @@ def get_row_index(preDict, usrDict):
            pos.append(index[word])
    return pos
-def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict, paraDim):
+def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict,
+                                  paraDim):
    """
    Extract desired parameters from a pretrained embedding model based on user dictionary
    """
@@ -70,6 +72,7 @@ def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict, paraDim)
    print "extract parameters finish, total", len(rowIndex), "lines"
    fi.close()
 def main():
    """
    Main entry for running paraconvert.py 
@@ -78,19 +81,33 @@ def main():
            "python %prog --preModel PREMODEL --preDict PREDICT" \
            " --usrModel USRMODEL --usrDict USRDICT -d DIM"
    parser = OptionParser(usage)
-    parser.add_option("--preModel", action="store", dest="preModel",
+    parser.add_option(
-                      help="the name of pretrained embedding model")
+        "--preModel",
-    parser.add_option("--preDict", action="store", dest="preDict",
+        action="store",
-                      help="the name of pretrained dictionary")
+        dest="preModel",
-    parser.add_option("--usrModel", action="store", dest="usrModel",
+        help="the name of pretrained embedding model")
-                      help="the name of output usr embedding model")
+    parser.add_option(
-    parser.add_option("--usrDict", action="store", dest="usrDict",
+        "--preDict",
-                      help="the name of user specified dictionary")
+        action="store",
-    parser.add_option("-d", action="store", dest="dim",
+        dest="preDict",
-                      help="dimension of parameter")
+        help="the name of pretrained dictionary")
+    parser.add_option(
+        "--usrModel",
+        action="store",
+        dest="usrModel",
+        help="the name of output usr embedding model")
+    parser.add_option(
+        "--usrDict",
+        action="store",
+        dest="usrDict",
+        help="the name of user specified dictionary")
+    parser.add_option(
+        "-d", action="store", dest="dim", help="dimension of parameter")
    (options, args) = parser.parse_args()
-    extract_parameters_by_usrDict(options.preModel, options.preDict, 
+    extract_parameters_by_usrDict(options.preModel, options.preDict,
-                      options.usrModel, options.usrDict, int(options.dim))
+                                  options.usrModel, options.usrDict,
+                                  int(options.dim))
 if __name__ == '__main__':
    main()
--- a/demo/model_zoo/embedding/paraconvert.py
+++ b/demo/model_zoo/embedding/paraconvert.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 Example:
    python paraconvert.py --b2t -i INPUT -o OUTPUT -d DIM
@@ -29,6 +28,7 @@ Options:
 from optparse import OptionParser
 import struct
 def binary2text(input, output, paraDim):
    """
    Convert a binary parameter file of embedding model to be a text file.  
@@ -76,12 +76,13 @@ def binary2text(input, output, paraDim):
    fo.close()
    print "binary2text finish, total", line, "lines"
 def get_para_count(input):
    """
    Compute the total number of embedding parameters in input text file. 
    input: the name of input text file
    """
-    numRows = 1 
+    numRows = 1
    paraDim = 0
    with open(input) as f:
        line = f.readline()
@@ -90,6 +91,7 @@ def get_para_count(input):
            numRows += 1
    return numRows * paraDim
 def text2binary(input, output, paddle_head=True):
    """
    Convert a text parameter file of embedding model to be a binary file.
@@ -123,6 +125,7 @@ def text2binary(input, output, paddle_head=True):
    fo.close()
    print "text2binary finish, total", count, "lines"
 def main():
    """
    Main entry for running paraconvert.py 
@@ -131,21 +134,26 @@ def main():
            "python %prog --b2t -i INPUT -o OUTPUT -d DIM \n" \
            "python %prog --t2b -i INPUT -o OUTPUT"
    parser = OptionParser(usage)
-    parser.add_option("--b2t", action="store_true",
+    parser.add_option(
-                      help="convert parameter file of embedding model from binary to text")
+        "--b2t",
-    parser.add_option("--t2b", action="store_true",
+        action="store_true",
-                      help="convert parameter file of embedding model from text to binary")
+        help="convert parameter file of embedding model from binary to text")
-    parser.add_option("-i", action="store", dest="input",
+    parser.add_option(
-                      help="input parameter file name")
+        "--t2b",
-    parser.add_option("-o", action="store", dest="output",
+        action="store_true",
-                      help="output parameter file name")
+        help="convert parameter file of embedding model from text to binary")
-    parser.add_option("-d", action="store", dest="dim",
+    parser.add_option(
-                      help="dimension of parameter")
+        "-i", action="store", dest="input", help="input parameter file name")
+    parser.add_option(
+        "-o", action="store", dest="output", help="output parameter file name")
+    parser.add_option(
+        "-d", action="store", dest="dim", help="dimension of parameter")
    (options, args) = parser.parse_args()
    if options.b2t:
        binary2text(options.input, options.output, options.dim)
    if options.t2b:
        text2binary(options.input, options.output)
 if __name__ == '__main__':
    main()
--- a/demo/model_zoo/embedding/pre_DictAndModel.sh
+++ b/demo/model_zoo/embedding/pre_DictAndModel.sh
@@ -18,7 +18,5 @@ set -x
 # download the dictionary and pretrained model 
 for file in baidu.dict model_32.emb model_64.emb model_128.emb model_256.emb
 do 
-  # following is the google drive address
+  wget http://paddlepaddle.bj.bcebos.com/model_zoo/embedding/$file
-  # you can also directly download from https://pan.baidu.com/s/1o8q577s
-  wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/embedding/$file --no-check-certificate
 done
--- a/demo/model_zoo/resnet/classify.py
+++ b/demo/model_zoo/resnet/classify.py
@@ -26,16 +26,22 @@ from py_paddle import swig_paddle, DataProviderConverter
 from paddle.trainer.PyDataProvider2 import dense_vector
 from paddle.trainer.config_parser import parse_config
-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
 logging.getLogger().setLevel(logging.INFO)
 class ImageClassifier():
-    def __init__(self, train_conf, model_dir=None,
+    def __init__(self,
-                 resize_dim=256, crop_dim=224,
+                 train_conf,
+                 model_dir=None,
+                 resize_dim=256,
+                 crop_dim=224,
                 use_gpu=True,
                 mean_file=None,
                 output_layer=None,
-                 oversample=False, is_color=True):
+                 oversample=False,
+                 is_color=True):
        """
        train_conf: network configure.
        model_dir: string, directory of model.
@@ -62,24 +68,25 @@ class ImageClassifier():
            assert isinstance(self.output_layer, basestring)
            self.output_layer = self.output_layer.split(",")
-        self.transformer = image_util.ImageTransformer(is_color = is_color)
+        self.transformer = image_util.ImageTransformer(is_color=is_color)
-        self.transformer.set_transpose((2,0,1))
+        self.transformer.set_transpose((2, 0, 1))
-        self.transformer.set_channel_swap((2,1,0))
+        self.transformer.set_channel_swap((2, 1, 0))
        self.mean_file = mean_file
        if self.mean_file is not None:
            mean = np.load(self.mean_file)['data_mean']
            mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
-            self.transformer.set_mean(mean) # mean pixel
+            self.transformer.set_mean(mean)  # mean pixel
        else:
            # if you use three mean value, set like:
            # this three mean value is calculated from ImageNet.
-            self.transformer.set_mean(np.array([103.939,116.779,123.68]))
+            self.transformer.set_mean(np.array([103.939, 116.779, 123.68]))
        conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (int(use_gpu))
        conf = parse_config(train_conf, conf_args)
        swig_paddle.initPaddle("--use_gpu=%d" % (int(use_gpu)))
-        self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+        self.network = swig_paddle.GradientMachine.createFromConfigProto(
+            conf.model_config)
        assert isinstance(self.network, swig_paddle.GradientMachine)
        self.network.loadParameters(self.model_dir)
@@ -105,14 +112,14 @@ class ImageClassifier():
            # image_util.resize_image: short side is self.resize_dim
            image = image_util.resize_image(image, self.resize_dim)
            image = np.array(image)
-            input = np.zeros((1, image.shape[0], image.shape[1], 3),
+            input = np.zeros(
-                             dtype=np.float32)
+                (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
            input[0] = image.astype(np.float32)
            input = image_util.oversample(input, self.crop_dims)
        else:
            image = image.resize(self.crop_dims, Image.ANTIALIAS)
-            input = np.zeros((1, self.crop_dims[0], self.crop_dims[1], 3),
+            input = np.zeros(
-                             dtype=np.float32)
+                (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
            input[0] = np.array(image).astype(np.float32)
        data_in = []
@@ -172,7 +179,7 @@ class ImageClassifier():
            logging.info("Label of %s is: %d", image, lab[0])
        return results
-    def extract(self, data_file, output_dir, batch_size = 10000):
+    def extract(self, data_file, output_dir, batch_size=10000):
        """
        extract and save features of output layers, which are
        specify in Outputs() in network configure.
@@ -197,7 +204,7 @@ class ImageClassifier():
            image_feature[file_name] = feature
            sample_num += 1
            if sample_num == batch_size:
-                batch_name = os.path.join(output_dir, 'batch_%d' %(batch_num))
+                batch_name = os.path.join(output_dir, 'batch_%d' % (batch_num))
                self.save_file(image_feature, batch_name)
                logging.info('Finish batch %d', batch_num)
                batch_num += 1
@@ -206,7 +213,7 @@ class ImageClassifier():
            if idx % 1000 == 0:
                logging.info('%d/%d, %s', idx, len(image_files), file_name)
        if sample_num > 0:
-            batch_name = os.path.join(output_dir, 'batch_%d' %(batch_num))
+            batch_name = os.path.join(output_dir, 'batch_%d' % (batch_num))
            self.save_file(image_feature, batch_name)
            logging.info('Finish batch %d', batch_num)
        logging.info('Done: make image feature batch')
@@ -215,38 +222,64 @@ class ImageClassifier():
        of = open(file, 'wb')
        cPickle.dump(data, of, protocol=cPickle.HIGHEST_PROTOCOL)
 def option_parser():
    """
    Main entry for predciting
    """
    usage = "%prog -c config -i data_list -w model_dir [options]"
    parser = OptionParser(usage="usage: %s" % usage)
-    parser.add_option("-j", "--job",
+    parser.add_option(
-                      action="store", dest="job_type",
+        "-j",
-                      help="job type: predict, extract\
+        "--job",
+        action="store",
+        dest="job_type",
+        help="job type: predict, extract\
                            predict: predicting,\
                            extract: extract features")
-    parser.add_option("-c", "--conf",
+    parser.add_option(
-                      action="store", dest="train_conf",
+        "-c",
-                      help="network config")
+        "--conf",
-    parser.add_option("-i", "--data",
+        action="store",
-                      action="store", dest="data_file",
+        dest="train_conf",
-                      help="image list")
+        help="network config")
-    parser.add_option("-w", "--model",
+    parser.add_option(
-                      action="store", dest="model_path",
+        "-i", "--data", action="store", dest="data_file", help="image list")
-                      default=None, help="model path")
+    parser.add_option(
-    parser.add_option("-g", "--use_gpu", action="store",
+        "-w",
-                      dest="use_gpu", default=True,
+        "--model",
-                      help="Whether to use gpu mode.")
+        action="store",
-    parser.add_option("-o", "--output_dir",
+        dest="model_path",
-                      action="store", dest="output_dir",
+        default=None,
-                      default="output", help="output path")
+        help="model path")
-    parser.add_option("-m", "--mean", action="store",
+    parser.add_option(
-                      dest="mean", default=None,
+        "-g",
-                      help="mean file.")
+        "--use_gpu",
-    parser.add_option("-p", "--multi_crop", action="store_true",
+        action="store",
-                      dest="multi_crop", default=False,
+        dest="use_gpu",
-                      help="Wether to use multiple crops on image.")
+        default=True,
+        help="Whether to use gpu mode.")
+    parser.add_option(
+        "-o",
+        "--output_dir",
+        action="store",
+        dest="output_dir",
+        default="output",
+        help="output path")
+    parser.add_option(
+        "-m",
+        "--mean",
+        action="store",
+        dest="mean",
+        default=None,
+        help="mean file.")
+    parser.add_option(
+        "-p",
+        "--multi_crop",
+        action="store_true",
+        dest="multi_crop",
+        default=False,
+        help="Wether to use multiple crops on image.")
    parser.add_option("-l", "--output_layer", action="store",
                      dest="output_layer", default=None,
                      help="--job=extract, specify layers to extract "\
@@ -254,24 +287,26 @@ def option_parser():
                           "classification probability, output in resnet.py.")
    return parser.parse_args()
 def main():
    """
    1. parse input arguments.
    2. predicting or extract features according job type.
    """
    options, args = option_parser()
-    obj = ImageClassifier(options.train_conf,
+    obj = ImageClassifier(
-                          options.model_path,
+        options.train_conf,
-                          use_gpu=options.use_gpu,
+        options.model_path,
-                          mean_file=options.mean,
+        use_gpu=options.use_gpu,
-                          output_layer=options.output_layer,
+        mean_file=options.mean,
-                          oversample=options.multi_crop)
+        output_layer=options.output_layer,
+        oversample=options.multi_crop)
    if options.job_type == "predict":
        obj.predict(options.data_file)
    elif options.job_type == "extract":
-        obj.extract(options.data_file,
+        obj.extract(options.data_file, options.output_dir)
-                    options.output_dir)
 if __name__ == '__main__':
    main()
--- a/demo/model_zoo/resnet/example/__init__.py
+++ b/demo/model_zoo/resnet/example/__init__.py
@@ -11,4 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
--- a/demo/model_zoo/resnet/example/image_list_provider.py
+++ b/demo/model_zoo/resnet/example/image_list_provider.py
@@ -16,8 +16,7 @@ from paddle.utils.image_util import *
 from paddle.trainer.PyDataProvider2 import *
-def hook(settings, image_size, crop_size, color, file_list,
+def hook(settings, image_size, crop_size, color, file_list, is_train, **kwargs):
-         is_train, **kwargs):
    """
    Description: Init with a list of data file
    file_list is the name list of input files.
@@ -58,7 +57,7 @@ def hook(settings, image_size, crop_size, color, file_list,
        sz = settings.crop_size * settings.crop_size
        settings.img_mean = np.zeros(sz * 3, dtype=np.single)
        for idx, value in enumerate(settings.mean_value):
-            settings.img_mean[idx * sz: (idx + 1) * sz] = value
+            settings.img_mean[idx * sz:(idx + 1) * sz] = value
        settings.img_mean = settings.img_mean.reshape(3, settings.crop_size,
                                                      settings.crop_size)
@@ -69,7 +68,8 @@ def hook(settings, image_size, crop_size, color, file_list,
    settings.input_types = [
        dense_vector(settings.img_input_size),  # image feature
-        integer_value(1)]  # labels
+        integer_value(1)
+    ]  # labels
    settings.logger.info('Image short side: %s', settings.img_size)
    settings.logger.info('Crop size: %s', settings.crop_size)
@@ -97,9 +97,6 @@ def processData(settings, file_list):
    # swap channel
    if settings.is_swap_channel:
        img = img[settings.swap_channel, :, :]
-    img_feat = preprocess_img(img,
+    img_feat = preprocess_img(img, settings.img_mean, settings.crop_size,
-                              settings.img_mean,
+                              settings.is_train, settings.color)
-                              settings.crop_size,
-                              settings.is_train,
-                              settings.color)
    yield img_feat.tolist(), int(lab.strip())
--- a/demo/model_zoo/resnet/get_model.sh
+++ b/demo/model_zoo/resnet/get_model.sh
@@ -24,9 +24,7 @@ echo "Downloading ResNet models..."
 for file in resnet_50.tar.gz resnet_101.tar.gz resnet_152.tar.gz mean_meta_224.tar.gz 
 do 
-  # following is the google drive address
+  wget http://paddlepaddle.bj.bcebos.com/model_zoo/imagenet/$file
-  # you can also directly download from https://pan.baidu.com/s/1o8q577s
-  wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/imagenet/$file --no-check-certificate
  tar -xvf $file 
  rm $file
 done

--- a/demo/model_zoo/resnet/load_feature.py
+++ b/demo/model_zoo/resnet/load_feature.py
@@ -17,9 +17,11 @@ import sys
 import cPickle
 import logging
-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
 logging.getLogger().setLevel(logging.INFO)
 def load_feature_c(file):
    """
    Load feature extracted by C++ interface.
@@ -30,14 +32,15 @@ def load_feature_c(file):
    f = open(file, 'r')
    for line in f:
        sample = []
-        for slot in line.strip().split(";"): 
+        for slot in line.strip().split(";"):
-            fea = [float(val) for val in slot.strip().split()] 
+            fea = [float(val) for val in slot.strip().split()]
            if fea:
                sample.append(fea)
        features.append(sample)
    f.close()
    return features
 def load_feature_py(feature_dir):
    """
    Load feature extracted by python interface.
@@ -54,6 +57,7 @@ def load_feature_py(feature_dir):
            logging.info('Load feature file %s', file_name)
    return features
 if __name__ == '__main__':
-    print load_feature_py(sys.argv[1]) 
+    print load_feature_py(sys.argv[1])
    #print load_feature_c(sys.argv[1]) 
--- a/demo/model_zoo/resnet/resnet.py
+++ b/demo/model_zoo/resnet/resnet.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 from paddle.trainer_config_helpers import *
 """
 paper: https://arxiv.org/abs/1512.03385
 """
@@ -28,15 +27,19 @@ if not is_predict and data_provider:
    # mean.meta size : 3 x 224 x 224.
    # If you use three mean value, set like:
    # "mean_value:103.939,116.779,123.68;"
-    args={
+    args = {
        'mean_meta': "model/mean_meta_224/mean.meta",
-        'image_size': 224, 'crop_size': 224,
+        'image_size': 224,
-        'color': True,'swap_channel:': [2, 1, 0]}
+        'crop_size': 224,
-    define_py_data_sources2(train_list,
+        'color': True,
-                           'example/test.list',
+        'swap_channel:': [2, 1, 0]
-                           module="example.image_list_provider",
+    }
-                           obj="processData",
+    define_py_data_sources2(
-                           args=args)
+        train_list,
+        'example/test.list',
+        module="example.image_list_provider",
+        obj="processData",
+        args=args)
 batch_size = 1
 learning_rate = 0.1 / batch_size
@@ -54,12 +57,16 @@ Settings(
    learning_method='momentum',
    learning_rate_decay_a=0.5,
    learning_rate_decay_b=1200000 * 10,
-    learning_rate_schedule="discexp",
+    learning_rate_schedule="discexp", )
-)
-def conv_bn_layer(name, input, filter_size, num_filters,
+def conv_bn_layer(name,
-                  stride, padding, channels=None,
+                  input,
+                  filter_size,
+                  num_filters,
+                  stride,
+                  padding,
+                  channels=None,
                  active_type=ReluActivation()):
    """
    A wrapper for conv layer with batch normalization layers.
@@ -67,19 +74,18 @@ def conv_bn_layer(name, input, filter_size, num_filters,
    conv layer has no activation.
    """
-    tmp = img_conv_layer(name=name + "_conv",
+    tmp = img_conv_layer(
-                         input=input,
+        name=name + "_conv",
-                         filter_size=filter_size,
+        input=input,
-                         num_channels=channels,
+        filter_size=filter_size,
-                         num_filters=num_filters,
+        num_channels=channels,
-                         stride=stride,
+        num_filters=num_filters,
-                         padding=padding,
+        stride=stride,
-                         act=LinearActivation(),
+        padding=padding,
-                         bias_attr=False)
+        act=LinearActivation(),
-    return batch_norm_layer(name=name + "_bn",
+        bias_attr=False)
-                            input=tmp,
+    return batch_norm_layer(
-                            act=active_type,
+        name=name + "_bn", input=tmp, act=active_type, use_global_stats=is_test)
-                            use_global_stats=is_test)
 def bottleneck_block(name, input, num_filters1, num_filters2):
@@ -88,29 +94,31 @@ def bottleneck_block(name, input, num_filters1, num_filters2):
    Last conv_bn_layer has no activation.
    Addto layer has activation of relu.
    """
-    last_name = conv_bn_layer(name=name + '_branch2a',
+    last_name = conv_bn_layer(
-                              input=input,
+        name=name + '_branch2a',
-                              filter_size=1,
+        input=input,
-                              num_filters=num_filters1,
+        filter_size=1,
-                              stride=1,
+        num_filters=num_filters1,
-                              padding=0)
+        stride=1,
-    last_name = conv_bn_layer(name=name + '_branch2b',
+        padding=0)
-                              input=last_name,
+    last_name = conv_bn_layer(
-                              filter_size=3,
+        name=name + '_branch2b',
-                              num_filters=num_filters1,
+        input=last_name,
-                              stride=1,
+        filter_size=3,
-                              padding=1)
+        num_filters=num_filters1,
-    last_name = conv_bn_layer(name=name + '_branch2c',
+        stride=1,
-                              input=last_name,
+        padding=1)
-                              filter_size=1,
+    last_name = conv_bn_layer(
-                              num_filters=num_filters2,
+        name=name + '_branch2c',
-                              stride=1,
+        input=last_name,
-                              padding=0,
+        filter_size=1,
-                              active_type=LinearActivation())
+        num_filters=num_filters2,
+        stride=1,
-    return addto_layer(name=name + "_addto",
+        padding=0,
-                       input=[input, last_name],
+        active_type=LinearActivation())
-                       act=ReluActivation())
+    return addto_layer(
+        name=name + "_addto", input=[input, last_name], act=ReluActivation())
 def mid_projection(name, input, num_filters1, num_filters2, stride=2):
@@ -123,38 +131,41 @@ def mid_projection(name, input, num_filters1, num_filters2, stride=2):
    branch2x: bottleneck building block, shortcuts are identity.
    """
    # stride = 2
-    branch1 = conv_bn_layer(name=name + '_branch1',
+    branch1 = conv_bn_layer(
-                            input=input,
+        name=name + '_branch1',
-                            filter_size=1,
+        input=input,
-                            num_filters=num_filters2,
+        filter_size=1,
-                            stride=stride,
+        num_filters=num_filters2,
-                            padding=0,
+        stride=stride,
-                            active_type=LinearActivation())
+        padding=0,
+        active_type=LinearActivation())
-    last_name = conv_bn_layer(name=name + '_branch2a',
-                              input=input,
+    last_name = conv_bn_layer(
-                              filter_size=1,
+        name=name + '_branch2a',
-                              num_filters=num_filters1,
+        input=input,
-                              stride=stride,
+        filter_size=1,
-                              padding=0)
+        num_filters=num_filters1,
-    last_name = conv_bn_layer(name=name + '_branch2b',
+        stride=stride,
-                              input=last_name,
+        padding=0)
-                              filter_size=3,
+    last_name = conv_bn_layer(
-                              num_filters=num_filters1,
+        name=name + '_branch2b',
-                              stride=1,
+        input=last_name,
-                              padding=1)
+        filter_size=3,
+        num_filters=num_filters1,
-    last_name = conv_bn_layer(name=name + '_branch2c',
+        stride=1,
-                              input=last_name,
+        padding=1)
-                              filter_size=1,
-                              num_filters=num_filters2,
+    last_name = conv_bn_layer(
-                              stride=1,
+        name=name + '_branch2c',
-                              padding=0,
+        input=last_name,
-                              active_type=LinearActivation())
+        filter_size=1,
+        num_filters=num_filters2,
-    return addto_layer(name=name + "_addto",
+        stride=1,
-                       input=[branch1, last_name],
+        padding=0,
-                       act=ReluActivation())
+        active_type=LinearActivation())
+    return addto_layer(
+        name=name + "_addto", input=[branch1, last_name], act=ReluActivation())
 def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
@@ -168,67 +179,67 @@ def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
    # For ImageNet
    # conv1: 112x112
    img = data_layer(name='input', size=224 * 224 * 3)
-    tmp = conv_bn_layer("conv1", img,
+    tmp = conv_bn_layer(
-                        filter_size=7,
+        "conv1",
-                        channels=3,
+        img,
-                        num_filters=64,
+        filter_size=7,
-                        stride=2,
+        channels=3,
-                        padding=3)
+        num_filters=64,
+        stride=2,
+        padding=3)
    tmp = img_pool_layer(name="pool1", input=tmp, pool_size=3, stride=2)
    # conv2_x: 56x56
-    tmp = mid_projection(name="res2_1",
+    tmp = mid_projection(
-                         input=tmp,
+        name="res2_1", input=tmp, num_filters1=64, num_filters2=256, stride=1)
-                         num_filters1=64,
-                         num_filters2=256,
-                         stride=1)
    for i in xrange(2, res2_num + 1, 1):
-        tmp = bottleneck_block(name="res2_" + str(i),
+        tmp = bottleneck_block(
-                               input=tmp,
+            name="res2_" + str(i), input=tmp, num_filters1=64, num_filters2=256)
-                               num_filters1=64,
-                               num_filters2=256)
    # conv3_x: 28x28
-    tmp = mid_projection(name="res3_1",
+    tmp = mid_projection(
-                         input=tmp,
+        name="res3_1", input=tmp, num_filters1=128, num_filters2=512)
-                         num_filters1=128,
-                         num_filters2=512)
    for i in xrange(2, res3_num + 1, 1):
-        tmp = bottleneck_block(name="res3_" + str(i),
+        tmp = bottleneck_block(
-                               input=tmp, num_filters1=128,
+            name="res3_" + str(i),
-                               num_filters2=512)
+            input=tmp,
+            num_filters1=128,
+            num_filters2=512)
    # conv4_x: 14x14
-    tmp = mid_projection(name="res4_1", input=tmp,
+    tmp = mid_projection(
-                         num_filters1=256, num_filters2=1024)
+        name="res4_1", input=tmp, num_filters1=256, num_filters2=1024)
    for i in xrange(2, res4_num + 1, 1):
-        tmp = bottleneck_block(name="res4_" + str(i),
+        tmp = bottleneck_block(
-                               input=tmp,
+            name="res4_" + str(i),
-                               num_filters1=256,
+            input=tmp,
-                               num_filters2=1024)
+            num_filters1=256,
+            num_filters2=1024)
    # conv5_x: 7x7
-    tmp = mid_projection(name="res5_1", input=tmp,
+    tmp = mid_projection(
-                         num_filters1=512, num_filters2=2048)
+        name="res5_1", input=tmp, num_filters1=512, num_filters2=2048)
    for i in xrange(2, res5_num + 1, 1):
-        tmp = bottleneck_block(name="res5_" + str(i),
+        tmp = bottleneck_block(
-                               input=tmp, num_filters1=512,
+            name="res5_" + str(i),
-                               num_filters2=2048)
+            input=tmp,
+            num_filters1=512,
-    tmp = img_pool_layer(name='avgpool',
+            num_filters2=2048)
-                         input=tmp,
-                         pool_size=7,
+    tmp = img_pool_layer(
-                         stride=1,
+        name='avgpool',
-                         pool_type=AvgPooling())
+        input=tmp,
+        pool_size=7,
-    output = fc_layer(name='output',
+        stride=1,
-                      input=tmp,
+        pool_type=AvgPooling())
-                      size=1000,
-                      act=SoftmaxActivation())
+    output = fc_layer(
+        name='output', input=tmp, size=1000, act=SoftmaxActivation())
    if not is_predict:
-        classification_cost(input=output, label=data_layer(name='label',
+        classification_cost(
-                                                           size=1))
+            input=output, label=data_layer(
+                name='label', size=1))
 def res_net_50():

--- a/demo/quick_start/api_train.py
+++ b/demo/quick_start/api_train.py
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import itertools
+import random
+from paddle.trainer.config_parser import parse_config
+from py_paddle import swig_paddle as api
+from py_paddle import DataProviderConverter
+from paddle.trainer.PyDataProvider2 \
+    import integer_value, integer_value_sequence, sparse_binary_vector
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--train_data", type=str, required=False, help="train data file")
+    parser.add_argument("--test_data", type=str, help="test data file")
+    parser.add_argument(
+        "--config", type=str, required=True, help="config file name")
+    parser.add_argument("--dict_file", required=True, help="dictionary file")
+    parser.add_argument(
+        "--seq", default=1, type=int, help="whether use sequence training")
+    parser.add_argument(
+        "--use_gpu", default=0, type=int, help="whether use GPU for training")
+    parser.add_argument(
+        "--trainer_count",
+        default=1,
+        type=int,
+        help="Number of threads for training")
+    parser.add_argument(
+        "--num_passes", default=5, type=int, help="Number of training passes")
+    return parser.parse_args()
+UNK_IDX = 0
+def load_data(file_name, word_dict):
+    with open(file_name, 'r') as f:
+        for line in f:
+            label, comment = line.strip().split('\t')
+            words = comment.split()
+            word_slot = [word_dict.get(w, UNK_IDX) for w in words]
+            yield word_slot, int(label)
+def load_dict(dict_file):
+    word_dict = dict()
+    with open(dict_file, 'r') as f:
+        for i, line in enumerate(f):
+            w = line.strip().split()[0]
+            word_dict[w] = i
+    return word_dict
+def main():
+    options = parse_arguments()
+    api.initPaddle("--use_gpu=%s" % options.use_gpu,
+                   "--trainer_count=%s" % options.trainer_count)
+    word_dict = load_dict(options.dict_file)
+    train_dataset = list(load_data(options.train_data, word_dict))
+    if options.test_data:
+        test_dataset = list(load_data(options.test_data, word_dict))
+    else:
+        test_dataset = None
+    trainer_config = parse_config(options.config,
+                                  "dict_file=%s" % options.dict_file)
+    # No need to have data provider for trainer
+    trainer_config.ClearField('data_config')
+    trainer_config.ClearField('test_data_config')
+    # create a GradientMachine from the model configuratin
+    model = api.GradientMachine.createFromConfigProto(
+        trainer_config.model_config)
+    # create a trainer for the gradient machine
+    trainer = api.Trainer.create(trainer_config, model)
+    # create a data converter which converts data to PaddlePaddle
+    # internal format
+    input_types = [
+        integer_value_sequence(len(word_dict)) if options.seq else
+        sparse_binary_vector(len(word_dict)), integer_value(2)
+    ]
+    converter = DataProviderConverter(input_types)
+    batch_size = trainer_config.opt_config.batch_size
+    trainer.startTrain()
+    for train_pass in xrange(options.num_passes):
+        trainer.startTrainPass()
+        random.shuffle(train_dataset)
+        for pos in xrange(0, len(train_dataset), batch_size):
+            batch = itertools.islice(train_dataset, pos, pos + batch_size)
+            size = min(batch_size, len(train_dataset) - pos)
+            trainer.trainOneDataBatch(size, converter(batch))
+        trainer.finishTrainPass()
+        if test_dataset:
+            trainer.startTestPeriod()
+            for pos in xrange(0, len(test_dataset), batch_size):
+                batch = itertools.islice(test_dataset, pos, pos + batch_size)
+                size = min(batch_size, len(test_dataset) - pos)
+                trainer.testOneDataBatch(size, converter(batch))
+            trainer.finishTestPeriod()
+    trainer.finishTrain()
+if __name__ == '__main__':
+    main()
--- a/demo/quick_start/api_train.sh
+++ b/demo/quick_start/api_train.sh
+#!/bin/bash
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+set -e
+# Note: if using trainer_config.emb.py, trainer_config.cnn.py
+# or trainer_config.lstm.py, you need to change --seq to --seq=1
+# because they are sequence models.
+python api_train.py \
+  --config=trainer_config.lr.py \
+  --trainer_count=2 \
+  --num_passes=15 \
+  --use_gpu=0 \
+  --seq=0 \
+  --train_data=data/train.txt \
+  --test_data=data/test.txt \
+  --dict_file=data/dict.txt \
+  2>&1 | tee 'train.log'
--- a/demo/quick_start/dataprovider_bow.py
+++ b/demo/quick_start/dataprovider_bow.py
@@ -17,6 +17,7 @@ from paddle.trainer.PyDataProvider2 import *
 # id of the word not in dictionary
 UNK_IDX = 0
 # initializer is called by the framework during initialization.
 # It allows the user to describe the data types and setup the
 # necessary data structure for later use.
@@ -38,7 +39,9 @@ def initializer(settings, dictionary, **kwargs):
        # The second input is an integer. It represents the category id of the
        # sample. 2 means there are two labels in the dataset.
        # (1 for positive and 0 for negative)
-        integer_value(2)]
+        integer_value(2)
+    ]
 # Delaring a data provider. It has an initializer 'data_initialzer'.
 # It will cache the generated data of the first pass in memory, so that
@@ -69,9 +72,8 @@ def process(settings, file_name):
 def predict_initializer(settings, dictionary, **kwargs):
    settings.word_dict = dictionary
-    settings.input_types = [
+    settings.input_types = [sparse_binary_vector(len(dictionary))]
-        sparse_binary_vector(len(dictionary))
-    ]
 # Declaring a data provider for prediction. The difference with process
 # is that label is not generated.
@@ -79,6 +81,6 @@ def predict_initializer(settings, dictionary, **kwargs):
 def process_predict(settings, file_name):
    with open(file_name, 'r') as f:
        for line in f:
-            comment = line.strip()
+            comment = line.strip().split()
            word_vector = [settings.word_dict.get(w, UNK_IDX) for w in comment]
            yield word_vector
--- a/demo/quick_start/dataprovider_emb.py
+++ b/demo/quick_start/dataprovider_emb.py
@@ -16,6 +16,7 @@ from paddle.trainer.PyDataProvider2 import *
 UNK_IDX = 0
 def initializer(settings, dictionary, **kwargs):
    settings.word_dict = dictionary
    settings.input_types = [
@@ -23,7 +24,8 @@ def initializer(settings, dictionary, **kwargs):
        # The value of the integers range from 0 to len(dictrionary)-1
        integer_value_sequence(len(dictionary)),
        # Define the second input for label id
-        integer_value(2)]
+        integer_value(2)
+    ]
 @provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
@@ -39,7 +41,8 @@ def process(settings, file_name):
 def predict_initializer(settings, dictionary, **kwargs):
    settings.word_dict = dictionary
    settings.input_types = [
-        integer_value(len(dictionary), seq_type=SequenceType.SEQUENCE)
+        integer_value(
+            len(dictionary), seq_type=SequenceType.SEQUENCE)
    ]
@@ -47,6 +50,6 @@ def predict_initializer(settings, dictionary, **kwargs):
 def process_predict(settings, file_name):
    with open(file_name, 'r') as f:
        for line in f:
-            comment = line.strip()
+            comment = line.strip().split()
            word_slot = [settings.word_dict.get(w, UNK_IDX) for w in comment]
            yield word_slot
--- a/demo/quick_start/preprocess.py
+++ b/demo/quick_start/preprocess.py
@@ -13,7 +13,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 1. (remove HTML before or not)tokensizing
 2. pos sample : rating score 5; neg sample: rating score 1-2.
@@ -35,7 +34,8 @@ import multiprocessing
 batch_size = 5000
 word_count = {}
-num_tokenize = max(1, multiprocessing.cpu_count() - 2)  # parse + tokenize + save
+num_tokenize = max(1,
+                   multiprocessing.cpu_count() - 2)  # parse + tokenize + save
 max_queue_size = 8
 parse_queue = Queue(maxsize=max_queue_size + num_tokenize)
 tokenize_queue = Queue(maxsize=max_queue_size + num_tokenize)

--- a/demo/quick_start/preprocess.sh
+++ b/demo/quick_start/preprocess.sh
@@ -20,13 +20,22 @@
 set -e
+export LC_ALL=C
+UNAME_STR=`uname`
+if [[ ${UNAME_STR} == 'Linux' ]]; then
+  SHUF_PROG='shuf'
+else
+  SHUF_PROG='gshuf'
+fi
 mkdir -p data/tmp
 python preprocess.py -i data/reviews_Electronics_5.json.gz
 # uniq and shuffle
 cd data/tmp
 echo 'uniq and shuffle...'
-cat pos_*|sort|uniq|shuf> pos.shuffed
+cat pos_*|sort|uniq|${SHUF_PROG}> pos.shuffed
-cat neg_*|sort|uniq|shuf> neg.shuffed
+cat neg_*|sort|uniq|${SHUF_PROG}> neg.shuffed
 min_len=`sed -n '$=' neg.shuffed`
 test_num=$((min_len/10))
@@ -40,8 +49,8 @@ head -n$train_num neg.shuffed >train.neg
 tail -n$test_num pos.shuffed >test.pos
 tail -n$test_num neg.shuffed >test.neg
-cat train.pos train.neg|shuf>../train.txt
+cat train.pos train.neg | ${SHUF_PROG} >../train.txt
-cat test.pos test.neg|shuf>../test.txt
+cat test.pos test.neg | ${SHUF_PROG} >../test.txt
 cd -
 echo 'data/train.txt' > data/train.list

--- a/demo/quick_start/train.sh
+++ b/demo/quick_start/train.sh
@@ -18,11 +18,14 @@ cfg=trainer_config.lr.py
 #cfg=trainer_config.emb.py
 #cfg=trainer_config.cnn.py
 #cfg=trainer_config.lstm.py
+#cfg=trainer_config.bidi-lstm.py
+#cfg=trainer_config.db-lstm.py
+#cfg=trainer_config.resnet-lstm.py
 paddle train \
  --config=$cfg \
  --save_dir=./output \
  --trainer_count=4 \
-  --log_period=20 \
+  --log_period=100 \
  --num_passes=15 \
  --use_gpu=false \
  --show_parameter_stats_period=100 \

--- a/demo/quick_start/trainer_config.bidi-lstm.py
+++ b/demo/quick_start/trainer_config.bidi-lstm.py
+# edit-mode: -*- python -*-
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle.trainer_config_helpers import *
+dict_file = "./data/dict.txt"
+word_dict = dict()
+with open(dict_file, 'r') as f:
+    for i, line in enumerate(f):
+        w = line.strip().split()[0]
+        word_dict[w] = i
+is_predict = get_config_arg('is_predict', bool, False)
+trn = 'data/train.list' if not is_predict else None
+tst = 'data/test.list' if not is_predict else 'data/pred.list'
+process = 'process' if not is_predict else 'process_predict'
+define_py_data_sources2(
+    train_list=trn,
+    test_list=tst,
+    module="dataprovider_emb",
+    obj=process,
+    args={"dictionary": word_dict})
+batch_size = 128 if not is_predict else 1
+settings(
+    batch_size=batch_size,
+    learning_rate=2e-3,
+    learning_method=AdamOptimizer(),
+    regularization=L2Regularization(8e-4),
+    gradient_clipping_threshold=25)
+bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
+data = data_layer(name="word", size=len(word_dict))
+emb = embedding_layer(input=data, size=128)
+bi_lstm = bidirectional_lstm(input=emb, size=128)
+dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
+output = fc_layer(
+    input=dropout, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
+if is_predict:
+    maxid = maxid_layer(output)
+    outputs([maxid, output])
+else:
+    label = data_layer(name="label", size=2)
+    cls = classification_cost(input=output, label=label)
+    outputs(cls)
--- a/demo/quick_start/trainer_config.cnn.py
+++ b/demo/quick_start/trainer_config.cnn.py
@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
 trn = 'data/train.list' if not is_predict else None
 tst = 'data/test.list' if not is_predict else 'data/pred.list'
 process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
+define_py_data_sources2(
-                        test_list=tst,
+    train_list=trn,
-                        module="dataprovider_emb",
+    test_list=tst,
-                        obj=process,
+    module="dataprovider_emb",
-                        args={"dictionary": word_dict})
+    obj=process,
+    args={"dictionary": word_dict})
 batch_size = 128 if not is_predict else 1
 settings(
@@ -39,8 +40,7 @@ settings(
    learning_rate=2e-3,
    learning_method=AdamOptimizer(),
    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25
+    gradient_clipping_threshold=25)
-)
 data = data_layer(name="word", size=len(word_dict))
 embedding = embedding_layer(input=data, size=128)

--- a/demo/quick_start/trainer_config.db-lstm.py
+++ b/demo/quick_start/trainer_config.db-lstm.py
+# edit-mode: -*- python -*-
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle.trainer_config_helpers import *
+dict_file = "./data/dict.txt"
+word_dict = dict()
+with open(dict_file, 'r') as f:
+    for i, line in enumerate(f):
+        w = line.strip().split()[0]
+        word_dict[w] = i
+is_predict = get_config_arg('is_predict', bool, False)
+trn = 'data/train.list' if not is_predict else None
+tst = 'data/test.list' if not is_predict else 'data/pred.list'
+process = 'process' if not is_predict else 'process_predict'
+define_py_data_sources2(
+    train_list=trn,
+    test_list=tst,
+    module="dataprovider_emb",
+    obj=process,
+    args={"dictionary": word_dict})
+batch_size = 128 if not is_predict else 1
+settings(
+    batch_size=batch_size,
+    learning_rate=2e-3,
+    learning_method=AdamOptimizer(),
+    regularization=L2Regularization(8e-4),
+    gradient_clipping_threshold=25)
+bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
+data = data_layer(name="word", size=len(word_dict))
+emb = embedding_layer(input=data, size=128)
+hidden_0 = mixed_layer(size=128, input=[full_matrix_projection(input=emb)])
+lstm_0 = lstmemory(input=hidden_0, layer_attr=ExtraAttr(drop_rate=0.1))
+input_layers = [hidden_0, lstm_0]
+for i in range(1, 8):
+    fc = fc_layer(input=input_layers, size=128)
+    lstm = lstmemory(
+        input=fc,
+        layer_attr=ExtraAttr(drop_rate=0.1),
+        reverse=(i % 2) == 1, )
+    input_layers = [fc, lstm]
+lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
+output = fc_layer(
+    input=lstm_last, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
+if is_predict:
+    maxid = maxid_layer(output)
+    outputs([maxid, output])
+else:
+    label = data_layer(name="label", size=2)
+    cls = classification_cost(input=output, label=label)
+    outputs(cls)
--- a/demo/quick_start/trainer_config.emb.py
+++ b/demo/quick_start/trainer_config.emb.py
@@ -27,18 +27,16 @@ is_predict = get_config_arg('is_predict', bool, False)
 trn = 'data/train.list' if not is_predict else None
 tst = 'data/test.list' if not is_predict else 'data/pred.list'
 process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
+define_py_data_sources2(
-                        test_list=tst,
+    train_list=trn,
-                        module="dataprovider_emb",
+    test_list=tst,
-                        obj=process,
+    module="dataprovider_emb",
-                        args={"dictionary": word_dict})
+    obj=process,
+    args={"dictionary": word_dict})
 batch_size = 128 if not is_predict else 1
 settings(
-    batch_size=batch_size,
+    batch_size=batch_size, learning_rate=2e-3, learning_method=AdamOptimizer())
-    learning_rate=2e-3,
-    learning_method=AdamOptimizer()
-)
 data = data_layer(name="word", size=len(word_dict))
 embedding = embedding_layer(input=data, size=128)

--- a/demo/quick_start/trainer_config.lr.py
+++ b/demo/quick_start/trainer_config.lr.py
@@ -16,7 +16,7 @@
 from paddle.trainer_config_helpers import *
-dict_file = "./data/dict.txt"
+dict_file = get_config_arg('dict_file', str, "./data/dict.txt")
 word_dict = dict()
 with open(dict_file, 'r') as f:
    for i, line in enumerate(f):
@@ -32,11 +32,12 @@ process = 'process' if not is_predict else 'process_predict'
 # We need to use different process for training and prediction.
 # For training, the input data includes both word IDs and labels.
 # For prediction, the input data only includs word Ids.
-define_py_data_sources2(train_list=trn,
+define_py_data_sources2(
-                        test_list=tst,
+    train_list=trn,
-                        module="dataprovider_bow",
+    test_list=tst,
-                        obj=process,
+    module="dataprovider_bow",
-                        args={"dictionary": word_dict})
+    obj=process,
+    args={"dictionary": word_dict})
 batch_size = 128 if not is_predict else 1
 settings(
@@ -44,8 +45,7 @@ settings(
    learning_rate=2e-3,
    learning_method=AdamOptimizer(),
    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25
+    gradient_clipping_threshold=25)
-)
 # Define the data for text features. The size of the data layer is the number
 # of words in the dictionary.
@@ -63,7 +63,6 @@ if not is_predict:
    label = data_layer(name="label", size=2)
    # Define cross-entropy classification loss and error.
-    classification_cost(input=output, label=label)
    cls = classification_cost(input=output, label=label)
    outputs(cls)
 else:

--- a/demo/quick_start/trainer_config.lstm.py
+++ b/demo/quick_start/trainer_config.lstm.py
@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
 trn = 'data/train.list' if not is_predict else None
 tst = 'data/test.list' if not is_predict else 'data/pred.list'
 process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
+define_py_data_sources2(
-                        test_list=tst,
+    train_list=trn,
-                        module="dataprovider_emb",
+    test_list=tst,
-                        obj=process,
+    module="dataprovider_emb",
-                        args={"dictionary": word_dict})
+    obj=process,
+    args={"dictionary": word_dict})
 batch_size = 128 if not is_predict else 1
 settings(
@@ -39,24 +40,14 @@ settings(
    learning_rate=2e-3,
    learning_method=AdamOptimizer(),
    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25
+    gradient_clipping_threshold=25)
-)
-bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
 data = data_layer(name="word", size=len(word_dict))
 emb = embedding_layer(input=data, size=128)
-fc = fc_layer(input=emb, size=512,
+lstm = simple_lstm(
-              act=LinearActivation(),
+    input=emb, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.25))
-              bias_attr=bias_attr,
+lstm_max = pooling_layer(input=lstm, pooling_type=MaxPooling())
-              layer_attr=ExtraAttr(drop_rate=0.1))
+output = fc_layer(input=lstm_max, size=2, act=SoftmaxActivation())
-lstm = lstmemory(input=fc, act=TanhActivation(),
-                 bias_attr=bias_attr,
-                 layer_attr=ExtraAttr(drop_rate=0.25))
-lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
-output = fc_layer(input=lstm_last, size=2,
-                  bias_attr=bias_attr,
-                  act=SoftmaxActivation())
 if is_predict:
    maxid = maxid_layer(output)
    outputs([maxid, output])

--- a/demo/quick_start/trainer_config.resnet-lstm.py
+++ b/demo/quick_start/trainer_config.resnet-lstm.py
+# edit-mode: -*- python -*-
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This configuration is a demonstration of how to implement the stacked LSTM
+with residual connections, i.e. an LSTM layer takes the sum of the hidden states
+and inputs of the previous LSTM layer instead of only the hidden states.
+This architecture is from:
+Yonghui Wu, Mike Schuster, Zhifeng Chen, Quoc V. Le, Mohammad Norouzi,
+Wolfgang Macherey, Maxim Krikun, Yuan Cao, Qin Gao, Klaus Macherey,
+Jeff Klingner, Apurva Shah, Melvin Johnson, Xiaobing Liu, Lukasz Kaiser,
+Stephan Gouws, Yoshikiyo Kato, Taku Kudo, Hideto Kazawa, Keith Stevens,
+George Kurian, Nishant Patil, Wei Wang, Cliff Young, Jason Smith, Jason Riesa,
+Alex Rudnick, Oriol Vinyals, Greg Corrado, Macduff Hughes, Jeffrey Dean. 2016.
+Google's Neural Machine Translation System: Bridging the Gap between Human and
+Machine Translation. In arXiv https://arxiv.org/pdf/1609.08144v2.pdf
+Different from the architecture described in the paper, we use a stack single
+direction LSTM layers as the first layer instead of bi-directional LSTM. Also,
+since this is a demo code, to reduce computation time, we stacked 4 layers
+instead of 8 layers.
+"""
+from paddle.trainer_config_helpers import *
+dict_file = "./data/dict.txt"
+word_dict = dict()
+with open(dict_file, 'r') as f:
+    for i, line in enumerate(f):
+        w = line.strip().split()[0]
+        word_dict[w] = i
+is_predict = get_config_arg('is_predict', bool, False)
+trn = 'data/train.list' if not is_predict else None
+tst = 'data/test.list' if not is_predict else 'data/pred.list'
+process = 'process' if not is_predict else 'process_predict'
+define_py_data_sources2(train_list=trn,
+                        test_list=tst,
+                        module="dataprovider_emb",
+                        obj=process,
+                        args={"dictionary": word_dict})
+batch_size = 128 if not is_predict else 1
+settings(
+    batch_size=batch_size,
+    learning_rate=2e-3,
+    learning_method=AdamOptimizer(),
+    regularization=L2Regularization(8e-4),
+    gradient_clipping_threshold=25
+)
+bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
+data = data_layer(name="word", size=len(word_dict))
+emb = embedding_layer(input=data, size=128)
+lstm = simple_lstm(input=emb, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.1))
+previous_input, previous_hidden_state = emb, lstm
+for i in range(3):
+    # The input to the current layer is the sum of the hidden state
+    # and input of the previous layer.
+    current_input = addto_layer(input=[previous_input, previous_hidden_state])
+    hidden_state = simple_lstm(input=current_input, size=128,
+                               lstm_cell_attr=ExtraAttr(drop_rate=0.1))
+    previous_input, previous_hidden_state = current_input, hidden_state
+lstm = previous_hidden_state
+lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
+output = fc_layer(input=lstm_last, size=2,
+                  bias_attr=bias_attr,
+                  act=SoftmaxActivation())
+if is_predict:
+    maxid = maxid_layer(output)
+    outputs([maxid, output])
+else:
+    label = data_layer(name="label", size=2)
+    cls = classification_cost(input=output, label=label)
+    outputs(cls)
--- a/demo/recommendation/common_utils.py
+++ b/demo/recommendation/common_utils.py
@@ -21,8 +21,9 @@ def meta_to_header(meta, name):
            yield integer_value(each_meta['max'])
        elif each_meta['type'] == 'embedding':
            is_seq = each_meta['seq'] == 'sequence'
-            yield integer_value(len(each_meta['dict']),
+            yield integer_value(
-                                seq_type=SequenceType.SEQUENCE if is_seq
+                len(each_meta['dict']),
-                                else SequenceType.NO_SEQUENCE)
+                seq_type=SequenceType.SEQUENCE
+                if is_seq else SequenceType.NO_SEQUENCE)
        elif each_meta['type'] == 'one_hot_dense':
            yield dense_vector(len(each_meta['dict']))
--- a/demo/recommendation/data/config.json
+++ b/demo/recommendation/data/config.json
@@ -14,4 +14,3 @@
    "fields": ["id", "title", "genres"]
  }
 }
--- a/demo/recommendation/data/config_generator.py
+++ b/demo/recommendation/data/config_generator.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 config_generator.py
@@ -29,10 +28,7 @@ import json
 import docopt
 import copy
-DEFAULT_FILE = {
+DEFAULT_FILE = {"type": "split", "delimiter": ","}
-    "type": "split",
-    "delimiter": ","
-}
 DEFAULT_FIELD = {
    "id": {
@@ -107,19 +103,16 @@ def main(filename, fmt):
                field = copy.deepcopy(DEFAULT_FIELD[field_key])
                field['pos'] = pos
                fields.append(field)
-            obj[k] = {
+            obj[k] = {"file": file_dict, "fields": fields}
-                "file": file_dict,
+    meta = {"meta": obj}
-                "fields": fields
-            }
-    meta = {
-        "meta": obj
-    }
    # print meta
    if fmt == 'json':
        def formatter(x):
            import json
            return json.dumps(x, indent=2)
    elif fmt == 'yaml':
        def formatter(x):
            import yaml
            return yaml.safe_dump(x, default_flow_style=False)

--- a/demo/recommendation/data/meta_generator.py
+++ b/demo/recommendation/data/meta_generator.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 Preprocess Movielens dataset, to get movie/user object.
@@ -66,8 +65,8 @@ class SortedIDGenerator(object):
        self.__key_set__.add(key)
    def finish_scan(self, compare=None, key=None, reverse=False):
-        self.__key_set__ = sorted(list(self.__key_set__), cmp=compare,
+        self.__key_set__ = sorted(
-                                  key=key, reverse=reverse)
+            list(self.__key_set__), cmp=compare, key=key, reverse=reverse)
        self.dict = dict()
        for idx, each_key in enumerate(self.__key_set__):
            self.dict[each_key] = idx
@@ -207,11 +206,10 @@ class EmbeddingFieldParser(object):
            self.dict = EmbeddingFieldParser.CharBasedEmbeddingDict(
                self.seq_type == EmbeddingFieldParser.SEQUENCE)
        elif config['dict']['type'] == 'split':
-            self.dict = SplitEmbeddingDict(
+            self.dict = SplitEmbeddingDict(config['dict'].get('delimiter', ','))
-                config['dict'].get('delimiter', ','))
        elif config['dict']['type'] == 'whole_content':
-            self.dict = EmbeddingFieldParser.WholeContentDict(
+            self.dict = EmbeddingFieldParser.WholeContentDict(config['dict'][
-                config['dict']['sort'])
+                'sort'])
        else:
            print config
            assert False
@@ -333,8 +331,8 @@ class ContentExtractorFactory(object):
                return PositionContentExtractor(config['pos'])
            else:
                extra_args = config['regex']
-                return RegexPositionContentExtractor(pos=config['pos'],
+                return RegexPositionContentExtractor(
-                                                     **extra_args)
+                    pos=config['pos'], **extra_args)
 class MetaFile(object):
@@ -364,9 +362,10 @@ class MetaFile(object):
            metas = map(lambda x: x.meta_field(), field_parsers)
            # print metas
-            key_index = filter(lambda x: x is not None, map(
+            key_index = filter(
-                lambda (idx, meta): idx if 'is_key' in meta and meta['is_key']
+                lambda x: x is not None,
-                else None, enumerate(metas)))[0]
+                map(lambda (idx, meta): idx if 'is_key' in meta and meta['is_key'] else None,
+                    enumerate(metas)))[0]
            key_map = []
            for i in range(min(key_index, len(metas))):
@@ -374,12 +373,7 @@ class MetaFile(object):
            for i in range(key_index + 1, len(metas)):
                key_map.append(i)
-            obj = {
+            obj = {'__meta__': {'raw_meta': metas, 'feature_map': key_map}}
-                '__meta__': {
-                    'raw_meta': metas,
-                    'feature_map': key_map
-                }
-            }
            for each_block in reader.read():
                idx = field_parsers[key_index].parse(each_block)

--- a/demo/recommendation/data/split.py
+++ b/demo/recommendation/data/split.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 Separate movielens 1m dataset to train/test file.

--- a/demo/recommendation/dataprovider.py
+++ b/demo/recommendation/dataprovider.py
@@ -15,6 +15,7 @@
 from paddle.trainer.PyDataProvider2 import *
 import common_utils  # parse
 def hook(settings, meta, **kwargs):
    """
    Init hook is invoked before process data. It will set obj.slots and store
@@ -41,6 +42,7 @@ def hook(settings, meta, **kwargs):
    settings.input_types = headers
    settings.meta = meta
 @provider(init_hook=hook, cache=CacheType.CACHE_PASS_IN_MEM)
 def process(settings, filename):
    with open(filename, 'r') as f:

--- a/demo/recommendation/prediction.py
+++ b/demo/recommendation/prediction.py
@@ -28,7 +28,8 @@ if __name__ == '__main__':
    model_path = sys.argv[1]
    swig_paddle.initPaddle('--use_gpu=0')
    conf = parse_config("trainer_config.py", "is_predict=1")
-    network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+    network = swig_paddle.GradientMachine.createFromConfigProto(
+        conf.model_config)
    assert isinstance(network, swig_paddle.GradientMachine)
    network.loadParameters(model_path)
    with open('./data/meta.bin', 'rb') as f:
@@ -39,11 +40,12 @@ if __name__ == '__main__':
        while True:
            movie_id = int(raw_input("Input movie_id: "))
            user_id = int(raw_input("Input user_id: "))
-            movie_meta = meta['movie'][movie_id]    # Query Data From Meta.
+            movie_meta = meta['movie'][movie_id]  # Query Data From Meta.
            user_meta = meta['user'][user_id]
            data = [movie_id - 1]
            data.extend(movie_meta)
            data.append(user_id - 1)
            data.extend(user_meta)
-            print "Prediction Score is %.2f" % ((network.forwardTest(
+            print "Prediction Score is %.2f" % (
-                cvt.convert([data]))[0]['value'][0][0] + 5) / 2)
+                (network.forwardTest(cvt.convert([data]))[0]['value'][0][0] + 5)
+                / 2)
--- a/demo/recommendation/trainer_config.py
+++ b/demo/recommendation/trainer_config.py
@@ -27,8 +27,8 @@ with open(META_FILE, 'rb') as f:
    # load meta file
    meta = pickle.load(f)
-settings(batch_size=1600, learning_rate=1e-3,
+settings(
-         learning_method=RMSPropOptimizer())
+    batch_size=1600, learning_rate=1e-3, learning_method=RMSPropOptimizer())
 def construct_feature(name):
@@ -59,11 +59,10 @@ def construct_feature(name):
        slot_name = each_meta.get('name', '%s_id' % name)
        if type_name == 'id':
            slot_dim = each_meta['max']
-            embedding = embedding_layer(input=data_layer(slot_name,
+            embedding = embedding_layer(
-                                                          size=slot_dim),
+                input=data_layer(
-                                        size=256)
+                    slot_name, size=slot_dim), size=256)
-            fusion.append(fc_layer(input=embedding,
+            fusion.append(fc_layer(input=embedding, size=256))
-                                   size=256))
        elif type_name == 'embedding':
            is_seq = each_meta['seq'] == 'sequence'
            slot_dim = len(each_meta['dict'])
@@ -71,17 +70,14 @@ def construct_feature(name):
            embedding = embedding_layer(input=din, size=256)
            if is_seq:
                fusion.append(
-                    text_conv_pool(input=embedding, context_len=5,
+                    text_conv_pool(
-                                   hidden_size=256))
+                        input=embedding, context_len=5, hidden_size=256))
            else:
-                fusion.append(fc_layer(input=embedding,
+                fusion.append(fc_layer(input=embedding, size=256))
-                                       size=256))
        elif type_name == 'one_hot_dense':
            slot_dim = len(each_meta['dict'])
-            hidden = fc_layer(input=data_layer(slot_name, slot_dim),
+            hidden = fc_layer(input=data_layer(slot_name, slot_dim), size=256)
-                              size=256)
+            fusion.append(fc_layer(input=hidden, size=256))
-            fusion.append(fc_layer(input=hidden,
-                                   size=256))
    return fc_layer(name="%s_fusion" % name, input=fusion, size=256)
@@ -90,10 +86,16 @@ movie_feature = construct_feature("movie")
 user_feature = construct_feature("user")
 similarity = cos_sim(a=movie_feature, b=user_feature)
 if not is_predict:
-    outputs(regression_cost(input=similarity,
+    outputs(
-                            label=data_layer('rating', size=1)))
+        regression_cost(
+            input=similarity, label=data_layer(
-    define_py_data_sources2('data/train.list', 'data/test.list', module='dataprovider',
+                'rating', size=1)))
-                           obj='process', args={'meta': meta})
+    define_py_data_sources2(
+        'data/train.list',
+        'data/test.list',
+        module='dataprovider',
+        obj='process',
+        args={'meta': meta})
 else:
    outputs(similarity)
--- a/demo/semantic_role_labeling/.gitignore
+++ b/demo/semantic_role_labeling/.gitignore
+*.pyc
+train.log
+data/feature
+data/conll05st-release/
+data/src.dict
+data/test.wsj.props
+data/test.wsj.seq_pair
+data/test.wsj.words
+data/tgt.dict
+output
--- a/demo/semantic_role_labeling/dataprovider.py
+++ b/demo/semantic_role_labeling/dataprovider.py
@@ -26,9 +26,9 @@ def hook(settings, word_dict, label_dict, **kwargs):
        integer_value_sequence(len(word_dict)),
        integer_value_sequence(len(word_dict)),
        integer_value_sequence(len(word_dict)),
-        integer_value_sequence(len(word_dict)),
+        integer_value_sequence(len(word_dict)), integer_value_sequence(2),
-        integer_value_sequence(2),
+        integer_value_sequence(len(label_dict))
-        integer_value_sequence(len(label_dict))]
+    ]
 @provider(init_hook=hook)

--- a/demo/semantic_role_labeling/db_lstm.py
+++ b/demo/semantic_role_labeling/db_lstm.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import math
 import os
 import sys
@@ -42,7 +41,7 @@ if not is_predict:
            label_dict[w] = i
    if is_test:
-        train_list_file = None 
+        train_list_file = None
    #define data provider
    define_py_data_sources2(

--- a/demo/semantic_role_labeling/predict.py
+++ b/demo/semantic_role_labeling/predict.py
@@ -41,22 +41,16 @@ class Prediction():
        len_dict = len(self.dict)
        len_label = len(self.labels)
-        conf = parse_config(
+        conf = parse_config(train_conf, 'dict_len=' + str(len_dict) +
-            train_conf,
+                            ',label_len=' + str(len_label) + ',is_predict=True')
-            'dict_len=' + str(len_dict) +
-            ',label_len=' + str(len_label) +
-            ',is_predict=True')
        self.network = swig_paddle.GradientMachine.createFromConfigProto(
            conf.model_config)
        self.network.loadParameters(model_dir)
        slots = [
-            integer_value_sequence(len_dict),
+            integer_value_sequence(len_dict), integer_value_sequence(len_dict),
-            integer_value_sequence(len_dict),
+            integer_value_sequence(len_dict), integer_value_sequence(len_dict),
-            integer_value_sequence(len_dict),
+            integer_value_sequence(len_dict), integer_value_sequence(2)
-            integer_value_sequence(len_dict),
-            integer_value_sequence(len_dict),
-            integer_value_sequence(2)
        ]
        self.converter = DataProviderConverter(slots)
@@ -110,8 +104,8 @@ class Prediction():
                len_sen = len(sen.split())
                line_labels = lab[index:index + len_sen]
                index += len_sen
-                fout.write(sen + '\t' + ' '.join([self.labels_reverse[
+                fout.write(sen + '\t' + ' '.join(
-                    i] for i in line_labels]) + '\n')
+                    [self.labels_reverse[i] for i in line_labels]) + '\n')
 def option_parser():

--- a/demo/semantic_role_labeling/predict.sh
+++ b/demo/semantic_role_labeling/predict.sh
@@ -18,7 +18,7 @@ set -e
 function get_best_pass() {
  cat $1  | grep -Pzo 'Test .*\n.*pass-.*' | \
  sed  -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' | \
-  sort | head -n 1
+  sort -n | head -n 1
 }   
 log=train.log
@@ -26,7 +26,6 @@ LOG=`get_best_pass $log`
 LOG=(${LOG})
 best_model_path="output/pass-${LOG[1]}"
 config_file=db_lstm.py
 dict_file=./data/src.dict
 label_file=./data/tgt.dict 

--- a/demo/semantic_role_labeling/test.sh
+++ b/demo/semantic_role_labeling/test.sh
@@ -18,7 +18,7 @@ set -e
 function get_best_pass() {
  cat $1  | grep -Pzo 'Test .*\n.*pass-.*' | \
  sed  -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' |\
-  sort | head -n 1
+  sort -n | head -n 1
 }
 log=train.log
@@ -36,5 +36,5 @@ paddle train \
  --job=test \
  --use_gpu=false \
  --config_args=is_test=1 \
+  --test_all_data_in_one_period=1 \
 2>&1 | tee 'test.log'
--- a/demo/semantic_role_labeling/train.sh
+++ b/demo/semantic_role_labeling/train.sh
@@ -24,4 +24,3 @@ paddle train \
  --show_parameter_stats_period=10 \
  --test_all_data_in_one_period=1 \
 2>&1 | tee 'train.log'
--- a/demo/sentiment/dataprovider.py
+++ b/demo/sentiment/dataprovider.py
@@ -17,8 +17,8 @@ from paddle.trainer.PyDataProvider2 import *
 def hook(settings, dictionary, **kwargs):
    settings.word_dict = dictionary
    settings.input_types = [
-        integer_value_sequence(len(settings.word_dict)),
+        integer_value_sequence(len(settings.word_dict)), integer_value(2)
-        integer_value(2)]
+    ]
    settings.logger.info('dict len : %d' % (len(settings.word_dict)))
@@ -29,6 +29,7 @@ def process(settings, file_name):
            label, comment = line.strip().split('\t\t')
            label = int(label)
            words = comment.split()
-            word_slot = [settings.word_dict[w] for w in words if w in
+            word_slot = [
-                         settings.word_dict]
+                settings.word_dict[w] for w in words if w in settings.word_dict
+            ]
            yield word_slot, label
--- a/demo/sentiment/predict.py
+++ b/demo/sentiment/predict.py
@@ -18,14 +18,14 @@ from optparse import OptionParser
 from py_paddle import swig_paddle, DataProviderConverter
 from paddle.trainer.PyDataProvider2 import integer_value_sequence
 from paddle.trainer.config_parser import parse_config
 """
 Usage: run following command to show help message.
  python predict.py -h
 """
 class SentimentPrediction():
-    def __init__(self, train_conf, dict_file, model_dir=None, label_file = None):
+    def __init__(self, train_conf, dict_file, model_dir=None, label_file=None):
        """
        train_conf: trainer configure.
        dict_file: word dictionary file name.
@@ -44,10 +44,11 @@ class SentimentPrediction():
            self.load_label(label_file)
        conf = parse_config(train_conf, "is_predict=1")
-        self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+        self.network = swig_paddle.GradientMachine.createFromConfigProto(
+            conf.model_config)
        self.network.loadParameters(self.model_dir)
-        slots = [integer_value_sequence(self.dict_dim)]
+        input_types = [integer_value_sequence(self.dict_dim)]
-        self.converter = DataProviderConverter(slots)
+        self.converter = DataProviderConverter(input_types)
    def load_dict(self):
        """
@@ -61,7 +62,7 @@ class SentimentPrediction():
        """
        Load label.
        """
-        self.label={}
+        self.label = {}
        for v in open(label_file, 'r'):
            self.label[int(v.split('\t')[1])] = v.split('\t')[0]
@@ -72,7 +73,9 @@ class SentimentPrediction():
        with open(data_file, 'r') as fdata:
            for line in fdata:
                words = line.strip().split()
-                word_slot = [self.word_dict[w] for w in words if w in self.word_dict]
+                word_slot = [
+                    self.word_dict[w] for w in words if w in self.word_dict
+                ]
                if not word_slot:
                    print "all words are not in dictionary: %s", line
                    continue
@@ -89,25 +92,48 @@ class SentimentPrediction():
        if self.label is None:
            print("%s: predicting label is %d" % (data_file, lab[0][0]))
        else:
-            print("%s: predicting label is %s" % (data_file, self.label[lab[0][0]]))
+            print("%s: predicting label is %s" %
+                  (data_file, self.label[lab[0][0]]))
 def option_parser():
    usage = "python predict.py -n config -w model_dir -d dictionary -i input_file "
    parser = OptionParser(usage="usage: %s [options]" % usage)
-    parser.add_option("-n", "--tconf", action="store",
+    parser.add_option(
-                      dest="train_conf", help="network config")
+        "-n",
-    parser.add_option("-d", "--dict", action="store",
+        "--tconf",
-                      dest="dict_file",help="dictionary file")
+        action="store",
-    parser.add_option("-b", "--label", action="store",
+        dest="train_conf",
-                      dest="label", default=None,
+        help="network config")
-                      help="dictionary file")
+    parser.add_option(
-    parser.add_option("-i", "--data", action="store",
+        "-d",
-                      dest="data", help="data file to predict")
+        "--dict",
-    parser.add_option("-w", "--model", action="store",
+        action="store",
-                      dest="model_path", default=None,
+        dest="dict_file",
-                      help="model path")
+        help="dictionary file")
+    parser.add_option(
+        "-b",
+        "--label",
+        action="store",
+        dest="label",
+        default=None,
+        help="dictionary file")
+    parser.add_option(
+        "-i",
+        "--data",
+        action="store",
+        dest="data",
+        help="data file to predict")
+    parser.add_option(
+        "-w",
+        "--model",
+        action="store",
+        dest="model_path",
+        default=None,
+        help="model path")
    return parser.parse_args()
 def main():
    options, args = option_parser()
    train_conf = options.train_conf
@@ -119,5 +145,6 @@ def main():
    predict = SentimentPrediction(train_conf, dict_file, model_path, label)
    predict.predict(data)
 if __name__ == '__main__':
    main()
--- a/demo/sentiment/preprocess.py
+++ b/demo/sentiment/preprocess.py
@@ -22,13 +22,13 @@ from os.path import join as join_path
 from optparse import OptionParser
 from paddle.utils.preprocess_util import *
 """
 Usage: run following command to show help message.
  python preprocess.py -h 
 """
-def save_dict(dict, filename, is_reverse = True):
+def save_dict(dict, filename, is_reverse=True):
    """
    Save dictionary into file.
    dict:   input dictionary.
@@ -39,9 +39,10 @@ def save_dict(dict, filename, is_reverse = True):
    f = open(filename, 'w')
    for k, v in sorted(dict.items(), key=operator.itemgetter(1),\
                       reverse=is_reverse):
-        f.write('%s\t%s\n'%(k, v))
+        f.write('%s\t%s\n' % (k, v))
    f.close()
 def tokenize(sentences):
    """
    Use tokenizer.perl to tokenize input sentences.
@@ -58,6 +59,7 @@ def tokenize(sentences):
    toks = tok_text.split('\n')[:-1]
    return toks
 def read_lines(path):
    """
    path: String, file path.
@@ -71,12 +73,17 @@ def read_lines(path):
                seqs.append(line)
    return seqs
 class SentimentDataSetCreate():
    """
    A class to process data for sentiment analysis task.
    """
-    def __init__(self, data_path, output_path,
-                 use_okenizer = True, multi_lines = False):
+    def __init__(self,
+                 data_path,
+                 output_path,
+                 use_okenizer=True,
+                 multi_lines=False):
        """
        data_path: string, traing and testing dataset path
        output_path: string, output path, store processed dataset
@@ -164,23 +171,17 @@ class SentimentDataSetCreate():
        # Preprocess train data.
        train_data, train_lab_set = self.data_list(self.train_dir)
        print "processing train set..."
-        file_lists = self.save_data(train_data,
+        file_lists = self.save_data(train_data, "train", self.batch_size, True,
-                                     "train",
+                                    True)
-                                     self.batch_size,
-                                     True,
-                                     True)
        save_list(file_lists, self.train_list)
        # If have test data path, preprocess test data.
        if os.path.exists(self.test_dir):
            test_data, test_lab_set = self.data_list(self.test_dir)
-            assert(train_lab_set == test_lab_set)
+            assert (train_lab_set == test_lab_set)
            print "processing test set..."
-            file_lists = self.save_data(test_data,
+            file_lists = self.save_data(test_data, "test", self.batch_size,
-                                        "test",
+                                        False, self.dict_with_test)
-                                        self.batch_size,
-                                        False,
-                                        self.dict_with_test)
            save_list(file_lists, self.test_list)
        # save labels set.
@@ -191,7 +192,9 @@ class SentimentDataSetCreate():
        save_dict(self.word_count, self.dict_file, True)
        self.dict_size = len(self.word_count)
-    def save_data(self, data, prefix = "",
+    def save_data(self,
+                  data,
+                  prefix="",
                  batch_size=50000,
                  is_shuffle=False,
                  build_dict=False):
@@ -205,7 +208,8 @@ class SentimentDataSetCreate():
        return: list of batch names
        """
        if is_shuffle and self.multi_lines:
-           return self.save_data_multi_lines(data, prefix, batch_size, build_dict)
+            return self.save_data_multi_lines(data, prefix, batch_size,
+                                              build_dict)
        if is_shuffle:
            random.shuffle(data)
@@ -213,7 +217,7 @@ class SentimentDataSetCreate():
        batch_names = []
        for i in range(num_batches):
            batch_name = join_path(self.output_path,
-                                   "%s_part_%03d" %(prefix, i))
+                                   "%s_part_%03d" % (prefix, i))
            begin = i * batch_size
            end = min((i + 1) * batch_size, len(data))
            # read a batch of data
@@ -246,7 +250,9 @@ class SentimentDataSetCreate():
            data_list = tokenize(data_list)
        return label_list, data_list
-    def save_data_multi_lines(self, data, prefix = "",
+    def save_data_multi_lines(self,
+                              data,
+                              prefix="",
                              batch_size=50000,
                              build_dict=False):
        """
@@ -274,14 +280,14 @@ class SentimentDataSetCreate():
            self.create_dict(data_list)
        length = len(label_list)
-        perm_list = np.array([ i for i in xrange(length) ])
+        perm_list = np.array([i for i in xrange(length)])
        random.shuffle(perm_list)
        num_batches = int(math.ceil(length / float(batch_size)))
        batch_names = []
        for i in range(num_batches):
            batch_name = join_path(self.output_path,
-                                   "%s_part_%03d" %(prefix, i))
+                                   "%s_part_%03d" % (prefix, i))
            begin = i * batch_size
            end = min((i + 1) * batch_size, length)
            sub_label = [label_list[perm_list[i]] for i in range(begin, end)]
@@ -304,35 +310,50 @@ class SentimentDataSetCreate():
            f.write('%s\t\t%s\n' % (lab, seq))
        f.close()
 def option_parser():
    parser = OptionParser(usage="usage: python preprcoess.py "\
                                "-i data_dir [options]")
-    parser.add_option("-i", "--data", action="store",
+    parser.add_option(
-                      dest="input", help="Input data directory.")
+        "-i",
-    parser.add_option("-o", "--output", action="store",
+        "--data",
-                      dest="output", default=None,
+        action="store",
-                      help="Output directory.")
+        dest="input",
-    parser.add_option("-t", "--tokenizer", action="store",
+        help="Input data directory.")
-                      dest="use_tokenizer", default=True,
+    parser.add_option(
-                      help="Whether to use tokenizer.")
+        "-o",
+        "--output",
+        action="store",
+        dest="output",
+        default=None,
+        help="Output directory.")
+    parser.add_option(
+        "-t",
+        "--tokenizer",
+        action="store",
+        dest="use_tokenizer",
+        default=True,
+        help="Whether to use tokenizer.")
    parser.add_option("-m", "--multi_lines", action="store",
                      dest="multi_lines", default=False,
                      help="If input text files have multi lines and they "\
                           "need to be shuffled, you should set -m True,")
    return parser.parse_args()
 def main():
    options, args = option_parser()
-    data_dir=options.input
+    data_dir = options.input
-    output_dir=options.output
+    output_dir = options.output
-    use_tokenizer=options.use_tokenizer
+    use_tokenizer = options.use_tokenizer
-    multi_lines=options.multi_lines
+    multi_lines = options.multi_lines
    if output_dir is None:
        outname = os.path.basename(options.input)
        output_dir = join_path(os.path.dirname(data_dir), 'pre-' + outname)
-    data_creator = SentimentDataSetCreate(data_dir, output_dir,
+    data_creator = SentimentDataSetCreate(data_dir, output_dir, use_tokenizer,
-                                          use_tokenizer, multi_lines)
+                                          multi_lines)
    data_creator.create_dataset()
 if __name__ == '__main__':
    main()
--- a/demo/sentiment/sentiment_net.py
+++ b/demo/sentiment/sentiment_net.py
@@ -47,10 +47,12 @@ def sentiment_data(data_dir=None,
        for i, line in enumerate(open(dict_file, 'r')):
            word_dict[line.split('\t')[0]] = i
-    define_py_data_sources2(train_list, test_list,
+    define_py_data_sources2(
-                           module="dataprovider",
+        train_list,
-                           obj="process",
+        test_list,
-                           args={'dictionary': word_dict})
+        module="dataprovider",
+        obj="process",
+        args={'dictionary': word_dict})
    return dict_dim, class_dim
@@ -64,8 +66,7 @@ def bidirectional_lstm_net(input_dim,
    emb = embedding_layer(input=data, size=emb_dim)
    bi_lstm = bidirectional_lstm(input=emb, size=lstm_dim)
    dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
-    output = fc_layer(input=dropout, size=class_dim,
+    output = fc_layer(input=dropout, size=class_dim, act=SoftmaxActivation())
-                      act=SoftmaxActivation())
    if not is_predict:
        lbl = data_layer("label", 1)
@@ -109,27 +110,36 @@ def stacked_lstm_net(input_dim,
    data = data_layer("word", input_dim)
    emb = embedding_layer(input=data, size=emb_dim)
-    fc1 = fc_layer(input=emb, size=hid_dim, act=linear,
+    fc1 = fc_layer(input=emb, size=hid_dim, act=linear, bias_attr=bias_attr)
-                   bias_attr=bias_attr)
+    lstm1 = lstmemory(
-    lstm1 = lstmemory(input=fc1, act=relu, bias_attr=bias_attr,
+        input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
-                      layer_attr=layer_attr)
    inputs = [fc1, lstm1]
    for i in range(2, stacked_num + 1):
-        fc = fc_layer(input=inputs, size=hid_dim, act=linear,
+        fc = fc_layer(
-                      param_attr=para_attr, bias_attr=bias_attr)
+            input=inputs,
-        lstm = lstmemory(input=fc, reverse=(i % 2) == 0, act=relu,
+            size=hid_dim,
-                         bias_attr=bias_attr, layer_attr=layer_attr)
+            act=linear,
+            param_attr=para_attr,
+            bias_attr=bias_attr)
+        lstm = lstmemory(
+            input=fc,
+            reverse=(i % 2) == 0,
+            act=relu,
+            bias_attr=bias_attr,
+            layer_attr=layer_attr)
        inputs = [fc, lstm]
    fc_last = pooling_layer(input=inputs[0], pooling_type=MaxPooling())
    lstm_last = pooling_layer(input=inputs[1], pooling_type=MaxPooling())
-    output = fc_layer(input=[fc_last, lstm_last], size=class_dim,
+    output = fc_layer(
-                      act=SoftmaxActivation(),
+        input=[fc_last, lstm_last],
-                      bias_attr=bias_attr, param_attr=para_attr)
+        size=class_dim,
+        act=SoftmaxActivation(),
+        bias_attr=bias_attr,
+        param_attr=para_attr)
    if is_predict:
        outputs(output)
    else:
-        outputs(
+        outputs(classification_cost(input=output, label=data_layer('label', 1)))
-            classification_cost(input=output, label=data_layer('label', 1)))
--- a/demo/sentiment/test.sh
+++ b/demo/sentiment/test.sh
@@ -17,7 +17,7 @@ set -e
 function get_best_pass() {
  cat $1  | grep -Pzo 'Test .*\n.*pass-.*' | \
  sed  -r 'N;s/Test.* classification_error_evaluator=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' |\
-  sort | head -n 1
+  sort -n | head -n 1
 }
 log=train.log

--- a/demo/sentiment/trainer_config.py
+++ b/demo/sentiment/trainer_config.py
@@ -20,20 +20,19 @@ is_test = get_config_arg('is_test', bool, False)
 # whether this config is used for prediction
 is_predict = get_config_arg('is_predict', bool, False)
-data_dir  = "./data/pre-imdb"
+data_dir = "./data/pre-imdb"
 dict_dim, class_dim = sentiment_data(data_dir, is_test, is_predict)
 ################## Algorithm Config #####################
 settings(
-  batch_size=128,
+    batch_size=128,
-  learning_rate=2e-3,
+    learning_rate=2e-3,
-  learning_method=AdamOptimizer(),
+    learning_method=AdamOptimizer(),
-  regularization=L2Regularization(8e-4),
+    regularization=L2Regularization(8e-4),
-  gradient_clipping_threshold=25
+    gradient_clipping_threshold=25)
-)
 #################### Network Config ######################
-stacked_lstm_net(dict_dim, class_dim=class_dim,
+stacked_lstm_net(
-                 stacked_num=3, is_predict=is_predict)
+    dict_dim, class_dim=class_dim, stacked_num=3, is_predict=is_predict)
 # bidirectional_lstm_net(dict_dim, class_dim=class_dim, is_predict=is_predict)
--- a/demo/seqToseq/data/paraphrase_data.sh
+++ b/demo/seqToseq/data/paraphrase_data.sh
@@ -16,9 +16,7 @@ set -e
 set -x
 # download the in-house paraphrase dataset
-# following is the google drive address
+wget http://paddlepaddle.bj.bcebos.com/model_zoo/embedding/paraphrase.tar.gz
-# you can also directly download from https://pan.baidu.com/s/1o8q577s
-wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/embedding/paraphrase.tar.gz --no-check-certificate
 # untar the dataset
 tar -zxvf paraphrase.tar.gz

--- a/demo/seqToseq/data/wmt14_model.sh
+++ b/demo/seqToseq/data/wmt14_model.sh
@@ -16,9 +16,7 @@ set -e
 set -x
 # download the pretrained model
-# following is the google drive address
+wget http://paddlepaddle.bj.bcebos.com/model_zoo/wmt14_model.tar.gz
-# you can also directly download from https://pan.baidu.com/s/1o8q577s
-wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/wmt14_model.tar.gz --no-check-certificate
 # untar the model
 tar -zxvf wmt14_model.tar.gz

--- a/demo/seqToseq/dataprovider.py
+++ b/demo/seqToseq/dataprovider.py
@@ -30,14 +30,14 @@ def hook(settings, src_dict, trg_dict, file_list, **kwargs):
    if settings.job_mode:
        settings.trg_dict = trg_dict
        settings.slots = [
-            integer_value_sequence(len(settings.src_dict)), 
+            integer_value_sequence(len(settings.src_dict)),
-            integer_value_sequence(len(settings.trg_dict)), 
+            integer_value_sequence(len(settings.trg_dict)),
            integer_value_sequence(len(settings.trg_dict))
        ]
        settings.logger.info("trg dict len : %d" % (len(settings.trg_dict)))
    else:
        settings.slots = [
-            integer_value_sequence(len(settings.src_dict)), 
+            integer_value_sequence(len(settings.src_dict)),
            integer_value_sequence(len(open(file_list[0], "r").readlines()))
        ]
@@ -62,8 +62,7 @@ def process(settings, file_name):
            if settings.job_mode:
                trg_seq = line_split[1]  # one target sequence
                trg_words = trg_seq.split()
-                trg_ids = [settings.trg_dict.get(w, UNK_IDX)
+                trg_ids = [settings.trg_dict.get(w, UNK_IDX) for w in trg_words]
-                           for w in trg_words]
                # remove sequence whose length > 80 in training mode
                if len(src_ids) > 80 or len(trg_ids) > 80:

--- a/demo/seqToseq/preprocess.py
+++ b/demo/seqToseq/preprocess.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 Example:
    python preprocess.py -i INPUT [-d DICTSIZE] [-m]
@@ -24,12 +23,13 @@ Options:
    -m --mergeDict merge source and target dictionary
 """
 import os
-import sys 
+import sys
 import string
 from optparse import OptionParser
 from paddle.utils.preprocess_util import save_list, DatasetCreater
 class SeqToSeqDatasetCreater(DatasetCreater):
    """
    A class to process data for sequence to sequence application.
@@ -75,7 +75,7 @@ class SeqToSeqDatasetCreater(DatasetCreater):
        if not os.path.exists(output):
            os.system(cmd + '> ' + output)
-    def build_dict(self, file_path, dict_path, dict_size = -1):
+    def build_dict(self, file_path, dict_path, dict_size=-1):
        """ 
        Create the dictionary for the file, Note that
        1. Valid characters include all printable characters
@@ -99,20 +99,23 @@ class SeqToSeqDatasetCreater(DatasetCreater):
                        for word in words:
                            if word not in dictory:
                                dictory[word] = 1
-                            else: 
+                            else:
                                dictory[word] += 1
            output = open(dict_path, "w+")
            output.write('<s>\n<e>\n<unk>\n')
            count = 3
-            for key, value in sorted(dictory.items(), key = lambda d:d[1], reverse = True):
+            for key, value in sorted(
+                    dictory.items(), key=lambda d: d[1], reverse=True):
                output.write(key + "\n")
                count += 1
                if count == dict_size:
                    break
            self.dict_size = count
-    def create_dataset(self, dict_size = -1, mergeDict = False,
+    def create_dataset(self,
-                       suffixes = ['.src', '.trg']):
+                       dict_size=-1,
+                       mergeDict=False,
+                       suffixes=['.src', '.trg']):
        """
        Create seqToseq dataset 
        """
@@ -135,13 +138,14 @@ class SeqToSeqDatasetCreater(DatasetCreater):
        # checkout dataset should be parallel corpora
        suffix_len = len(suffixes[0])
        for dataset in dataset_list:
-          file_list = os.listdir(dataset)
+            file_list = os.listdir(dataset)
-          if len(file_list) % 2 == 1:
+            if len(file_list) % 2 == 1:
-              raise RuntimeError("dataset should be parallel corpora")
+                raise RuntimeError("dataset should be parallel corpora")
-          file_list.sort()
+            file_list.sort()
-          for i in range(0, len(file_list), 2):
+            for i in range(0, len(file_list), 2):
-              if file_list[i][:-suffix_len] != file_list[i + 1][:-suffix_len]:
+                if file_list[i][:-suffix_len] != file_list[i + 1][:-suffix_len]:
-                  raise RuntimeError("source and target file name should be equal")
+                    raise RuntimeError(
+                        "source and target file name should be equal")
        # cat all the files with the same suffix in dataset
        for suffix in suffixes:
@@ -155,16 +159,18 @@ class SeqToSeqDatasetCreater(DatasetCreater):
        list = ['train.list', 'test.list', 'gen.list']
        for dataset in dataset_list:
            outname = os.path.basename(dataset)
-            self.concat_file(dataset, outname + suffixes[0], 
+            self.concat_file(dataset, outname + suffixes[0],
                             outname + suffixes[1], dir_list[id], outname)
-            save_list([os.path.join(dir_list[id], outname)], 
+            save_list([os.path.join(dir_list[id], outname)],
                      os.path.join(self.output_path, list[id]))
            id += 1
        # build dictionary for train data
        dict = ['src.dict', 'trg.dict']
-        dict_path = [os.path.join(self.output_path, dict[0]), 
+        dict_path = [
-                     os.path.join(self.output_path, dict[1])]
+            os.path.join(self.output_path, dict[0]),
+            os.path.join(self.output_path, dict[1])
+        ]
        if mergeDict:
            outname = os.path.join(train_dir, train_dataset.split('/')[-1])
            print 'build src dictionary for train data'
@@ -173,22 +179,30 @@ class SeqToSeqDatasetCreater(DatasetCreater):
            os.system('cp ' + dict_path[0] + ' ' + dict_path[1])
        else:
            outname = os.path.join(train_dataset, self.train_dir_name)
-            for id in range(0,2):
+            for id in range(0, 2):
                suffix = suffixes[id]
                print 'build ' + suffix[1:] + ' dictionary for train data'
                self.build_dict(outname + suffix, dict_path[id], dict_size)
        print 'dictionary size is', self.dict_size
 def main():
    usage = "usage: \n" \
            "python %prog -i INPUT [-d DICTSIZE] [-m]"
    parser = OptionParser(usage)
-    parser.add_option("-i", action="store", dest="input",
+    parser.add_option(
-                      help="input original dataset path")
+        "-i", action="store", dest="input", help="input original dataset path")
-    parser.add_option("-d", action="store", dest="dictsize",
+    parser.add_option(
-                      help="specified word count of dictionary")
+        "-d",
-    parser.add_option("-m", "--mergeDict", action="store_true", dest="mergeDict",
+        action="store",
-                      help="merge source and target dictionary")
+        dest="dictsize",
+        help="specified word count of dictionary")
+    parser.add_option(
+        "-m",
+        "--mergeDict",
+        action="store_true",
+        dest="mergeDict",
+        help="merge source and target dictionary")
    (options, args) = parser.parse_args()
    if options.input[-1] == os.path.sep:
        options.input = options.input[:-1]
@@ -200,5 +214,6 @@ def main():
        data_creator = SeqToSeqDatasetCreater(options.input, output_path)
        data_creator.create_dataset(dictsize, options.mergeDict)
 if __name__ == "__main__":
-    main(); 
+    main()
--- a/demo/seqToseq/seqToseq_net.py
+++ b/demo/seqToseq/seqToseq_net.py
@@ -50,16 +50,21 @@ def seq_to_seq_data(data_dir,
        trg_dict = None
    else:
        train_list = os.path.join(data_dir, train_list)
-        test_list = os.path.join(data_dir,test_list)
+        test_list = os.path.join(data_dir, test_list)
-    define_py_data_sources2(train_list, test_list,
+    define_py_data_sources2(
-                           module = "dataprovider",
+        train_list,
-                           obj = "process",
+        test_list,
-                           args = {"src_dict": src_dict,
+        module="dataprovider",
-                                   "trg_dict": trg_dict})
+        obj="process",
+        args={"src_dict": src_dict,
+              "trg_dict": trg_dict})
-    return {"src_dict_path": src_lang_dict, "trg_dict_path": trg_lang_dict,
+    return {
-            "gen_result": gen_result}
+        "src_dict_path": src_lang_dict,
+        "trg_dict_path": trg_lang_dict,
+        "gen_result": gen_result
+    }
 def gru_encoder_decoder(data_conf,
@@ -90,51 +95,55 @@ def gru_encoder_decoder(data_conf,
        size=word_vector_dim,
        param_attr=ParamAttr(name='_source_language_embedding'))
    src_forward = simple_gru(input=src_embedding, size=encoder_size)
-    src_backward = simple_gru(input=src_embedding,
+    src_backward = simple_gru(
-                              size=encoder_size,
+        input=src_embedding, size=encoder_size, reverse=True)
-                              reverse=True)
    encoded_vector = concat_layer(input=[src_forward, src_backward])
    with mixed_layer(size=decoder_size) as encoded_proj:
-        encoded_proj += full_matrix_projection(encoded_vector)
+        encoded_proj += full_matrix_projection(input=encoded_vector)
    backward_first = first_seq(input=src_backward)
-    with mixed_layer(size=decoder_size,
+    with mixed_layer(
-                     act=TanhActivation(), ) as decoder_boot:
+            size=decoder_size,
-        decoder_boot += full_matrix_projection(backward_first)
+            act=TanhActivation(), ) as decoder_boot:
+        decoder_boot += full_matrix_projection(input=backward_first)
    def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
-        decoder_mem = memory(name='gru_decoder',
+        decoder_mem = memory(
-                             size=decoder_size,
+            name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
-                             boot_layer=decoder_boot)
-        context = simple_attention(encoded_sequence=enc_vec,
+        context = simple_attention(
-                                   encoded_proj=enc_proj,
+            encoded_sequence=enc_vec,
-                                   decoder_state=decoder_mem, )
+            encoded_proj=enc_proj,
+            decoder_state=decoder_mem, )
        with mixed_layer(size=decoder_size * 3) as decoder_inputs:
-            decoder_inputs += full_matrix_projection(context)
+            decoder_inputs += full_matrix_projection(input=context)
-            decoder_inputs += full_matrix_projection(current_word)
+            decoder_inputs += full_matrix_projection(input=current_word)
-        gru_step = gru_step_layer(name='gru_decoder',
+        gru_step = gru_step_layer(
-                                  input=decoder_inputs,
+            name='gru_decoder',
-                                  output_mem=decoder_mem,
+            input=decoder_inputs,
-                                  size=decoder_size)
+            output_mem=decoder_mem,
+            size=decoder_size)
-        with mixed_layer(size=target_dict_dim,
-                         bias_attr=True,
+        with mixed_layer(
-                         act=SoftmaxActivation()) as out:
+                size=target_dict_dim, bias_attr=True,
+                act=SoftmaxActivation()) as out:
            out += full_matrix_projection(input=gru_step)
        return out
    decoder_group_name = "decoder_group"
-    group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
+    group_inputs = [
-                  StaticInput(input=encoded_proj,is_seq=True)]
+        StaticInput(
+            input=encoded_vector, is_seq=True), StaticInput(
+                input=encoded_proj, is_seq=True)
+    ]
    if not is_generating:
        trg_embedding = embedding_layer(
-            input=data_layer(name='target_language_word',
+            input=data_layer(
-                             size=target_dict_dim),
+                name='target_language_word', size=target_dict_dim),
            size=word_vector_dim,
            param_attr=ParamAttr(name='_target_language_embedding'))
        group_inputs.append(trg_embedding)
@@ -144,12 +153,12 @@ def gru_encoder_decoder(data_conf,
        # while encoded source sequence is accessed to as an unbounded memory.
        # Here, the StaticInput defines a read-only memory
        # for the recurrent_group.
-        decoder = recurrent_group(name=decoder_group_name,
+        decoder = recurrent_group(
-                                  step=gru_decoder_with_attention,
+            name=decoder_group_name,
-                                  input=group_inputs)
+            step=gru_decoder_with_attention,
+            input=group_inputs)
-        lbl = data_layer(name='target_language_next_word',
+        lbl = data_layer(name='target_language_next_word', size=target_dict_dim)
-                         size=target_dict_dim)
        cost = classification_cost(input=decoder, label=lbl)
        outputs(cost)
    else:
@@ -168,16 +177,19 @@ def gru_encoder_decoder(data_conf,
            embedding_size=word_vector_dim)
        group_inputs.append(trg_embedding)
-        beam_gen = beam_search(name=decoder_group_name,
+        beam_gen = beam_search(
-                               step=gru_decoder_with_attention,
+            name=decoder_group_name,
-                               input=group_inputs,
+            step=gru_decoder_with_attention,
-                               bos_id=0,
+            input=group_inputs,
-                               eos_id=1,
+            bos_id=0,
-                               beam_size=beam_size,
+            eos_id=1,
-                               max_length=max_length)
+            beam_size=beam_size,
+            max_length=max_length)
-        seqtext_printer_evaluator(input=beam_gen,
-                                  id_input=data_layer(name="sent_id", size=1),
+        seqtext_printer_evaluator(
-                                  dict_file=trg_dict_path,
+            input=beam_gen,
-                                  result_file=gen_trans_file)
+            id_input=data_layer(
+                name="sent_id", size=1),
+            dict_file=trg_dict_path,
+            result_file=gen_trans_file)
        outputs(beam_gen)
--- a/demo/sequence_tagging/data/get_data.sh
+++ b/demo/sequence_tagging/data/get_data.sh
+#!/bin/bash
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+set -e
+DIR="$( cd "$(dirname "$0")" ; pwd -P )"
+cd $DIR
+wget http://www.cnts.ua.ac.be/conll2000/chunking/train.txt.gz
+wget http://www.cnts.ua.ac.be/conll2000/chunking/test.txt.gz
--- a/demo/sequence_tagging/data/test.list
+++ b/demo/sequence_tagging/data/test.list
+data/test.txt.gz
--- a/demo/sequence_tagging/data/train.list
+++ b/demo/sequence_tagging/data/train.list
+data/train.txt.gz
--- a/demo/sequence_tagging/dataprovider.py
+++ b/demo/sequence_tagging/dataprovider.py
--- a/demo/sequence_tagging/linear_crf.py
+++ b/demo/sequence_tagging/linear_crf.py
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle.trainer_config_helpers import *
+import math
+define_py_data_sources2(
+    train_list="data/train.list",
+    test_list="data/test.list",
+    module="dataprovider",
+    obj="process")
+batch_size = 1
+settings(
+    learning_method=MomentumOptimizer(),
+    batch_size=batch_size,
+    regularization=L2Regularization(batch_size * 1e-4),
+    average_window=0.5,
+    learning_rate=1e-1,
+    learning_rate_decay_a=1e-5,
+    learning_rate_decay_b=0.25, )
+num_label_types = 23
+def get_simd_size(size):
+    return int(math.ceil(float(size) / 8)) * 8
+# Currently, in order to use sparse_update=True,
+# the size has to be aligned.
+num_label_types = get_simd_size(num_label_types)
+features = data_layer(name="features", size=76328)
+word = data_layer(name="word", size=6778)
+pos = data_layer(name="pos", size=44)
+chunk = data_layer(name="chunk", size=num_label_types)
+crf_input = fc_layer(
+    input=features,
+    size=num_label_types,
+    act=LinearActivation(),
+    bias_attr=False,
+    param_attr=ParamAttr(
+        initial_std=0, sparse_update=True))
+crf = crf_layer(
+    input=crf_input,
+    label=chunk,
+    param_attr=ParamAttr(
+        name="crfw", initial_std=0), )
+crf_decoding = crf_decoding_layer(
+    size=num_label_types,
+    input=crf_input,
+    label=chunk,
+    param_attr=ParamAttr(name="crfw"), )
+sum_evaluator(
+    name="error",
+    input=crf_decoding, )
+chunk_evaluator(
+    name="chunk_f1",
+    input=[crf_decoding, chunk],
+    chunk_scheme="IOB",
+    num_chunk_types=11, )
+inputs(word, pos, chunk, features)
+outputs(crf)
--- a/demo/sequence_tagging/readme.md
+++ b/demo/sequence_tagging/readme.md
+# Sequence Tagging
+This demo is a sequence model for assigning tags to each token in a sentence. The task is described at <a href = "http://www.cnts.ua.ac.be/conll2000/chunking">CONLL2000 Text Chunking</a> task.
+## Download data
+```bash
+cd demo/sequence_tagging
+./data/get_data.sh
+```
+## Train model
+```bash
+cd demo/sequence_tagging
+./train.sh
+```
+## Model description
+We provide two models. One is a linear CRF model (linear_crf.py) with is equivalent to the one at <a href="http://leon.bottou.org/projects/sgd#stochastic_gradient_crfs">leon.bottou.org/projects/sgd</a>. The second one is a stacked bidirectional RNN and CRF model (rnn_crf.py).
+<center>
+<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
+<thead>
+<th scope="col" class="left">Model name</th>
+<th scope="col" class="left">Number of parameters</th>
+<th scope="col" class="left">F1 score</th>
+</thead>
+<tbody>
+<tr>
+<td class="left">linear_crf</td>
+<td class="left"> 1.8M </td>
+<td class="left"> 0.937</td>
+</tr>
+<tr>
+<td class="left">rnn_crf</td>
+<td class="left"> 960K </td>
+<td class="left">0.941</td>
+</tr>
+</tbody>
+</table>
+</center>
+<br>
--- a/demo/sequence_tagging/rnn_crf.py
+++ b/demo/sequence_tagging/rnn_crf.py
--- a/demo/sequence_tagging/train.sh
+++ b/demo/sequence_tagging/train.sh
+#!/bin/bash
+paddle train \
+       --config rnn_crf.py \
+       --parallel_nn=1 \
+       --use_gpu=1 \
+       --dot_period=10 \
+       --log_period=1000 \
+       --test_period=0 \
+       --num_passes=10
--- a/demo/sequence_tagging/train_linear.sh
+++ b/demo/sequence_tagging/train_linear.sh
+#!/bin/bash
+paddle train \
+       --config linear_crf.py \
+       --use_gpu=0 \
+       --dot_period=100 \
+       --log_period=10000 \
+       --test_period=0 \
+       --num_passes=10
--- a/doc/build/build_from_source.md
+++ b/doc/build/build_from_source.md
--- a/doc/build/contribute_to_paddle.md
+++ b/doc/build/contribute_to_paddle.md
--- a/doc/build/docker_install.md
+++ b/doc/build/docker_install.md
--- a/doc/build/docker_install.rst
+++ b/doc/build/docker_install.rst
--- a/doc/build/index.rst
+++ b/doc/build/index.rst
--- a/doc/build/ubuntu_install.md
+++ b/doc/build/ubuntu_install.md
--- a/doc/build/ubuntu_install.rst
+++ b/doc/build/ubuntu_install.rst
--- a/doc/cluster/opensource/cluster_train.md
+++ b/doc/cluster/opensource/cluster_train.md
--- a/doc/demo/quick_start/index_en.md
+++ b/doc/demo/quick_start/index_en.md
--- a/doc/demo/semantic_role_labeling/semantic_role_labeling.md
+++ b/doc/demo/semantic_role_labeling/semantic_role_labeling.md
--- a/doc/index.md
+++ b/doc/index.md
--- a/doc/introduction/index.md
+++ b/doc/introduction/index.md
--- a/doc/introduction/parameters.png
+++ b/doc/introduction/parameters.png
--- a/doc/source/gserver/layers/layer.rst
+++ b/doc/source/gserver/layers/layer.rst
--- a/doc/ui/api/trainer_config_helpers/activations.rst
+++ b/doc/ui/api/trainer_config_helpers/activations.rst
--- a/doc/ui/api/trainer_config_helpers/activations_index.rst
+++ b/doc/ui/api/trainer_config_helpers/activations_index.rst
--- a/doc/ui/api/trainer_config_helpers/evaluators.rst
+++ b/doc/ui/api/trainer_config_helpers/evaluators.rst
--- a/doc/ui/api/trainer_config_helpers/evaluators_index.rst
+++ b/doc/ui/api/trainer_config_helpers/evaluators_index.rst
--- a/doc/ui/api/trainer_config_helpers/index.md
+++ b/doc/ui/api/trainer_config_helpers/index.md
--- a/doc/ui/api/trainer_config_helpers/index.rst
+++ b/doc/ui/api/trainer_config_helpers/index.rst
--- a/doc/ui/api/trainer_config_helpers/layers.rst
+++ b/doc/ui/api/trainer_config_helpers/layers.rst
--- a/doc/ui/api/trainer_config_helpers/layers_index.rst
+++ b/doc/ui/api/trainer_config_helpers/layers_index.rst
--- a/doc/ui/api/trainer_config_helpers/networks.rst
+++ b/doc/ui/api/trainer_config_helpers/networks.rst
--- a/doc/ui/api/trainer_config_helpers/networks_index.rst
+++ b/doc/ui/api/trainer_config_helpers/networks_index.rst
--- a/doc/ui/api/trainer_config_helpers/optimizers.rst
+++ b/doc/ui/api/trainer_config_helpers/optimizers.rst
--- a/doc/ui/api/trainer_config_helpers/optimizers_index.rst
+++ b/doc/ui/api/trainer_config_helpers/optimizers_index.rst
--- a/doc/ui/api/trainer_config_helpers/poolings.rst
+++ b/doc/ui/api/trainer_config_helpers/poolings.rst
--- a/doc/ui/api/trainer_config_helpers/poolings_index.rst
+++ b/doc/ui/api/trainer_config_helpers/poolings_index.rst
--- a/doc/ui/cmd_argument/argument_outline.md
+++ b/doc/ui/cmd_argument/argument_outline.md
--- a/doc/ui/cmd_argument/detail_introduction.md
+++ b/doc/ui/cmd_argument/detail_introduction.md
--- a/doc/ui/predict/predict_sample.py
+++ b/doc/ui/predict/predict_sample.py
--- a/doc_cn/algorithm/rnn/hierarchical-layer.md
+++ b/doc_cn/algorithm/rnn/hierarchical-layer.md
--- a/doc_cn/algorithm/rnn/hierarchical-rnn.md
+++ b/doc_cn/algorithm/rnn/hierarchical-rnn.md
--- a/doc_cn/algorithm/rnn/rnn-tutorial.md
+++ b/doc_cn/algorithm/rnn/rnn-tutorial.md
--- a/doc_cn/build_and_install/index.rst
+++ b/doc_cn/build_and_install/index.rst
--- a/doc_cn/build_and_install/install/docker_install.rst
+++ b/doc_cn/build_and_install/install/docker_install.rst
--- a/doc_cn/build_and_install/install/paddle_version.txt
+++ b/doc_cn/build_and_install/install/paddle_version.txt
--- a/doc_cn/build_and_install/install/ubuntu_install.rst
+++ b/doc_cn/build_and_install/install/ubuntu_install.rst
--- a/doc_cn/concepts/nn.rst
+++ b/doc_cn/concepts/nn.rst
--- a/doc_cn/concepts/program_concepts.rst
+++ b/doc_cn/concepts/program_concepts.rst
--- a/doc_cn/concepts/pserver_topology.dot
+++ b/doc_cn/concepts/pserver_topology.dot
--- a/doc_cn/concepts/trainer_config.py
+++ b/doc_cn/concepts/trainer_config.py
--- a/doc_cn/concepts/use_concepts.rst
+++ b/doc_cn/concepts/use_concepts.rst
--- a/doc_cn/conf.py.in
+++ b/doc_cn/conf.py.in
--- a/doc_cn/demo/quick_start/index.md
+++ b/doc_cn/demo/quick_start/index.md
--- a/doc_cn/faq/index.rst
+++ b/doc_cn/faq/index.rst
--- a/doc_cn/faq/reduce_min_pool_size.py
+++ b/doc_cn/faq/reduce_min_pool_size.py
--- a/doc_cn/faq/word2vec_config.py
+++ b/doc_cn/faq/word2vec_config.py
--- a/doc_cn/faq/word2vec_dataprovider.py
+++ b/doc_cn/faq/word2vec_dataprovider.py
--- a/doc_cn/howto/how_to_write_docs/index.rst
+++ b/doc_cn/howto/how_to_write_docs/index.rst
--- a/doc_cn/index.rst
+++ b/doc_cn/index.rst
--- a/doc_cn/introduction/index.md
+++ b/doc_cn/introduction/index.md
--- a/doc_cn/introduction/parameters.png
+++ b/doc_cn/introduction/parameters.png
--- a/doc_cn/ui/data_provider/mnist_config.py
+++ b/doc_cn/ui/data_provider/mnist_config.py
--- a/doc_cn/ui/data_provider/mnist_provider.dict.py
+++ b/doc_cn/ui/data_provider/mnist_provider.dict.py
--- a/doc_cn/ui/data_provider/mnist_provider.py
+++ b/doc_cn/ui/data_provider/mnist_provider.py
--- a/doc_cn/ui/data_provider/pydataprovider2.rst
+++ b/doc_cn/ui/data_provider/pydataprovider2.rst
--- a/doc_cn/ui/data_provider/sentimental_config.py
+++ b/doc_cn/ui/data_provider/sentimental_config.py
--- a/doc_cn/ui/data_provider/sentimental_provider.py
+++ b/doc_cn/ui/data_provider/sentimental_provider.py
--- a/paddle/.common_test_util.sh
+++ b/paddle/.common_test_util.sh
--- a/paddle/.set_python_path.sh
+++ b/paddle/.set_python_path.sh
--- a/paddle/CMakeLists.txt
+++ b/paddle/CMakeLists.txt
--- a/paddle/api/Arguments.cpp
+++ b/paddle/api/Arguments.cpp
--- a/paddle/api/CMakeLists.txt
+++ b/paddle/api/CMakeLists.txt
--- a/paddle/api/ConfigParser.cpp
+++ b/paddle/api/ConfigParser.cpp
--- a/paddle/api/GradientMachine.cpp
+++ b/paddle/api/GradientMachine.cpp
--- a/paddle/api/PaddleAPI.h
+++ b/paddle/api/PaddleAPI.h
--- a/paddle/api/PaddleAPIPrivate.h
+++ b/paddle/api/PaddleAPIPrivate.h
--- a/paddle/api/ParameterOptimizer.cpp
+++ b/paddle/api/ParameterOptimizer.cpp
--- a/paddle/api/Trainer.cpp
+++ b/paddle/api/Trainer.cpp
--- a/paddle/api/__init__.py
+++ b/paddle/api/__init__.py
--- a/paddle/api/paddle_api_config.py.in
+++ b/paddle/api/paddle_api_config.py.in
--- a/paddle/api/paddle_ld_flags.py
+++ b/paddle/api/paddle_ld_flags.py
--- a/paddle/api/test/CMakeLists.txt
+++ b/paddle/api/test/CMakeLists.txt
--- a/paddle/api/test/run_tests.sh
+++ b/paddle/api/test/run_tests.sh
--- a/paddle/api/test/testArguments.py
+++ b/paddle/api/test/testArguments.py
--- a/paddle/api/test/testGradientMachine.py
+++ b/paddle/api/test/testGradientMachine.py
--- a/paddle/api/test/testMatrix.py
+++ b/paddle/api/test/testMatrix.py
--- a/paddle/api/test/testTrain.py
+++ b/paddle/api/test/testTrain.py
--- a/paddle/api/test/testTrainConfig.py
+++ b/paddle/api/test/testTrainConfig.py
--- a/paddle/api/test/testTrainer.py
+++ b/paddle/api/test/testTrainer.py
--- a/paddle/api/test/testVector.py
+++ b/paddle/api/test/testVector.py
--- a/paddle/cuda/CMakeLists.txt
+++ b/paddle/cuda/CMakeLists.txt
--- a/paddle/cuda/include/hl_base.h
+++ b/paddle/cuda/include/hl_base.h
--- a/paddle/cuda/include/hl_cnn.h
+++ b/paddle/cuda/include/hl_cnn.h
--- a/paddle/cuda/include/hl_cpu_gru.cuh
+++ b/paddle/cuda/include/hl_cpu_gru.cuh
--- a/paddle/cuda/include/hl_cuda_cublas.h
+++ b/paddle/cuda/include/hl_cuda_cublas.h
--- a/paddle/cuda/include/hl_device_functions.cuh
+++ b/paddle/cuda/include/hl_device_functions.cuh
--- a/paddle/cuda/include/hl_gpu_functions.cuh
+++ b/paddle/cuda/include/hl_gpu_functions.cuh
--- a/paddle/cuda/include/hl_matrix.h
+++ b/paddle/cuda/include/hl_matrix.h
--- a/paddle/cuda/include/hl_matrix_base.cuh
+++ b/paddle/cuda/include/hl_matrix_base.cuh
--- a/paddle/cuda/include/hl_matrix_type.cuh
+++ b/paddle/cuda/include/hl_matrix_type.cuh
--- a/paddle/cuda/include/hl_sequence.h
+++ b/paddle/cuda/include/hl_sequence.h
--- a/paddle/cuda/include/hl_sparse.h
+++ b/paddle/cuda/include/hl_sparse.h
--- a/paddle/cuda/include/hl_sse_matrix_kernel.cuh
+++ b/paddle/cuda/include/hl_sse_matrix_kernel.cuh
--- a/paddle/cuda/include/stub/hl_cnn_stub.h
+++ b/paddle/cuda/include/stub/hl_cnn_stub.h
--- a/paddle/cuda/include/stub/hl_cuda_cublas_stub.h
+++ b/paddle/cuda/include/stub/hl_cuda_cublas_stub.h
--- a/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h
+++ b/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h
--- a/paddle/cuda/include/stub/hl_matrix_stub.h
+++ b/paddle/cuda/include/stub/hl_matrix_stub.h
--- a/paddle/cuda/include/stub/hl_sequence_stub.h
+++ b/paddle/cuda/include/stub/hl_sequence_stub.h
--- a/paddle/cuda/src/avx_mathfun.h
+++ b/paddle/cuda/src/avx_mathfun.h
--- a/paddle/cuda/src/hl_avx_functions.cc
+++ b/paddle/cuda/src/hl_avx_functions.cc
--- a/paddle/cuda/src/hl_cpu_functions.cc
+++ b/paddle/cuda/src/hl_cpu_functions.cc
--- a/paddle/cuda/src/hl_cuda_cnn.cu
+++ b/paddle/cuda/src/hl_cuda_cnn.cu
--- a/paddle/cuda/src/hl_cuda_cublas.cc
+++ b/paddle/cuda/src/hl_cuda_cublas.cc
--- a/paddle/cuda/src/hl_cuda_cudnn.cc
+++ b/paddle/cuda/src/hl_cuda_cudnn.cc
--- a/paddle/cuda/src/hl_cuda_device.cc
+++ b/paddle/cuda/src/hl_cuda_device.cc
--- a/paddle/cuda/src/hl_cuda_matrix.cu
+++ b/paddle/cuda/src/hl_cuda_matrix.cu
--- a/paddle/cuda/src/hl_cuda_sequence.cu
+++ b/paddle/cuda/src/hl_cuda_sequence.cu
--- a/paddle/cuda/src/hl_cuda_sparse.cu
+++ b/paddle/cuda/src/hl_cuda_sparse.cu
--- a/paddle/cuda/src/hl_cuda_sparse.cuh
+++ b/paddle/cuda/src/hl_cuda_sparse.cuh
--- a/paddle/cuda/src/hl_dso_loader.cc
+++ b/paddle/cuda/src/hl_dso_loader.cc
--- a/paddle/gserver/CMakeLists.txt
+++ b/paddle/gserver/CMakeLists.txt
--- a/paddle/gserver/activations/ActivationFunction.cpp
+++ b/paddle/gserver/activations/ActivationFunction.cpp
--- a/paddle/gserver/activations/ActivationFunction.h
+++ b/paddle/gserver/activations/ActivationFunction.h
--- a/paddle/gserver/dataproviders/DataProvider.cpp
+++ b/paddle/gserver/dataproviders/DataProvider.cpp
--- a/paddle/gserver/dataproviders/DataProvider.h
+++ b/paddle/gserver/dataproviders/DataProvider.h
--- a/paddle/gserver/dataproviders/PyDataProvider2.cpp
+++ b/paddle/gserver/dataproviders/PyDataProvider2.cpp
--- a/paddle/gserver/evaluators/ChunkEvaluator.cpp
+++ b/paddle/gserver/evaluators/ChunkEvaluator.cpp
--- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp
+++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp
--- a/paddle/gserver/gradientmachines/ParallelNeuralNetwork.cpp
+++ b/paddle/gserver/gradientmachines/ParallelNeuralNetwork.cpp
--- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
--- a/paddle/gserver/layers/AgentLayer.cpp
+++ b/paddle/gserver/layers/AgentLayer.cpp
--- a/paddle/gserver/layers/AgentLayer.h
+++ b/paddle/gserver/layers/AgentLayer.h
--- a/paddle/gserver/layers/AverageLayer.cpp
+++ b/paddle/gserver/layers/AverageLayer.cpp
--- a/paddle/gserver/layers/AverageLayer.h
+++ b/paddle/gserver/layers/AverageLayer.h
--- a/paddle/gserver/layers/BilinearInterpLayer.cpp
+++ b/paddle/gserver/layers/BilinearInterpLayer.cpp
--- a/paddle/gserver/layers/BilinearInterpLayer.h
+++ b/paddle/gserver/layers/BilinearInterpLayer.h
--- a/paddle/gserver/layers/ConcatenateLayer.cpp
+++ b/paddle/gserver/layers/ConcatenateLayer.cpp
--- a/paddle/gserver/layers/ConvBaseLayer.cpp
+++ b/paddle/gserver/layers/ConvBaseLayer.cpp
--- a/paddle/gserver/layers/ConvBaseLayer.h
+++ b/paddle/gserver/layers/ConvBaseLayer.h
--- a/paddle/gserver/layers/ConvOperator.cpp
+++ b/paddle/gserver/layers/ConvOperator.cpp
--- a/paddle/gserver/layers/ConvProjection.cpp
+++ b/paddle/gserver/layers/ConvProjection.cpp
--- a/paddle/gserver/layers/ConvProjection.h
+++ b/paddle/gserver/layers/ConvProjection.h
--- a/paddle/gserver/layers/CostLayer.cpp
+++ b/paddle/gserver/layers/CostLayer.cpp
--- a/paddle/gserver/layers/CostLayer.h
+++ b/paddle/gserver/layers/CostLayer.h
--- a/paddle/gserver/layers/CudnnConvLayer.cpp
+++ b/paddle/gserver/layers/CudnnConvLayer.cpp
--- a/paddle/gserver/layers/CudnnConvLayer.h
+++ b/paddle/gserver/layers/CudnnConvLayer.h
--- a/paddle/gserver/layers/CudnnPoolLayer.cpp
+++ b/paddle/gserver/layers/CudnnPoolLayer.cpp
--- a/paddle/gserver/layers/CudnnPoolLayer.h
+++ b/paddle/gserver/layers/CudnnPoolLayer.h
--- a/paddle/gserver/layers/ExpandConvBaseLayer.cpp
+++ b/paddle/gserver/layers/ExpandConvBaseLayer.cpp
--- a/paddle/gserver/layers/ExpandConvBaseLayer.h
+++ b/paddle/gserver/layers/ExpandConvBaseLayer.h
--- a/paddle/gserver/layers/ExpandConvLayer.cpp
+++ b/paddle/gserver/layers/ExpandConvLayer.cpp
--- a/paddle/gserver/layers/ExpandConvLayer.h
+++ b/paddle/gserver/layers/ExpandConvLayer.h
--- a/paddle/gserver/layers/ExpandConvTransLayer.cpp
+++ b/paddle/gserver/layers/ExpandConvTransLayer.cpp
--- a/paddle/gserver/layers/ExpandConvTransLayer.h
+++ b/paddle/gserver/layers/ExpandConvTransLayer.h
--- a/paddle/gserver/layers/ExpandLayer.cpp
+++ b/paddle/gserver/layers/ExpandLayer.cpp
--- a/paddle/gserver/layers/ExpandLayer.h
+++ b/paddle/gserver/layers/ExpandLayer.h
--- a/paddle/gserver/layers/FullMatrixProjection.cpp
+++ b/paddle/gserver/layers/FullMatrixProjection.cpp
--- a/paddle/gserver/layers/FullyConnectedLayer.h
+++ b/paddle/gserver/layers/FullyConnectedLayer.h
--- a/paddle/gserver/layers/Layer.cpp
+++ b/paddle/gserver/layers/Layer.cpp
--- a/paddle/gserver/layers/MaxLayer.cpp
+++ b/paddle/gserver/layers/MaxLayer.cpp
--- a/paddle/gserver/layers/MaxLayer.h
+++ b/paddle/gserver/layers/MaxLayer.h
--- a/paddle/gserver/layers/MaxOutLayer.cpp
+++ b/paddle/gserver/layers/MaxOutLayer.cpp
--- a/paddle/gserver/layers/MaxOutLayer.h
+++ b/paddle/gserver/layers/MaxOutLayer.h
--- a/paddle/gserver/layers/MixedLayer.cpp
+++ b/paddle/gserver/layers/MixedLayer.cpp
--- a/paddle/gserver/layers/MixedLayer.h
+++ b/paddle/gserver/layers/MixedLayer.h
--- a/paddle/gserver/layers/NCELayer.cpp
+++ b/paddle/gserver/layers/NCELayer.cpp
--- a/paddle/gserver/layers/PoolLayer.cpp
+++ b/paddle/gserver/layers/PoolLayer.cpp
--- a/paddle/gserver/layers/PoolLayer.h
+++ b/paddle/gserver/layers/PoolLayer.h
--- a/paddle/gserver/layers/PoolProjection.cpp
+++ b/paddle/gserver/layers/PoolProjection.cpp
--- a/paddle/gserver/layers/PoolProjection.h
+++ b/paddle/gserver/layers/PoolProjection.h
--- a/paddle/gserver/layers/PoolProjectionLayer.cpp
+++ b/paddle/gserver/layers/PoolProjectionLayer.cpp
--- a/paddle/gserver/layers/PoolProjectionLayer.h
+++ b/paddle/gserver/layers/PoolProjectionLayer.h
--- a/paddle/gserver/layers/Projection.h
+++ b/paddle/gserver/layers/Projection.h
--- a/paddle/gserver/layers/ScalingProjection.cpp
+++ b/paddle/gserver/layers/ScalingProjection.cpp
--- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp
+++ b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp
--- a/paddle/gserver/layers/SequencePoolLayer.cpp
+++ b/paddle/gserver/layers/SequencePoolLayer.cpp
--- a/paddle/gserver/layers/SequencePoolLayer.h
+++ b/paddle/gserver/layers/SequencePoolLayer.h
--- a/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp
+++ b/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp
--- a/paddle/gserver/layers/SpatialPyramidPoolLayer.h
+++ b/paddle/gserver/layers/SpatialPyramidPoolLayer.h
--- a/paddle/gserver/tests/CMakeLists.txt
+++ b/paddle/gserver/tests/CMakeLists.txt
--- a/paddle/gserver/tests/LayerGradUtil.cpp
+++ b/paddle/gserver/tests/LayerGradUtil.cpp
--- a/paddle/gserver/tests/LayerGradUtil.h
+++ b/paddle/gserver/tests/LayerGradUtil.h
--- a/paddle/gserver/tests/__init__.py
+++ b/paddle/gserver/tests/__init__.py
--- a/paddle/gserver/tests/img_conv_a.conf
+++ b/paddle/gserver/tests/img_conv_a.conf
--- a/paddle/gserver/tests/img_conv_b.conf
+++ b/paddle/gserver/tests/img_conv_b.conf
--- a/paddle/gserver/tests/img_pool_a.conf
+++ b/paddle/gserver/tests/img_pool_a.conf
--- a/paddle/gserver/tests/img_pool_b.conf
+++ b/paddle/gserver/tests/img_pool_b.conf
--- a/paddle/gserver/tests/pyDataProvider.py
+++ b/paddle/gserver/tests/pyDataProvider.py
--- a/paddle/gserver/tests/rnn_data_provider.py
+++ b/paddle/gserver/tests/rnn_data_provider.py
--- a/paddle/gserver/tests/sequenceGen.py
+++ b/paddle/gserver/tests/sequenceGen.py
--- a/paddle/gserver/tests/sequence_layer_group.conf
+++ b/paddle/gserver/tests/sequence_layer_group.conf
--- a/paddle/gserver/tests/sequence_nest_layer_group.conf
+++ b/paddle/gserver/tests/sequence_nest_layer_group.conf
--- a/paddle/gserver/tests/sequence_nest_rnn.conf
+++ b/paddle/gserver/tests/sequence_nest_rnn.conf
--- a/paddle/gserver/tests/sequence_nest_rnn_multi_input.conf
+++ b/paddle/gserver/tests/sequence_nest_rnn_multi_input.conf
--- a/paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.conf
+++ b/paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.conf
--- a/paddle/gserver/tests/sequence_rnn_multi_input.conf
+++ b/paddle/gserver/tests/sequence_rnn_multi_input.conf
--- a/paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.conf
+++ b/paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.conf
--- a/paddle/gserver/tests/test_ActivationGrad.cpp
+++ b/paddle/gserver/tests/test_ActivationGrad.cpp
--- a/paddle/gserver/tests/test_ConvTrans.cpp
+++ b/paddle/gserver/tests/test_ConvTrans.cpp
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
--- a/paddle/gserver/tests/test_NetworkCompare.cpp
+++ b/paddle/gserver/tests/test_NetworkCompare.cpp
--- a/paddle/gserver/tests/test_PyDataProvider2.cpp
+++ b/paddle/gserver/tests/test_PyDataProvider2.cpp
--- a/paddle/gserver/tests/test_PyDataProvider2.py
+++ b/paddle/gserver/tests/test_PyDataProvider2.py
--- a/paddle/gserver/tests/test_RecurrentGradientMachine.cpp
+++ b/paddle/gserver/tests/test_RecurrentGradientMachine.cpp
--- a/paddle/gserver/tests/test_RecurrentLayer.cpp
+++ b/paddle/gserver/tests/test_RecurrentLayer.cpp
--- a/paddle/math/BaseMatrix.cu
+++ b/paddle/math/BaseMatrix.cu
--- a/paddle/math/BaseMatrix.h
+++ b/paddle/math/BaseMatrix.h
--- a/paddle/math/CMakeLists.txt
+++ b/paddle/math/CMakeLists.txt
--- a/paddle/math/CpuSparseMatrix.cpp
+++ b/paddle/math/CpuSparseMatrix.cpp
--- a/paddle/math/MathFunctions.cpp
+++ b/paddle/math/MathFunctions.cpp
--- a/paddle/math/MathFunctions.h
+++ b/paddle/math/MathFunctions.h
--- a/paddle/math/MathUtils.cpp
+++ b/paddle/math/MathUtils.cpp
--- a/paddle/math/MathUtils.h
+++ b/paddle/math/MathUtils.h
--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
--- a/paddle/math/Matrix.h
+++ b/paddle/math/Matrix.h
--- a/paddle/math/SparseRowMatrix.cpp
+++ b/paddle/math/SparseRowMatrix.cpp
--- a/paddle/math/Vector.cpp
+++ b/paddle/math/Vector.cpp
--- a/paddle/math/Vector.h
+++ b/paddle/math/Vector.h
--- a/paddle/math/tests/CMakeLists.txt
+++ b/paddle/math/tests/CMakeLists.txt
--- a/paddle/math/tests/test_FPException.cpp
+++ b/paddle/math/tests/test_FPException.cpp
--- a/paddle/math/tests/test_matrixCompare.cpp
+++ b/paddle/math/tests/test_matrixCompare.cpp
--- a/paddle/parameter/CMakeLists.txt
+++ b/paddle/parameter/CMakeLists.txt
--- a/paddle/parameter/Parameter.h
+++ b/paddle/parameter/Parameter.h
--- a/paddle/parameter/tests/CMakeLists.txt
+++ b/paddle/parameter/tests/CMakeLists.txt
--- a/paddle/pserver/ParameterServer2.cpp
+++ b/paddle/pserver/ParameterServer2.cpp
--- a/paddle/pserver/PserverForPython.h
+++ b/paddle/pserver/PserverForPython.h
--- a/paddle/pserver/SocketChannel.cpp
+++ b/paddle/pserver/SocketChannel.cpp
--- a/paddle/py_paddle/__init__.py
+++ b/paddle/py_paddle/__init__.py
--- a/paddle/py_paddle/dataprovider_converter.py
+++ b/paddle/py_paddle/dataprovider_converter.py
--- a/paddle/py_paddle/util.py
+++ b/paddle/py_paddle/util.py
--- a/paddle/scripts/CMakeLists.txt
+++ b/paddle/scripts/CMakeLists.txt
--- a/paddle/scripts/cluster_train/conf.py
+++ b/paddle/scripts/cluster_train/conf.py
--- a/paddle/scripts/cluster_train/paddle.py
+++ b/paddle/scripts/cluster_train/paddle.py
--- a/paddle/scripts/cpplint.py
+++ b/paddle/scripts/cpplint.py
--- a/paddle/scripts/deb/build_scripts/.gitignore
+++ b/paddle/scripts/deb/build_scripts/.gitignore
--- a/paddle/scripts/deb/build_scripts/Dockerfile
+++ b/paddle/scripts/deb/build_scripts/Dockerfile
--- a/paddle/scripts/deb/build_scripts/build.sh
+++ b/paddle/scripts/deb/build_scripts/build.sh
--- a/paddle/scripts/deb/build_scripts/build_deb.sh
+++ b/paddle/scripts/deb/build_scripts/build_deb.sh
--- a/paddle/scripts/docker/Dockerfile.cpu
+++ b/paddle/scripts/docker/Dockerfile.cpu
--- a/paddle/scripts/docker/Dockerfile.cpu-demo
+++ b/paddle/scripts/docker/Dockerfile.cpu-demo
--- a/paddle/scripts/docker/Dockerfile.cpu-devel
+++ b/paddle/scripts/docker/Dockerfile.cpu-devel
--- a/paddle/scripts/docker/Dockerfile.cpu-noavx
+++ b/paddle/scripts/docker/Dockerfile.cpu-noavx
--- a/paddle/scripts/docker/Dockerfile.cpu-noavx-demo
+++ b/paddle/scripts/docker/Dockerfile.cpu-noavx-demo
--- a/paddle/scripts/docker/Dockerfile.cpu-noavx-devel
+++ b/paddle/scripts/docker/Dockerfile.cpu-noavx-devel
--- a/paddle/scripts/docker/Dockerfile.gpu
+++ b/paddle/scripts/docker/Dockerfile.gpu
--- a/paddle/scripts/docker/Dockerfile.gpu-demo
+++ b/paddle/scripts/docker/Dockerfile.gpu-demo
--- a/paddle/scripts/docker/Dockerfile.gpu-devel
+++ b/paddle/scripts/docker/Dockerfile.gpu-devel
--- a/paddle/scripts/docker/Dockerfile.gpu-noavx
+++ b/paddle/scripts/docker/Dockerfile.gpu-noavx
--- a/paddle/scripts/docker/Dockerfile.gpu-noavx-demo
+++ b/paddle/scripts/docker/Dockerfile.gpu-noavx-demo
--- a/paddle/scripts/docker/Dockerfile.gpu-noavx-devel
+++ b/paddle/scripts/docker/Dockerfile.gpu-noavx-devel
--- a/paddle/scripts/docker/Dockerfile.m4
+++ b/paddle/scripts/docker/Dockerfile.m4
--- a/paddle/scripts/docker/build.sh
+++ b/paddle/scripts/docker/build.sh
--- a/paddle/scripts/docker/generate.sh
+++ b/paddle/scripts/docker/generate.sh
--- a/paddle/scripts/submit_local.sh.in
+++ b/paddle/scripts/submit_local.sh.in
--- a/paddle/scripts/tools/build_docs/.gitignore
+++ b/paddle/scripts/tools/build_docs/.gitignore
--- a/paddle/scripts/tools/build_docs/Dockerfile
+++ b/paddle/scripts/tools/build_docs/Dockerfile
--- a/paddle/scripts/tools/build_docs/build.sh
+++ b/paddle/scripts/tools/build_docs/build.sh
--- a/paddle/scripts/tools/build_docs/build_docs.sh
+++ b/paddle/scripts/tools/build_docs/build_docs.sh
--- a/paddle/scripts/travis/before_install.sh
+++ b/paddle/scripts/travis/before_install.sh
--- a/paddle/scripts/travis/before_install.osx.sh
+++ b/paddle/scripts/travis/before_install.osx.sh
--- a/paddle/scripts/travis/build_and_test.sh
+++ b/paddle/scripts/travis/build_and_test.sh
--- a/paddle/scripts/travis/common.sh
+++ b/paddle/scripts/travis/common.sh
--- a/paddle/setup.py.in
+++ b/paddle/setup.py.in
--- a/paddle/trainer/CMakeLists.txt
+++ b/paddle/trainer/CMakeLists.txt
--- a/paddle/trainer/ParamUtil.cpp
+++ b/paddle/trainer/ParamUtil.cpp
--- a/paddle/trainer/Tester.cpp
+++ b/paddle/trainer/Tester.cpp
--- a/paddle/trainer/Tester.h
+++ b/paddle/trainer/Tester.h
--- a/paddle/trainer/ThreadParameterUpdater.cpp
+++ b/paddle/trainer/ThreadParameterUpdater.cpp
--- a/paddle/trainer/Trainer.cpp
+++ b/paddle/trainer/Trainer.cpp
--- a/paddle/trainer/Trainer.h
+++ b/paddle/trainer/Trainer.h
--- a/paddle/trainer/TrainerBenchmark.cpp
+++ b/paddle/trainer/TrainerBenchmark.cpp
--- a/paddle/trainer/TrainerInternal.cpp
+++ b/paddle/trainer/TrainerInternal.cpp
--- a/paddle/trainer/TrainerInternal.h
+++ b/paddle/trainer/TrainerInternal.h
--- a/paddle/trainer/TrainerMain.cpp
+++ b/paddle/trainer/TrainerMain.cpp
--- a/paddle/trainer/tests/__init__.py
+++ b/paddle/trainer/tests/__init__.py
--- a/paddle/trainer/tests/config_parser_test.py
+++ b/paddle/trainer/tests/config_parser_test.py
--- a/paddle/trainer/tests/gen_proto_data.py
+++ b/paddle/trainer/tests/gen_proto_data.py
--- a/paddle/trainer/tests/test.txt
+++ b/paddle/trainer/tests/test.txt
--- a/paddle/trainer/tests/testPyDataWrapper.py
+++ b/paddle/trainer/tests/testPyDataWrapper.py
--- a/paddle/trainer/tests/test_CompareSparse.cpp
+++ b/paddle/trainer/tests/test_CompareSparse.cpp
--- a/paddle/trainer/tests/test_config.conf
+++ b/paddle/trainer/tests/test_config.conf
--- a/paddle/trainer/tests/test_gen_dict.txt
+++ b/paddle/trainer/tests/test_gen_dict.txt
--- a/paddle/trainer/tests/train.txt
+++ b/paddle/trainer/tests/train.txt
--- a/paddle/utils/.gitignore
+++ b/paddle/utils/.gitignore
--- a/paddle/utils/CMakeLists.txt
+++ b/paddle/utils/CMakeLists.txt
--- a/paddle/utils/Logging.h
+++ b/paddle/utils/Logging.h
--- a/paddle/utils/PythonUtil.cpp
+++ b/paddle/utils/PythonUtil.cpp
--- a/paddle/utils/PythonUtil.h
+++ b/paddle/utils/PythonUtil.h
--- a/paddle/utils/Queue.h
+++ b/paddle/utils/Queue.h
--- a/paddle/utils/ThreadLocal.h
+++ b/paddle/utils/ThreadLocal.h
--- a/paddle/utils/Util.cpp
+++ b/paddle/utils/Util.cpp
--- a/paddle/utils/arch/osx/Locks.cpp
+++ b/paddle/utils/arch/osx/Locks.cpp
--- a/paddle/utils/enable_virtualenv.py
+++ b/paddle/utils/enable_virtualenv.py
--- a/paddle/utils/tests/CMakeLists.txt
+++ b/paddle/utils/tests/CMakeLists.txt
--- a/paddle/utils/tests/test_CommandLineParser.cpp
+++ b/paddle/utils/tests/test_CommandLineParser.cpp
--- a/paddle/utils/tests/test_SpinLock.cpp
+++ b/paddle/utils/tests/test_SpinLock.cpp
--- a/proto/ModelConfig.proto.m4
+++ b/proto/ModelConfig.proto.m4
--- a/proto/TrainerConfig.proto.m4
+++ b/proto/TrainerConfig.proto.m4
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
--- a/python/paddle/proto/__init__.py
+++ b/python/paddle/proto/__init__.py
--- a/python/paddle/trainer/PyDataProvider2.py
+++ b/python/paddle/trainer/PyDataProvider2.py
--- a/python/paddle/trainer/PyDataProviderWrapper.py
+++ b/python/paddle/trainer/PyDataProviderWrapper.py
--- a/python/paddle/trainer/__init__.py
+++ b/python/paddle/trainer/__init__.py
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
--- a/python/paddle/trainer/config_parser_extension.py
+++ b/python/paddle/trainer/config_parser_extension.py
--- a/python/paddle/trainer/recurrent_units.py
+++ b/python/paddle/trainer/recurrent_units.py
--- a/python/paddle/trainer_config_helpers/__init__.py
+++ b/python/paddle/trainer_config_helpers/__init__.py
--- a/python/paddle/trainer_config_helpers/activations.py
+++ b/python/paddle/trainer_config_helpers/activations.py
--- a/python/paddle/trainer_config_helpers/attrs.py
+++ b/python/paddle/trainer_config_helpers/attrs.py
--- a/python/paddle/trainer_config_helpers/data_sources.py
+++ b/python/paddle/trainer_config_helpers/data_sources.py
--- a/python/paddle/trainer_config_helpers/default_decorators.py
+++ b/python/paddle/trainer_config_helpers/default_decorators.py
--- a/python/paddle/trainer_config_helpers/evaluators.py
+++ b/python/paddle/trainer_config_helpers/evaluators.py
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
--- a/python/paddle/trainer_config_helpers/math.py
+++ b/python/paddle/trainer_config_helpers/math.py
--- a/python/paddle/trainer_config_helpers/networks.py
+++ b/python/paddle/trainer_config_helpers/networks.py
--- a/python/paddle/trainer_config_helpers/optimizers.py
+++ b/python/paddle/trainer_config_helpers/optimizers.py
--- a/python/paddle/trainer_config_helpers/poolings.py
+++ b/python/paddle/trainer_config_helpers/poolings.py
--- a/python/paddle/trainer_config_helpers/tests/configs/.gitignore
+++ b/python/paddle/trainer_config_helpers/tests/configs/.gitignore
--- a/python/paddle/trainer_config_helpers/tests/configs/check.md5
+++ b/python/paddle/trainer_config_helpers/tests/configs/check.md5
--- a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
--- a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
--- a/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
--- a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
--- a/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
--- a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
--- a/python/paddle/trainer_config_helpers/tests/configs/projections.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/projections.py
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/layer_activations.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/layer_activations.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_fc.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_fc.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bi_grumemory.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bi_grumemory.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_expand_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_expand_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_fc.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_fc.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_grumemory_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_grumemory_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_hsigmoid.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_hsigmoid.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_lstmemory_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_lstmemory_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_ntm_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_ntm_layers.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_print_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_print_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_spp_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_spp_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/unused_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/unused_layers.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/util_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/util_layers.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
--- a/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py
--- a/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
--- a/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_fc.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_fc.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
--- a/python/paddle/trainer_config_helpers/tests/configs/util_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/util_layers.py
--- a/python/paddle/trainer_config_helpers/tests/layers_test_config.py
+++ b/python/paddle/trainer_config_helpers/tests/layers_test_config.py
--- a/python/paddle/trainer_config_helpers/utils.py
+++ b/python/paddle/trainer_config_helpers/utils.py
--- a/python/paddle/utils/__init__.py
+++ b/python/paddle/utils/__init__.py
--- a/python/paddle/utils/dump_config.py
+++ b/python/paddle/utils/dump_config.py
--- a/python/paddle/utils/image_util.py
+++ b/python/paddle/utils/image_util.py
--- a/python/paddle/utils/make_model_diagram.py
+++ b/python/paddle/utils/make_model_diagram.py
--- a/python/paddle/utils/plotcurve.py
+++ b/python/paddle/utils/plotcurve.py
--- a/python/paddle/utils/predefined_net.py
+++ b/python/paddle/utils/predefined_net.py
--- a/python/paddle/utils/preprocess_img.py
+++ b/python/paddle/utils/preprocess_img.py
--- a/python/paddle/utils/preprocess_util.py
+++ b/python/paddle/utils/preprocess_util.py
--- a/python/paddle/utils/show_pb.py
+++ b/python/paddle/utils/show_pb.py
--- a/python/paddle/utils/torch2paddle.py
+++ b/python/paddle/utils/torch2paddle.py