fix conflicts

e749e7b1 · zhangjinchao · 65026140 · d0a908d5 · e749e7b1 · e749e7b1
291 changed file
--- a/.clang-format
+++ b/.clang-format
@@ -13,8 +13,6 @@
 # The document of clang-format is 
 #   http://clang.llvm.org/docs/ClangFormat.html
 #   http://clang.llvm.org/docs/ClangFormatStyleOptions.html
-#
-# TODO(yuyang18): Add python and other language code style
 ---
 Language:        Cpp
 BasedOnStyle:  Google
@@ -22,8 +20,9 @@ IndentWidth:     2
 TabWidth:        2
 ContinuationIndentWidth: 4
 AccessModifierOffset: -2  # The private/protected/public has no indent in class
-PointerAlignment: Left    # int* p/int& p, not int *p/int &p
 Standard:  Cpp11 
 AllowAllParametersOfDeclarationOnNextLine: true
+BinPackParameters: false
+BinPackArguments: false
 ...
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,6 @@ build/
 .vscode
 .idea
 .project
+.cproject
 .pydevproject
+Makefile
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
+-   repo: https://github.com/Lucas-C/pre-commit-hooks.git
+    sha: c25201a00e6b0514370501050cf2a8538ac12270
+    hooks:
+    -   id: remove-crlf
+-   repo: https://github.com/reyoung/mirrors-yapf.git
+    sha: v0.13.2
+    hooks:
+    -   id: yapf
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    sha: 4ef03c4223ad322c7adaa6c6c0efb26b57df3b71
+    hooks:
+    -   id: check-added-large-files
+    -   id: check-merge-conflict
+    -   id: check-symlinks
+    -   id: detect-private-key
+    -   id: end-of-file-fixer
+# TODO(yuyang): trailing whitespace has some bugs on markdown 
+# files now, please not add it to pre-commit hook now
+#    -   id: trailing-whitespace
+#
+# TODO(yuyang): debug-statements not fit for Paddle, because
+# not all of our python code is runnable. Some are used for 
+# documenation
+#    -   id: debug-statements
--- a/.style.yapf
+++ b/.style.yapf
+[style]
+based_on_style = pep8
+column_limit = 80
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -109,11 +109,9 @@ else()
    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-g -O3 --use_fast_math")
    if(WITH_AVX)
-        if(AVX_FOUND)
+        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${AVX_FLAG}")
-            set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler -mavx")
-        endif(AVX_FOUND)
    else(WITH_AVX)
-        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler -msse3")
+        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SSE3_FLAG}")
    endif(WITH_AVX)
    if(WITH_DSO)
@@ -138,11 +136,11 @@ if(NOT WITH_TIMER)
 endif(NOT WITH_TIMER)
 if(WITH_AVX)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAGS}")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAG}")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAG}")
 else(WITH_AVX)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SSE3_FLAG}")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SSE3_FLAG}")
 endif(WITH_AVX)
 if(WITH_PYTHON)

--- a/cmake/FindAVX.cmake
+++ b/cmake/FindAVX.cmake
@@ -3,36 +3,55 @@
 INCLUDE(CheckCXXSourceRuns)
-SET(FIND_AVX_10)
-SET(FIND_AVX_20)
-SET(AVX_FLAGS)
-SET(AVX_FOUND)
-# Check AVX 2
-SET(CMAKE_REQUIRED_FLAGS)
 IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-  SET(CMAKE_REQUIRED_FLAGS "-mavx2")
+    set(MMX_FLAG "-mmmx")
-ELSEIF(MSVC AND NOT CMAKE_CL_64)  # reserve for WINDOWS
+    set(SSE2_FLAG "-msse2")
-  SET(CMAKE_REQUIRED_FLAGS "/arch:AVX2")
+    set(SSE3_FLAG "-msse3")
+    SET(AVX_FLAG "-mavx")
+    SET(AVX2_FLAG "-mavx2")
+ELSEIF(MSVC)
+    set(MMX_FLAG "/arch:MMX")
+    set(SSE2_FLAG "/arch:SSE2")
+    set(SSE3_FLAG "/arch:SSE3")
+    SET(AVX_FLAG "/arch:AVX")
+    SET(AVX2_FLAG "/arch:AVX2")
 ENDIF()
+# Check  MMX
+set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG})
 CHECK_CXX_SOURCE_RUNS("
-#include <immintrin.h>
+#include <mmintrin.h>
 int main()
 {
-    __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
+    _mm_setzero_si64();
-    __m256i result = _mm256_abs_epi32 (a);
    return 0;
-}" FIND_AVX_20)
+}" MMX_FOUND)
-# Check AVX
+# Check SSE2
-SET(CMAKE_REQUIRED_FLAGS)
+set(CMAKE_REQUIRED_FLAGS ${SSE2_FLAG})
-IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+CHECK_CXX_SOURCE_RUNS("
-    SET(CMAKE_REQUIRED_FLAGS "-mavx")
+#include <emmintrin.h>
-ELSEIF(MSVC AND NOT CMAKE_CL_64)
+int main()
-    SET(CMAKE_REQUIRED_FLAGS "/arch:AVX")
+{
-endif()
+    _mm_setzero_si128();
+    return 0;
+}" SSE2_FOUND)
+# Check SSE3
+set(CMAKE_REQUIRED_FLAGS ${SSE3_FLAG})
+CHECK_CXX_SOURCE_RUNS("
+#include <pmmintrin.h>
+int main()
+{
+    __m128d a = _mm_set1_pd(6.28);
+    __m128d b = _mm_set1_pd(3.14);
+    __m128d result = _mm_addsub_pd(a, b);
+    result = _mm_movedup_pd(result);
+    return 0;
+}" SSE3_FOUND)
+# Check AVX
+set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG})
 CHECK_CXX_SOURCE_RUNS("
 #include <immintrin.h>
 int main()
@@ -41,25 +60,17 @@ int main()
    __m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
    __m256 result = _mm256_add_ps (a, b);
    return 0;
-}" FIND_AVX_10)
+}" AVX_FOUND)
-IF(${FIND_AVX_20})
-    IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-        SET(AVX_FLAGS "${AVX_FLAGS} -mavx2")
-    ELSEIF(MSVC)
-        SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX2")
-    ENDIF()
-ENDIF()
-IF(${FIND_AVX_10})
+# Check AVX 2
-    IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG})
-        SET(AVX_FLAGS "${AVX_FLAGS} -mavx")
+CHECK_CXX_SOURCE_RUNS("
-    ELSEIF(MSVC)
+#include <immintrin.h>
-        SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX")
+int main()
-    ENDIF()
+{
-ENDIF()
+    __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
+    __m256i result = _mm256_abs_epi32 (a);
+    return 0;
+}" AVX2_FOUND)
-IF(${FIND_AVX_10})
+mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND)
-    SET(AVX_FOUND TRUE)
-    MESSAGE(STATUS "Find CPU supports ${AVX_FLAGS}.")
-ENDIF()
--- a/demo/image_classification/.gitignore
+++ b/demo/image_classification/.gitignore
@@ -5,3 +5,5 @@ plot.png
 train.log
 image_provider_copy_1.py
 *pyc
+train.list
+test.list
--- a/demo/image_classification/data/download_cifar.sh
+++ b/demo/image_classification/data/download_cifar.sh
--- a/demo/image_classification/data/process_cifar.py
+++ b/demo/image_classification/data/process_cifar.py
@@ -16,7 +16,6 @@ import numpy as np
 import sys
 import os
 import PIL.Image as Image
 """
  Usage: python process_cifar input_dir output_dir
 """
@@ -30,6 +29,7 @@ def mkdir_not_exist(path):
    if not os.path.exists(path):
        os.mkdir(path)
 def create_dir_structure(output_dir):
    """
    Create the directory structure for the directory.
@@ -39,8 +39,8 @@ def create_dir_structure(output_dir):
    mkdir_not_exist(os.path.join(output_dir, "train"))
    mkdir_not_exist(os.path.join(output_dir, "test"))
-def convert_batch(batch_path, label_set, label_map,
-                  output_dir, data_split):
+def convert_batch(batch_path, label_set, label_map, output_dir, data_split):
    """
    Convert CIFAR batch to the structure of Paddle format.
    batch_path: the batch to be converted.
@@ -67,11 +67,23 @@ if __name__ == '__main__':
    output_dir = sys.argv[2]
    num_batch = 5
    create_dir_structure(output_dir)
-    label_map = {0: "airplane", 1: "automobile", 2: "bird", 3: "cat", 4: "deer",
+    label_map = {
-                 5: "dog", 6: "frog", 7: "horse", 8: "ship", 9: "truck"}
+        0: "airplane",
+        1: "automobile",
+        2: "bird",
+        3: "cat",
+        4: "deer",
+        5: "dog",
+        6: "frog",
+        7: "horse",
+        8: "ship",
+        9: "truck"
+    }
    labels = {}
    for i in range(1, num_batch + 1):
-        convert_batch(os.path.join(input_dir, "data_batch_%d" % i), labels,
+        convert_batch(
-                      label_map, output_dir, "train")
+            os.path.join(input_dir, "data_batch_%d" % i), labels, label_map,
-    convert_batch(os.path.join(input_dir, "test_batch"), {},
+            output_dir, "train")
-                  label_map, output_dir, "test")
+    convert_batch(
\ No newline at end of file
+        os.path.join(input_dir, "test_batch"), {}, label_map, output_dir,
+        "test")
--- a/demo/image_classification/image_provider.py
+++ b/demo/image_classification/image_provider.py
@@ -46,36 +46,41 @@ def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg,
    settings.img_mean = image_util.load_meta(settings.meta_path,
                                             settings.mean_img_size,
-                                             settings.img_size,
+                                             settings.img_size, settings.color)
-                                             settings.color)
    settings.logger.info('Image size: %s', settings.img_size)
    settings.logger.info('Meta path: %s', settings.meta_path)
    settings.input_types = [
        dense_vector(settings.img_raw_size),  # image feature
-        integer_value(settings.num_classes)]  # labels
+        integer_value(settings.num_classes)
+    ]  # labels
    settings.logger.info('DataProvider Initialization finished')
-@provider(init_hook=hook)
+@provider(init_hook=hook, min_pool_size=0)
-def processData(settings, file_name):
+def processData(settings, file_list):
    """
    The main function for loading data.
    Load the batch, iterate all the images and labels in this batch.
-    file_name: the batch file name.
+    file_list: the batch file list.
    """
-    data = cPickle.load(io.open(file_name, 'rb'))
+    with open(file_list, 'r') as fdata:
-    indexes = list(range(len(data['images'])))
+        lines = [line.strip() for line in fdata]
-    if settings.is_train:
+        random.shuffle(lines)
-        random.shuffle(indexes)
+        for file_name in lines:
-    for i in indexes:
+            with io.open(file_name.strip(), 'rb') as file:
-        if settings.use_jpeg == 1:
+                data = cPickle.load(file)
-            img = image_util.decode_jpeg(data['images'][i])
+                indexes = list(range(len(data['images'])))
-        else:
+                if settings.is_train:
-            img = data['images'][i]
+                    random.shuffle(indexes)
-        img_feat = image_util.preprocess_img(img, settings.img_mean,
+                for i in indexes:
-                                             settings.img_size, settings.is_train,
+                    if settings.use_jpeg == 1:
-                                             settings.color)
+                        img = image_util.decode_jpeg(data['images'][i])
-        label = data['labels'][i]
+                    else:
-        yield img_feat.tolist(), int(label)
+                        img = data['images'][i]
+                    img_feat = image_util.preprocess_img(
+                        img, settings.img_mean, settings.img_size,
+                        settings.is_train, settings.color)
+                    label = data['labels'][i]
+                    yield img_feat.astype('float32'), int(label)
--- a/demo/image_classification/image_util.py
+++ b/demo/image_classification/image_util.py
@@ -16,17 +16,20 @@ import numpy as np
 from PIL import Image
 from cStringIO import StringIO
 def resize_image(img, target_size):
    """
    Resize an image so that the shorter edge has length target_size.
    img: the input image to be resized.
    target_size: the target resized image size.
    """
-    percent = (target_size/float(min(img.size[0], img.size[1])))
+    percent = (target_size / float(min(img.size[0], img.size[1])))
-    resized_size = int(round(img.size[0] * percent)), int(round(img.size[1] * percent))
+    resized_size = int(round(img.size[0] * percent)), int(
+        round(img.size[1] * percent))
    img = img.resize(resized_size, Image.ANTIALIAS)
    return img
 def flip(im):
    """
    Return the flipped image.
@@ -38,6 +41,7 @@ def flip(im):
    else:
        return im[:, ::-1]
 def crop_img(im, inner_size, color=True, test=True):
    """
    Return cropped image.
@@ -50,20 +54,22 @@ def crop_img(im, inner_size, color=True, test=True):
      If True, crop the center of images.
    """
    if color:
-        height, width = max(inner_size, im.shape[1]), max(inner_size, im.shape[2])
+        height, width = max(inner_size, im.shape[1]), max(inner_size,
+                                                          im.shape[2])
        padded_im = np.zeros((3, height, width))
        startY = (height - im.shape[1]) / 2
        startX = (width - im.shape[2]) / 2
        endY, endX = startY + im.shape[1], startX + im.shape[2]
-        padded_im[:, startY: endY, startX: endX] = im
+        padded_im[:, startY:endY, startX:endX] = im
    else:
        im = im.astype('float32')
-        height, width = max(inner_size, im.shape[0]), max(inner_size, im.shape[1])
+        height, width = max(inner_size, im.shape[0]), max(inner_size,
+                                                          im.shape[1])
        padded_im = np.zeros((height, width))
        startY = (height - im.shape[0]) / 2
        startX = (width - im.shape[1]) / 2
        endY, endX = startY + im.shape[0], startX + im.shape[1]
-        padded_im[startY: endY, startX: endX] = im
+        padded_im[startY:endY, startX:endX] = im
    if test:
        startY = (height - inner_size) / 2
        startX = (width - inner_size) / 2
@@ -72,19 +78,21 @@ def crop_img(im, inner_size, color=True, test=True):
        startX = np.random.randint(0, width - inner_size + 1)
    endY, endX = startY + inner_size, startX + inner_size
    if color:
-        pic = padded_im[:, startY: endY, startX: endX]
+        pic = padded_im[:, startY:endY, startX:endX]
    else:
-        pic = padded_im[startY: endY, startX: endX]
+        pic = padded_im[startY:endY, startX:endX]
    if (not test) and (np.random.randint(2) == 0):
        pic = flip(pic)
    return pic
 def decode_jpeg(jpeg_string):
    np_array = np.array(Image.open(StringIO(jpeg_string)))
    if len(np_array.shape) == 3:
        np_array = np.transpose(np_array, (2, 0, 1))
    return np_array
 def preprocess_img(im, img_mean, crop_size, is_train, color=True):
    """
    Does data augmentation for images.
@@ -99,6 +107,7 @@ def preprocess_img(im, img_mean, crop_size, is_train, color=True):
    pic -= img_mean
    return pic.flatten()
 def load_meta(meta_path, mean_img_size, crop_size, color=True):
    """
    Return the loaded meta file.
@@ -109,17 +118,18 @@ def load_meta(meta_path, mean_img_size, crop_size, color=True):
    mean = np.load(meta_path)['data_mean']
    border = (mean_img_size - crop_size) / 2
    if color:
-        assert(mean_img_size * mean_img_size * 3 == mean.shape[0])
+        assert (mean_img_size * mean_img_size * 3 == mean.shape[0])
        mean = mean.reshape(3, mean_img_size, mean_img_size)
-        mean = mean[:, border: border + crop_size,
+        mean = mean[:, border:border + crop_size, border:border +
-                       border: border + crop_size].astype('float32')
+                    crop_size].astype('float32')
    else:
-        assert(mean_img_size * mean_img_size == mean.shape[0])
+        assert (mean_img_size * mean_img_size == mean.shape[0])
        mean = mean.reshape(mean_img_size, mean_img_size)
-        mean = mean[border: border + crop_size,
+        mean = mean[border:border + crop_size, border:border +
-                    border: border + crop_size].astype('float32')
+                    crop_size].astype('float32')
    return mean
 def load_image(img_path, is_color=True):
    """
    Load image and return. 
@@ -130,6 +140,7 @@ def load_image(img_path, is_color=True):
    img.load()
    return img
 def oversample(img, crop_dims):
    """
    image : iterable of (H x W x K) ndarrays
@@ -152,50 +163,53 @@ def oversample(img, crop_dims):
        for j in w_indices:
            crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
            curr += 1
-    crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([
+    crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate(
-        -crop_dims / 2.0,
+        [-crop_dims / 2.0, crop_dims / 2.0])
-         crop_dims / 2.0
-    ])
    crops_ix = np.tile(crops_ix, (2, 1))
    # Extract crops
-    crops = np.empty((10 * len(img), crop_dims[0], crop_dims[1],
+    crops = np.empty(
-                      im_shape[-1]), dtype=np.float32)
+        (10 * len(img), crop_dims[0], crop_dims[1], im_shape[-1]),
+        dtype=np.float32)
    ix = 0
    for im in img:
        for crop in crops_ix:
            crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :]
            ix += 1
-        crops[ix-5:ix] = crops[ix-5:ix, :, ::-1, :]  # flip for mirrors
+        crops[ix - 5:ix] = crops[ix - 5:ix, :, ::-1, :]  # flip for mirrors
    return crops
 class ImageTransformer:
-    def __init__(self, transpose = None,
+    def __init__(self,
-                 channel_swap = None, mean = None, is_color = True):
+                 transpose=None,
+                 channel_swap=None,
+                 mean=None,
+                 is_color=True):
        self.transpose = transpose
        self.channel_swap = None
        self.mean = None
-        self.is_color = is_color 
+        self.is_color = is_color
-    def set_transpose(self, order): 
+    def set_transpose(self, order):
        if self.is_color:
-            assert 3 == len(order) 
+            assert 3 == len(order)
        self.transpose = order
-    def set_channel_swap(self, order): 
+    def set_channel_swap(self, order):
        if self.is_color:
-            assert 3 == len(order) 
+            assert 3 == len(order)
        self.channel_swap = order
    def set_mean(self, mean):
        # mean value, may be one value per channel 
        if mean.ndim == 1:
-            mean = mean[:, np.newaxis, np.newaxis]       
+            mean = mean[:, np.newaxis, np.newaxis]
-        else: 
+        else:
            # elementwise mean
            if self.is_color:
                assert len(mean.shape) == 3
-        self.mean = mean 
+        self.mean = mean
    def transformer(self, data):
        if self.transpose is not None:

--- a/demo/image_classification/prediction.py
+++ b/demo/image_classification/prediction.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import os,sys
+import os, sys
 import numpy as np
 import logging
 from PIL import Image
@@ -24,9 +24,11 @@ from py_paddle import swig_paddle, DataProviderConverter
 from paddle.trainer.PyDataProvider2 import dense_vector
 from paddle.trainer.config_parser import parse_config
-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
 logging.getLogger().setLevel(logging.INFO)
 class ImageClassifier():
    def __init__(self,
                 train_conf,
@@ -58,18 +60,19 @@ class ImageClassifier():
        self.oversample = oversample
        self.is_color = is_color
-        self.transformer = image_util.ImageTransformer(is_color = is_color)
+        self.transformer = image_util.ImageTransformer(is_color=is_color)
-        self.transformer.set_transpose((2,0,1))
+        self.transformer.set_transpose((2, 0, 1))
        self.mean_file = mean_file
        mean = np.load(self.mean_file)['data_mean']
        mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
-        self.transformer.set_mean(mean) # mean pixel
+        self.transformer.set_mean(mean)  # mean pixel
        gpu = 1 if use_gpu else 0
        conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (gpu)
        conf = parse_config(train_conf, conf_args)
        swig_paddle.initPaddle("--use_gpu=%d" % (gpu))
-        self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+        self.network = swig_paddle.GradientMachine.createFromConfigProto(
+            conf.model_config)
        assert isinstance(self.network, swig_paddle.GradientMachine)
        self.network.loadParameters(self.model_dir)
@@ -90,14 +93,14 @@ class ImageClassifier():
            # image_util.resize_image: short side is self.resize_dim
            image = image_util.resize_image(image, self.resize_dim)
            image = np.array(image)
-            input = np.zeros((1, image.shape[0], image.shape[1], 3),
+            input = np.zeros(
-                             dtype=np.float32)
+                (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
            input[0] = image.astype(np.float32)
            input = image_util.oversample(input, self.crop_dims)
        else:
            image = image.resize(self.crop_dims, Image.ANTIALIAS)
-            input = np.zeros((1, self.crop_dims[0], self.crop_dims[1], 3),
+            input = np.zeros(
-                             dtype=np.float32)
+                (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
            input[0] = np.array(image).astype(np.float32)
        data_in = []
@@ -133,22 +136,24 @@ class ImageClassifier():
        lab = np.argsort(-prob)
        logging.info("Label of %s is: %d", image, lab[0])
 if __name__ == '__main__':
-    image_size=32
+    image_size = 32
-    crop_size=32
+    crop_size = 32
-    multi_crop=True
+    multi_crop = True
-    config="vgg_16_cifar.py"
+    config = "vgg_16_cifar.py"
-    output_layer="__fc_layer_1__"
+    output_layer = "__fc_layer_1__"
-    mean_path="data/cifar-out/batches/batches.meta"
+    mean_path = "data/cifar-out/batches/batches.meta"
-    model_path=sys.argv[1]
+    model_path = sys.argv[1]
-    image=sys.argv[2]
+    image = sys.argv[2]
-    use_gpu=bool(int(sys.argv[3]))
+    use_gpu = bool(int(sys.argv[3]))
-    obj = ImageClassifier(train_conf=config,
+    obj = ImageClassifier(
-                          model_dir=model_path,
+        train_conf=config,
-                          resize_dim=image_size,
+        model_dir=model_path,
-                          crop_dim=crop_size,
+        resize_dim=image_size,
-                          mean_file=mean_path,
+        crop_dim=crop_size,
-                          use_gpu=use_gpu,
+        mean_file=mean_path,
-                          oversample=multi_crop)
+        use_gpu=use_gpu,
+        oversample=multi_crop)
    obj.predict(image, output_layer)
--- a/demo/image_classification/preprocess.py
+++ b/demo/image_classification/preprocess.py
@@ -19,22 +19,36 @@ from optparse import OptionParser
 def option_parser():
    parser = OptionParser(usage="usage: python preprcoess.py "\
                          "-i data_dir [options]")
-    parser.add_option("-i", "--input", action="store",
+    parser.add_option(
-                      dest="input", help="Input data directory.")
+        "-i",
-    parser.add_option("-s", "--size", action="store",
+        "--input",
-                      dest="size", help="Processed image size.")
+        action="store",
-    parser.add_option("-c", "--color", action="store",
+        dest="input",
-                      dest="color", help="whether to use color images.")
+        help="Input data directory.")
+    parser.add_option(
+        "-s",
+        "--size",
+        action="store",
+        dest="size",
+        help="Processed image size.")
+    parser.add_option(
+        "-c",
+        "--color",
+        action="store",
+        dest="color",
+        help="whether to use color images.")
    return parser.parse_args()
 if __name__ == '__main__':
-     options, args = option_parser()
+    options, args = option_parser()
-     data_dir = options.input
+    data_dir = options.input
-     processed_image_size = int(options.size)
+    processed_image_size = int(options.size)
-     color = options.color == "1"
+    color = options.color == "1"
-     data_creator = ImageClassificationDatasetCreater(data_dir,
+    data_creator = ImageClassificationDatasetCreater(
-                                                      processed_image_size,
+        data_dir, processed_image_size, color)
-                                                      color)
+    data_creator.train_list_name = "train.txt"
-     data_creator.num_per_batch = 1000
+    data_creator.test_list_name = "test.txt"
-     data_creator.overwrite = True
+    data_creator.num_per_batch = 1000
-     data_creator.create_batches()
+    data_creator.overwrite = True
+    data_creator.create_batches()
--- a/demo/image_classification/preprocess.sh
+++ b/demo/image_classification/preprocess.sh
@@ -17,3 +17,6 @@ set -e
 data_dir=./data/cifar-out
 python preprocess.py -i $data_dir -s 32 -c 1
+echo "data/cifar-out/batches/train.txt" > train.list
+echo "data/cifar-out/batches/test.txt" > test.list
--- a/demo/image_classification/vgg_16_cifar.py
+++ b/demo/image_classification/vgg_16_cifar.py
@@ -18,36 +18,38 @@ is_predict = get_config_arg("is_predict", bool, False)
 ####################Data Configuration ##################
 if not is_predict:
-  data_dir='data/cifar-out/batches/'
+    data_dir = 'data/cifar-out/batches/'
-  meta_path=data_dir+'batches.meta'
+    meta_path = data_dir + 'batches.meta'
-  args = {'meta':meta_path,'mean_img_size': 32,
+    args = {
-          'img_size': 32,'num_classes': 10,
+        'meta': meta_path,
-          'use_jpeg': 1,'color': "color"}
+        'mean_img_size': 32,
+        'img_size': 32,
-  define_py_data_sources2(train_list=data_dir+"train.list",
+        'num_classes': 10,
-                          test_list=data_dir+'test.list',
+        'use_jpeg': 1,
-                          module='image_provider',
+        'color': "color"
-                          obj='processData',
+    }
-                          args=args)
+    define_py_data_sources2(
+        train_list="train.list",
+        test_list="train.list",
+        module='image_provider',
+        obj='processData',
+        args=args)
 ######################Algorithm Configuration #############
 settings(
-    batch_size = 128,
+    batch_size=128,
-    learning_rate = 0.1 / 128.0,
+    learning_rate=0.1 / 128.0,
-    learning_method = MomentumOptimizer(0.9),
+    learning_method=MomentumOptimizer(0.9),
-    regularization = L2Regularization(0.0005 * 128)
+    regularization=L2Regularization(0.0005 * 128))
-)
 #######################Network Configuration #############
-data_size=3*32*32
+data_size = 3 * 32 * 32
-label_size=10
+label_size = 10
-img = data_layer(name='image',
+img = data_layer(name='image', size=data_size)
-                 size=data_size)
 # small_vgg is predefined in trainer_config_helpers.networks
-predict = small_vgg(input_image=img,
+predict = small_vgg(input_image=img, num_channels=3, num_classes=label_size)
-                    num_channels=3,
-                    num_classes=label_size)
 if not is_predict:
    lbl = data_layer(name="label", size=label_size)

--- a/demo/introduction/README.md
+++ b/demo/introduction/README.md
 This folder contains scripts used in PaddlePaddle introduction.
 - use `bash train.sh` to train a simple linear regression model
 - use `python evaluate_model.py` to read model parameters. You can see that `w` and `b` are very close to [2, 0.3].
--- a/demo/introduction/dataprovider.py
+++ b/demo/introduction/dataprovider.py
@@ -15,10 +15,10 @@
 from paddle.trainer.PyDataProvider2 import *
 import random
 # define data types of input: 2 real numbers
-@provider(input_types=[dense_vector(1), dense_vector(1)],use_seq=False)
+@provider(input_types=[dense_vector(1), dense_vector(1)], use_seq=False)
 def process(settings, input_file):
    for i in xrange(2000):
        x = random.random()
-        yield [x], [2*x+0.3]
+        yield [x], [2 * x + 0.3]
--- a/demo/introduction/evaluate_model.py
+++ b/demo/introduction/evaluate_model.py
@@ -23,14 +23,17 @@ Usage:
 import numpy as np
 import os
 def load(file_name):
    with open(file_name, 'rb') as f:
-        f.read(16) # skip header for float type.
+        f.read(16)  # skip header for float type.
        return np.fromfile(f, dtype=np.float32)
 def main():
    print 'w=%.6f, b=%.6f from pass 29' % (load('output/pass-00029/w'),
-            load('output/pass-00029/b'))
+                                           load('output/pass-00029/b'))
 if __name__ == '__main__':
    main()
--- a/demo/introduction/trainer_config.py
+++ b/demo/introduction/trainer_config.py
@@ -16,9 +16,14 @@ from paddle.trainer_config_helpers import *
 # 1. read data. Suppose you saved above python code as dataprovider.py
 data_file = 'empty.list'
-with open(data_file, 'w') as f: f.writelines(' ')
+with open(data_file, 'w') as f:
-define_py_data_sources2(train_list=data_file, test_list=None, 
+    f.writelines(' ')
-        module='dataprovider', obj='process',args={})
+define_py_data_sources2(
+    train_list=data_file,
+    test_list=None,
+    module='dataprovider',
+    obj='process',
+    args={})
 # 2. learning algorithm
 settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
@@ -26,7 +31,11 @@ settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
 # 3. Network configuration
 x = data_layer(name='x', size=1)
 y = data_layer(name='y', size=1)
-y_predict = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b'))
+y_predict = fc_layer(
+    input=x,
+    param_attr=ParamAttr(name='w'),
+    size=1,
+    act=LinearActivation(),
+    bias_attr=ParamAttr(name='b'))
 cost = regression_cost(input=y_predict, label=y)
 outputs(cost)
--- a/demo/mnist/data/generate_list.py
+++ b/demo/mnist/data/generate_list.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 o = open("./" + "train.list", "w")
-o.write("./data/raw_data/train" +"\n")
+o.write("./data/raw_data/train" + "\n")
 o.close()
 o = open("./" + "test.list", "w")
-o.write("./data/raw_data/t10k" +"\n")
+o.write("./data/raw_data/t10k" + "\n")
 o.close()
\ No newline at end of file
--- a/demo/mnist/data/get_mnist_data.sh
+++ b/demo/mnist/data/get_mnist_data.sh
@@ -19,4 +19,3 @@ done
 cd $DIR
 rm -f *.list
 python generate_list.py
--- a/demo/mnist/mnist_provider.py
+++ b/demo/mnist/mnist_provider.py
@@ -2,10 +2,9 @@ from paddle.trainer.PyDataProvider2 import *
 # Define a py data provider
-@provider(input_types={
+@provider(
-    'pixel': dense_vector(28 * 28),
+    input_types={'pixel': dense_vector(28 * 28),
-    'label': integer_value(10)
+                 'label': integer_value(10)})
-})
 def process(settings, filename):  # settings is not used currently.
    imgf = filename + "-images-idx3-ubyte"
    labelf = filename + "-labels-idx1-ubyte"

--- a/demo/mnist/vgg_16_mnist.py
+++ b/demo/mnist/vgg_16_mnist.py
@@ -18,32 +18,29 @@ is_predict = get_config_arg("is_predict", bool, False)
 ####################Data Configuration ##################
 if not is_predict:
-  data_dir='./data/'
+    data_dir = './data/'
-  define_py_data_sources2(train_list= data_dir + 'train.list',
+    define_py_data_sources2(
-                        test_list= data_dir + 'test.list',
+        train_list=data_dir + 'train.list',
-                        module='mnist_provider',
+        test_list=data_dir + 'test.list',
-                        obj='process')
+        module='mnist_provider',
+        obj='process')
 ######################Algorithm Configuration #############
 settings(
-    batch_size = 128,
+    batch_size=128,
-    learning_rate = 0.1 / 128.0,
+    learning_rate=0.1 / 128.0,
-    learning_method = MomentumOptimizer(0.9),
+    learning_method=MomentumOptimizer(0.9),
-    regularization = L2Regularization(0.0005 * 128)
+    regularization=L2Regularization(0.0005 * 128))
-)
 #######################Network Configuration #############
-data_size=1*28*28
+data_size = 1 * 28 * 28
-label_size=10
+label_size = 10
 img = data_layer(name='pixel', size=data_size)
 # small_vgg is predined in trainer_config_helpers.network
-predict = small_vgg(input_image=img,
+predict = small_vgg(input_image=img, num_channels=1, num_classes=label_size)
-                    num_channels=1,
-                    num_classes=label_size)
 if not is_predict:
    lbl = data_layer(name="label", size=label_size)

--- a/demo/model_zoo/embedding/extract_para.py
+++ b/demo/model_zoo/embedding/extract_para.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 Example:
    python extract_para.py --preModel PREMODEL --preDict PREDICT \
@@ -29,6 +28,7 @@ Options:
 from optparse import OptionParser
 import struct
 def get_row_index(preDict, usrDict):
    """
    Get the row positions for all words in user dictionary from pre-trained dictionary.
@@ -47,7 +47,9 @@ def get_row_index(preDict, usrDict):
            pos.append(index[word])
    return pos
-def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict, paraDim):
+def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict,
+                                  paraDim):
    """
    Extract desired parameters from a pretrained embedding model based on user dictionary
    """
@@ -70,6 +72,7 @@ def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict, paraDim)
    print "extract parameters finish, total", len(rowIndex), "lines"
    fi.close()
 def main():
    """
    Main entry for running paraconvert.py 
@@ -78,19 +81,33 @@ def main():
            "python %prog --preModel PREMODEL --preDict PREDICT" \
            " --usrModel USRMODEL --usrDict USRDICT -d DIM"
    parser = OptionParser(usage)
-    parser.add_option("--preModel", action="store", dest="preModel",
+    parser.add_option(
-                      help="the name of pretrained embedding model")
+        "--preModel",
-    parser.add_option("--preDict", action="store", dest="preDict",
+        action="store",
-                      help="the name of pretrained dictionary")
+        dest="preModel",
-    parser.add_option("--usrModel", action="store", dest="usrModel",
+        help="the name of pretrained embedding model")
-                      help="the name of output usr embedding model")
+    parser.add_option(
-    parser.add_option("--usrDict", action="store", dest="usrDict",
+        "--preDict",
-                      help="the name of user specified dictionary")
+        action="store",
-    parser.add_option("-d", action="store", dest="dim",
+        dest="preDict",
-                      help="dimension of parameter")
+        help="the name of pretrained dictionary")
+    parser.add_option(
+        "--usrModel",
+        action="store",
+        dest="usrModel",
+        help="the name of output usr embedding model")
+    parser.add_option(
+        "--usrDict",
+        action="store",
+        dest="usrDict",
+        help="the name of user specified dictionary")
+    parser.add_option(
+        "-d", action="store", dest="dim", help="dimension of parameter")
    (options, args) = parser.parse_args()
-    extract_parameters_by_usrDict(options.preModel, options.preDict, 
+    extract_parameters_by_usrDict(options.preModel, options.preDict,
-                      options.usrModel, options.usrDict, int(options.dim))
+                                  options.usrModel, options.usrDict,
+                                  int(options.dim))
 if __name__ == '__main__':
    main()
--- a/demo/model_zoo/embedding/paraconvert.py
+++ b/demo/model_zoo/embedding/paraconvert.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 Example:
    python paraconvert.py --b2t -i INPUT -o OUTPUT -d DIM
@@ -29,6 +28,7 @@ Options:
 from optparse import OptionParser
 import struct
 def binary2text(input, output, paraDim):
    """
    Convert a binary parameter file of embedding model to be a text file.  
@@ -76,12 +76,13 @@ def binary2text(input, output, paraDim):
    fo.close()
    print "binary2text finish, total", line, "lines"
 def get_para_count(input):
    """
    Compute the total number of embedding parameters in input text file. 
    input: the name of input text file
    """
-    numRows = 1 
+    numRows = 1
    paraDim = 0
    with open(input) as f:
        line = f.readline()
@@ -90,6 +91,7 @@ def get_para_count(input):
            numRows += 1
    return numRows * paraDim
 def text2binary(input, output, paddle_head=True):
    """
    Convert a text parameter file of embedding model to be a binary file.
@@ -123,6 +125,7 @@ def text2binary(input, output, paddle_head=True):
    fo.close()
    print "text2binary finish, total", count, "lines"
 def main():
    """
    Main entry for running paraconvert.py 
@@ -131,21 +134,26 @@ def main():
            "python %prog --b2t -i INPUT -o OUTPUT -d DIM \n" \
            "python %prog --t2b -i INPUT -o OUTPUT"
    parser = OptionParser(usage)
-    parser.add_option("--b2t", action="store_true",
+    parser.add_option(
-                      help="convert parameter file of embedding model from binary to text")
+        "--b2t",
-    parser.add_option("--t2b", action="store_true",
+        action="store_true",
-                      help="convert parameter file of embedding model from text to binary")
+        help="convert parameter file of embedding model from binary to text")
-    parser.add_option("-i", action="store", dest="input",
+    parser.add_option(
-                      help="input parameter file name")
+        "--t2b",
-    parser.add_option("-o", action="store", dest="output",
+        action="store_true",
-                      help="output parameter file name")
+        help="convert parameter file of embedding model from text to binary")
-    parser.add_option("-d", action="store", dest="dim",
+    parser.add_option(
-                      help="dimension of parameter")
+        "-i", action="store", dest="input", help="input parameter file name")
+    parser.add_option(
+        "-o", action="store", dest="output", help="output parameter file name")
+    parser.add_option(
+        "-d", action="store", dest="dim", help="dimension of parameter")
    (options, args) = parser.parse_args()
    if options.b2t:
        binary2text(options.input, options.output, options.dim)
    if options.t2b:
        text2binary(options.input, options.output)
 if __name__ == '__main__':
    main()
--- a/demo/model_zoo/resnet/classify.py
+++ b/demo/model_zoo/resnet/classify.py
@@ -26,16 +26,22 @@ from py_paddle import swig_paddle, DataProviderConverter
 from paddle.trainer.PyDataProvider2 import dense_vector
 from paddle.trainer.config_parser import parse_config
-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
 logging.getLogger().setLevel(logging.INFO)
 class ImageClassifier():
-    def __init__(self, train_conf, model_dir=None,
+    def __init__(self,
-                 resize_dim=256, crop_dim=224,
+                 train_conf,
+                 model_dir=None,
+                 resize_dim=256,
+                 crop_dim=224,
                 use_gpu=True,
                 mean_file=None,
                 output_layer=None,
-                 oversample=False, is_color=True):
+                 oversample=False,
+                 is_color=True):
        """
        train_conf: network configure.
        model_dir: string, directory of model.
@@ -62,24 +68,25 @@ class ImageClassifier():
            assert isinstance(self.output_layer, basestring)
            self.output_layer = self.output_layer.split(",")
-        self.transformer = image_util.ImageTransformer(is_color = is_color)
+        self.transformer = image_util.ImageTransformer(is_color=is_color)
-        self.transformer.set_transpose((2,0,1))
+        self.transformer.set_transpose((2, 0, 1))
-        self.transformer.set_channel_swap((2,1,0))
+        self.transformer.set_channel_swap((2, 1, 0))
        self.mean_file = mean_file
        if self.mean_file is not None:
            mean = np.load(self.mean_file)['data_mean']
            mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
-            self.transformer.set_mean(mean) # mean pixel
+            self.transformer.set_mean(mean)  # mean pixel
        else:
            # if you use three mean value, set like:
            # this three mean value is calculated from ImageNet.
-            self.transformer.set_mean(np.array([103.939,116.779,123.68]))
+            self.transformer.set_mean(np.array([103.939, 116.779, 123.68]))
        conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (int(use_gpu))
        conf = parse_config(train_conf, conf_args)
        swig_paddle.initPaddle("--use_gpu=%d" % (int(use_gpu)))
-        self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+        self.network = swig_paddle.GradientMachine.createFromConfigProto(
+            conf.model_config)
        assert isinstance(self.network, swig_paddle.GradientMachine)
        self.network.loadParameters(self.model_dir)
@@ -105,14 +112,14 @@ class ImageClassifier():
            # image_util.resize_image: short side is self.resize_dim
            image = image_util.resize_image(image, self.resize_dim)
            image = np.array(image)
-            input = np.zeros((1, image.shape[0], image.shape[1], 3),
+            input = np.zeros(
-                             dtype=np.float32)
+                (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
            input[0] = image.astype(np.float32)
            input = image_util.oversample(input, self.crop_dims)
        else:
            image = image.resize(self.crop_dims, Image.ANTIALIAS)
-            input = np.zeros((1, self.crop_dims[0], self.crop_dims[1], 3),
+            input = np.zeros(
-                             dtype=np.float32)
+                (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
            input[0] = np.array(image).astype(np.float32)
        data_in = []
@@ -172,7 +179,7 @@ class ImageClassifier():
            logging.info("Label of %s is: %d", image, lab[0])
        return results
-    def extract(self, data_file, output_dir, batch_size = 10000):
+    def extract(self, data_file, output_dir, batch_size=10000):
        """
        extract and save features of output layers, which are
        specify in Outputs() in network configure.
@@ -197,7 +204,7 @@ class ImageClassifier():
            image_feature[file_name] = feature
            sample_num += 1
            if sample_num == batch_size:
-                batch_name = os.path.join(output_dir, 'batch_%d' %(batch_num))
+                batch_name = os.path.join(output_dir, 'batch_%d' % (batch_num))
                self.save_file(image_feature, batch_name)
                logging.info('Finish batch %d', batch_num)
                batch_num += 1
@@ -206,7 +213,7 @@ class ImageClassifier():
            if idx % 1000 == 0:
                logging.info('%d/%d, %s', idx, len(image_files), file_name)
        if sample_num > 0:
-            batch_name = os.path.join(output_dir, 'batch_%d' %(batch_num))
+            batch_name = os.path.join(output_dir, 'batch_%d' % (batch_num))
            self.save_file(image_feature, batch_name)
            logging.info('Finish batch %d', batch_num)
        logging.info('Done: make image feature batch')
@@ -215,38 +222,64 @@ class ImageClassifier():
        of = open(file, 'wb')
        cPickle.dump(data, of, protocol=cPickle.HIGHEST_PROTOCOL)
 def option_parser():
    """
    Main entry for predciting
    """
    usage = "%prog -c config -i data_list -w model_dir [options]"
    parser = OptionParser(usage="usage: %s" % usage)
-    parser.add_option("-j", "--job",
+    parser.add_option(
-                      action="store", dest="job_type",
+        "-j",
-                      help="job type: predict, extract\
+        "--job",
+        action="store",
+        dest="job_type",
+        help="job type: predict, extract\
                            predict: predicting,\
                            extract: extract features")
-    parser.add_option("-c", "--conf",
+    parser.add_option(
-                      action="store", dest="train_conf",
+        "-c",
-                      help="network config")
+        "--conf",
-    parser.add_option("-i", "--data",
+        action="store",
-                      action="store", dest="data_file",
+        dest="train_conf",
-                      help="image list")
+        help="network config")
-    parser.add_option("-w", "--model",
+    parser.add_option(
-                      action="store", dest="model_path",
+        "-i", "--data", action="store", dest="data_file", help="image list")
-                      default=None, help="model path")
+    parser.add_option(
-    parser.add_option("-g", "--use_gpu", action="store",
+        "-w",
-                      dest="use_gpu", default=True,
+        "--model",
-                      help="Whether to use gpu mode.")
+        action="store",
-    parser.add_option("-o", "--output_dir",
+        dest="model_path",
-                      action="store", dest="output_dir",
+        default=None,
-                      default="output", help="output path")
+        help="model path")
-    parser.add_option("-m", "--mean", action="store",
+    parser.add_option(
-                      dest="mean", default=None,
+        "-g",
-                      help="mean file.")
+        "--use_gpu",
-    parser.add_option("-p", "--multi_crop", action="store_true",
+        action="store",
-                      dest="multi_crop", default=False,
+        dest="use_gpu",
-                      help="Wether to use multiple crops on image.")
+        default=True,
+        help="Whether to use gpu mode.")
+    parser.add_option(
+        "-o",
+        "--output_dir",
+        action="store",
+        dest="output_dir",
+        default="output",
+        help="output path")
+    parser.add_option(
+        "-m",
+        "--mean",
+        action="store",
+        dest="mean",
+        default=None,
+        help="mean file.")
+    parser.add_option(
+        "-p",
+        "--multi_crop",
+        action="store_true",
+        dest="multi_crop",
+        default=False,
+        help="Wether to use multiple crops on image.")
    parser.add_option("-l", "--output_layer", action="store",
                      dest="output_layer", default=None,
                      help="--job=extract, specify layers to extract "\
@@ -254,24 +287,26 @@ def option_parser():
                           "classification probability, output in resnet.py.")
    return parser.parse_args()
 def main():
    """
    1. parse input arguments.
    2. predicting or extract features according job type.
    """
    options, args = option_parser()
-    obj = ImageClassifier(options.train_conf,
+    obj = ImageClassifier(
-                          options.model_path,
+        options.train_conf,
-                          use_gpu=options.use_gpu,
+        options.model_path,
-                          mean_file=options.mean,
+        use_gpu=options.use_gpu,
-                          output_layer=options.output_layer,
+        mean_file=options.mean,
-                          oversample=options.multi_crop)
+        output_layer=options.output_layer,
+        oversample=options.multi_crop)
    if options.job_type == "predict":
        obj.predict(options.data_file)
    elif options.job_type == "extract":
-        obj.extract(options.data_file,
+        obj.extract(options.data_file, options.output_dir)
-                    options.output_dir)
 if __name__ == '__main__':
    main()
--- a/demo/model_zoo/resnet/example/__init__.py
+++ b/demo/model_zoo/resnet/example/__init__.py
@@ -11,4 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
--- a/demo/model_zoo/resnet/example/image_list_provider.py
+++ b/demo/model_zoo/resnet/example/image_list_provider.py
@@ -16,8 +16,7 @@ from paddle.utils.image_util import *
 from paddle.trainer.PyDataProvider2 import *
-def hook(settings, image_size, crop_size, color, file_list,
+def hook(settings, image_size, crop_size, color, file_list, is_train, **kwargs):
-         is_train, **kwargs):
    """
    Description: Init with a list of data file
    file_list is the name list of input files.
@@ -58,7 +57,7 @@ def hook(settings, image_size, crop_size, color, file_list,
        sz = settings.crop_size * settings.crop_size
        settings.img_mean = np.zeros(sz * 3, dtype=np.single)
        for idx, value in enumerate(settings.mean_value):
-            settings.img_mean[idx * sz: (idx + 1) * sz] = value
+            settings.img_mean[idx * sz:(idx + 1) * sz] = value
        settings.img_mean = settings.img_mean.reshape(3, settings.crop_size,
                                                      settings.crop_size)
@@ -69,7 +68,8 @@ def hook(settings, image_size, crop_size, color, file_list,
    settings.input_types = [
        dense_vector(settings.img_input_size),  # image feature
-        integer_value(1)]  # labels
+        integer_value(1)
+    ]  # labels
    settings.logger.info('Image short side: %s', settings.img_size)
    settings.logger.info('Crop size: %s', settings.crop_size)
@@ -97,9 +97,6 @@ def processData(settings, file_list):
    # swap channel
    if settings.is_swap_channel:
        img = img[settings.swap_channel, :, :]
-    img_feat = preprocess_img(img,
+    img_feat = preprocess_img(img, settings.img_mean, settings.crop_size,
-                              settings.img_mean,
+                              settings.is_train, settings.color)
-                              settings.crop_size,
-                              settings.is_train,
-                              settings.color)
    yield img_feat.tolist(), int(lab.strip())
--- a/demo/model_zoo/resnet/load_feature.py
+++ b/demo/model_zoo/resnet/load_feature.py
@@ -17,9 +17,11 @@ import sys
 import cPickle
 import logging
-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
 logging.getLogger().setLevel(logging.INFO)
 def load_feature_c(file):
    """
    Load feature extracted by C++ interface.
@@ -30,14 +32,15 @@ def load_feature_c(file):
    f = open(file, 'r')
    for line in f:
        sample = []
-        for slot in line.strip().split(";"): 
+        for slot in line.strip().split(";"):
-            fea = [float(val) for val in slot.strip().split()] 
+            fea = [float(val) for val in slot.strip().split()]
            if fea:
                sample.append(fea)
        features.append(sample)
    f.close()
    return features
 def load_feature_py(feature_dir):
    """
    Load feature extracted by python interface.
@@ -54,6 +57,7 @@ def load_feature_py(feature_dir):
            logging.info('Load feature file %s', file_name)
    return features
 if __name__ == '__main__':
-    print load_feature_py(sys.argv[1]) 
+    print load_feature_py(sys.argv[1])
    #print load_feature_c(sys.argv[1]) 
--- a/demo/model_zoo/resnet/resnet.py
+++ b/demo/model_zoo/resnet/resnet.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 from paddle.trainer_config_helpers import *
 """
 paper: https://arxiv.org/abs/1512.03385
 """
@@ -28,15 +27,19 @@ if not is_predict and data_provider:
    # mean.meta size : 3 x 224 x 224.
    # If you use three mean value, set like:
    # "mean_value:103.939,116.779,123.68;"
-    args={
+    args = {
        'mean_meta': "model/mean_meta_224/mean.meta",
-        'image_size': 224, 'crop_size': 224,
+        'image_size': 224,
-        'color': True,'swap_channel:': [2, 1, 0]}
+        'crop_size': 224,
-    define_py_data_sources2(train_list,
+        'color': True,
-                           'example/test.list',
+        'swap_channel:': [2, 1, 0]
-                           module="example.image_list_provider",
+    }
-                           obj="processData",
+    define_py_data_sources2(
-                           args=args)
+        train_list,
+        'example/test.list',
+        module="example.image_list_provider",
+        obj="processData",
+        args=args)
 batch_size = 1
 learning_rate = 0.1 / batch_size
@@ -54,12 +57,16 @@ Settings(
    learning_method='momentum',
    learning_rate_decay_a=0.5,
    learning_rate_decay_b=1200000 * 10,
-    learning_rate_schedule="discexp",
+    learning_rate_schedule="discexp", )
-)
-def conv_bn_layer(name, input, filter_size, num_filters,
+def conv_bn_layer(name,
-                  stride, padding, channels=None,
+                  input,
+                  filter_size,
+                  num_filters,
+                  stride,
+                  padding,
+                  channels=None,
                  active_type=ReluActivation()):
    """
    A wrapper for conv layer with batch normalization layers.
@@ -67,19 +74,18 @@ def conv_bn_layer(name, input, filter_size, num_filters,
    conv layer has no activation.
    """
-    tmp = img_conv_layer(name=name + "_conv",
+    tmp = img_conv_layer(
-                         input=input,
+        name=name + "_conv",
-                         filter_size=filter_size,
+        input=input,
-                         num_channels=channels,
+        filter_size=filter_size,
-                         num_filters=num_filters,
+        num_channels=channels,
-                         stride=stride,
+        num_filters=num_filters,
-                         padding=padding,
+        stride=stride,
-                         act=LinearActivation(),
+        padding=padding,
-                         bias_attr=False)
+        act=LinearActivation(),
-    return batch_norm_layer(name=name + "_bn",
+        bias_attr=False)
-                            input=tmp,
+    return batch_norm_layer(
-                            act=active_type,
+        name=name + "_bn", input=tmp, act=active_type, use_global_stats=is_test)
-                            use_global_stats=is_test)
 def bottleneck_block(name, input, num_filters1, num_filters2):
@@ -88,29 +94,31 @@ def bottleneck_block(name, input, num_filters1, num_filters2):
    Last conv_bn_layer has no activation.
    Addto layer has activation of relu.
    """
-    last_name = conv_bn_layer(name=name + '_branch2a',
+    last_name = conv_bn_layer(
-                              input=input,
+        name=name + '_branch2a',
-                              filter_size=1,
+        input=input,
-                              num_filters=num_filters1,
+        filter_size=1,
-                              stride=1,
+        num_filters=num_filters1,
-                              padding=0)
+        stride=1,
-    last_name = conv_bn_layer(name=name + '_branch2b',
+        padding=0)
-                              input=last_name,
+    last_name = conv_bn_layer(
-                              filter_size=3,
+        name=name + '_branch2b',
-                              num_filters=num_filters1,
+        input=last_name,
-                              stride=1,
+        filter_size=3,
-                              padding=1)
+        num_filters=num_filters1,
-    last_name = conv_bn_layer(name=name + '_branch2c',
+        stride=1,
-                              input=last_name,
+        padding=1)
-                              filter_size=1,
+    last_name = conv_bn_layer(
-                              num_filters=num_filters2,
+        name=name + '_branch2c',
-                              stride=1,
+        input=last_name,
-                              padding=0,
+        filter_size=1,
-                              active_type=LinearActivation())
+        num_filters=num_filters2,
+        stride=1,
-    return addto_layer(name=name + "_addto",
+        padding=0,
-                       input=[input, last_name],
+        active_type=LinearActivation())
-                       act=ReluActivation())
+    return addto_layer(
+        name=name + "_addto", input=[input, last_name], act=ReluActivation())
 def mid_projection(name, input, num_filters1, num_filters2, stride=2):
@@ -123,38 +131,41 @@ def mid_projection(name, input, num_filters1, num_filters2, stride=2):
    branch2x: bottleneck building block, shortcuts are identity.
    """
    # stride = 2
-    branch1 = conv_bn_layer(name=name + '_branch1',
+    branch1 = conv_bn_layer(
-                            input=input,
+        name=name + '_branch1',
-                            filter_size=1,
+        input=input,
-                            num_filters=num_filters2,
+        filter_size=1,
-                            stride=stride,
+        num_filters=num_filters2,
-                            padding=0,
+        stride=stride,
-                            active_type=LinearActivation())
+        padding=0,
+        active_type=LinearActivation())
-    last_name = conv_bn_layer(name=name + '_branch2a',
-                              input=input,
+    last_name = conv_bn_layer(
-                              filter_size=1,
+        name=name + '_branch2a',
-                              num_filters=num_filters1,
+        input=input,
-                              stride=stride,
+        filter_size=1,
-                              padding=0)
+        num_filters=num_filters1,
-    last_name = conv_bn_layer(name=name + '_branch2b',
+        stride=stride,
-                              input=last_name,
+        padding=0)
-                              filter_size=3,
+    last_name = conv_bn_layer(
-                              num_filters=num_filters1,
+        name=name + '_branch2b',
-                              stride=1,
+        input=last_name,
-                              padding=1)
+        filter_size=3,
+        num_filters=num_filters1,
-    last_name = conv_bn_layer(name=name + '_branch2c',
+        stride=1,
-                              input=last_name,
+        padding=1)
-                              filter_size=1,
-                              num_filters=num_filters2,
+    last_name = conv_bn_layer(
-                              stride=1,
+        name=name + '_branch2c',
-                              padding=0,
+        input=last_name,
-                              active_type=LinearActivation())
+        filter_size=1,
+        num_filters=num_filters2,
-    return addto_layer(name=name + "_addto",
+        stride=1,
-                       input=[branch1, last_name],
+        padding=0,
-                       act=ReluActivation())
+        active_type=LinearActivation())
+    return addto_layer(
+        name=name + "_addto", input=[branch1, last_name], act=ReluActivation())
 def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
@@ -168,67 +179,67 @@ def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
    # For ImageNet
    # conv1: 112x112
    img = data_layer(name='input', size=224 * 224 * 3)
-    tmp = conv_bn_layer("conv1", img,
+    tmp = conv_bn_layer(
-                        filter_size=7,
+        "conv1",
-                        channels=3,
+        img,
-                        num_filters=64,
+        filter_size=7,
-                        stride=2,
+        channels=3,
-                        padding=3)
+        num_filters=64,
+        stride=2,
+        padding=3)
    tmp = img_pool_layer(name="pool1", input=tmp, pool_size=3, stride=2)
    # conv2_x: 56x56
-    tmp = mid_projection(name="res2_1",
+    tmp = mid_projection(
-                         input=tmp,
+        name="res2_1", input=tmp, num_filters1=64, num_filters2=256, stride=1)
-                         num_filters1=64,
-                         num_filters2=256,
-                         stride=1)
    for i in xrange(2, res2_num + 1, 1):
-        tmp = bottleneck_block(name="res2_" + str(i),
+        tmp = bottleneck_block(
-                               input=tmp,
+            name="res2_" + str(i), input=tmp, num_filters1=64, num_filters2=256)
-                               num_filters1=64,
-                               num_filters2=256)
    # conv3_x: 28x28
-    tmp = mid_projection(name="res3_1",
+    tmp = mid_projection(
-                         input=tmp,
+        name="res3_1", input=tmp, num_filters1=128, num_filters2=512)
-                         num_filters1=128,
-                         num_filters2=512)
    for i in xrange(2, res3_num + 1, 1):
-        tmp = bottleneck_block(name="res3_" + str(i),
+        tmp = bottleneck_block(
-                               input=tmp, num_filters1=128,
+            name="res3_" + str(i),
-                               num_filters2=512)
+            input=tmp,
+            num_filters1=128,
+            num_filters2=512)
    # conv4_x: 14x14
-    tmp = mid_projection(name="res4_1", input=tmp,
+    tmp = mid_projection(
-                         num_filters1=256, num_filters2=1024)
+        name="res4_1", input=tmp, num_filters1=256, num_filters2=1024)
    for i in xrange(2, res4_num + 1, 1):
-        tmp = bottleneck_block(name="res4_" + str(i),
+        tmp = bottleneck_block(
-                               input=tmp,
+            name="res4_" + str(i),
-                               num_filters1=256,
+            input=tmp,
-                               num_filters2=1024)
+            num_filters1=256,
+            num_filters2=1024)
    # conv5_x: 7x7
-    tmp = mid_projection(name="res5_1", input=tmp,
+    tmp = mid_projection(
-                         num_filters1=512, num_filters2=2048)
+        name="res5_1", input=tmp, num_filters1=512, num_filters2=2048)
    for i in xrange(2, res5_num + 1, 1):
-        tmp = bottleneck_block(name="res5_" + str(i),
+        tmp = bottleneck_block(
-                               input=tmp, num_filters1=512,
+            name="res5_" + str(i),
-                               num_filters2=2048)
+            input=tmp,
+            num_filters1=512,
-    tmp = img_pool_layer(name='avgpool',
+            num_filters2=2048)
-                         input=tmp,
-                         pool_size=7,
+    tmp = img_pool_layer(
-                         stride=1,
+        name='avgpool',
-                         pool_type=AvgPooling())
+        input=tmp,
+        pool_size=7,
-    output = fc_layer(name='output',
+        stride=1,
-                      input=tmp,
+        pool_type=AvgPooling())
-                      size=1000,
-                      act=SoftmaxActivation())
+    output = fc_layer(
+        name='output', input=tmp, size=1000, act=SoftmaxActivation())
    if not is_predict:
-        classification_cost(input=output, label=data_layer(name='label',
+        classification_cost(
-                                                           size=1))
+            input=output, label=data_layer(
+                name='label', size=1))
 def res_net_50():

--- a/demo/quick_start/api_train.py
+++ b/demo/quick_start/api_train.py
@@ -22,27 +22,32 @@ from py_paddle import DataProviderConverter
 from paddle.trainer.PyDataProvider2 \
    import integer_value, integer_value_sequence, sparse_binary_vector
 def parse_arguments():
    parser = argparse.ArgumentParser()
-    parser.add_argument("--train_data",
+    parser.add_argument(
-                        type=str, required=False, help="train data file")
+        "--train_data", type=str, required=False, help="train data file")
    parser.add_argument("--test_data", type=str, help="test data file")
-    parser.add_argument("--config",
+    parser.add_argument(
-                        type=str, required=True, help="config file name")
+        "--config", type=str, required=True, help="config file name")
    parser.add_argument("--dict_file", required=True, help="dictionary file")
-    parser.add_argument("--seq",
+    parser.add_argument(
-                        default=1, type=int,
+        "--seq", default=1, type=int, help="whether use sequence training")
-                        help="whether use sequence training")
+    parser.add_argument(
-    parser.add_argument("--use_gpu", default=0, type=int,
+        "--use_gpu", default=0, type=int, help="whether use GPU for training")
-                        help="whether use GPU for training")
+    parser.add_argument(
-    parser.add_argument("--trainer_count", default=1, type=int,
+        "--trainer_count",
-                        help="Number of threads for training")
+        default=1,
-    parser.add_argument("--num_passes", default=5, type=int,
+        type=int,
-                        help="Number of training passes")
+        help="Number of threads for training")
+    parser.add_argument(
+        "--num_passes", default=5, type=int, help="Number of training passes")
    return parser.parse_args()
 UNK_IDX = 0
 def load_data(file_name, word_dict):
    with open(file_name, 'r') as f:
        for line in f:
@@ -51,6 +56,7 @@ def load_data(file_name, word_dict):
            word_slot = [word_dict.get(w, UNK_IDX) for w in words]
            yield word_slot, int(label)
 def load_dict(dict_file):
    word_dict = dict()
    with open(dict_file, 'r') as f:
@@ -59,6 +65,7 @@ def load_dict(dict_file):
            word_dict[w] = i
    return word_dict
 def main():
    options = parse_arguments()
    api.initPaddle("--use_gpu=%s" % options.use_gpu,
@@ -86,9 +93,9 @@ def main():
    # create a data converter which converts data to PaddlePaddle
    # internal format
    input_types = [
-        integer_value_sequence(len(word_dict)) if options.seq
+        integer_value_sequence(len(word_dict)) if options.seq else
-            else sparse_binary_vector(len(word_dict)),
+        sparse_binary_vector(len(word_dict)), integer_value(2)
-        integer_value(2)]
+    ]
    converter = DataProviderConverter(input_types)
    batch_size = trainer_config.opt_config.batch_size
@@ -102,7 +109,7 @@ def main():
            trainer.trainOneDataBatch(size, converter(batch))
        trainer.finishTrainPass()
        if test_dataset:
-            trainer.startTestPeriod();
+            trainer.startTestPeriod()
            for pos in xrange(0, len(test_dataset), batch_size):
                batch = itertools.islice(test_dataset, pos, pos + batch_size)
                size = min(batch_size, len(test_dataset) - pos)
@@ -110,5 +117,6 @@ def main():
            trainer.finishTestPeriod()
    trainer.finishTrain()
 if __name__ == '__main__':
    main()
--- a/demo/quick_start/dataprovider_bow.py
+++ b/demo/quick_start/dataprovider_bow.py
@@ -17,6 +17,7 @@ from paddle.trainer.PyDataProvider2 import *
 # id of the word not in dictionary
 UNK_IDX = 0
 # initializer is called by the framework during initialization.
 # It allows the user to describe the data types and setup the
 # necessary data structure for later use.
@@ -38,7 +39,9 @@ def initializer(settings, dictionary, **kwargs):
        # The second input is an integer. It represents the category id of the
        # sample. 2 means there are two labels in the dataset.
        # (1 for positive and 0 for negative)
-        integer_value(2)]
+        integer_value(2)
+    ]
 # Delaring a data provider. It has an initializer 'data_initialzer'.
 # It will cache the generated data of the first pass in memory, so that
@@ -69,9 +72,8 @@ def process(settings, file_name):
 def predict_initializer(settings, dictionary, **kwargs):
    settings.word_dict = dictionary
-    settings.input_types = [
+    settings.input_types = [sparse_binary_vector(len(dictionary))]
-        sparse_binary_vector(len(dictionary))
-    ]
 # Declaring a data provider for prediction. The difference with process
 # is that label is not generated.

--- a/demo/quick_start/dataprovider_emb.py
+++ b/demo/quick_start/dataprovider_emb.py
@@ -24,7 +24,8 @@ def initializer(settings, dictionary, **kwargs):
        # The value of the integers range from 0 to len(dictrionary)-1
        integer_value_sequence(len(dictionary)),
        # Define the second input for label id
-        integer_value(2)]
+        integer_value(2)
+    ]
 @provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
@@ -40,7 +41,8 @@ def process(settings, file_name):
 def predict_initializer(settings, dictionary, **kwargs):
    settings.word_dict = dictionary
    settings.input_types = [
-        integer_value(len(dictionary), seq_type=SequenceType.SEQUENCE)
+        integer_value(
+            len(dictionary), seq_type=SequenceType.SEQUENCE)
    ]

--- a/demo/quick_start/preprocess.py
+++ b/demo/quick_start/preprocess.py
@@ -13,7 +13,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 1. (remove HTML before or not)tokensizing
 2. pos sample : rating score 5; neg sample: rating score 1-2.
@@ -35,7 +34,8 @@ import multiprocessing
 batch_size = 5000
 word_count = {}
-num_tokenize = max(1, multiprocessing.cpu_count() - 2)  # parse + tokenize + save
+num_tokenize = max(1,
+                   multiprocessing.cpu_count() - 2)  # parse + tokenize + save
 max_queue_size = 8
 parse_queue = Queue(maxsize=max_queue_size + num_tokenize)
 tokenize_queue = Queue(maxsize=max_queue_size + num_tokenize)

--- a/demo/quick_start/preprocess.sh
+++ b/demo/quick_start/preprocess.sh
@@ -23,7 +23,7 @@ set -e
 export LC_ALL=C
 UNAME_STR=`uname`
-if [[ ${UNAME_STR} == 'Linux' ]]; then
+if [ ${UNAME_STR} == 'Linux' ]; then
  SHUF_PROG='shuf'
 else
  SHUF_PROG='gshuf'

--- a/demo/quick_start/train.sh
+++ b/demo/quick_start/train.sh
@@ -20,6 +20,7 @@ cfg=trainer_config.lr.py
 #cfg=trainer_config.lstm.py
 #cfg=trainer_config.bidi-lstm.py
 #cfg=trainer_config.db-lstm.py
+#cfg=trainer_config.resnet-lstm.py
 paddle train \
  --config=$cfg \
  --save_dir=./output \

--- a/demo/quick_start/trainer_config.bidi-lstm.py
+++ b/demo/quick_start/trainer_config.bidi-lstm.py
@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
 trn = 'data/train.list' if not is_predict else None
 tst = 'data/test.list' if not is_predict else 'data/pred.list'
 process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
+define_py_data_sources2(
-                        test_list=tst,
+    train_list=trn,
-                        module="dataprovider_emb",
+    test_list=tst,
-                        obj=process,
+    module="dataprovider_emb",
-                        args={"dictionary": word_dict})
+    obj=process,
+    args={"dictionary": word_dict})
 batch_size = 128 if not is_predict else 1
 settings(
@@ -39,19 +40,17 @@ settings(
    learning_rate=2e-3,
    learning_method=AdamOptimizer(),
    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25
+    gradient_clipping_threshold=25)
-)
-bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
+bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
 data = data_layer(name="word", size=len(word_dict))
 emb = embedding_layer(input=data, size=128)
 bi_lstm = bidirectional_lstm(input=emb, size=128)
 dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
-output = fc_layer(input=dropout, size=2,
+output = fc_layer(
-                  bias_attr=bias_attr,
+    input=dropout, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
-                  act=SoftmaxActivation())
 if is_predict:
    maxid = maxid_layer(output)

--- a/demo/quick_start/trainer_config.cnn.py
+++ b/demo/quick_start/trainer_config.cnn.py
@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
 trn = 'data/train.list' if not is_predict else None
 tst = 'data/test.list' if not is_predict else 'data/pred.list'
 process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
+define_py_data_sources2(
-                        test_list=tst,
+    train_list=trn,
-                        module="dataprovider_emb",
+    test_list=tst,
-                        obj=process,
+    module="dataprovider_emb",
-                        args={"dictionary": word_dict})
+    obj=process,
+    args={"dictionary": word_dict})
 batch_size = 128 if not is_predict else 1
 settings(
@@ -39,8 +40,7 @@ settings(
    learning_rate=2e-3,
    learning_method=AdamOptimizer(),
    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25
+    gradient_clipping_threshold=25)
-)
 data = data_layer(name="word", size=len(word_dict))
 embedding = embedding_layer(input=data, size=128)

--- a/demo/quick_start/trainer_config.db-lstm.py
+++ b/demo/quick_start/trainer_config.db-lstm.py
@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
 trn = 'data/train.list' if not is_predict else None
 tst = 'data/test.list' if not is_predict else 'data/pred.list'
 process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
+define_py_data_sources2(
-                        test_list=tst,
+    train_list=trn,
-                        module="dataprovider_emb",
+    test_list=tst,
-                        obj=process,
+    module="dataprovider_emb",
-                        args={"dictionary": word_dict})
+    obj=process,
+    args={"dictionary": word_dict})
 batch_size = 128 if not is_predict else 1
 settings(
@@ -39,10 +40,9 @@ settings(
    learning_rate=2e-3,
    learning_method=AdamOptimizer(),
    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25
+    gradient_clipping_threshold=25)
-)
-bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
+bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
 data = data_layer(name="word", size=len(word_dict))
 emb = embedding_layer(input=data, size=128)
@@ -52,17 +52,18 @@ lstm_0 = lstmemory(input=hidden_0, layer_attr=ExtraAttr(drop_rate=0.1))
 input_layers = [hidden_0, lstm_0]
-for i in range(1,8):
+for i in range(1, 8):
    fc = fc_layer(input=input_layers, size=128)
-    lstm = lstmemory(input=fc, layer_attr=ExtraAttr(drop_rate=0.1),
+    lstm = lstmemory(
-                    reverse=(i % 2) == 1,)
+        input=fc,
+        layer_attr=ExtraAttr(drop_rate=0.1),
+        reverse=(i % 2) == 1, )
    input_layers = [fc, lstm]
 lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
-output = fc_layer(input=lstm_last, size=2,
+output = fc_layer(
-                  bias_attr=bias_attr,
+    input=lstm_last, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
-                  act=SoftmaxActivation())
 if is_predict:
    maxid = maxid_layer(output)

--- a/demo/quick_start/trainer_config.emb.py
+++ b/demo/quick_start/trainer_config.emb.py
@@ -27,18 +27,16 @@ is_predict = get_config_arg('is_predict', bool, False)
 trn = 'data/train.list' if not is_predict else None
 tst = 'data/test.list' if not is_predict else 'data/pred.list'
 process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
+define_py_data_sources2(
-                        test_list=tst,
+    train_list=trn,
-                        module="dataprovider_emb",
+    test_list=tst,
-                        obj=process,
+    module="dataprovider_emb",
-                        args={"dictionary": word_dict})
+    obj=process,
+    args={"dictionary": word_dict})
 batch_size = 128 if not is_predict else 1
 settings(
-    batch_size=batch_size,
+    batch_size=batch_size, learning_rate=2e-3, learning_method=AdamOptimizer())
-    learning_rate=2e-3,
-    learning_method=AdamOptimizer()
-)
 data = data_layer(name="word", size=len(word_dict))
 embedding = embedding_layer(input=data, size=128)

--- a/demo/quick_start/trainer_config.lr.py
+++ b/demo/quick_start/trainer_config.lr.py
@@ -32,11 +32,12 @@ process = 'process' if not is_predict else 'process_predict'
 # We need to use different process for training and prediction.
 # For training, the input data includes both word IDs and labels.
 # For prediction, the input data only includs word Ids.
-define_py_data_sources2(train_list=trn,
+define_py_data_sources2(
-                        test_list=tst,
+    train_list=trn,
-                        module="dataprovider_bow",
+    test_list=tst,
-                        obj=process,
+    module="dataprovider_bow",
-                        args={"dictionary": word_dict})
+    obj=process,
+    args={"dictionary": word_dict})
 batch_size = 128 if not is_predict else 1
 settings(
@@ -44,8 +45,7 @@ settings(
    learning_rate=2e-3,
    learning_method=AdamOptimizer(),
    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25
+    gradient_clipping_threshold=25)
-)
 # Define the data for text features. The size of the data layer is the number
 # of words in the dictionary.

--- a/demo/quick_start/trainer_config.lstm.py
+++ b/demo/quick_start/trainer_config.lstm.py
@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
 trn = 'data/train.list' if not is_predict else None
 tst = 'data/test.list' if not is_predict else 'data/pred.list'
 process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
+define_py_data_sources2(
-                        test_list=tst,
+    train_list=trn,
-                        module="dataprovider_emb",
+    test_list=tst,
-                        obj=process,
+    module="dataprovider_emb",
-                        args={"dictionary": word_dict})
+    obj=process,
+    args={"dictionary": word_dict})
 batch_size = 128 if not is_predict else 1
 settings(
@@ -39,17 +40,14 @@ settings(
    learning_rate=2e-3,
    learning_method=AdamOptimizer(),
    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25
+    gradient_clipping_threshold=25)
-)
 data = data_layer(name="word", size=len(word_dict))
 emb = embedding_layer(input=data, size=128)
-lstm = simple_lstm(input=emb, size=128,
+lstm = simple_lstm(
-                   lstm_cell_attr=ExtraAttr(drop_rate=0.25))
+    input=emb, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.25))
 lstm_max = pooling_layer(input=lstm, pooling_type=MaxPooling())
-output = fc_layer(input=lstm_max, size=2,
+output = fc_layer(input=lstm_max, size=2, act=SoftmaxActivation())
-                  act=SoftmaxActivation())
 if is_predict:
    maxid = maxid_layer(output)
    outputs([maxid, output])

--- a/demo/quick_start/trainer_config.resnet-lstm.py
+++ b/demo/quick_start/trainer_config.resnet-lstm.py
+# edit-mode: -*- python -*-
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This configuration is a demonstration of how to implement the stacked LSTM
+with residual connections, i.e. an LSTM layer takes the sum of the hidden states
+and inputs of the previous LSTM layer instead of only the hidden states.
+This architecture is from:
+Yonghui Wu, Mike Schuster, Zhifeng Chen, Quoc V. Le, Mohammad Norouzi,
+Wolfgang Macherey, Maxim Krikun, Yuan Cao, Qin Gao, Klaus Macherey,
+Jeff Klingner, Apurva Shah, Melvin Johnson, Xiaobing Liu, Lukasz Kaiser,
+Stephan Gouws, Yoshikiyo Kato, Taku Kudo, Hideto Kazawa, Keith Stevens,
+George Kurian, Nishant Patil, Wei Wang, Cliff Young, Jason Smith, Jason Riesa,
+Alex Rudnick, Oriol Vinyals, Greg Corrado, Macduff Hughes, Jeffrey Dean. 2016.
+Google's Neural Machine Translation System: Bridging the Gap between Human and
+Machine Translation. In arXiv https://arxiv.org/pdf/1609.08144v2.pdf
+Different from the architecture described in the paper, we use a stack single
+direction LSTM layers as the first layer instead of bi-directional LSTM. Also,
+since this is a demo code, to reduce computation time, we stacked 4 layers
+instead of 8 layers.
+"""
+from paddle.trainer_config_helpers import *
+dict_file = "./data/dict.txt"
+word_dict = dict()
+with open(dict_file, 'r') as f:
+    for i, line in enumerate(f):
+        w = line.strip().split()[0]
+        word_dict[w] = i
+is_predict = get_config_arg('is_predict', bool, False)
+trn = 'data/train.list' if not is_predict else None
+tst = 'data/test.list' if not is_predict else 'data/pred.list'
+process = 'process' if not is_predict else 'process_predict'
+define_py_data_sources2(train_list=trn,
+                        test_list=tst,
+                        module="dataprovider_emb",
+                        obj=process,
+                        args={"dictionary": word_dict})
+batch_size = 128 if not is_predict else 1
+settings(
+    batch_size=batch_size,
+    learning_rate=2e-3,
+    learning_method=AdamOptimizer(),
+    regularization=L2Regularization(8e-4),
+    gradient_clipping_threshold=25
+)
+bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
+data = data_layer(name="word", size=len(word_dict))
+emb = embedding_layer(input=data, size=128)
+lstm = simple_lstm(input=emb, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.1))
+previous_input, previous_hidden_state = emb, lstm
+for i in range(3):
+    # The input to the current layer is the sum of the hidden state
+    # and input of the previous layer.
+    current_input = addto_layer(input=[previous_input, previous_hidden_state])
+    hidden_state = simple_lstm(input=current_input, size=128,
+                               lstm_cell_attr=ExtraAttr(drop_rate=0.1))
+    previous_input, previous_hidden_state = current_input, hidden_state
+lstm = previous_hidden_state
+lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
+output = fc_layer(input=lstm_last, size=2,
+                  bias_attr=bias_attr,
+                  act=SoftmaxActivation())
+if is_predict:
+    maxid = maxid_layer(output)
+    outputs([maxid, output])
+else:
+    label = data_layer(name="label", size=2)
+    cls = classification_cost(input=output, label=label)
+    outputs(cls)
--- a/demo/recommendation/common_utils.py
+++ b/demo/recommendation/common_utils.py
@@ -21,8 +21,9 @@ def meta_to_header(meta, name):
            yield integer_value(each_meta['max'])
        elif each_meta['type'] == 'embedding':
            is_seq = each_meta['seq'] == 'sequence'
-            yield integer_value(len(each_meta['dict']),
+            yield integer_value(
-                                seq_type=SequenceType.SEQUENCE if is_seq
+                len(each_meta['dict']),
-                                else SequenceType.NO_SEQUENCE)
+                seq_type=SequenceType.SEQUENCE
+                if is_seq else SequenceType.NO_SEQUENCE)
        elif each_meta['type'] == 'one_hot_dense':
            yield dense_vector(len(each_meta['dict']))
--- a/demo/recommendation/data/config.json
+++ b/demo/recommendation/data/config.json
@@ -14,4 +14,3 @@
    "fields": ["id", "title", "genres"]
  }
 }
--- a/demo/recommendation/data/config_generator.py
+++ b/demo/recommendation/data/config_generator.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 config_generator.py
@@ -29,10 +28,7 @@ import json
 import docopt
 import copy
-DEFAULT_FILE = {
+DEFAULT_FILE = {"type": "split", "delimiter": ","}
-    "type": "split",
-    "delimiter": ","
-}
 DEFAULT_FIELD = {
    "id": {
@@ -107,19 +103,16 @@ def main(filename, fmt):
                field = copy.deepcopy(DEFAULT_FIELD[field_key])
                field['pos'] = pos
                fields.append(field)
-            obj[k] = {
+            obj[k] = {"file": file_dict, "fields": fields}
-                "file": file_dict,
+    meta = {"meta": obj}
-                "fields": fields
-            }
-    meta = {
-        "meta": obj
-    }
    # print meta
    if fmt == 'json':
        def formatter(x):
            import json
            return json.dumps(x, indent=2)
    elif fmt == 'yaml':
        def formatter(x):
            import yaml
            return yaml.safe_dump(x, default_flow_style=False)

--- a/demo/recommendation/data/meta_generator.py
+++ b/demo/recommendation/data/meta_generator.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 Preprocess Movielens dataset, to get movie/user object.
@@ -66,8 +65,8 @@ class SortedIDGenerator(object):
        self.__key_set__.add(key)
    def finish_scan(self, compare=None, key=None, reverse=False):
-        self.__key_set__ = sorted(list(self.__key_set__), cmp=compare,
+        self.__key_set__ = sorted(
-                                  key=key, reverse=reverse)
+            list(self.__key_set__), cmp=compare, key=key, reverse=reverse)
        self.dict = dict()
        for idx, each_key in enumerate(self.__key_set__):
            self.dict[each_key] = idx
@@ -207,11 +206,10 @@ class EmbeddingFieldParser(object):
            self.dict = EmbeddingFieldParser.CharBasedEmbeddingDict(
                self.seq_type == EmbeddingFieldParser.SEQUENCE)
        elif config['dict']['type'] == 'split':
-            self.dict = SplitEmbeddingDict(
+            self.dict = SplitEmbeddingDict(config['dict'].get('delimiter', ','))
-                config['dict'].get('delimiter', ','))
        elif config['dict']['type'] == 'whole_content':
-            self.dict = EmbeddingFieldParser.WholeContentDict(
+            self.dict = EmbeddingFieldParser.WholeContentDict(config['dict'][
-                config['dict']['sort'])
+                'sort'])
        else:
            print config
            assert False
@@ -333,8 +331,8 @@ class ContentExtractorFactory(object):
                return PositionContentExtractor(config['pos'])
            else:
                extra_args = config['regex']
-                return RegexPositionContentExtractor(pos=config['pos'],
+                return RegexPositionContentExtractor(
-                                                     **extra_args)
+                    pos=config['pos'], **extra_args)
 class MetaFile(object):
@@ -364,9 +362,10 @@ class MetaFile(object):
            metas = map(lambda x: x.meta_field(), field_parsers)
            # print metas
-            key_index = filter(lambda x: x is not None, map(
+            key_index = filter(
-                lambda (idx, meta): idx if 'is_key' in meta and meta['is_key']
+                lambda x: x is not None,
-                else None, enumerate(metas)))[0]
+                map(lambda (idx, meta): idx if 'is_key' in meta and meta['is_key'] else None,
+                    enumerate(metas)))[0]
            key_map = []
            for i in range(min(key_index, len(metas))):
@@ -374,12 +373,7 @@ class MetaFile(object):
            for i in range(key_index + 1, len(metas)):
                key_map.append(i)
-            obj = {
+            obj = {'__meta__': {'raw_meta': metas, 'feature_map': key_map}}
-                '__meta__': {
-                    'raw_meta': metas,
-                    'feature_map': key_map
-                }
-            }
            for each_block in reader.read():
                idx = field_parsers[key_index].parse(each_block)

--- a/demo/recommendation/data/split.py
+++ b/demo/recommendation/data/split.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 Separate movielens 1m dataset to train/test file.

--- a/demo/recommendation/dataprovider.py
+++ b/demo/recommendation/dataprovider.py
@@ -15,6 +15,7 @@
 from paddle.trainer.PyDataProvider2 import *
 import common_utils  # parse
 def hook(settings, meta, **kwargs):
    """
    Init hook is invoked before process data. It will set obj.slots and store
@@ -41,6 +42,7 @@ def hook(settings, meta, **kwargs):
    settings.input_types = headers
    settings.meta = meta
 @provider(init_hook=hook, cache=CacheType.CACHE_PASS_IN_MEM)
 def process(settings, filename):
    with open(filename, 'r') as f:

--- a/demo/recommendation/prediction.py
+++ b/demo/recommendation/prediction.py
@@ -28,7 +28,8 @@ if __name__ == '__main__':
    model_path = sys.argv[1]
    swig_paddle.initPaddle('--use_gpu=0')
    conf = parse_config("trainer_config.py", "is_predict=1")
-    network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+    network = swig_paddle.GradientMachine.createFromConfigProto(
+        conf.model_config)
    assert isinstance(network, swig_paddle.GradientMachine)
    network.loadParameters(model_path)
    with open('./data/meta.bin', 'rb') as f:
@@ -39,11 +40,12 @@ if __name__ == '__main__':
        while True:
            movie_id = int(raw_input("Input movie_id: "))
            user_id = int(raw_input("Input user_id: "))
-            movie_meta = meta['movie'][movie_id]    # Query Data From Meta.
+            movie_meta = meta['movie'][movie_id]  # Query Data From Meta.
            user_meta = meta['user'][user_id]
            data = [movie_id - 1]
            data.extend(movie_meta)
            data.append(user_id - 1)
            data.extend(user_meta)
-            print "Prediction Score is %.2f" % ((network.forwardTest(
+            print "Prediction Score is %.2f" % (
-                cvt.convert([data]))[0]['value'][0][0] + 5) / 2)
+                (network.forwardTest(cvt.convert([data]))[0]['value'][0][0] + 5)
+                / 2)
--- a/demo/recommendation/trainer_config.py
+++ b/demo/recommendation/trainer_config.py
@@ -27,8 +27,8 @@ with open(META_FILE, 'rb') as f:
    # load meta file
    meta = pickle.load(f)
-settings(batch_size=1600, learning_rate=1e-3,
+settings(
-         learning_method=RMSPropOptimizer())
+    batch_size=1600, learning_rate=1e-3, learning_method=RMSPropOptimizer())
 def construct_feature(name):
@@ -59,11 +59,10 @@ def construct_feature(name):
        slot_name = each_meta.get('name', '%s_id' % name)
        if type_name == 'id':
            slot_dim = each_meta['max']
-            embedding = embedding_layer(input=data_layer(slot_name,
+            embedding = embedding_layer(
-                                                          size=slot_dim),
+                input=data_layer(
-                                        size=256)
+                    slot_name, size=slot_dim), size=256)
-            fusion.append(fc_layer(input=embedding,
+            fusion.append(fc_layer(input=embedding, size=256))
-                                   size=256))
        elif type_name == 'embedding':
            is_seq = each_meta['seq'] == 'sequence'
            slot_dim = len(each_meta['dict'])
@@ -71,17 +70,14 @@ def construct_feature(name):
            embedding = embedding_layer(input=din, size=256)
            if is_seq:
                fusion.append(
-                    text_conv_pool(input=embedding, context_len=5,
+                    text_conv_pool(
-                                   hidden_size=256))
+                        input=embedding, context_len=5, hidden_size=256))
            else:
-                fusion.append(fc_layer(input=embedding,
+                fusion.append(fc_layer(input=embedding, size=256))
-                                       size=256))
        elif type_name == 'one_hot_dense':
            slot_dim = len(each_meta['dict'])
-            hidden = fc_layer(input=data_layer(slot_name, slot_dim),
+            hidden = fc_layer(input=data_layer(slot_name, slot_dim), size=256)
-                              size=256)
+            fusion.append(fc_layer(input=hidden, size=256))
-            fusion.append(fc_layer(input=hidden,
-                                   size=256))
    return fc_layer(name="%s_fusion" % name, input=fusion, size=256)
@@ -90,10 +86,16 @@ movie_feature = construct_feature("movie")
 user_feature = construct_feature("user")
 similarity = cos_sim(a=movie_feature, b=user_feature)
 if not is_predict:
-    outputs(regression_cost(input=similarity,
+    outputs(
-                            label=data_layer('rating', size=1)))
+        regression_cost(
+            input=similarity, label=data_layer(
-    define_py_data_sources2('data/train.list', 'data/test.list', module='dataprovider',
+                'rating', size=1)))
-                           obj='process', args={'meta': meta})
+    define_py_data_sources2(
+        'data/train.list',
+        'data/test.list',
+        module='dataprovider',
+        obj='process',
+        args={'meta': meta})
 else:
    outputs(similarity)
--- a/demo/semantic_role_labeling/dataprovider.py
+++ b/demo/semantic_role_labeling/dataprovider.py
@@ -30,9 +30,9 @@ def hook(settings, word_dict, label_dict, predicate_dict, **kwargs):
        integer_value_sequence(len(word_dict)),
        integer_value_sequence(len(word_dict)),
        integer_value_sequence(len(word_dict)),
-        integer_value_sequence(len(word_dict)),
+        integer_value_sequence(len(word_dict)), integer_value_sequence(2),
-        integer_value_sequence(2),
+        integer_value_sequence(len(label_dict))
-        integer_value_sequence(len(label_dict))]
+    ]
 def get_batch_size(yeild_data):

--- a/demo/semantic_role_labeling/db_lstm.py
+++ b/demo/semantic_role_labeling/db_lstm.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import math
 import os
 import sys
@@ -50,7 +49,7 @@ if not is_predict:
    if is_test:
-        train_list_file = None 
+        train_list_file = None
    #define data provider
    define_py_data_sources2(

--- a/demo/semantic_role_labeling/predict.py
+++ b/demo/semantic_role_labeling/predict.py
@@ -63,6 +63,10 @@ class Prediction():
            integer_value_sequence(len_dict), 
            integer_value_sequence(2)
            ]
+            integer_value_sequence(len_dict), integer_value_sequence(len_dict),
+            integer_value_sequence(len_dict), integer_value_sequence(len_dict),
+            integer_value_sequence(len_dict), integer_value_sequence(2)
+        ]
        self.converter = DataProviderConverter(slots)
    def load_dict_label(self, dict_file, label_file, predicate_dict_file):
@@ -118,8 +122,8 @@ class Prediction():
                len_sen = len(sen.split())
                line_labels = lab[index:index + len_sen]
                index += len_sen
-                fout.write(sen + '\t' + ' '.join([self.labels_reverse[
+                fout.write(sen + '\t' + ' '.join(
-                    i] for i in line_labels]) + '\n')
+                    [self.labels_reverse[i] for i in line_labels]) + '\n')
 def option_parser():

--- a/demo/semantic_role_labeling/test.sh
+++ b/demo/semantic_role_labeling/test.sh
@@ -38,4 +38,3 @@ paddle train \
  --config_args=is_test=1 \
  --test_all_data_in_one_period=1 \
 2>&1 | tee 'test.log'
--- a/demo/semantic_role_labeling/train.sh
+++ b/demo/semantic_role_labeling/train.sh
@@ -26,4 +26,3 @@ paddle train \
  --init_model_path=./data \
  --load_missing_parameter_strategy=rand \
 2>&1 | tee 'train.log'
--- a/demo/sentiment/data/get_imdb.sh
+++ b/demo/sentiment/data/get_imdb.sh
@@ -38,11 +38,11 @@ unzip master.zip
 mkdir -p imdb/train
 mkdir -p imdb/test
-cp -r aclImdb/train/pos/ imdb/train/
+cp -r aclImdb/train/pos/ imdb/train/pos
-cp -r aclImdb/train/neg/ imdb/train/
+cp -r aclImdb/train/neg/ imdb/train/neg
-cp -r aclImdb/test/pos/ imdb/test/
+cp -r aclImdb/test/pos/ imdb/test/pos
-cp -r aclImdb/test/neg/ imdb/test/
+cp -r aclImdb/test/neg/ imdb/test/neg
 #remove compressed package
 rm aclImdb_v1.tar.gz

--- a/demo/sentiment/dataprovider.py
+++ b/demo/sentiment/dataprovider.py
@@ -17,8 +17,8 @@ from paddle.trainer.PyDataProvider2 import *
 def hook(settings, dictionary, **kwargs):
    settings.word_dict = dictionary
    settings.input_types = [
-        integer_value_sequence(len(settings.word_dict)),
+        integer_value_sequence(len(settings.word_dict)), integer_value(2)
-        integer_value(2)]
+    ]
    settings.logger.info('dict len : %d' % (len(settings.word_dict)))
@@ -29,6 +29,7 @@ def process(settings, file_name):
            label, comment = line.strip().split('\t\t')
            label = int(label)
            words = comment.split()
-            word_slot = [settings.word_dict[w] for w in words if w in
+            word_slot = [
-                         settings.word_dict]
+                settings.word_dict[w] for w in words if w in settings.word_dict
+            ]
            yield word_slot, label
--- a/demo/sentiment/predict.py
+++ b/demo/sentiment/predict.py
@@ -18,14 +18,14 @@ from optparse import OptionParser
 from py_paddle import swig_paddle, DataProviderConverter
 from paddle.trainer.PyDataProvider2 import integer_value_sequence
 from paddle.trainer.config_parser import parse_config
 """
 Usage: run following command to show help message.
  python predict.py -h
 """
 class SentimentPrediction():
-    def __init__(self, train_conf, dict_file, model_dir=None, label_file = None):
+    def __init__(self, train_conf, dict_file, model_dir=None, label_file=None):
        """
        train_conf: trainer configure.
        dict_file: word dictionary file name.
@@ -44,7 +44,8 @@ class SentimentPrediction():
            self.load_label(label_file)
        conf = parse_config(train_conf, "is_predict=1")
-        self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+        self.network = swig_paddle.GradientMachine.createFromConfigProto(
+            conf.model_config)
        self.network.loadParameters(self.model_dir)
        input_types = [integer_value_sequence(self.dict_dim)]
        self.converter = DataProviderConverter(input_types)
@@ -61,7 +62,7 @@ class SentimentPrediction():
        """
        Load label.
        """
-        self.label={}
+        self.label = {}
        for v in open(label_file, 'r'):
            self.label[int(v.split('\t')[1])] = v.split('\t')[0]
@@ -72,7 +73,9 @@ class SentimentPrediction():
        with open(data_file, 'r') as fdata:
            for line in fdata:
                words = line.strip().split()
-                word_slot = [self.word_dict[w] for w in words if w in self.word_dict]
+                word_slot = [
+                    self.word_dict[w] for w in words if w in self.word_dict
+                ]
                if not word_slot:
                    print "all words are not in dictionary: %s", line
                    continue
@@ -89,25 +92,48 @@ class SentimentPrediction():
        if self.label is None:
            print("%s: predicting label is %d" % (data_file, lab[0][0]))
        else:
-            print("%s: predicting label is %s" % (data_file, self.label[lab[0][0]]))
+            print("%s: predicting label is %s" %
+                  (data_file, self.label[lab[0][0]]))
 def option_parser():
    usage = "python predict.py -n config -w model_dir -d dictionary -i input_file "
    parser = OptionParser(usage="usage: %s [options]" % usage)
-    parser.add_option("-n", "--tconf", action="store",
+    parser.add_option(
-                      dest="train_conf", help="network config")
+        "-n",
-    parser.add_option("-d", "--dict", action="store",
+        "--tconf",
-                      dest="dict_file",help="dictionary file")
+        action="store",
-    parser.add_option("-b", "--label", action="store",
+        dest="train_conf",
-                      dest="label", default=None,
+        help="network config")
-                      help="dictionary file")
+    parser.add_option(
-    parser.add_option("-i", "--data", action="store",
+        "-d",
-                      dest="data", help="data file to predict")
+        "--dict",
-    parser.add_option("-w", "--model", action="store",
+        action="store",
-                      dest="model_path", default=None,
+        dest="dict_file",
-                      help="model path")
+        help="dictionary file")
+    parser.add_option(
+        "-b",
+        "--label",
+        action="store",
+        dest="label",
+        default=None,
+        help="dictionary file")
+    parser.add_option(
+        "-i",
+        "--data",
+        action="store",
+        dest="data",
+        help="data file to predict")
+    parser.add_option(
+        "-w",
+        "--model",
+        action="store",
+        dest="model_path",
+        default=None,
+        help="model path")
    return parser.parse_args()
 def main():
    options, args = option_parser()
    train_conf = options.train_conf
@@ -119,5 +145,6 @@ def main():
    predict = SentimentPrediction(train_conf, dict_file, model_path, label)
    predict.predict(data)
 if __name__ == '__main__':
    main()
--- a/demo/sentiment/preprocess.py
+++ b/demo/sentiment/preprocess.py
@@ -22,13 +22,13 @@ from os.path import join as join_path
 from optparse import OptionParser
 from paddle.utils.preprocess_util import *
 """
 Usage: run following command to show help message.
  python preprocess.py -h 
 """
-def save_dict(dict, filename, is_reverse = True):
+def save_dict(dict, filename, is_reverse=True):
    """
    Save dictionary into file.
    dict:   input dictionary.
@@ -39,9 +39,10 @@ def save_dict(dict, filename, is_reverse = True):
    f = open(filename, 'w')
    for k, v in sorted(dict.items(), key=operator.itemgetter(1),\
                       reverse=is_reverse):
-        f.write('%s\t%s\n'%(k, v))
+        f.write('%s\t%s\n' % (k, v))
    f.close()
 def tokenize(sentences):
    """
    Use tokenizer.perl to tokenize input sentences.
@@ -58,6 +59,7 @@ def tokenize(sentences):
    toks = tok_text.split('\n')[:-1]
    return toks
 def read_lines(path):
    """
    path: String, file path.
@@ -71,12 +73,17 @@ def read_lines(path):
                seqs.append(line)
    return seqs
 class SentimentDataSetCreate():
    """
    A class to process data for sentiment analysis task.
    """
-    def __init__(self, data_path, output_path,
-                 use_okenizer = True, multi_lines = False):
+    def __init__(self,
+                 data_path,
+                 output_path,
+                 use_okenizer=True,
+                 multi_lines=False):
        """
        data_path: string, traing and testing dataset path
        output_path: string, output path, store processed dataset
@@ -164,23 +171,17 @@ class SentimentDataSetCreate():
        # Preprocess train data.
        train_data, train_lab_set = self.data_list(self.train_dir)
        print "processing train set..."
-        file_lists = self.save_data(train_data,
+        file_lists = self.save_data(train_data, "train", self.batch_size, True,
-                                     "train",
+                                    True)
-                                     self.batch_size,
-                                     True,
-                                     True)
        save_list(file_lists, self.train_list)
        # If have test data path, preprocess test data.
        if os.path.exists(self.test_dir):
            test_data, test_lab_set = self.data_list(self.test_dir)
-            assert(train_lab_set == test_lab_set)
+            assert (train_lab_set == test_lab_set)
            print "processing test set..."
-            file_lists = self.save_data(test_data,
+            file_lists = self.save_data(test_data, "test", self.batch_size,
-                                        "test",
+                                        False, self.dict_with_test)
-                                        self.batch_size,
-                                        False,
-                                        self.dict_with_test)
            save_list(file_lists, self.test_list)
        # save labels set.
@@ -191,7 +192,9 @@ class SentimentDataSetCreate():
        save_dict(self.word_count, self.dict_file, True)
        self.dict_size = len(self.word_count)
-    def save_data(self, data, prefix = "",
+    def save_data(self,
+                  data,
+                  prefix="",
                  batch_size=50000,
                  is_shuffle=False,
                  build_dict=False):
@@ -205,7 +208,8 @@ class SentimentDataSetCreate():
        return: list of batch names
        """
        if is_shuffle and self.multi_lines:
-           return self.save_data_multi_lines(data, prefix, batch_size, build_dict)
+            return self.save_data_multi_lines(data, prefix, batch_size,
+                                              build_dict)
        if is_shuffle:
            random.shuffle(data)
@@ -213,7 +217,7 @@ class SentimentDataSetCreate():
        batch_names = []
        for i in range(num_batches):
            batch_name = join_path(self.output_path,
-                                   "%s_part_%03d" %(prefix, i))
+                                   "%s_part_%03d" % (prefix, i))
            begin = i * batch_size
            end = min((i + 1) * batch_size, len(data))
            # read a batch of data
@@ -246,7 +250,9 @@ class SentimentDataSetCreate():
            data_list = tokenize(data_list)
        return label_list, data_list
-    def save_data_multi_lines(self, data, prefix = "",
+    def save_data_multi_lines(self,
+                              data,
+                              prefix="",
                              batch_size=50000,
                              build_dict=False):
        """
@@ -274,14 +280,14 @@ class SentimentDataSetCreate():
            self.create_dict(data_list)
        length = len(label_list)
-        perm_list = np.array([ i for i in xrange(length) ])
+        perm_list = np.array([i for i in xrange(length)])
        random.shuffle(perm_list)
        num_batches = int(math.ceil(length / float(batch_size)))
        batch_names = []
        for i in range(num_batches):
            batch_name = join_path(self.output_path,
-                                   "%s_part_%03d" %(prefix, i))
+                                   "%s_part_%03d" % (prefix, i))
            begin = i * batch_size
            end = min((i + 1) * batch_size, length)
            sub_label = [label_list[perm_list[i]] for i in range(begin, end)]
@@ -304,35 +310,50 @@ class SentimentDataSetCreate():
            f.write('%s\t\t%s\n' % (lab, seq))
        f.close()
 def option_parser():
    parser = OptionParser(usage="usage: python preprcoess.py "\
                                "-i data_dir [options]")
-    parser.add_option("-i", "--data", action="store",
+    parser.add_option(
-                      dest="input", help="Input data directory.")
+        "-i",
-    parser.add_option("-o", "--output", action="store",
+        "--data",
-                      dest="output", default=None,
+        action="store",
-                      help="Output directory.")
+        dest="input",
-    parser.add_option("-t", "--tokenizer", action="store",
+        help="Input data directory.")
-                      dest="use_tokenizer", default=True,
+    parser.add_option(
-                      help="Whether to use tokenizer.")
+        "-o",
+        "--output",
+        action="store",
+        dest="output",
+        default=None,
+        help="Output directory.")
+    parser.add_option(
+        "-t",
+        "--tokenizer",
+        action="store",
+        dest="use_tokenizer",
+        default=True,
+        help="Whether to use tokenizer.")
    parser.add_option("-m", "--multi_lines", action="store",
                      dest="multi_lines", default=False,
                      help="If input text files have multi lines and they "\
                           "need to be shuffled, you should set -m True,")
    return parser.parse_args()
 def main():
    options, args = option_parser()
-    data_dir=options.input
+    data_dir = options.input
-    output_dir=options.output
+    output_dir = options.output
-    use_tokenizer=options.use_tokenizer
+    use_tokenizer = options.use_tokenizer
-    multi_lines=options.multi_lines
+    multi_lines = options.multi_lines
    if output_dir is None:
        outname = os.path.basename(options.input)
        output_dir = join_path(os.path.dirname(data_dir), 'pre-' + outname)
-    data_creator = SentimentDataSetCreate(data_dir, output_dir,
+    data_creator = SentimentDataSetCreate(data_dir, output_dir, use_tokenizer,
-                                          use_tokenizer, multi_lines)
+                                          multi_lines)
    data_creator.create_dataset()
 if __name__ == '__main__':
    main()
--- a/demo/sentiment/sentiment_net.py
+++ b/demo/sentiment/sentiment_net.py
@@ -47,10 +47,12 @@ def sentiment_data(data_dir=None,
        for i, line in enumerate(open(dict_file, 'r')):
            word_dict[line.split('\t')[0]] = i
-    define_py_data_sources2(train_list, test_list,
+    define_py_data_sources2(
-                           module="dataprovider",
+        train_list,
-                           obj="process",
+        test_list,
-                           args={'dictionary': word_dict})
+        module="dataprovider",
+        obj="process",
+        args={'dictionary': word_dict})
    return dict_dim, class_dim
@@ -64,8 +66,7 @@ def bidirectional_lstm_net(input_dim,
    emb = embedding_layer(input=data, size=emb_dim)
    bi_lstm = bidirectional_lstm(input=emb, size=lstm_dim)
    dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
-    output = fc_layer(input=dropout, size=class_dim,
+    output = fc_layer(input=dropout, size=class_dim, act=SoftmaxActivation())
-                      act=SoftmaxActivation())
    if not is_predict:
        lbl = data_layer("label", 1)
@@ -109,27 +110,36 @@ def stacked_lstm_net(input_dim,
    data = data_layer("word", input_dim)
    emb = embedding_layer(input=data, size=emb_dim)
-    fc1 = fc_layer(input=emb, size=hid_dim, act=linear,
+    fc1 = fc_layer(input=emb, size=hid_dim, act=linear, bias_attr=bias_attr)
-                   bias_attr=bias_attr)
+    lstm1 = lstmemory(
-    lstm1 = lstmemory(input=fc1, act=relu, bias_attr=bias_attr,
+        input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
-                      layer_attr=layer_attr)
    inputs = [fc1, lstm1]
    for i in range(2, stacked_num + 1):
-        fc = fc_layer(input=inputs, size=hid_dim, act=linear,
+        fc = fc_layer(
-                      param_attr=para_attr, bias_attr=bias_attr)
+            input=inputs,
-        lstm = lstmemory(input=fc, reverse=(i % 2) == 0, act=relu,
+            size=hid_dim,
-                         bias_attr=bias_attr, layer_attr=layer_attr)
+            act=linear,
+            param_attr=para_attr,
+            bias_attr=bias_attr)
+        lstm = lstmemory(
+            input=fc,
+            reverse=(i % 2) == 0,
+            act=relu,
+            bias_attr=bias_attr,
+            layer_attr=layer_attr)
        inputs = [fc, lstm]
    fc_last = pooling_layer(input=inputs[0], pooling_type=MaxPooling())
    lstm_last = pooling_layer(input=inputs[1], pooling_type=MaxPooling())
-    output = fc_layer(input=[fc_last, lstm_last], size=class_dim,
+    output = fc_layer(
-                      act=SoftmaxActivation(),
+        input=[fc_last, lstm_last],
-                      bias_attr=bias_attr, param_attr=para_attr)
+        size=class_dim,
+        act=SoftmaxActivation(),
+        bias_attr=bias_attr,
+        param_attr=para_attr)
    if is_predict:
        outputs(output)
    else:
-        outputs(
+        outputs(classification_cost(input=output, label=data_layer('label', 1)))
-            classification_cost(input=output, label=data_layer('label', 1)))
--- a/demo/sentiment/trainer_config.py
+++ b/demo/sentiment/trainer_config.py
@@ -20,20 +20,19 @@ is_test = get_config_arg('is_test', bool, False)
 # whether this config is used for prediction
 is_predict = get_config_arg('is_predict', bool, False)
-data_dir  = "./data/pre-imdb"
+data_dir = "./data/pre-imdb"
 dict_dim, class_dim = sentiment_data(data_dir, is_test, is_predict)
 ################## Algorithm Config #####################
 settings(
-  batch_size=128,
+    batch_size=128,
-  learning_rate=2e-3,
+    learning_rate=2e-3,
-  learning_method=AdamOptimizer(),
+    learning_method=AdamOptimizer(),
-  regularization=L2Regularization(8e-4),
+    regularization=L2Regularization(8e-4),
-  gradient_clipping_threshold=25
+    gradient_clipping_threshold=25)
-)
 #################### Network Config ######################
-stacked_lstm_net(dict_dim, class_dim=class_dim,
+stacked_lstm_net(
-                 stacked_num=3, is_predict=is_predict)
+    dict_dim, class_dim=class_dim, stacked_num=3, is_predict=is_predict)
 # bidirectional_lstm_net(dict_dim, class_dim=class_dim, is_predict=is_predict)
--- a/demo/seqToseq/dataprovider.py
+++ b/demo/seqToseq/dataprovider.py
@@ -30,14 +30,14 @@ def hook(settings, src_dict, trg_dict, file_list, **kwargs):
    if settings.job_mode:
        settings.trg_dict = trg_dict
        settings.slots = [
-            integer_value_sequence(len(settings.src_dict)), 
+            integer_value_sequence(len(settings.src_dict)),
-            integer_value_sequence(len(settings.trg_dict)), 
+            integer_value_sequence(len(settings.trg_dict)),
            integer_value_sequence(len(settings.trg_dict))
        ]
        settings.logger.info("trg dict len : %d" % (len(settings.trg_dict)))
    else:
        settings.slots = [
-            integer_value_sequence(len(settings.src_dict)), 
+            integer_value_sequence(len(settings.src_dict)),
            integer_value_sequence(len(open(file_list[0], "r").readlines()))
        ]
@@ -62,8 +62,7 @@ def process(settings, file_name):
            if settings.job_mode:
                trg_seq = line_split[1]  # one target sequence
                trg_words = trg_seq.split()
-                trg_ids = [settings.trg_dict.get(w, UNK_IDX)
+                trg_ids = [settings.trg_dict.get(w, UNK_IDX) for w in trg_words]
-                           for w in trg_words]
                # remove sequence whose length > 80 in training mode
                if len(src_ids) > 80 or len(trg_ids) > 80:

--- a/demo/seqToseq/preprocess.py
+++ b/demo/seqToseq/preprocess.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 Example:
    python preprocess.py -i INPUT [-d DICTSIZE] [-m]
@@ -24,12 +23,13 @@ Options:
    -m --mergeDict merge source and target dictionary
 """
 import os
-import sys 
+import sys
 import string
 from optparse import OptionParser
 from paddle.utils.preprocess_util import save_list, DatasetCreater
 class SeqToSeqDatasetCreater(DatasetCreater):
    """
    A class to process data for sequence to sequence application.
@@ -75,7 +75,7 @@ class SeqToSeqDatasetCreater(DatasetCreater):
        if not os.path.exists(output):
            os.system(cmd + '> ' + output)
-    def build_dict(self, file_path, dict_path, dict_size = -1):
+    def build_dict(self, file_path, dict_path, dict_size=-1):
        """ 
        Create the dictionary for the file, Note that
        1. Valid characters include all printable characters
@@ -99,20 +99,23 @@ class SeqToSeqDatasetCreater(DatasetCreater):
                        for word in words:
                            if word not in dictory:
                                dictory[word] = 1
-                            else: 
+                            else:
                                dictory[word] += 1
            output = open(dict_path, "w+")
            output.write('<s>\n<e>\n<unk>\n')
            count = 3
-            for key, value in sorted(dictory.items(), key = lambda d:d[1], reverse = True):
+            for key, value in sorted(
+                    dictory.items(), key=lambda d: d[1], reverse=True):
                output.write(key + "\n")
                count += 1
                if count == dict_size:
                    break
            self.dict_size = count
-    def create_dataset(self, dict_size = -1, mergeDict = False,
+    def create_dataset(self,
-                       suffixes = ['.src', '.trg']):
+                       dict_size=-1,
+                       mergeDict=False,
+                       suffixes=['.src', '.trg']):
        """
        Create seqToseq dataset 
        """
@@ -135,13 +138,14 @@ class SeqToSeqDatasetCreater(DatasetCreater):
        # checkout dataset should be parallel corpora
        suffix_len = len(suffixes[0])
        for dataset in dataset_list:
-          file_list = os.listdir(dataset)
+            file_list = os.listdir(dataset)
-          if len(file_list) % 2 == 1:
+            if len(file_list) % 2 == 1:
-              raise RuntimeError("dataset should be parallel corpora")
+                raise RuntimeError("dataset should be parallel corpora")
-          file_list.sort()
+            file_list.sort()
-          for i in range(0, len(file_list), 2):
+            for i in range(0, len(file_list), 2):
-              if file_list[i][:-suffix_len] != file_list[i + 1][:-suffix_len]:
+                if file_list[i][:-suffix_len] != file_list[i + 1][:-suffix_len]:
-                  raise RuntimeError("source and target file name should be equal")
+                    raise RuntimeError(
+                        "source and target file name should be equal")
        # cat all the files with the same suffix in dataset
        for suffix in suffixes:
@@ -155,16 +159,18 @@ class SeqToSeqDatasetCreater(DatasetCreater):
        list = ['train.list', 'test.list', 'gen.list']
        for dataset in dataset_list:
            outname = os.path.basename(dataset)
-            self.concat_file(dataset, outname + suffixes[0], 
+            self.concat_file(dataset, outname + suffixes[0],
                             outname + suffixes[1], dir_list[id], outname)
-            save_list([os.path.join(dir_list[id], outname)], 
+            save_list([os.path.join(dir_list[id], outname)],
                      os.path.join(self.output_path, list[id]))
            id += 1
        # build dictionary for train data
        dict = ['src.dict', 'trg.dict']
-        dict_path = [os.path.join(self.output_path, dict[0]), 
+        dict_path = [
-                     os.path.join(self.output_path, dict[1])]
+            os.path.join(self.output_path, dict[0]),
+            os.path.join(self.output_path, dict[1])
+        ]
        if mergeDict:
            outname = os.path.join(train_dir, train_dataset.split('/')[-1])
            print 'build src dictionary for train data'
@@ -173,22 +179,30 @@ class SeqToSeqDatasetCreater(DatasetCreater):
            os.system('cp ' + dict_path[0] + ' ' + dict_path[1])
        else:
            outname = os.path.join(train_dataset, self.train_dir_name)
-            for id in range(0,2):
+            for id in range(0, 2):
                suffix = suffixes[id]
                print 'build ' + suffix[1:] + ' dictionary for train data'
                self.build_dict(outname + suffix, dict_path[id], dict_size)
        print 'dictionary size is', self.dict_size
 def main():
    usage = "usage: \n" \
            "python %prog -i INPUT [-d DICTSIZE] [-m]"
    parser = OptionParser(usage)
-    parser.add_option("-i", action="store", dest="input",
+    parser.add_option(
-                      help="input original dataset path")
+        "-i", action="store", dest="input", help="input original dataset path")
-    parser.add_option("-d", action="store", dest="dictsize",
+    parser.add_option(
-                      help="specified word count of dictionary")
+        "-d",
-    parser.add_option("-m", "--mergeDict", action="store_true", dest="mergeDict",
+        action="store",
-                      help="merge source and target dictionary")
+        dest="dictsize",
+        help="specified word count of dictionary")
+    parser.add_option(
+        "-m",
+        "--mergeDict",
+        action="store_true",
+        dest="mergeDict",
+        help="merge source and target dictionary")
    (options, args) = parser.parse_args()
    if options.input[-1] == os.path.sep:
        options.input = options.input[:-1]
@@ -200,5 +214,6 @@ def main():
        data_creator = SeqToSeqDatasetCreater(options.input, output_path)
        data_creator.create_dataset(dictsize, options.mergeDict)
 if __name__ == "__main__":
-    main(); 
+    main()
--- a/demo/seqToseq/seqToseq_net.py
+++ b/demo/seqToseq/seqToseq_net.py
@@ -50,16 +50,21 @@ def seq_to_seq_data(data_dir,
        trg_dict = None
    else:
        train_list = os.path.join(data_dir, train_list)
-        test_list = os.path.join(data_dir,test_list)
+        test_list = os.path.join(data_dir, test_list)
-    define_py_data_sources2(train_list, test_list,
+    define_py_data_sources2(
-                           module = "dataprovider",
+        train_list,
-                           obj = "process",
+        test_list,
-                           args = {"src_dict": src_dict,
+        module="dataprovider",
-                                   "trg_dict": trg_dict})
+        obj="process",
+        args={"src_dict": src_dict,
+              "trg_dict": trg_dict})
-    return {"src_dict_path": src_lang_dict, "trg_dict_path": trg_lang_dict,
+    return {
-            "gen_result": gen_result}
+        "src_dict_path": src_lang_dict,
+        "trg_dict_path": trg_lang_dict,
+        "gen_result": gen_result
+    }
 def gru_encoder_decoder(data_conf,
@@ -90,51 +95,55 @@ def gru_encoder_decoder(data_conf,
        size=word_vector_dim,
        param_attr=ParamAttr(name='_source_language_embedding'))
    src_forward = simple_gru(input=src_embedding, size=encoder_size)
-    src_backward = simple_gru(input=src_embedding,
+    src_backward = simple_gru(
-                              size=encoder_size,
+        input=src_embedding, size=encoder_size, reverse=True)
-                              reverse=True)
    encoded_vector = concat_layer(input=[src_forward, src_backward])
    with mixed_layer(size=decoder_size) as encoded_proj:
        encoded_proj += full_matrix_projection(input=encoded_vector)
    backward_first = first_seq(input=src_backward)
-    with mixed_layer(size=decoder_size,
+    with mixed_layer(
-                     act=TanhActivation(), ) as decoder_boot:
+            size=decoder_size,
+            act=TanhActivation(), ) as decoder_boot:
        decoder_boot += full_matrix_projection(input=backward_first)
    def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
-        decoder_mem = memory(name='gru_decoder',
+        decoder_mem = memory(
-                             size=decoder_size,
+            name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
-                             boot_layer=decoder_boot)
-        context = simple_attention(encoded_sequence=enc_vec,
+        context = simple_attention(
-                                   encoded_proj=enc_proj,
+            encoded_sequence=enc_vec,
-                                   decoder_state=decoder_mem, )
+            encoded_proj=enc_proj,
+            decoder_state=decoder_mem, )
        with mixed_layer(size=decoder_size * 3) as decoder_inputs:
            decoder_inputs += full_matrix_projection(input=context)
            decoder_inputs += full_matrix_projection(input=current_word)
-        gru_step = gru_step_layer(name='gru_decoder',
+        gru_step = gru_step_layer(
-                                  input=decoder_inputs,
+            name='gru_decoder',
-                                  output_mem=decoder_mem,
+            input=decoder_inputs,
-                                  size=decoder_size)
+            output_mem=decoder_mem,
+            size=decoder_size)
-        with mixed_layer(size=target_dict_dim,
+        with mixed_layer(
-                         bias_attr=True,
+                size=target_dict_dim, bias_attr=True,
-                         act=SoftmaxActivation()) as out:
+                act=SoftmaxActivation()) as out:
            out += full_matrix_projection(input=gru_step)
        return out
    decoder_group_name = "decoder_group"
-    group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
+    group_inputs = [
-                  StaticInput(input=encoded_proj,is_seq=True)]
+        StaticInput(
+            input=encoded_vector, is_seq=True), StaticInput(
+                input=encoded_proj, is_seq=True)
+    ]
    if not is_generating:
        trg_embedding = embedding_layer(
-            input=data_layer(name='target_language_word',
+            input=data_layer(
-                             size=target_dict_dim),
+                name='target_language_word', size=target_dict_dim),
            size=word_vector_dim,
            param_attr=ParamAttr(name='_target_language_embedding'))
        group_inputs.append(trg_embedding)
@@ -144,12 +153,12 @@ def gru_encoder_decoder(data_conf,
        # while encoded source sequence is accessed to as an unbounded memory.
        # Here, the StaticInput defines a read-only memory
        # for the recurrent_group.
-        decoder = recurrent_group(name=decoder_group_name,
+        decoder = recurrent_group(
-                                  step=gru_decoder_with_attention,
+            name=decoder_group_name,
-                                  input=group_inputs)
+            step=gru_decoder_with_attention,
+            input=group_inputs)
-        lbl = data_layer(name='target_language_next_word',
+        lbl = data_layer(name='target_language_next_word', size=target_dict_dim)
-                         size=target_dict_dim)
        cost = classification_cost(input=decoder, label=lbl)
        outputs(cost)
    else:
@@ -168,16 +177,19 @@ def gru_encoder_decoder(data_conf,
            embedding_size=word_vector_dim)
        group_inputs.append(trg_embedding)
-        beam_gen = beam_search(name=decoder_group_name,
+        beam_gen = beam_search(
-                               step=gru_decoder_with_attention,
+            name=decoder_group_name,
-                               input=group_inputs,
+            step=gru_decoder_with_attention,
-                               bos_id=0,
+            input=group_inputs,
-                               eos_id=1,
+            bos_id=0,
-                               beam_size=beam_size,
+            eos_id=1,
-                               max_length=max_length)
+            beam_size=beam_size,
+            max_length=max_length)
-        seqtext_printer_evaluator(input=beam_gen,
-                                  id_input=data_layer(name="sent_id", size=1),
+        seqtext_printer_evaluator(
-                                  dict_file=trg_dict_path,
+            input=beam_gen,
-                                  result_file=gen_trans_file)
+            id_input=data_layer(
+                name="sent_id", size=1),
+            dict_file=trg_dict_path,
+            result_file=gen_trans_file)
        outputs(beam_gen)
--- a/demo/sequence_tagging/dataprovider.py
+++ b/demo/sequence_tagging/dataprovider.py
@@ -17,8 +17,7 @@ import gzip
 import logging
 logging.basicConfig(
-    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s',
+    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s', )
-)
 logger = logging.getLogger('paddle')
 logger.setLevel(logging.INFO)
@@ -32,59 +31,58 @@ num_original_columns = 3
 # [[-1,0], [0,0]]  means previous token at column 0 and current token at 
 # column 0 are combined as one feature.
 patterns = [
-    [[-2,0]],
+    [[-2, 0]],
-    [[-1,0]],
+    [[-1, 0]],
-    [[0,0]],
+    [[0, 0]],
-    [[1,0]],
+    [[1, 0]],
-    [[2,0]],
+    [[2, 0]],
+    [[-1, 0], [0, 0]],
-    [[-1,0], [0,0]],
+    [[0, 0], [1, 0]],
-    [[0,0], [1,0]],
+    [[-2, 1]],
+    [[-1, 1]],
-    [[-2,1]],
+    [[0, 1]],
-    [[-1,1]],
+    [[1, 1]],
-    [[0,1]],
+    [[2, 1]],
-    [[1,1]],
+    [[-2, 1], [-1, 1]],
-    [[2,1]],
+    [[-1, 1], [0, 1]],
-    [[-2,1], [-1,1]],
+    [[0, 1], [1, 1]],
-    [[-1,1], [0,1]],
+    [[1, 1], [2, 1]],
-    [[0,1], [1,1]],
+    [[-2, 1], [-1, 1], [0, 1]],
-    [[1,1], [2,1]],
+    [[-1, 1], [0, 1], [1, 1]],
+    [[0, 1], [1, 1], [2, 1]],
-    [[-2,1], [-1,1], [0,1]],
-    [[-1,1], [0,1], [1,1]],
-    [[0,1], [1,1], [2,1]],
 ]
 dict_label = {
- 'B-ADJP': 0,
+    'B-ADJP': 0,
- 'I-ADJP': 1,
+    'I-ADJP': 1,
- 'B-ADVP': 2,
+    'B-ADVP': 2,
- 'I-ADVP': 3,
+    'I-ADVP': 3,
- 'B-CONJP': 4,
+    'B-CONJP': 4,
- 'I-CONJP': 5,
+    'I-CONJP': 5,
- 'B-INTJ': 6,
+    'B-INTJ': 6,
- 'I-INTJ': 7,
+    'I-INTJ': 7,
- 'B-LST': 8,
+    'B-LST': 8,
- 'I-LST': 9,
+    'I-LST': 9,
- 'B-NP': 10,
+    'B-NP': 10,
- 'I-NP': 11,
+    'I-NP': 11,
- 'B-PP': 12,
+    'B-PP': 12,
- 'I-PP': 13,
+    'I-PP': 13,
- 'B-PRT': 14,
+    'B-PRT': 14,
- 'I-PRT': 15,
+    'I-PRT': 15,
- 'B-SBAR': 16,
+    'B-SBAR': 16,
- 'I-SBAR': 17,
+    'I-SBAR': 17,
- 'B-UCP': 18,
+    'B-UCP': 18,
- 'I-UCP': 19,
+    'I-UCP': 19,
- 'B-VP': 20,
+    'B-VP': 20,
- 'I-VP': 21,
+    'I-VP': 21,
- 'O': 22
+    'O': 22
 }
 def make_features(sequence):
    length = len(sequence)
    num_features = len(sequence[0])
    def get_features(pos):
        if pos < 0:
            return ['#B%s' % -pos] * num_features
@@ -94,9 +92,10 @@ def make_features(sequence):
    for i in xrange(length):
        for pattern in patterns:
-            fname = '/'.join([get_features(i+pos)[f] for pos, f in pattern])
+            fname = '/'.join([get_features(i + pos)[f] for pos, f in pattern])
            sequence[i].append(fname)
 '''
 Source file format:
 Each line is for one timestep. The features are separated by space.
@@ -109,6 +108,8 @@ i-th column.
 return a list of dict for each column
 '''
 def create_dictionaries(filename, cutoff, oov_policy):
    def add_to_dict(sequence, dicts):
        num_features = len(dicts)
@@ -140,7 +141,6 @@ def create_dictionaries(filename, cutoff, oov_policy):
        features = line.split(' ')
        sequence.append(features)
    for i in xrange(num_features):
        dct = dicts[i]
        n = 1 if oov_policy[i] == OOV_POLICY_USE else 0
@@ -151,7 +151,7 @@ def create_dictionaries(filename, cutoff, oov_policy):
            else:
                dct[k] = n
                n += 1
        if oov_policy[i] == OOV_POLICY_USE:
            # placeholder so that len(dct) will be the number of features
            # including OOV
@@ -187,12 +187,15 @@ def initializer(settings, **xargs):
        logger.info("feature size=%s" % dim)
    settings.input_types = input_types
 '''
 if oov_policy[i] == OOV_POLICY_USE, features in i-th column which are not
 existed in dicts[i] will be assigned to id 0.
 if oov_policy[i] == OOV_POLICY_ERROR, all features in i-th column MUST exist
 in dicts[i].
 '''
 @provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
 def process(settings, filename):
    input_file = filename
@@ -231,7 +234,7 @@ def process(settings, filename):
                        logger.fatal("Unknown token: %s" % features[i])
                    else:
                        vec.ids.append(dim + 0)
                    dim += len(dicts[i])
                sample[-1].append(vec)
        return sample
@@ -255,4 +258,3 @@ def process(settings, filename):
    f.close()
    logger.info("num_sequences=%s" % num_sequences)
--- a/demo/sequence_tagging/linear_crf.py
+++ b/demo/sequence_tagging/linear_crf.py
@@ -16,11 +16,11 @@ from paddle.trainer_config_helpers import *
 import math
-define_py_data_sources2(train_list="data/train.list",
+define_py_data_sources2(
-                        test_list="data/test.list",
+    train_list="data/train.list",
-                        module="dataprovider",
+    test_list="data/test.list",
-                        obj="process")
+    module="dataprovider",
+    obj="process")
 batch_size = 1
 settings(
@@ -30,14 +30,15 @@ settings(
    average_window=0.5,
    learning_rate=1e-1,
    learning_rate_decay_a=1e-5,
-    learning_rate_decay_b=0.25,
+    learning_rate_decay_b=0.25, )
-)
+num_label_types = 23
-num_label_types=23
 def get_simd_size(size):
    return int(math.ceil(float(size) / 8)) * 8
 # Currently, in order to use sparse_update=True,
 # the size has to be aligned.
 num_label_types = get_simd_size(num_label_types)
@@ -45,40 +46,37 @@ num_label_types = get_simd_size(num_label_types)
 features = data_layer(name="features", size=76328)
 word = data_layer(name="word", size=6778)
 pos = data_layer(name="pos", size=44)
-chunk = data_layer(name="chunk",
+chunk = data_layer(name="chunk", size=num_label_types)
-                   size=num_label_types)
 crf_input = fc_layer(
    input=features,
    size=num_label_types,
    act=LinearActivation(),
    bias_attr=False,
-    param_attr=ParamAttr(initial_std=0, sparse_update=True))
+    param_attr=ParamAttr(
+        initial_std=0, sparse_update=True))
-crf=crf_layer(
+crf = crf_layer(
    input=crf_input,
    label=chunk,
-    param_attr=ParamAttr(name="crfw", initial_std=0),
+    param_attr=ParamAttr(
-)
+        name="crfw", initial_std=0), )
-crf_decoding=crf_decoding_layer(
+crf_decoding = crf_decoding_layer(
    size=num_label_types,
    input=crf_input,
    label=chunk,
-    param_attr=ParamAttr(name="crfw"),
+    param_attr=ParamAttr(name="crfw"), )
-)
 sum_evaluator(
    name="error",
-    input=crf_decoding,
+    input=crf_decoding, )
-)
 chunk_evaluator(
    name="chunk_f1",
-    input =[crf_decoding, chunk],
+    input=[crf_decoding, chunk],
    chunk_scheme="IOB",
-    num_chunk_types=11,
+    num_chunk_types=11, )
-)
 inputs(word, pos, chunk, features)
 outputs(crf)
--- a/demo/sequence_tagging/rnn_crf.py
+++ b/demo/sequence_tagging/rnn_crf.py
@@ -16,10 +16,11 @@ from paddle.trainer_config_helpers import *
 import math
-define_py_data_sources2(train_list="data/train.list",
+define_py_data_sources2(
-                        test_list="data/test.list",
+    train_list="data/train.list",
-                        module="dataprovider",
+    test_list="data/test.list",
-                        obj="process")
+    module="dataprovider",
+    obj="process")
 batch_size = 16
 settings(
@@ -27,29 +28,27 @@ settings(
    batch_size=batch_size,
    regularization=L2Regularization(batch_size * 1e-5),
    average_window=0.5,
-    learning_rate = 2e-3,
+    learning_rate=2e-3,
-    learning_rate_decay_a = 5e-7,
+    learning_rate_decay_a=5e-7,
-    learning_rate_decay_b = 0.5,
+    learning_rate_decay_b=0.5, )
-)
-word_dim=128
+word_dim = 128
 hidden_dim = 128
 with_rnn = True
-initial_std=1/math.sqrt(hidden_dim)
+initial_std = 1 / math.sqrt(hidden_dim)
-param_attr=ParamAttr(initial_std=initial_std)
+param_attr = ParamAttr(initial_std=initial_std)
-cpu_layer_attr=ExtraLayerAttribute(device=-1)
+cpu_layer_attr = ExtraLayerAttribute(device=-1)
 default_device(0)
-num_label_types=23
+num_label_types = 23
 features = data_layer(name="features", size=76328)
 word = data_layer(name="word", size=6778)
 pos = data_layer(name="pos", size=44)
-chunk = data_layer(name="chunk",
+chunk = data_layer(
-                   size=num_label_types,
+    name="chunk", size=num_label_types, layer_attr=cpu_layer_attr)
-                   layer_attr=cpu_layer_attr)
 emb = embedding_layer(
    input=word, size=word_dim, param_attr=ParamAttr(initial_std=0))
@@ -58,73 +57,64 @@ hidden1 = mixed_layer(
    size=hidden_dim,
    act=STanhActivation(),
    bias_attr=True,
-    input=[full_matrix_projection(emb),
+    input=[
-           table_projection(pos, param_attr=param_attr)]
+        full_matrix_projection(emb), table_projection(
-)
+            pos, param_attr=param_attr)
+    ])
 if with_rnn:
    rnn1 = recurrent_layer(
        act=ReluActivation(),
        bias_attr=True,
        input=hidden1,
-        param_attr=ParamAttr(initial_std=0),
+        param_attr=ParamAttr(initial_std=0), )
-    )
 hidden2 = mixed_layer(
    size=hidden_dim,
    act=STanhActivation(),
    bias_attr=True,
-    input=[full_matrix_projection(hidden1)
+    input=[full_matrix_projection(hidden1)] +
-    ] + ([
+    ([full_matrix_projection(
-        full_matrix_projection(rnn1, param_attr=ParamAttr(initial_std=0))
+        rnn1, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), )
-    ] if with_rnn else []),
-)
 if with_rnn:
-    rnn2=recurrent_layer(
+    rnn2 = recurrent_layer(
        reverse=True,
        act=ReluActivation(),
        bias_attr=True,
        input=hidden2,
-        param_attr=ParamAttr(initial_std=0),
+        param_attr=ParamAttr(initial_std=0), )
-    )
 crf_input = mixed_layer(
    size=num_label_types,
    bias_attr=False,
-    input=[
+    input=[full_matrix_projection(hidden2), ] +
-        full_matrix_projection(hidden2),
+    ([full_matrix_projection(
-    ] + ([
+        rnn2, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), )
-        full_matrix_projection(rnn2, param_attr=ParamAttr(initial_std=0))
-    ] if with_rnn else []),
-)
 crf = crf_layer(
    input=crf_input,
    label=chunk,
-    param_attr=ParamAttr(name="crfw", initial_std=0),
+    param_attr=ParamAttr(
-    layer_attr=cpu_layer_attr,
+        name="crfw", initial_std=0),
-)
+    layer_attr=cpu_layer_attr, )
 crf_decoding = crf_decoding_layer(
    size=num_label_types,
    input=crf_input,
    label=chunk,
    param_attr=ParamAttr(name="crfw"),
-    layer_attr=cpu_layer_attr,
+    layer_attr=cpu_layer_attr, )
-)
 sum_evaluator(
    name="error",
-    input=crf_decoding,
+    input=crf_decoding, )
-)
 chunk_evaluator(
    name="chunk_f1",
-    input =[crf_decoding, chunk],
+    input=[crf_decoding, chunk],
    chunk_scheme="IOB",
-    num_chunk_types=11,
+    num_chunk_types=11, )
-)
 inputs(word, pos, chunk, features)
 outputs(crf)
--- a/doc/algorithm/index.rst
+++ b/doc/algorithm/index.rst
+Algorithm Tutorial
+==================
+..  toctree::
+  :maxdepth: 1
+  rnn/rnn.rst
--- a/doc/algorithm/rnn/rnn.rst
+++ b/doc/algorithm/rnn/rnn.rst
-Recurrent Neural Network Configuration
+RNN Configuration
-======================================
+=================
 This tutorial will guide you how to configure recurrent neural network in PaddlePaddle. PaddlePaddle supports highly flexible and efficient recurrent neural network configuration. In this tutorial, you will learn how to:

--- a/doc/build/contribute_to_paddle.md
+++ b/doc/build/contribute_to_paddle.md
-# Contribute to PaddlePaddle
+# Contribute Code
 We sincerely appreciate your contributions. You can use fork and pull request
 workflow to merge your code. 

--- a/doc/build/index.rst
+++ b/doc/build/index.rst
-Build And Install PaddlePaddle
+Install and Build
-================================
+=================
 Install PaddlePaddle
 ----------------------
@@ -18,11 +18,7 @@ Build from Source
 ..  warning::
-    Please use :code:`deb` package or :code:`docker` image to install paddle. The building guide is used for hacking or contributing to PaddlePaddle.
+    Please use :code:`deb` package or :code:`docker` image to install paddle. The building guide is used for hacking or contributing PaddlePaddle source code.
-If you want to hack and contribute PaddlePaddle source code, following guides can help you\:
 ..  toctree::
    :maxdepth: 1
@@ -30,4 +26,3 @@ If you want to hack and contribute PaddlePaddle source code, following guides ca
    build_from_source.md
    contribute_to_paddle.md
--- a/doc/cluster/opensource/cluster_train.md
+++ b/doc/cluster/opensource/cluster_train.md
-# Cluster Training
+# Distributed Training
-We provide some simple scripts ```paddle/scripts/cluster_train``` to help you to launch cluster training Job to harness PaddlePaddle's distributed trainning. For MPI and other cluster scheduler refer this naive script to implement more robust cluster training platform by yourself.
+In this article, we explain how to run distributed Paddle training jobs on clusters.  We will create the distributed version of the single-process training example, [recommendation](https://github.com/baidu/Paddle/tree/develop/demo/recommendation).
-The following cluster demo is based on RECOMMENDATION local training demo in PaddlePaddle ```demo/recommendation``` directory.  Assuming you enter the ```paddle/scripts/cluster_train/``` directory.
+[Scripts](https://github.com/baidu/Paddle/tree/develop/paddle/scripts/cluster_train) used in this article launch distributed jobs via SSH.  They also work as a reference for users running more sophisticated cluster management systems like MPI and Kubernetes.
-## Pre-requirements
+## Prerequisite
-Firstly,
+1. Aforementioned scripts use a Python library [fabric](http://www.fabfile.org/) to run SSH commands.  We can use `pip` to install fabric:
-```bash
+   ```bash
 pip install fabric
-```
+   ```
-Secondly, go through installing scripts to install PaddlePaddle at all nodes to make sure demo can run as local mode. For CUDA enabled training, we assume that CUDA is installed in ```/usr/local/cuda```, otherwise missed cuda runtime libraries error could be reported at cluster runtime. In one word, the local training environment should be well prepared for the simple scripts.
-Then you should prepare same ROOT_DIR directory in all nodes. ROOT_DIR is from in cluster_train/conf.py. Assuming that the ROOT_DIR = /home/paddle, you can create ```paddle``` user account as well, at last ```paddle.py``` can ssh connections to all nodes with ```paddle``` user automatically.
+1. We need to install PaddlePaddle on all nodes in the cluster.  To enable GPUs, we need to install CUDA in `/usr/local/cuda`; otherwise Paddle would report errors at runtime.
-At last you can create ssh mutual trust relationship between all nodes for easy ssh login, otherwise ```password``` should be provided at runtime from ```paddle.py```.
+1. Set the `ROOT_DIR` variable in [`cluster_train/conf.py`] on all nodes.  For convenience, we often create a Unix user `paddle` on all nodes and set `ROOT_DIR=/home/paddle`.  In this way, we can write public SSH keys into `/home/paddle/.ssh/authorized_keys` so that user `paddle` can SSH to all nodes without password.
 ## Prepare Job Workspace
-```Job workspace``` is defined as one package directory which contains dependency libraries, train data, test data, model config file and all other related file dependencies.
+We refer to the directory where we put dependent libraries, config files, etc., as *workspace*.
 These ```train/test``` data should be prepared before launching cluster job. To  satisfy the requirement that train/test data are placed in different directory from workspace, PADDLE refers train/test data according to index file named as ```train.list/test.list``` which are used in model config file. So the train/test data also contains train.list/test.list two list file. All local training demo already provides scripts to help you create these two files,  and all nodes in cluster job will handle files with same logical code in normal condition.

--- a/doc/demo/quick_start/index_en.md
+++ b/doc/demo/quick_start/index_en.md
-# Quick Start Tutorial
+# Quick Start
 This tutorial will teach the basics of deep learning (DL), including how to implement many different models in PaddlePaddle. You will learn how to:
  - Prepare data into the standardized format that PaddlePaddle accepts.

--- a/doc/dev/index.rst
+++ b/doc/dev/index.rst
+Development Guide
+=================
+..  toctree::
+  :maxdepth: 1
+  layer.md
+  new_layer/new_layer.rst
+  ../source/index.md
--- a/doc/dev/layer.md
+++ b/doc/dev/layer.md
+# Layer Documents
+* [Layer Source Code Document](../source/gserver/layers/index.rst)
+* [Layer Python API Document](../ui/api/trainer_config_helpers/index.rst)
--- a/doc/dev/new_layer/index.rst
+++ b/doc/dev/new_layer/index.rst
-Writing New Layers
-==================
-.. toctree::
-  :maxdepth: 3
-  new_layer.rst
--- a/doc/dev/new_layer/new_layer.rst
+++ b/doc/dev/new_layer/new_layer.rst
+==================
 Writing New Layers
 ==================

--- a/doc/index.md
+++ b/doc/index.md
-PaddlePaddle Documentation
-==========================
-User Guide
----------
-* [Introduction](introduction/index.md)
-* [Quick Start](demo/quick_start/index_en.md)
-* [Build and Installation](build/index.rst)
-* [Contribute Code](build/contribute_to_paddle.md)
-* [User Interface](ui/index.md)
-* [Model Config Interface](ui/api/trainer_config_helpers/index.md)
-* [Example and Demo](demo/index.md)
-* [Cluster Train](cluster/index.md)
-Development Guide
-----------------
-* [Layer Documents](layer.md)
-* [Writing New Layers](dev/new_layer/index.rst)
-* [Source Code Documents](source/index.md)
-Algorithm Tutorial
------------------
-* [RNN Configuration](algorithm/rnn/rnn.rst)
--- a/doc/index.rst
+++ b/doc/index.rst
+PaddlePaddle Documentation
+==========================
+..  toctree::
+  :maxdepth: 1
+  introduction/index.md
+  user_guide.rst
+  dev/index.rst
+  algorithm/index.rst
--- a/doc/introduction/index.md
+++ b/doc/introduction/index.md
@@ -98,4 +98,3 @@ There, you have recovered the underlying pattern between `X` and `Y` only from o
 - <a href="../build/index.html"> Build and Installation </a>
 - <a href="../demo/quick_start/index_en.html">Quick Start</a>
 - <a href="../demo/index.html">Example and Demo</a>
--- a/doc/layer.md
+++ b/doc/layer.md
-# Layer Documents
-* [Layer Source Code Document](source/gserver/layers/index.rst)
-* [Layer Python API Document](ui/api/trainer_config_helpers/layers_index.rst)
--- a/doc/source/gserver/layers/layer.rst
+++ b/doc/source/gserver/layers/layer.rst
@@ -465,6 +465,11 @@ SumOfSquaresCostLayer
 ..  doxygenclass:: paddle::SumOfSquaresCostLayer
    :members:
+SumCostLayer
+`````````````````````
+..  doxygenclass:: paddle::SumCostLayer
+    :members:
 CosSimLayer
 -----------
 ..  doxygenclass:: paddle::CosSimLayer

--- a/doc/ui/api/trainer_config_helpers/activations.rst
+++ b/doc/ui/api/trainer_config_helpers/activations.rst
+===========
+Activations
+===========
 BaseActivation
 ==============
@@ -102,4 +106,3 @@ STanhActivation
 ..  automodule:: paddle.trainer_config_helpers.activations
    :members: STanhActivation
    :noindex:
--- a/doc/ui/api/trainer_config_helpers/activations_index.rst
+++ b/doc/ui/api/trainer_config_helpers/activations_index.rst
-Activations
-===========
-.. toctree::
-  :maxdepth: 3
-  activations.rst
--- a/doc/ui/api/trainer_config_helpers/evaluators.rst
+++ b/doc/ui/api/trainer_config_helpers/evaluators.rst
+==========
+Evaluators
+==========
 Base
 ====
 ..  automodule:: paddle.trainer_config_helpers.evaluators

--- a/doc/ui/api/trainer_config_helpers/evaluators_index.rst
+++ b/doc/ui/api/trainer_config_helpers/evaluators_index.rst
-Evaluators
-==========
-.. toctree::
-  :maxdepth: 3
-  evaluators.rst
--- a/doc/ui/api/trainer_config_helpers/index.md
+++ b/doc/ui/api/trainer_config_helpers/index.md
-# Model Config Interface
-* [Optimizer](optimizers_index.rst)
-* [Data Source](data_sources.rst)
-* [Layers](layers_index.rst)
-* [Activations](activations_index.rst)
-* [Poolings](poolings_index.rst)
-* [Networks](networks_index.rst)
-* [Evaluators](evaluators_index.rst)
-* [Parameter and Extra Layer Attribute](attrs.rst)
--- a/doc/ui/api/trainer_config_helpers/index.rst
+++ b/doc/ui/api/trainer_config_helpers/index.rst
+Model Config Interface
+======================
+.. toctree::
+  :maxdepth: 1
+  optimizers.rst
+  data_sources.rst
+  layers.rst
+  activations.rst 
+  poolings.rst
+  networks.rst
+  evaluators.rst
+  attrs.rst
--- a/doc/ui/api/trainer_config_helpers/layers.rst
+++ b/doc/ui/api/trainer_config_helpers/layers.rst
+======
+Layers
+======
 Base
 ======
@@ -46,6 +50,12 @@ conv_operator
    :members: conv_operator
    :noindex:
+conv_projection
+---------------
+..  automodule:: paddle.trainer_config_helpers.layers
+    :members: conv_projection
+    :noindex:
 conv_shift_layer
 ------------------
 ..  automodule:: paddle.trainer_config_helpers.layers
@@ -71,6 +81,12 @@ img_pool_layer
 --------------
 ..  automodule:: paddle.trainer_config_helpers.layers
    :members: img_pool_layer
+    :noindex:   
+spp_layer
+--------------
+..  automodule:: paddle.trainer_config_helpers.layers
+    :members: spp_layer
    :noindex:
 maxout_layer
@@ -175,6 +191,12 @@ embedding_layer
    :members: embedding_layer
    :noindex:
+scaling_projection
+------------------
+..  automodule:: paddle.trainer_config_helpers.layers
+    :members: scaling_projection
+    :noindex:
 dotmul_projection
 -----------------
 ..  automodule:: paddle.trainer_config_helpers.layers
@@ -254,6 +276,12 @@ expand_layer
    :members: expand_layer
    :noindex:
+repeat_layer
+------------
+..  automodule:: paddle.trainer_config_helpers.layers
+    :members: repeat_layer
+    :noindex:
 Math Layers
 ===========
@@ -275,6 +303,12 @@ interpolation_layer
    :members: interpolation_layer
    :noindex:
+bilinear_interp_layer
+----------------------
+..  automodule:: paddle.trainer_config_helpers.layers
+    :members: bilinear_interp_layer
+    :noindex:
 power_layer
 -----------
 ..  automodule:: paddle.trainer_config_helpers.layers
@@ -395,6 +429,12 @@ hsigmoid
    :members: hsigmoid
    :noindex:
+sum_cost
+---------
+..  automodule:: paddle.trainer_config_helpers.layers
+    :members: sum_cost
+    :noindex:
 Check Layer 
 ============

--- a/doc/ui/api/trainer_config_helpers/layers_index.rst
+++ b/doc/ui/api/trainer_config_helpers/layers_index.rst
-Layers
-======
-.. toctree::
-  :maxdepth: 3
-  layers.rst
--- a/doc/ui/api/trainer_config_helpers/networks.rst
+++ b/doc/ui/api/trainer_config_helpers/networks.rst
+========
+Networks
+========
+The networks module contains pieces of neural network that combine multiple layers.
 NLP
 ===
@@ -111,4 +117,3 @@ outputs
 ..  automodule:: paddle.trainer_config_helpers.networks
    :members: outputs
    :noindex:
--- a/doc/ui/api/trainer_config_helpers/networks_index.rst
+++ b/doc/ui/api/trainer_config_helpers/networks_index.rst
-Networks
-========
-The networks module contains pieces of neural network that combine multiple layers.
-.. toctree::
-  :maxdepth: 3
-  networks.rst
--- a/doc/ui/api/trainer_config_helpers/optimizers.rst
+++ b/doc/ui/api/trainer_config_helpers/optimizers.rst
+==========
+Optimizers
+==========
 BaseSGDOptimizer
 ================
 ..  automodule:: paddle.trainer_config_helpers.optimizers
@@ -51,4 +55,3 @@ settings
 ..  automodule:: paddle.trainer_config_helpers.optimizers
    :members: settings
    :noindex:
--- a/doc/ui/api/trainer_config_helpers/optimizers_index.rst
+++ b/doc/ui/api/trainer_config_helpers/optimizers_index.rst
-Optimizers
-==========
-.. toctree::
-  :maxdepth: 3
-  optimizers.rst
--- a/doc/ui/api/trainer_config_helpers/poolings.rst
+++ b/doc/ui/api/trainer_config_helpers/poolings.rst
+========
+Poolings
+========
 BasePoolingType
 ===============
 ..  automodule:: paddle.trainer_config_helpers.poolings
@@ -27,4 +31,3 @@ SquareRootNPooling
 ..  automodule:: paddle.trainer_config_helpers.poolings
    :members: SquareRootNPooling
    :noindex:
--- a/doc/ui/api/trainer_config_helpers/poolings_index.rst
+++ b/doc/ui/api/trainer_config_helpers/poolings_index.rst
-Poolings
-========
-These pooling types are used for sequence input, not for images.
-.. toctree::
-  :maxdepth: 3
-  poolings.rst
--- a/doc/ui/predict/predict_sample.py
+++ b/doc/ui/predict/predict_sample.py
@@ -16,82 +16,113 @@ from py_paddle import swig_paddle, DataProviderConverter
 from paddle.trainer.PyDataProvider2 import dense_vector
 from paddle.trainer.config_parser import parse_config
-TEST_DATA = [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+TEST_DATA = [[[
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.215686,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-               0.533333, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.67451,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-               0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.070588, 0.886275,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-               0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.192157, 0.070588, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.215686, 0.533333, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-               0.670588, 0.992157, 0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.117647, 0.933333, 0.858824, 0.313725, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.67451, 0.992157, 0, 0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0.090196, 0.858824, 0.992157, 0.831373, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.141176,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-               0.992157, 0.992157, 0.611765, 0.054902, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.258824, 0.992157, 0.992157,
+    0.070588, 0.886275, 0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.192157,
-               0.529412, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.368627, 0.992157, 0.992157, 0.419608, 0.003922, 0, 0, 0, 0, 0, 0,
+    0.070588, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.670588, 0.992157,
-               0, 0, 0, 0.094118, 0.835294, 0.992157, 0.992157, 0.517647, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.603922, 0.992157,
+    0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.117647, 0.933333, 0.858824, 0.313725,
-               0.992157, 0.992157, 0.603922, 0.545098, 0.043137, 0, 0, 0, 0, 0, 0, 0, 0.447059, 0.992157, 0.992157,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.090196, 0.858824, 0.992157, 0.831373, 0,
-               0.956863, 0.062745, 0, 0, 0, 0, 0, 0, 0, 0, 0.011765, 0.666667, 0.992157, 0.992157, 0.992157, 0.992157,
+    0, 0, 0, 0, 0, 0, 0, 0, 0.141176, 0.992157, 0.992157, 0.611765, 0.054902, 0,
-               0.992157, 0.745098, 0.137255, 0, 0, 0, 0, 0, 0.152941, 0.866667, 0.992157, 0.992157, 0.521569, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0.258824, 0.992157, 0.992157, 0.529412, 0, 0, 0,
-               0, 0, 0, 0, 0, 0, 0.070588, 0.992157, 0.992157, 0.992157, 0.803922, 0.352941, 0.745098, 0.992157,
+    0, 0, 0, 0, 0, 0, 0.368627, 0.992157, 0.992157, 0.419608, 0.003922, 0, 0, 0,
-               0.945098, 0.317647, 0, 0, 0, 0, 0.580392, 0.992157, 0.992157, 0.764706, 0.043137, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0.094118, 0.835294, 0.992157, 0.992157, 0.517647, 0, 0, 0,
-               0, 0.070588, 0.992157, 0.992157, 0.776471, 0.043137, 0, 0.007843, 0.27451, 0.882353, 0.941176, 0.176471,
+    0, 0, 0, 0, 0, 0, 0.603922, 0.992157, 0.992157, 0.992157, 0.603922,
-               0, 0, 0.180392, 0.898039, 0.992157, 0.992157, 0.313725, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.070588, 0.992157,
+    0.545098, 0.043137, 0, 0, 0, 0, 0, 0, 0, 0.447059, 0.992157, 0.992157,
-               0.992157, 0.713725, 0, 0, 0, 0, 0.627451, 0.992157, 0.729412, 0.062745, 0, 0.509804, 0.992157, 0.992157,
+    0.956863, 0.062745, 0, 0, 0, 0, 0, 0, 0, 0, 0.011765, 0.666667, 0.992157,
-               0.776471, 0.035294, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.494118, 0.992157, 0.992157, 0.968627, 0.168627, 0, 0,
+    0.992157, 0.992157, 0.992157, 0.992157, 0.745098, 0.137255, 0, 0, 0, 0, 0,
-               0, 0.423529, 0.992157, 0.992157, 0.364706, 0, 0.717647, 0.992157, 0.992157, 0.317647, 0, 0, 0, 0, 0, 0,
+    0.152941, 0.866667, 0.992157, 0.992157, 0.521569, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-               0, 0, 0, 0, 0, 0.533333, 0.992157, 0.984314, 0.945098, 0.603922, 0, 0, 0, 0.003922, 0.466667, 0.992157,
+    0.070588, 0.992157, 0.992157, 0.992157, 0.803922, 0.352941, 0.745098,
-               0.988235, 0.976471, 0.992157, 0.992157, 0.788235, 0.007843, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.686275,
+    0.992157, 0.945098, 0.317647, 0, 0, 0, 0, 0.580392, 0.992157, 0.992157,
-               0.882353, 0.364706, 0, 0, 0, 0, 0, 0, 0.098039, 0.588235, 0.992157, 0.992157, 0.992157, 0.980392,
+    0.764706, 0.043137, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.070588, 0.992157, 0.992157,
-               0.305882, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.101961, 0.67451, 0.321569, 0, 0, 0, 0, 0, 0, 0, 0.105882,
+    0.776471, 0.043137, 0, 0.007843, 0.27451, 0.882353, 0.941176, 0.176471, 0,
-               0.733333, 0.976471, 0.811765, 0.713725, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.65098, 0.992157,
+    0, 0.180392, 0.898039, 0.992157, 0.992157, 0.313725, 0, 0, 0, 0, 0, 0, 0, 0,
-               0.321569, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.25098, 0.007843, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+    0, 0, 0.070588, 0.992157, 0.992157, 0.713725, 0, 0, 0, 0, 0.627451,
-               0.94902, 0.219608, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.968627,
+    0.992157, 0.729412, 0.062745, 0, 0.509804, 0.992157, 0.992157, 0.776471,
-               0.764706, 0.152941, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.498039,
+    0.035294, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.494118, 0.992157, 0.992157,
-               0.25098, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0.968627, 0.168627, 0, 0, 0, 0.423529, 0.992157, 0.992157, 0.364706, 0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0.717647, 0.992157, 0.992157, 0.317647, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0.533333, 0.992157, 0.984314, 0.945098, 0.603922, 0, 0, 0, 0.003922,
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], [
+    0.466667, 0.992157, 0.988235, 0.976471, 0.992157, 0.992157, 0.788235,
-                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0.007843, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.686275, 0.882353, 0.364706, 0,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0.098039, 0.588235, 0.992157, 0.992157, 0.992157, 0.980392,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0.305882, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.101961, 0.67451, 0.321569,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0.105882, 0.733333, 0.976471, 0.811765, 0.713725, 0, 0,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.65098, 0.992157, 0.321569, 0, 0, 0, 0, 0,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.298039, 0.333333, 0.333333, 0.333333, 0.337255, 0.333333,
+    0, 0, 0, 0, 0.25098, 0.007843, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
-                  0.333333, 0.109804, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.027451, 0.223529, 0.776471,
+    0.94902, 0.219608, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0.964706, 0.988235, 0.988235, 0.988235, 0.992157, 0.988235, 0.988235, 0.780392, 0.098039, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0.968627, 0.764706, 0.152941, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.14902, 0.698039, 0.988235, 0.992157, 0.988235, 0.901961, 0.87451,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.498039, 0.25098, 0, 0, 0,
-                  0.568627, 0.882353, 0.976471, 0.988235, 0.988235, 0.501961, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0.188235, 0.647059, 0.988235, 0.988235, 0.745098, 0.439216, 0.098039, 0, 0, 0, 0.572549, 0.988235,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0.988235, 0.988235, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.2, 0.933333, 0.992157, 0.941176,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0.247059, 0, 0, 0, 0, 0, 0, 0.188235, 0.898039, 0.992157, 0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0.039216, 0.639216, 0.933333, 0.988235, 0.913725, 0.278431, 0, 0, 0, 0, 0, 0, 0, 0.113725, 0.843137,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0.988235, 0.988235, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.235294, 0.988235, 0.992157, 0.988235, 0.815686,
+    0, 0, 0, 0, 0
-                  0.07451, 0, 0, 0, 0, 0, 0, 0, 0.333333, 0.988235, 0.988235, 0.552941, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+]], [[
-                  0.211765, 0.878431, 0.988235, 0.992157, 0.701961, 0.329412, 0.109804, 0, 0, 0, 0, 0, 0, 0, 0.698039,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0.988235, 0.913725, 0.145098, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.188235, 0.890196, 0.988235, 0.988235,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0.745098, 0.047059, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.882353, 0.988235, 0.568627, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0, 0.2, 0.933333, 0.992157, 0.992157, 0.992157, 0.447059, 0.294118, 0, 0, 0, 0, 0, 0, 0, 0, 0.447059,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0.992157, 0.768627, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.623529, 0.988235, 0.988235, 0.988235, 0.988235,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0.992157, 0.47451, 0, 0, 0, 0, 0, 0, 0, 0.188235, 0.933333, 0.87451, 0.509804, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0, 0, 0.992157, 0.988235, 0.937255, 0.792157, 0.988235, 0.894118, 0.082353, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0.027451, 0.647059, 0.992157, 0.654902, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.623529, 0.988235, 0.913725,
+    0, 0, 0, 0, 0, 0, 0, 0, 0.298039, 0.333333, 0.333333, 0.333333, 0.337255,
-                  0.329412, 0.376471, 0.184314, 0, 0, 0, 0, 0, 0, 0.027451, 0.513725, 0.988235, 0.635294, 0.219608, 0,
+    0.333333, 0.333333, 0.109804, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.196078, 0.929412, 0.988235, 0.988235, 0.741176, 0.309804, 0, 0, 0, 0,
+    0, 0, 0.027451, 0.223529, 0.776471, 0.964706, 0.988235, 0.988235, 0.988235,
-                  0, 0, 0.529412, 0.988235, 0.678431, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.223529, 0.992157,
+    0.992157, 0.988235, 0.988235, 0.780392, 0.098039, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0.992157, 1, 0.992157, 0.992157, 0.992157, 0.992157, 1, 0.992157, 0.992157, 0.882353, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0.14902, 0.698039, 0.988235, 0.992157, 0.988235, 0.901961,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.023529, 0.478431, 0.654902, 0.658824, 0.952941, 0.988235, 0.988235,
+    0.87451, 0.568627, 0.882353, 0.976471, 0.988235, 0.988235, 0.501961, 0, 0,
-                  0.988235, 0.992157, 0.988235, 0.729412, 0.278431, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.188235, 0.647059, 0.988235, 0.988235,
-                  0, 0, 0, 0.196078, 0.647059, 0.764706, 0.764706, 0.768627, 0.580392, 0.047059, 0, 0, 0, 0, 0, 0, 0, 0,
+    0.745098, 0.439216, 0.098039, 0, 0, 0, 0.572549, 0.988235, 0.988235,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0.988235, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.2, 0.933333, 0.992157,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0.941176, 0.247059, 0, 0, 0, 0, 0, 0, 0.188235, 0.898039, 0.992157,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.039216, 0.639216, 0.933333,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0.988235, 0.913725, 0.278431, 0, 0, 0, 0, 0, 0, 0, 0.113725, 0.843137,
-                  0, 0, 0, 0, 0, 0, 0]]]
+    0.988235, 0.988235, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.235294, 0.988235,
+    0.992157, 0.988235, 0.815686, 0.07451, 0, 0, 0, 0, 0, 0, 0, 0.333333,
+    0.988235, 0.988235, 0.552941, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.211765,
+    0.878431, 0.988235, 0.992157, 0.701961, 0.329412, 0.109804, 0, 0, 0, 0, 0,
+    0, 0, 0.698039, 0.988235, 0.913725, 0.145098, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0.188235, 0.890196, 0.988235, 0.988235, 0.745098, 0.047059, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0.882353, 0.988235, 0.568627, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.2,
+    0.933333, 0.992157, 0.992157, 0.992157, 0.447059, 0.294118, 0, 0, 0, 0, 0,
+    0, 0, 0, 0.447059, 0.992157, 0.768627, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0.623529, 0.988235, 0.988235, 0.988235, 0.988235, 0.992157, 0.47451, 0, 0,
+    0, 0, 0, 0, 0, 0.188235, 0.933333, 0.87451, 0.509804, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0.992157, 0.988235, 0.937255, 0.792157, 0.988235, 0.894118,
+    0.082353, 0, 0, 0, 0, 0, 0, 0.027451, 0.647059, 0.992157, 0.654902, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0.623529, 0.988235, 0.913725, 0.329412, 0.376471,
+    0.184314, 0, 0, 0, 0, 0, 0, 0.027451, 0.513725, 0.988235, 0.635294,
+    0.219608, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.196078, 0.929412, 0.988235,
+    0.988235, 0.741176, 0.309804, 0, 0, 0, 0, 0, 0, 0.529412, 0.988235,
+    0.678431, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.223529, 0.992157,
+    0.992157, 1, 0.992157, 0.992157, 0.992157, 0.992157, 1, 0.992157, 0.992157,
+    0.882353, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.023529,
+    0.478431, 0.654902, 0.658824, 0.952941, 0.988235, 0.988235, 0.988235,
+    0.992157, 0.988235, 0.729412, 0.278431, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0.196078, 0.647059, 0.764706, 0.764706, 0.768627,
+    0.580392, 0.047059, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0
+]]]
 def main():
    conf = parse_config("./mnist_model/trainer_config.py", "")
    print conf.data_config.load_data_args
-    network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+    network = swig_paddle.GradientMachine.createFromConfigProto(
+        conf.model_config)
    assert isinstance(network, swig_paddle.GradientMachine)  # For code hint.
    network.loadParameters("./mnist_model/")
    converter = DataProviderConverter([dense_vector(784)])

--- a/doc/user_guide.rst
+++ b/doc/user_guide.rst
+User Guide
+==========
+..  toctree::
+  :maxdepth: 1
+  demo/quick_start/index_en.md
+  build/index.rst
+  build/contribute_to_paddle.md
+  ui/index.md
+  ui/api/trainer_config_helpers/index.rst
+  demo/index.md
+  cluster/index.md
--- a/doc_cn/algorithm/rnn/hierarchical-layer.md
+++ b/doc_cn/algorithm/rnn/hierarchical-layer.md
 # 支持双层序列作为输入的Layer
 ## 概述
 在自然语言处理任务中，序列是一种常见的数据类型。一个独立的词语，可以看作是一个非序列输入，或者，我们称之为一个0层的序列；由词语构成的句子，是一个单层序列；若干个句子构成一个段落，是一个双层的序列。
 双层序列是一个嵌套的序列，它的每一个元素，又是一个单层的序列。这是一种非常灵活的数据组织方式，帮助我们构造一些复杂的输入信息。
 我们可以按照如下层次定义非序列，单层序列，以及双层序列。
 + 0层序列：一个独立的元素，类型可以是PaddlePaddle支持的任意输入数据类型
 + 单层序列：排成一列的多个元素，每个元素是一个0层序列，元素之间的顺序是重要的输入信息
 + 双层序列：排成一列的多个元素，每个元素是一个单层序列，称之为双层序列的一个子序列（subseq），subseq的每个元素是一个0层序列
 在 PaddlePaddle中，下面这些Layer能够接受双层序列作为输入，完成相应的计算。
 ## pooling_layer
 pooling_layer的使用示例如下，详细见<a href = "../../../doc/ui/api/trainer_config_helpers/layers.html#pooling-layer">配置API</a>。
 ```python
 seq_pool = pooling_layer(input=layer,
                         pooling_type=AvgPooling(),
                         agg_level=AggregateLevel.EACH_SEQUENCE)
 ```
 - `pooling_type` 目前支持两种，分别是：MaxPooling()和AvgPooling()。
 - `agg_level=AggregateLevel.TIMESTEP`时（默认值）：
  - 作用：双层序列经过运算变成一个0层序列，或单层序列经过运算变成一个0层序列
  - 输入：一个双层序列，或一个单层序列
  - 输出：一个0层序列，即整个输入序列（单层或双层）的平均值（或最大值）
 - `agg_level=AggregateLevel.EACH_SEQUENCE`时：
  - 作用：一个双层序列经过运算变成一个单层序列
  - 输入：必须是一个双层序列
  - 输出：一个单层序列，序列的每个元素是原来双层序列每个subseq元素的平均值（或最大值）
 ## last_seq 和 first_seq
 last_seq的使用示例如下（first_seq类似），详细见<a href = "../../../doc/ui/api/trainer_config_helpers/layers.html#last-seq">配置API</a>。
 ```python
 last = last_seq(input=layer,
                agg_level=AggregateLevel.EACH_SEQUENCE)
 ```
 - `agg_level=AggregateLevel.TIMESTEP`时（默认值）：
  - 作用：一个双层序列经过运算变成一个0层序列，或一个单层序列经过运算变成一个0层序列
  - 输入：一个双层序列或一个单层序列
  - 输出：一个0层序列，即整个输入序列（双层或者单层）最后一个，或第一个元素。
 - `agg_level=AggregateLevel.EACH_SEQUENCE`时：
  - 作用：一个双层序列经过运算变成一个单层序列
  - 输入：必须是一个双层序列
  - 输出：一个单层序列，其中每个元素是双层序列中每个subseq最后一个（或第一个）元素。
 ## expand_layer
 expand_layer的使用示例如下，详细见<a href = "../../../doc/ui/api/trainer_config_helpers/layers.html#expand-layer">配置API</a>。
 ```python
 expand = expand_layer(input=layer1,
                      expand_as=layer2,
                      expand_level=ExpandLevel.FROM_TIMESTEP)
 ```
 - `expand_level=ExpandLevel.FROM_TIMESTEP`时（默认值）：
  - 作用：一个0层序列经过运算扩展成一个单层序列，或者一个双层序列
  - 输入：layer1必须是一个0层序列，是待扩展的数据；layer2可以是一个单层序列，或者是一个双层序列，提供扩展的长度信息
  - 输出：一个单层序列，或一个双层序列，输出序列的类型（双层序列，或单层序列）和序列中含有元素的数目同 layer2一致。若输出是单层序列，单层序列的每个元素（0层序列），都是对layer1元素的拷贝；若输出是双层序列，双层序列每个subseq中每个元素（0层序列），都是对layer1元素的拷贝
 - `expand_level=ExpandLevel.FROM_SEQUENCE`时：
  - 作用：一个单层序列经过运算扩展成一个双层序列
  - 输入：layer1必须是一个单层序列，是待扩展的数据；layer2必须是一个双层序列，提供扩展的长度信息
  - 输出：一个双层序列，序列中含有元素的数目同layer2一致。要求单层序列含有元素的数目（0层序列），和双层序列含有subseq 的数目一致。单层序列第i个元素（0层序列），被扩展为一个单层序列，构成了输出双层序列的第i个subseq。
\ No newline at end of file
--- a/doc_cn/algorithm/rnn/hierarchical-rnn.md
+++ b/doc_cn/algorithm/rnn/hierarchical-rnn.md
--- a/doc_cn/algorithm/rnn/rnn-tutorial.md
+++ b/doc_cn/algorithm/rnn/rnn-tutorial.md
--- a/doc_cn/build_and_install/install/paddle_version.txt
+++ b/doc_cn/build_and_install/install/paddle_version.txt
--- a/doc_cn/concepts/trainer_config.py
+++ b/doc_cn/concepts/trainer_config.py
--- a/doc_cn/demo/index.rst
+++ b/doc_cn/demo/index.rst
--- a/doc_cn/demo/sentiment_analysis/index.rst
+++ b/doc_cn/demo/sentiment_analysis/index.rst
--- a/doc_cn/demo/sentiment_analysis/sentiment_analysis.md
+++ b/doc_cn/demo/sentiment_analysis/sentiment_analysis.md
--- a/doc_cn/faq/reduce_min_pool_size.py
+++ b/doc_cn/faq/reduce_min_pool_size.py
--- a/doc_cn/faq/word2vec_config.py
+++ b/doc_cn/faq/word2vec_config.py
--- a/doc_cn/faq/word2vec_dataprovider.py
+++ b/doc_cn/faq/word2vec_dataprovider.py
--- a/doc_cn/ui/data_provider/mnist_config.py
+++ b/doc_cn/ui/data_provider/mnist_config.py
--- a/doc_cn/ui/data_provider/mnist_provider.dict.py
+++ b/doc_cn/ui/data_provider/mnist_provider.dict.py
--- a/doc_cn/ui/data_provider/mnist_provider.py
+++ b/doc_cn/ui/data_provider/mnist_provider.py
--- a/doc_cn/ui/data_provider/sentimental_config.py
+++ b/doc_cn/ui/data_provider/sentimental_config.py
--- a/doc_cn/ui/data_provider/sentimental_provider.py
+++ b/doc_cn/ui/data_provider/sentimental_provider.py
--- a/paddle/.common_test_util.sh
+++ b/paddle/.common_test_util.sh
--- a/paddle/CMakeLists.txt
+++ b/paddle/CMakeLists.txt
--- a/paddle/api/Matrix.cpp
+++ b/paddle/api/Matrix.cpp
--- a/paddle/api/Paddle.swig
+++ b/paddle/api/Paddle.swig
--- a/paddle/api/PaddleAPI.h
+++ b/paddle/api/PaddleAPI.h
--- a/paddle/api/PaddleAPIPrivate.h
+++ b/paddle/api/PaddleAPIPrivate.h
--- a/paddle/api/Util.cpp
+++ b/paddle/api/Util.cpp
--- a/paddle/api/Vector.cpp
+++ b/paddle/api/Vector.cpp
--- a/paddle/api/__init__.py
+++ b/paddle/api/__init__.py
--- a/paddle/api/paddle_ld_flags.py
+++ b/paddle/api/paddle_ld_flags.py
--- a/paddle/api/test/CMakeLists.txt
+++ b/paddle/api/test/CMakeLists.txt
--- a/paddle/api/test/testArguments.py
+++ b/paddle/api/test/testArguments.py
--- a/paddle/api/test/testGradientMachine.py
+++ b/paddle/api/test/testGradientMachine.py
--- a/paddle/api/test/testMatrix.py
+++ b/paddle/api/test/testMatrix.py
--- a/paddle/api/test/testTrain.py
+++ b/paddle/api/test/testTrain.py
--- a/paddle/api/test/testTrainConfig.py
+++ b/paddle/api/test/testTrainConfig.py
--- a/paddle/api/test/testTrainer.py
+++ b/paddle/api/test/testTrainer.py
--- a/paddle/api/test/testVector.py
+++ b/paddle/api/test/testVector.py
--- a/paddle/cuda/include/hl_base.h
+++ b/paddle/cuda/include/hl_base.h
--- a/paddle/cuda/include/hl_cnn.h
+++ b/paddle/cuda/include/hl_cnn.h
--- a/paddle/cuda/include/hl_matrix.h
+++ b/paddle/cuda/include/hl_matrix.h
--- a/paddle/cuda/include/stub/hl_cnn_stub.h
+++ b/paddle/cuda/include/stub/hl_cnn_stub.h
--- a/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h
+++ b/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h
--- a/paddle/cuda/include/stub/hl_matrix_stub.h
+++ b/paddle/cuda/include/stub/hl_matrix_stub.h
--- a/paddle/cuda/src/avx_mathfun.h
+++ b/paddle/cuda/src/avx_mathfun.h
--- a/paddle/cuda/src/hl_cuda_cnn.cu
+++ b/paddle/cuda/src/hl_cuda_cnn.cu
--- a/paddle/cuda/src/hl_cuda_matrix.cu
+++ b/paddle/cuda/src/hl_cuda_matrix.cu
--- a/paddle/cuda/src/hl_dso_loader.cc
+++ b/paddle/cuda/src/hl_dso_loader.cc
--- a/paddle/gserver/layers/BilinearInterpLayer.cpp
+++ b/paddle/gserver/layers/BilinearInterpLayer.cpp
--- a/paddle/gserver/layers/BilinearInterpLayer.h
+++ b/paddle/gserver/layers/BilinearInterpLayer.h
--- a/paddle/gserver/layers/ConcatenateLayer.cpp
+++ b/paddle/gserver/layers/ConcatenateLayer.cpp
--- a/paddle/gserver/layers/ConvBaseLayer.cpp
+++ b/paddle/gserver/layers/ConvBaseLayer.cpp
--- a/paddle/gserver/layers/ConvBaseLayer.h
+++ b/paddle/gserver/layers/ConvBaseLayer.h
--- a/paddle/gserver/layers/CostLayer.cpp
+++ b/paddle/gserver/layers/CostLayer.cpp
--- a/paddle/gserver/layers/CostLayer.h
+++ b/paddle/gserver/layers/CostLayer.h
--- a/paddle/gserver/layers/ExpandConvBaseLayer.cpp
+++ b/paddle/gserver/layers/ExpandConvBaseLayer.cpp
--- a/paddle/gserver/layers/ExpandConvBaseLayer.h
+++ b/paddle/gserver/layers/ExpandConvBaseLayer.h
--- a/paddle/gserver/layers/ExpandConvLayer.cpp
+++ b/paddle/gserver/layers/ExpandConvLayer.cpp
--- a/paddle/gserver/layers/ExpandConvLayer.h
+++ b/paddle/gserver/layers/ExpandConvLayer.h
--- a/paddle/gserver/layers/ExpandConvTransLayer.cpp
+++ b/paddle/gserver/layers/ExpandConvTransLayer.cpp
--- a/paddle/gserver/layers/ExpandConvTransLayer.h
+++ b/paddle/gserver/layers/ExpandConvTransLayer.h
--- a/paddle/gserver/layers/FullMatrixProjection.cpp
+++ b/paddle/gserver/layers/FullMatrixProjection.cpp
--- a/paddle/gserver/layers/FullyConnectedLayer.h
+++ b/paddle/gserver/layers/FullyConnectedLayer.h
--- a/paddle/gserver/layers/PoolLayer.cpp
+++ b/paddle/gserver/layers/PoolLayer.cpp
--- a/paddle/gserver/layers/PoolProjection.cpp
+++ b/paddle/gserver/layers/PoolProjection.cpp
--- a/paddle/gserver/layers/PoolProjection.h
+++ b/paddle/gserver/layers/PoolProjection.h
--- a/paddle/gserver/layers/PoolProjectionLayer.cpp
+++ b/paddle/gserver/layers/PoolProjectionLayer.cpp
--- a/paddle/gserver/layers/PoolProjectionLayer.h
+++ b/paddle/gserver/layers/PoolProjectionLayer.h
--- a/paddle/gserver/layers/Projection.h
+++ b/paddle/gserver/layers/Projection.h
--- a/paddle/gserver/layers/ScalingProjection.cpp
+++ b/paddle/gserver/layers/ScalingProjection.cpp
--- a/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp
+++ b/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp
--- a/paddle/gserver/layers/SpatialPyramidPoolLayer.h
+++ b/paddle/gserver/layers/SpatialPyramidPoolLayer.h
--- a/paddle/gserver/tests/CMakeLists.txt
+++ b/paddle/gserver/tests/CMakeLists.txt
--- a/paddle/gserver/tests/__init__.py
+++ b/paddle/gserver/tests/__init__.py
--- a/paddle/gserver/tests/pyDataProvider.py
+++ b/paddle/gserver/tests/pyDataProvider.py
--- a/paddle/gserver/tests/rnn_data_provider.py
+++ b/paddle/gserver/tests/rnn_data_provider.py
--- a/paddle/gserver/tests/sequenceGen.py
+++ b/paddle/gserver/tests/sequenceGen.py
--- a/paddle/gserver/tests/sequence_layer_group.conf
+++ b/paddle/gserver/tests/sequence_layer_group.conf
--- a/paddle/gserver/tests/sequence_nest_layer_group.conf
+++ b/paddle/gserver/tests/sequence_nest_layer_group.conf
--- a/paddle/gserver/tests/test_ConvTrans.cpp
+++ b/paddle/gserver/tests/test_ConvTrans.cpp
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
--- a/paddle/gserver/tests/test_PyDataProvider2.py
+++ b/paddle/gserver/tests/test_PyDataProvider2.py
--- a/paddle/math/BaseMatrix.cu
+++ b/paddle/math/BaseMatrix.cu
--- a/paddle/math/BaseMatrix.h
+++ b/paddle/math/BaseMatrix.h
--- a/paddle/math/CpuSparseMatrix.cpp
+++ b/paddle/math/CpuSparseMatrix.cpp
--- a/paddle/math/MathFunctions.h
+++ b/paddle/math/MathFunctions.h
--- a/paddle/math/MathUtils.cpp
+++ b/paddle/math/MathUtils.cpp
--- a/paddle/math/MathUtils.h
+++ b/paddle/math/MathUtils.h
--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
--- a/paddle/math/Matrix.h
+++ b/paddle/math/Matrix.h
--- a/paddle/math/Vector.cpp
+++ b/paddle/math/Vector.cpp
--- a/paddle/math/Vector.h
+++ b/paddle/math/Vector.h
--- a/paddle/math/tests/test_matrixCompare.cpp
+++ b/paddle/math/tests/test_matrixCompare.cpp
--- a/paddle/parameter/CMakeLists.txt
+++ b/paddle/parameter/CMakeLists.txt
--- a/paddle/parameter/tests/CMakeLists.txt
+++ b/paddle/parameter/tests/CMakeLists.txt
--- a/paddle/py_paddle/__init__.py
+++ b/paddle/py_paddle/__init__.py
--- a/paddle/py_paddle/dataprovider_converter.py
+++ b/paddle/py_paddle/dataprovider_converter.py
--- a/paddle/py_paddle/util.py
+++ b/paddle/py_paddle/util.py
--- a/paddle/scripts/CMakeLists.txt
+++ b/paddle/scripts/CMakeLists.txt
--- a/paddle/scripts/cluster_train/conf.py
+++ b/paddle/scripts/cluster_train/conf.py
--- a/paddle/scripts/cluster_train/paddle.py
+++ b/paddle/scripts/cluster_train/paddle.py
--- a/paddle/scripts/cpplint.py
+++ b/paddle/scripts/cpplint.py
--- a/paddle/scripts/deb/build_scripts/build.sh
+++ b/paddle/scripts/deb/build_scripts/build.sh
--- a/paddle/scripts/docker/Dockerfile.cpu
+++ b/paddle/scripts/docker/Dockerfile.cpu
--- a/paddle/scripts/docker/Dockerfile.cpu-demo
+++ b/paddle/scripts/docker/Dockerfile.cpu-demo
--- a/paddle/scripts/docker/Dockerfile.cpu-devel
+++ b/paddle/scripts/docker/Dockerfile.cpu-devel
--- a/paddle/scripts/docker/Dockerfile.cpu-noavx
+++ b/paddle/scripts/docker/Dockerfile.cpu-noavx
--- a/paddle/scripts/docker/Dockerfile.cpu-noavx-demo
+++ b/paddle/scripts/docker/Dockerfile.cpu-noavx-demo
--- a/paddle/scripts/docker/Dockerfile.cpu-noavx-devel
+++ b/paddle/scripts/docker/Dockerfile.cpu-noavx-devel
--- a/paddle/scripts/docker/Dockerfile.gpu
+++ b/paddle/scripts/docker/Dockerfile.gpu
--- a/paddle/scripts/docker/Dockerfile.gpu-demo
+++ b/paddle/scripts/docker/Dockerfile.gpu-demo
--- a/paddle/scripts/docker/Dockerfile.gpu-devel
+++ b/paddle/scripts/docker/Dockerfile.gpu-devel
--- a/paddle/scripts/docker/Dockerfile.gpu-noavx
+++ b/paddle/scripts/docker/Dockerfile.gpu-noavx
--- a/paddle/scripts/docker/Dockerfile.gpu-noavx-demo
+++ b/paddle/scripts/docker/Dockerfile.gpu-noavx-demo
--- a/paddle/scripts/docker/Dockerfile.gpu-noavx-devel
+++ b/paddle/scripts/docker/Dockerfile.gpu-noavx-devel
--- a/paddle/scripts/docker/Dockerfile.m4
+++ b/paddle/scripts/docker/Dockerfile.m4
--- a/paddle/scripts/docker/build.sh
+++ b/paddle/scripts/docker/build.sh
--- a/paddle/scripts/docker/generate.sh
+++ b/paddle/scripts/docker/generate.sh
--- a/paddle/scripts/travis/common.sh
+++ b/paddle/scripts/travis/common.sh
--- a/paddle/trainer/tests/__init__.py
+++ b/paddle/trainer/tests/__init__.py
--- a/paddle/trainer/tests/config_parser_test.py
+++ b/paddle/trainer/tests/config_parser_test.py
--- a/paddle/trainer/tests/gen_proto_data.py
+++ b/paddle/trainer/tests/gen_proto_data.py
--- a/paddle/trainer/tests/test.txt
+++ b/paddle/trainer/tests/test.txt
--- a/paddle/trainer/tests/testPyDataWrapper.py
+++ b/paddle/trainer/tests/testPyDataWrapper.py
--- a/paddle/trainer/tests/test_gen_dict.txt
+++ b/paddle/trainer/tests/test_gen_dict.txt
--- a/paddle/trainer/tests/train.txt
+++ b/paddle/trainer/tests/train.txt
--- a/paddle/utils/Util.cpp
+++ b/paddle/utils/Util.cpp
--- a/paddle/utils/enable_virtualenv.py
+++ b/paddle/utils/enable_virtualenv.py
--- a/paddle/utils/tests/test_CommandLineParser.cpp
+++ b/paddle/utils/tests/test_CommandLineParser.cpp
--- a/proto/ModelConfig.proto.m4
+++ b/proto/ModelConfig.proto.m4
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
--- a/python/paddle/trainer/PyDataProvider2.py
+++ b/python/paddle/trainer/PyDataProvider2.py
--- a/python/paddle/trainer/PyDataProviderWrapper.py
+++ b/python/paddle/trainer/PyDataProviderWrapper.py
--- a/python/paddle/trainer/__init__.py
+++ b/python/paddle/trainer/__init__.py
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
--- a/python/paddle/trainer/config_parser_extension.py
+++ b/python/paddle/trainer/config_parser_extension.py
--- a/python/paddle/trainer/recurrent_units.py
+++ b/python/paddle/trainer/recurrent_units.py
--- a/python/paddle/trainer_config_helpers/__init__.py
+++ b/python/paddle/trainer_config_helpers/__init__.py
--- a/python/paddle/trainer_config_helpers/activations.py
+++ b/python/paddle/trainer_config_helpers/activations.py
--- a/python/paddle/trainer_config_helpers/attrs.py
+++ b/python/paddle/trainer_config_helpers/attrs.py
--- a/python/paddle/trainer_config_helpers/data_sources.py
+++ b/python/paddle/trainer_config_helpers/data_sources.py
--- a/python/paddle/trainer_config_helpers/default_decorators.py
+++ b/python/paddle/trainer_config_helpers/default_decorators.py
--- a/python/paddle/trainer_config_helpers/evaluators.py
+++ b/python/paddle/trainer_config_helpers/evaluators.py
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
--- a/python/paddle/trainer_config_helpers/math.py
+++ b/python/paddle/trainer_config_helpers/math.py
--- a/python/paddle/trainer_config_helpers/networks.py
+++ b/python/paddle/trainer_config_helpers/networks.py
--- a/python/paddle/trainer_config_helpers/optimizers.py
+++ b/python/paddle/trainer_config_helpers/optimizers.py
--- a/python/paddle/trainer_config_helpers/poolings.py
+++ b/python/paddle/trainer_config_helpers/poolings.py
--- a/python/paddle/trainer_config_helpers/tests/configs/.gitignore
+++ b/python/paddle/trainer_config_helpers/tests/configs/.gitignore
--- a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
--- a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
--- a/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
--- a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
--- a/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
--- a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
--- a/python/paddle/trainer_config_helpers/tests/configs/projections.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/projections.py
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_spp_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_spp_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py
--- a/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
--- a/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_fc.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_fc.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
--- a/python/paddle/trainer_config_helpers/tests/configs/util_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/util_layers.py
--- a/python/paddle/trainer_config_helpers/tests/layers_test_config.py
+++ b/python/paddle/trainer_config_helpers/tests/layers_test_config.py
--- a/python/paddle/trainer_config_helpers/utils.py
+++ b/python/paddle/trainer_config_helpers/utils.py
--- a/python/paddle/utils/__init__.py
+++ b/python/paddle/utils/__init__.py
--- a/python/paddle/utils/dump_config.py
+++ b/python/paddle/utils/dump_config.py
--- a/python/paddle/utils/image_util.py
+++ b/python/paddle/utils/image_util.py
--- a/python/paddle/utils/make_model_diagram.py
+++ b/python/paddle/utils/make_model_diagram.py
--- a/python/paddle/utils/plotcurve.py
+++ b/python/paddle/utils/plotcurve.py
--- a/python/paddle/utils/predefined_net.py
+++ b/python/paddle/utils/predefined_net.py
--- a/python/paddle/utils/preprocess_img.py
+++ b/python/paddle/utils/preprocess_img.py
--- a/python/paddle/utils/preprocess_util.py
+++ b/python/paddle/utils/preprocess_util.py
--- a/python/paddle/utils/show_pb.py
+++ b/python/paddle/utils/show_pb.py
--- a/python/paddle/utils/torch2paddle.py
+++ b/python/paddle/utils/torch2paddle.py