diff --git a/cmake/external/gflags.cmake b/cmake/external/gflags.cmake
index 30027a336c0063e46e49420aaa1ac0c20c6e4dea..a0d0a892c4b3cc3743ac725f3cd90444f18abf34 100644
--- a/cmake/external/gflags.cmake
+++ b/cmake/external/gflags.cmake
@@ -1,11 +1,11 @@
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-# 
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-# 
+#
 # http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -44,7 +44,7 @@ ExternalProject_Add(
                      -DCMAKE_BUILD_TYPE:STRING=Release
 )
 
-ADD_LIBRARY(gflags STATIC IMPORTED)
+ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
 SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES})
 ADD_DEPENDENCIES(gflags extern_gflags)
 
diff --git a/cmake/external/glog.cmake b/cmake/external/glog.cmake
index fa9a5092877666f9c477fa1724d5e4282c89345d..b70e94a170f17cc61f61673609e6eb941662ea62 100644
--- a/cmake/external/glog.cmake
+++ b/cmake/external/glog.cmake
@@ -48,7 +48,7 @@ ExternalProject_Add(
                      -DCMAKE_BUILD_TYPE:STRING=Release
 )
 
-ADD_LIBRARY(glog STATIC IMPORTED)
+ADD_LIBRARY(glog STATIC IMPORTED GLOBAL)
 SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES})
 ADD_DEPENDENCIES(glog extern_glog)
 
diff --git a/cmake/external/gtest.cmake b/cmake/external/gtest.cmake
index 386204dc37d258187ed4edce68b16ad916a1e7e7..77e06e983e9f8bfaf6320e3c67b85b692ed877fc 100644
--- a/cmake/external/gtest.cmake
+++ b/cmake/external/gtest.cmake
@@ -1,11 +1,11 @@
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-# 
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-# 
+#
 # http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -56,11 +56,11 @@ IF(WITH_TESTING)
                          -DCMAKE_BUILD_TYPE:STRING=Release
     )
 
-    ADD_LIBRARY(gtest STATIC IMPORTED)
+    ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL)
     SET_PROPERTY(TARGET gtest PROPERTY IMPORTED_LOCATION ${GTEST_LIBRARIES})
     ADD_DEPENDENCIES(gtest extern_gtest)
 
-    ADD_LIBRARY(gtest_main STATIC IMPORTED)
+    ADD_LIBRARY(gtest_main STATIC IMPORTED GLOBAL)
     SET_PROPERTY(TARGET gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_MAIN_LIBRARIES})
     ADD_DEPENDENCIES(gtest_main extern_gtest)
 
diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake
index 367d5b98c7f97e119039ab1b349f9610d735311d..2d7daed9bcd5b8d854ffae6dc1ea191d154c16fe 100644
--- a/cmake/external/warpctc.cmake
+++ b/cmake/external/warpctc.cmake
@@ -1,11 +1,11 @@
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-# 
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-# 
+#
 # http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -65,7 +65,7 @@ ExternalProject_Add(
                      -DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR}
 )
 
-ADD_LIBRARY(warpctc STATIC IMPORTED)
+ADD_LIBRARY(warpctc STATIC IMPORTED GLOBAL)
 SET_PROPERTY(TARGET warpctc PROPERTY IMPORTED_LOCATION ${WARPCTC_LIBRARIES})
 ADD_DEPENDENCIES(warpctc extern_warpctc)
 
diff --git a/demo/image_classification/.gitignore b/demo/image_classification/.gitignore
deleted file mode 100644
index 6a05b8f6632db0977fceade8b48a89b9f7f6e6cc..0000000000000000000000000000000000000000
--- a/demo/image_classification/.gitignore
+++ /dev/null
@@ -1,9 +0,0 @@
-data/cifar-10-batches-py
-data/cifar-out
-cifar_vgg_model/*
-plot.png
-train.log
-image_provider_copy_1.py
-*pyc
-train.list
-test.list
diff --git a/demo/image_classification/api_v2_resnet.py b/demo/image_classification/api_v2_resnet.py
deleted file mode 100644
index 19d20540780becf504973a23b50445d4b65dc2ef..0000000000000000000000000000000000000000
--- a/demo/image_classification/api_v2_resnet.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.v2 as paddle
-
-__all__ = ['resnet_cifar10']
-
-
-def conv_bn_layer(input,
-                  ch_out,
-                  filter_size,
-                  stride,
-                  padding,
-                  active_type=paddle.activation.Relu(),
-                  ch_in=None):
-    tmp = paddle.layer.img_conv(
-        input=input,
-        filter_size=filter_size,
-        num_channels=ch_in,
-        num_filters=ch_out,
-        stride=stride,
-        padding=padding,
-        act=paddle.activation.Linear(),
-        bias_attr=False)
-    return paddle.layer.batch_norm(input=tmp, act=active_type)
-
-
-def shortcut(ipt, n_in, n_out, stride):
-    if n_in != n_out:
-        return conv_bn_layer(ipt, n_out, 1, stride, 0,
-                             paddle.activation.Linear())
-    else:
-        return ipt
-
-
-def basicblock(ipt, ch_out, stride):
-    ch_in = ch_out * 2
-    tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1)
-    tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear())
-    short = shortcut(ipt, ch_in, ch_out, stride)
-    return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu())
-
-
-def layer_warp(block_func, ipt, features, count, stride):
-    tmp = block_func(ipt, features, stride)
-    for i in range(1, count):
-        tmp = block_func(tmp, features, 1)
-    return tmp
-
-
-def resnet_cifar10(ipt, depth=32):
-    # depth should be one of 20, 32, 44, 56, 110, 1202
-    assert (depth - 2) % 6 == 0
-    n = (depth - 2) / 6
-    nStages = {16, 64, 128}
-    conv1 = conv_bn_layer(
-        ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
-    res1 = layer_warp(basicblock, conv1, 16, n, 1)
-    res2 = layer_warp(basicblock, res1, 32, n, 2)
-    res3 = layer_warp(basicblock, res2, 64, n, 2)
-    pool = paddle.layer.img_pool(
-        input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
-    return pool
diff --git a/demo/image_classification/api_v2_train.py b/demo/image_classification/api_v2_train.py
deleted file mode 100644
index 53cffa6fb4e8b2e19725f4f44bf7b9ffffb25232..0000000000000000000000000000000000000000
--- a/demo/image_classification/api_v2_train.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License
-
-import sys
-
-import paddle.v2 as paddle
-
-from api_v2_vgg import vgg_bn_drop
-
-
-def main():
-    datadim = 3 * 32 * 32
-    classdim = 10
-
-    # PaddlePaddle init
-    paddle.init(use_gpu=False, trainer_count=1)
-
-    image = paddle.layer.data(
-        name="image", type=paddle.data_type.dense_vector(datadim))
-
-    # Add neural network config
-    # option 1. resnet
-    # net = resnet_cifar10(image, depth=32)
-    # option 2. vgg
-    net = vgg_bn_drop(image)
-
-    out = paddle.layer.fc(input=net,
-                          size=classdim,
-                          act=paddle.activation.Softmax())
-
-    lbl = paddle.layer.data(
-        name="label", type=paddle.data_type.integer_value(classdim))
-    cost = paddle.layer.classification_cost(input=out, label=lbl)
-
-    # Create parameters
-    parameters = paddle.parameters.create(cost)
-
-    # Create optimizer
-    momentum_optimizer = paddle.optimizer.Momentum(
-        momentum=0.9,
-        regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
-        learning_rate=0.1 / 128.0,
-        learning_rate_decay_a=0.1,
-        learning_rate_decay_b=50000 * 100,
-        learning_rate_schedule='discexp',
-        batch_size=128)
-
-    # End batch and end pass event handler
-    def event_handler(event):
-        if isinstance(event, paddle.event.EndIteration):
-            if event.batch_id % 100 == 0:
-                print "\nPass %d, Batch %d, Cost %f, %s" % (
-                    event.pass_id, event.batch_id, event.cost, event.metrics)
-            else:
-                sys.stdout.write('.')
-                sys.stdout.flush()
-        if isinstance(event, paddle.event.EndPass):
-            result = trainer.test(
-                reader=paddle.batch(
-                    paddle.dataset.cifar.test10(), batch_size=128),
-                feeding={'image': 0,
-                         'label': 1})
-            print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
-
-    # Create trainer
-    trainer = paddle.trainer.SGD(cost=cost,
-                                 parameters=parameters,
-                                 update_equation=momentum_optimizer)
-    trainer.train(
-        reader=paddle.batch(
-            paddle.reader.shuffle(
-                paddle.dataset.cifar.train10(), buf_size=50000),
-            batch_size=128),
-        num_passes=5,
-        event_handler=event_handler,
-        feeding={'image': 0,
-                 'label': 1})
-
-
-if __name__ == '__main__':
-    main()
diff --git a/demo/image_classification/api_v2_vgg.py b/demo/image_classification/api_v2_vgg.py
deleted file mode 100644
index 1e0e6b93adde30425f17aa9cd07542275f4fec37..0000000000000000000000000000000000000000
--- a/demo/image_classification/api_v2_vgg.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.v2 as paddle
-
-__all__ = ['vgg_bn_drop']
-
-
-def vgg_bn_drop(input):
-    def conv_block(ipt, num_filter, groups, dropouts, num_channels=None):
-        return paddle.networks.img_conv_group(
-            input=ipt,
-            num_channels=num_channels,
-            pool_size=2,
-            pool_stride=2,
-            conv_num_filter=[num_filter] * groups,
-            conv_filter_size=3,
-            conv_act=paddle.activation.Relu(),
-            conv_with_batchnorm=True,
-            conv_batchnorm_drop_rate=dropouts,
-            pool_type=paddle.pooling.Max())
-
-    conv1 = conv_block(input, 64, 2, [0.3, 0], 3)
-    conv2 = conv_block(conv1, 128, 2, [0.4, 0])
-    conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
-    conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
-    conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
-
-    drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5)
-    fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear())
-    bn = paddle.layer.batch_norm(
-        input=fc1,
-        act=paddle.activation.Relu(),
-        layer_attr=paddle.attr.Extra(drop_rate=0.5))
-    fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear())
-    return fc2
diff --git a/demo/image_classification/data/download_cifar.sh b/demo/image_classification/data/download_cifar.sh
deleted file mode 100755
index 532178d627fe19ab8ea79ecae73e5328b5294bea..0000000000000000000000000000000000000000
--- a/demo/image_classification/data/download_cifar.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
-tar zxf cifar-10-python.tar.gz
-rm cifar-10-python.tar.gz
-rm -rf cifar-out/*
-echo Converting CIFAR data to images.....
-python process_cifar.py ./cifar-10-batches-py ./cifar-out
diff --git a/demo/image_classification/data/process_cifar.py b/demo/image_classification/data/process_cifar.py
deleted file mode 100644
index db6666189e5b8008a6b66fb64afcdf98980e72bb..0000000000000000000000000000000000000000
--- a/demo/image_classification/data/process_cifar.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import sys
-import os
-import PIL.Image as Image
-"""
-  Usage: python process_cifar input_dir output_dir
-"""
-
-
-def mkdir_not_exist(path):
-    """
-    Make dir if the path does not exist.
-    path: the path to be created.
-    """
-    if not os.path.exists(path):
-        os.mkdir(path)
-
-
-def create_dir_structure(output_dir):
-    """
-    Create the directory structure for the directory.
-    output_dir: the direcotry structure path.
-    """
-    mkdir_not_exist(os.path.join(output_dir))
-    mkdir_not_exist(os.path.join(output_dir, "train"))
-    mkdir_not_exist(os.path.join(output_dir, "test"))
-
-
-def convert_batch(batch_path, label_set, label_map, output_dir, data_split):
-    """
-    Convert CIFAR batch to the structure of Paddle format.
-    batch_path: the batch to be converted.
-    label_set: the set of labels.
-    output_dir: the output path.
-    data_split: whether it is training or testing data.
-    """
-    data = np.load(batch_path)
-    for data, label, filename in zip(data['data'], data['labels'],
-                                     data['filenames']):
-        data = data.reshape((3, 32, 32))
-        data = np.transpose(data, (1, 2, 0))
-        label = label_map[label]
-        output_dir_this = os.path.join(output_dir, data_split, str(label))
-        output_filename = os.path.join(output_dir_this, filename)
-        if not label in label_set:
-            label_set[label] = True
-            mkdir_not_exist(output_dir_this)
-        Image.fromarray(data).save(output_filename)
-
-
-if __name__ == '__main__':
-    input_dir = sys.argv[1]
-    output_dir = sys.argv[2]
-    num_batch = 5
-    create_dir_structure(output_dir)
-    label_map = {
-        0: "airplane",
-        1: "automobile",
-        2: "bird",
-        3: "cat",
-        4: "deer",
-        5: "dog",
-        6: "frog",
-        7: "horse",
-        8: "ship",
-        9: "truck"
-    }
-    labels = {}
-    for i in range(1, num_batch + 1):
-        convert_batch(
-            os.path.join(input_dir, "data_batch_%d" % i), labels, label_map,
-            output_dir, "train")
-    convert_batch(
-        os.path.join(input_dir, "test_batch"), {}, label_map, output_dir,
-        "test")
diff --git a/demo/image_classification/image_provider.py b/demo/image_classification/image_provider.py
deleted file mode 100644
index 6a315ff094c1af5f8250d8a22ff96740dddd9808..0000000000000000000000000000000000000000
--- a/demo/image_classification/image_provider.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import io
-import random
-
-import paddle.utils.image_util as image_util
-from paddle.trainer.PyDataProvider2 import *
-
-
-#
-# {'img_size': 32,
-# 'settings': a global object,
-# 'color': True,
-# 'mean_img_size': 32,
-# 'meta': './data/cifar-out/batches/batches.meta',
-# 'num_classes': 10,
-# 'file_list': ('./data/cifar-out/batches/train_batch_000',),
-# 'use_jpeg': True}
-def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg,
-         is_train, **kwargs):
-    settings.mean_img_size = mean_img_size
-    settings.img_size = img_size
-    settings.num_classes = num_classes
-    settings.color = color
-    settings.is_train = is_train
-
-    if settings.color:
-        settings.img_raw_size = settings.img_size * settings.img_size * 3
-    else:
-        settings.img_raw_size = settings.img_size * settings.img_size
-
-    settings.meta_path = meta
-    settings.use_jpeg = use_jpeg
-
-    settings.img_mean = image_util.load_meta(settings.meta_path,
-                                             settings.mean_img_size,
-                                             settings.img_size, settings.color)
-
-    settings.logger.info('Image size: %s', settings.img_size)
-    settings.logger.info('Meta path: %s', settings.meta_path)
-    settings.input_types = {
-        'image': dense_vector(settings.img_raw_size),
-        'label': integer_value(settings.num_classes)
-    }
-
-    settings.logger.info('DataProvider Initialization finished')
-
-
-@provider(init_hook=hook, min_pool_size=0)
-def processData(settings, file_list):
-    """
-    The main function for loading data.
-    Load the batch, iterate all the images and labels in this batch.
-    file_list: the batch file list.
-    """
-    with open(file_list, 'r') as fdata:
-        lines = [line.strip() for line in fdata]
-        random.shuffle(lines)
-        for file_name in lines:
-            with io.open(file_name.strip(), 'rb') as file:
-                data = cPickle.load(file)
-                indexes = list(range(len(data['images'])))
-                if settings.is_train:
-                    random.shuffle(indexes)
-                for i in indexes:
-                    if settings.use_jpeg == 1:
-                        img = image_util.decode_jpeg(data['images'][i])
-                    else:
-                        img = data['images'][i]
-                    img_feat = image_util.preprocess_img(
-                        img, settings.img_mean, settings.img_size,
-                        settings.is_train, settings.color)
-                    label = data['labels'][i]
-                    yield {
-                        'image': img_feat.astype('float32'),
-                        'label': int(label)
-                    }
diff --git a/demo/image_classification/image_util.py b/demo/image_classification/image_util.py
deleted file mode 100644
index f09605394a19e09d92e555eeefb0b5646625b618..0000000000000000000000000000000000000000
--- a/demo/image_classification/image_util.py
+++ /dev/null
@@ -1,221 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-from PIL import Image
-from cStringIO import StringIO
-
-
-def resize_image(img, target_size):
-    """
-    Resize an image so that the shorter edge has length target_size.
-    img: the input image to be resized.
-    target_size: the target resized image size.
-    """
-    percent = (target_size / float(min(img.size[0], img.size[1])))
-    resized_size = int(round(img.size[0] * percent)), int(
-        round(img.size[1] * percent))
-    img = img.resize(resized_size, Image.ANTIALIAS)
-    return img
-
-
-def flip(im):
-    """
-    Return the flipped image.
-    Flip an image along the horizontal direction.
-    im: input image, (H x W x K) ndarrays 
-    """
-    if len(im.shape) == 3:
-        return im[:, :, ::-1]
-    else:
-        return im[:, ::-1]
-
-
-def crop_img(im, inner_size, color=True, test=True):
-    """
-    Return cropped image.
-    The size of the cropped image is inner_size * inner_size.
-    im: (K x H x W) ndarrays
-    inner_size: the cropped image size.
-    color: whether it is color image.
-    test: whether in test mode.
-      If False, does random cropping and flipping.
-      If True, crop the center of images.
-    """
-    if color:
-        height, width = max(inner_size, im.shape[1]), max(inner_size,
-                                                          im.shape[2])
-        padded_im = np.zeros((3, height, width))
-        startY = (height - im.shape[1]) / 2
-        startX = (width - im.shape[2]) / 2
-        endY, endX = startY + im.shape[1], startX + im.shape[2]
-        padded_im[:, startY:endY, startX:endX] = im
-    else:
-        im = im.astype('float32')
-        height, width = max(inner_size, im.shape[0]), max(inner_size,
-                                                          im.shape[1])
-        padded_im = np.zeros((height, width))
-        startY = (height - im.shape[0]) / 2
-        startX = (width - im.shape[1]) / 2
-        endY, endX = startY + im.shape[0], startX + im.shape[1]
-        padded_im[startY:endY, startX:endX] = im
-    if test:
-        startY = (height - inner_size) / 2
-        startX = (width - inner_size) / 2
-    else:
-        startY = np.random.randint(0, height - inner_size + 1)
-        startX = np.random.randint(0, width - inner_size + 1)
-    endY, endX = startY + inner_size, startX + inner_size
-    if color:
-        pic = padded_im[:, startY:endY, startX:endX]
-    else:
-        pic = padded_im[startY:endY, startX:endX]
-    if (not test) and (np.random.randint(2) == 0):
-        pic = flip(pic)
-    return pic
-
-
-def decode_jpeg(jpeg_string):
-    np_array = np.array(Image.open(StringIO(jpeg_string)))
-    if len(np_array.shape) == 3:
-        np_array = np.transpose(np_array, (2, 0, 1))
-    return np_array
-
-
-def preprocess_img(im, img_mean, crop_size, is_train, color=True):
-    """
-    Does data augmentation for images.
-    If is_train is false, cropping the center region from the image.
-    If is_train is true, randomly crop a region from the image,
-    and randomy does flipping.
-    im: (K x H x W) ndarrays
-    """
-    im = im.astype('float32')
-    test = not is_train
-    pic = crop_img(im, crop_size, color, test)
-    pic -= img_mean
-    return pic.flatten()
-
-
-def load_meta(meta_path, mean_img_size, crop_size, color=True):
-    """
-    Return the loaded meta file.
-    Load the meta image, which is the mean of the images in the dataset.
-    The mean image is subtracted from every input image so that the expected mean
-    of each input image is zero.
-    """
-    mean = np.load(meta_path)['data_mean']
-    border = (mean_img_size - crop_size) / 2
-    if color:
-        assert (mean_img_size * mean_img_size * 3 == mean.shape[0])
-        mean = mean.reshape(3, mean_img_size, mean_img_size)
-        mean = mean[:, border:border + crop_size, border:border +
-                    crop_size].astype('float32')
-    else:
-        assert (mean_img_size * mean_img_size == mean.shape[0])
-        mean = mean.reshape(mean_img_size, mean_img_size)
-        mean = mean[border:border + crop_size, border:border +
-                    crop_size].astype('float32')
-    return mean
-
-
-def load_image(img_path, is_color=True):
-    """
-    Load image and return. 
-    img_path: image path.
-    is_color: is color image or not.
-    """
-    img = Image.open(img_path)
-    img.load()
-    return img
-
-
-def oversample(img, crop_dims):
-    """
-    image : iterable of (H x W x K) ndarrays
-    crop_dims: (height, width) tuple for the crops.
-    Returned data contains ten crops of input image, namely,
-    four corner patches and the center patch as well as their
-    horizontal reflections.
-    """
-    # Dimensions and center.
-    im_shape = np.array(img[0].shape)
-    crop_dims = np.array(crop_dims)
-    im_center = im_shape[:2] / 2.0
-
-    # Make crop coordinates
-    h_indices = (0, im_shape[0] - crop_dims[0])
-    w_indices = (0, im_shape[1] - crop_dims[1])
-    crops_ix = np.empty((5, 4), dtype=int)
-    curr = 0
-    for i in h_indices:
-        for j in w_indices:
-            crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
-            curr += 1
-    crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate(
-        [-crop_dims / 2.0, crop_dims / 2.0])
-    crops_ix = np.tile(crops_ix, (2, 1))
-
-    # Extract crops
-    crops = np.empty(
-        (10 * len(img), crop_dims[0], crop_dims[1], im_shape[-1]),
-        dtype=np.float32)
-    ix = 0
-    for im in img:
-        for crop in crops_ix:
-            crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :]
-            ix += 1
-        crops[ix - 5:ix] = crops[ix - 5:ix, :, ::-1, :]  # flip for mirrors
-    return crops
-
-
-class ImageTransformer:
-    def __init__(self,
-                 transpose=None,
-                 channel_swap=None,
-                 mean=None,
-                 is_color=True):
-        self.transpose = transpose
-        self.channel_swap = None
-        self.mean = None
-        self.is_color = is_color
-
-    def set_transpose(self, order):
-        if self.is_color:
-            assert 3 == len(order)
-        self.transpose = order
-
-    def set_channel_swap(self, order):
-        if self.is_color:
-            assert 3 == len(order)
-        self.channel_swap = order
-
-    def set_mean(self, mean):
-        # mean value, may be one value per channel 
-        if mean.ndim == 1:
-            mean = mean[:, np.newaxis, np.newaxis]
-        else:
-            # elementwise mean
-            if self.is_color:
-                assert len(mean.shape) == 3
-        self.mean = mean
-
-    def transformer(self, data):
-        if self.transpose is not None:
-            data = data.transpose(self.transpose)
-        if self.channel_swap is not None:
-            data = data[self.channel_swap, :, :]
-        if self.mean is not None:
-            data -= self.mean
-        return data
diff --git a/demo/image_classification/predict.sh b/demo/image_classification/predict.sh
deleted file mode 100755
index 9d5785c9a1a4dac12f7940fa708b1a79c6ec8a93..0000000000000000000000000000000000000000
--- a/demo/image_classification/predict.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-model=cifar_vgg_model/pass-00299/
-image=data/cifar-out/test/airplane/seaplane_s_000978.png
-use_gpu=1
-python prediction.py $model $image $use_gpu
diff --git a/demo/image_classification/prediction.py b/demo/image_classification/prediction.py
deleted file mode 100755
index 49c0ff600c40e0222093ff0a8a2f7e8e38ccba29..0000000000000000000000000000000000000000
--- a/demo/image_classification/prediction.py
+++ /dev/null
@@ -1,159 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os, sys
-import numpy as np
-import logging
-from PIL import Image
-from optparse import OptionParser
-
-import paddle.utils.image_util as image_util
-
-from py_paddle import swig_paddle, DataProviderConverter
-from paddle.trainer.PyDataProvider2 import dense_vector
-from paddle.trainer.config_parser import parse_config
-
-logging.basicConfig(
-    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
-logging.getLogger().setLevel(logging.INFO)
-
-
-class ImageClassifier():
-    def __init__(self,
-                 train_conf,
-                 use_gpu=True,
-                 model_dir=None,
-                 resize_dim=None,
-                 crop_dim=None,
-                 mean_file=None,
-                 oversample=False,
-                 is_color=True):
-        """
-        train_conf: network configure.
-        model_dir: string, directory of model.
-        resize_dim: int, resized image size.
-        crop_dim: int, crop size.
-        mean_file: string, image mean file.
-        oversample: bool, oversample means multiple crops, namely five
-                    patches (the four corner patches and the center
-                    patch) as well as their horizontal reflections,
-                    ten crops in all.
-        """
-        self.train_conf = train_conf
-        self.model_dir = model_dir
-        if model_dir is None:
-            self.model_dir = os.path.dirname(train_conf)
-
-        self.resize_dim = resize_dim
-        self.crop_dims = [crop_dim, crop_dim]
-        self.oversample = oversample
-        self.is_color = is_color
-
-        self.transformer = image_util.ImageTransformer(is_color=is_color)
-        self.transformer.set_transpose((2, 0, 1))
-
-        self.mean_file = mean_file
-        mean = np.load(self.mean_file)['data_mean']
-        mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
-        self.transformer.set_mean(mean)  # mean pixel
-        gpu = 1 if use_gpu else 0
-        conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (gpu)
-        conf = parse_config(train_conf, conf_args)
-        swig_paddle.initPaddle("--use_gpu=%d" % (gpu))
-        self.network = swig_paddle.GradientMachine.createFromConfigProto(
-            conf.model_config)
-        assert isinstance(self.network, swig_paddle.GradientMachine)
-        self.network.loadParameters(self.model_dir)
-
-        data_size = 3 * self.crop_dims[0] * self.crop_dims[1]
-        slots = [dense_vector(data_size)]
-        self.converter = DataProviderConverter(slots)
-
-    def get_data(self, img_path):
-        """
-        1. load image from img_path.
-        2. resize or oversampling.
-        3. transformer data: transpose, sub mean.
-        return K x H x W ndarray.
-        img_path: image path.
-        """
-        image = image_util.load_image(img_path, self.is_color)
-        if self.oversample:
-            # image_util.resize_image: short side is self.resize_dim
-            image = image_util.resize_image(image, self.resize_dim)
-            image = np.array(image)
-            input = np.zeros(
-                (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
-            input[0] = image.astype(np.float32)
-            input = image_util.oversample(input, self.crop_dims)
-        else:
-            image = image.resize(self.crop_dims, Image.ANTIALIAS)
-            input = np.zeros(
-                (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
-            input[0] = np.array(image).astype(np.float32)
-
-        data_in = []
-        for img in input:
-            img = self.transformer.transformer(img).flatten()
-            data_in.append([img.tolist()])
-        return data_in
-
-    def forward(self, input_data):
-        in_arg = self.converter(input_data)
-        return self.network.forwardTest(in_arg)
-
-    def forward(self, data, output_layer):
-        """
-        input_data: py_paddle input data.
-        output_layer: specify the name of probability, namely the layer with
-                      softmax activation.
-        return: the predicting probability of each label.
-        """
-        input = self.converter(data)
-        self.network.forwardTest(input)
-        output = self.network.getLayerOutputs(output_layer)
-        # For oversampling, average predictions across crops.
-        # If not, the shape of output[name]: (1, class_number),
-        # the mean is also applicable.
-        return output[output_layer]['value'].mean(0)
-
-    def predict(self, image=None, output_layer=None):
-        assert isinstance(image, basestring)
-        assert isinstance(output_layer, basestring)
-        data = self.get_data(image)
-        prob = self.forward(data, output_layer)
-        lab = np.argsort(-prob)
-        logging.info("Label of %s is: %d", image, lab[0])
-
-
-if __name__ == '__main__':
-    image_size = 32
-    crop_size = 32
-    multi_crop = True
-    config = "vgg_16_cifar.py"
-    output_layer = "__fc_layer_1__"
-    mean_path = "data/cifar-out/batches/batches.meta"
-    model_path = sys.argv[1]
-    image = sys.argv[2]
-    use_gpu = bool(int(sys.argv[3]))
-
-    obj = ImageClassifier(
-        train_conf=config,
-        model_dir=model_path,
-        resize_dim=image_size,
-        crop_dim=crop_size,
-        mean_file=mean_path,
-        use_gpu=use_gpu,
-        oversample=multi_crop)
-    obj.predict(image, output_layer)
diff --git a/demo/image_classification/preprocess.py b/demo/image_classification/preprocess.py
deleted file mode 100755
index 2947ad239c36f9a02ed67ccf5906380cb70e37ce..0000000000000000000000000000000000000000
--- a/demo/image_classification/preprocess.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.utils.preprocess_img import ImageClassificationDatasetCreater
-from optparse import OptionParser
-
-
-def option_parser():
-    parser = OptionParser(usage="usage: python preprcoess.py "\
-                          "-i data_dir [options]")
-    parser.add_option(
-        "-i",
-        "--input",
-        action="store",
-        dest="input",
-        help="Input data directory.")
-    parser.add_option(
-        "-s",
-        "--size",
-        action="store",
-        dest="size",
-        help="Processed image size.")
-    parser.add_option(
-        "-c",
-        "--color",
-        action="store",
-        dest="color",
-        help="whether to use color images.")
-    return parser.parse_args()
-
-
-if __name__ == '__main__':
-    options, args = option_parser()
-    data_dir = options.input
-    processed_image_size = int(options.size)
-    color = options.color == "1"
-    data_creator = ImageClassificationDatasetCreater(
-        data_dir, processed_image_size, color)
-    data_creator.train_list_name = "train.txt"
-    data_creator.test_list_name = "test.txt"
-    data_creator.num_per_batch = 1000
-    data_creator.overwrite = True
-    data_creator.create_batches()
diff --git a/demo/image_classification/preprocess.sh b/demo/image_classification/preprocess.sh
deleted file mode 100755
index c7396c6393599ef3f2c55089eb05f2435b2b4b82..0000000000000000000000000000000000000000
--- a/demo/image_classification/preprocess.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-data_dir=./data/cifar-out
-
-python preprocess.py -i $data_dir -s 32 -c 1
-
-echo "data/cifar-out/batches/train.txt" > train.list
-echo "data/cifar-out/batches/test.txt" > test.list
diff --git a/demo/image_classification/train.sh b/demo/image_classification/train.sh
deleted file mode 100755
index e45bd47ad5925c6674d628a70a7ad7c4d5d5c173..0000000000000000000000000000000000000000
--- a/demo/image_classification/train.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-config=vgg_16_cifar.py
-output=./cifar_vgg_model
-log=train.log
-
-paddle train \
---config=$config \
---dot_period=10 \
---log_period=100 \
---test_all_data_in_one_period=1 \
---use_gpu=1 \
---trainer_count=1 \
---num_passes=300 \
---save_dir=$output \
-2>&1 | tee $log
-paddle usage -l $log -e $? -n "image_classification_train" >/dev/null 2>&1
-
-python -m paddle.utils.plotcurve -i $log > plot.png
diff --git a/demo/image_classification/vgg_16_cifar.py b/demo/image_classification/vgg_16_cifar.py
deleted file mode 100755
index 8ee4a64c15f885023a6e19812885b4f76bb12af9..0000000000000000000000000000000000000000
--- a/demo/image_classification/vgg_16_cifar.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-is_predict = get_config_arg("is_predict", bool, False)
-
-####################Data Configuration ##################
-if not is_predict:
-    data_dir = 'data/cifar-out/batches/'
-    meta_path = data_dir + 'batches.meta'
-
-    args = {
-        'meta': meta_path,
-        'mean_img_size': 32,
-        'img_size': 32,
-        'num_classes': 10,
-        'use_jpeg': 1,
-        'color': "color"
-    }
-
-    define_py_data_sources2(
-        train_list="train.list",
-        test_list="train.list",
-        module='image_provider',
-        obj='processData',
-        args=args)
-
-######################Algorithm Configuration #############
-settings(
-    batch_size=128,
-    learning_rate=0.1 / 128.0,
-    learning_method=MomentumOptimizer(0.9),
-    regularization=L2Regularization(0.0005 * 128))
-
-#######################Network Configuration #############
-data_size = 3 * 32 * 32
-label_size = 10
-img = data_layer(name='image', size=data_size)
-# small_vgg is predefined in trainer_config_helpers.networks
-predict = small_vgg(input_image=img, num_channels=3, num_classes=label_size)
-
-if not is_predict:
-    lbl = data_layer(name="label", size=label_size)
-    outputs(classification_cost(input=predict, label=lbl))
-else:
-    outputs(predict)
diff --git a/demo/introduction/.gitignore b/demo/introduction/.gitignore
deleted file mode 100644
index c54f3f9480ce4ceefda98f77a812ec2d6cd4a5e3..0000000000000000000000000000000000000000
--- a/demo/introduction/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-dataprovider.pyc
-empty.list
-train.log
-output
-train.list
diff --git a/demo/introduction/README.md b/demo/introduction/README.md
deleted file mode 100644
index 0614a7afe645677ef0b65a17ea05f1dcfa45214f..0000000000000000000000000000000000000000
--- a/demo/introduction/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-This folder contains scripts used in PaddlePaddle introduction.
-- use `bash train.sh` to train a simple linear regression model
-- use `python evaluate_model.py` to read model parameters. You can see that `w` and `b` are very close to [2, 0.3].
diff --git a/demo/introduction/api_train_v2.py b/demo/introduction/api_train_v2.py
deleted file mode 100644
index 1ba971b3688ce3dec078998df2c0b183a4e449f8..0000000000000000000000000000000000000000
--- a/demo/introduction/api_train_v2.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import paddle.v2 as paddle
-import paddle.v2.dataset.uci_housing as uci_housing
-
-
-def main():
-    # init
-    paddle.init(use_gpu=False, trainer_count=1)
-
-    # network config
-    x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))
-    y_predict = paddle.layer.fc(input=x,
-                                param_attr=paddle.attr.Param(name='w'),
-                                size=1,
-                                act=paddle.activation.Linear(),
-                                bias_attr=paddle.attr.Param(name='b'))
-    y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
-    cost = paddle.layer.mse_cost(input=y_predict, label=y)
-
-    # create parameters
-    parameters = paddle.parameters.create(cost)
-
-    # create optimizer
-    optimizer = paddle.optimizer.Momentum(momentum=0)
-
-    trainer = paddle.trainer.SGD(cost=cost,
-                                 parameters=parameters,
-                                 update_equation=optimizer)
-
-    # event_handler to print training and testing info
-    def event_handler(event):
-        if isinstance(event, paddle.event.EndIteration):
-            if event.batch_id % 100 == 0:
-                print "Pass %d, Batch %d, Cost %f" % (
-                    event.pass_id, event.batch_id, event.cost)
-
-        if isinstance(event, paddle.event.EndPass):
-            if (event.pass_id + 1) % 10 == 0:
-                result = trainer.test(
-                    reader=paddle.batch(
-                        uci_housing.test(), batch_size=2),
-                    feeding={'x': 0,
-                             'y': 1})
-                print "Test %d, %.2f" % (event.pass_id, result.cost)
-
-    # training
-    trainer.train(
-        reader=paddle.batch(
-            paddle.reader.shuffle(
-                uci_housing.train(), buf_size=500),
-            batch_size=2),
-        feeding={'x': 0,
-                 'y': 1},
-        event_handler=event_handler,
-        num_passes=30)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/demo/introduction/dataprovider.py b/demo/introduction/dataprovider.py
deleted file mode 100644
index 5b48aad0408800676ae7da16eba2dcbb2124f25f..0000000000000000000000000000000000000000
--- a/demo/introduction/dataprovider.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer.PyDataProvider2 import *
-import random
-
-
-# define data types of input: 2 real numbers
-@provider(
-    input_types={'x': dense_vector(1),
-                 'y': dense_vector(1)}, use_seq=False)
-def process(settings, input_file):
-    for i in xrange(2000):
-        x = random.random()
-        yield {'x': [x], 'y': [2 * x + 0.3]}
diff --git a/demo/introduction/evaluate_model.py b/demo/introduction/evaluate_model.py
deleted file mode 100755
index eeda43c5c86f3e49f758bf55b16a68387e64238c..0000000000000000000000000000000000000000
--- a/demo/introduction/evaluate_model.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: UTF-8 -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Print model parameters in last model
-
-Usage:
-    python evaluate_model.py
-"""
-import numpy as np
-import os
-
-
-def load(file_name):
-    with open(file_name, 'rb') as f:
-        f.read(16)  # skip header for float type.
-        return np.fromfile(f, dtype=np.float32)
-
-
-def main():
-    print 'w=%.6f, b=%.6f from pass 29' % (load('output/pass-00029/w'),
-                                           load('output/pass-00029/b'))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/demo/introduction/train.sh b/demo/introduction/train.sh
deleted file mode 100755
index 2ce6446d7c943ffc9bea8da43d153539f6f9f15f..0000000000000000000000000000000000000000
--- a/demo/introduction/train.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-paddle train \
-    --config=trainer_config.py \
-    --save_dir=./output \
-    --num_passes=30 \
-    2>&1 |tee 'train.log'
-paddle usage -l "train.log" -e $? -n "introduction" >/dev/null 2>&1
diff --git a/demo/introduction/trainer_config.py b/demo/introduction/trainer_config.py
deleted file mode 100644
index 651dfaa4b7b4873810a0b393655541a62d1a311b..0000000000000000000000000000000000000000
--- a/demo/introduction/trainer_config.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-# 1. read data. Suppose you saved above python code as dataprovider.py
-define_py_data_sources2(
-    train_list=['no_matter.txt'],
-    test_list=None,
-    module='dataprovider',
-    obj='process',
-    args={})
-
-# 2. learning algorithm
-settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
-
-# 3. Network configuration
-x = data_layer(name='x', size=1)
-y = data_layer(name='y', size=1)
-y_predict = fc_layer(
-    input=x,
-    param_attr=ParamAttr(name='w'),
-    size=1,
-    act=LinearActivation(),
-    bias_attr=ParamAttr(name='b'))
-cost = mse_cost(input=y_predict, label=y)
-outputs(cost)
diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py
deleted file mode 100644
index 6b95a88042a13a280bcb80f753b3887fcef37296..0000000000000000000000000000000000000000
--- a/demo/mnist/api_train_v2.py
+++ /dev/null
@@ -1,137 +0,0 @@
-import paddle.v2 as paddle
-import gzip
-
-
-def softmax_regression(img):
-    predict = paddle.layer.fc(input=img,
-                              size=10,
-                              act=paddle.activation.Softmax())
-    return predict
-
-
-def multilayer_perceptron(img):
-    # The first fully-connected layer
-    hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu())
-    # The second fully-connected layer and the according activation function
-    hidden2 = paddle.layer.fc(input=hidden1,
-                              size=64,
-                              act=paddle.activation.Relu())
-    # The thrid fully-connected layer, note that the hidden size should be 10,
-    # which is the number of unique digits
-    predict = paddle.layer.fc(input=hidden2,
-                              size=10,
-                              act=paddle.activation.Softmax())
-    return predict
-
-
-def convolutional_neural_network(img):
-    # first conv layer
-    conv_pool_1 = paddle.networks.simple_img_conv_pool(
-        input=img,
-        filter_size=5,
-        num_filters=20,
-        num_channel=1,
-        pool_size=2,
-        pool_stride=2,
-        act=paddle.activation.Tanh())
-    # second conv layer
-    conv_pool_2 = paddle.networks.simple_img_conv_pool(
-        input=conv_pool_1,
-        filter_size=5,
-        num_filters=50,
-        num_channel=20,
-        pool_size=2,
-        pool_stride=2,
-        act=paddle.activation.Tanh())
-    # The first fully-connected layer
-    fc1 = paddle.layer.fc(input=conv_pool_2,
-                          size=128,
-                          act=paddle.activation.Tanh())
-    # The softmax layer, note that the hidden size should be 10,
-    # which is the number of unique digits
-    predict = paddle.layer.fc(input=fc1,
-                              size=10,
-                              act=paddle.activation.Softmax())
-    return predict
-
-
-def main():
-    paddle.init(use_gpu=False, trainer_count=1)
-
-    # define network topology
-    images = paddle.layer.data(
-        name='pixel', type=paddle.data_type.dense_vector(784))
-    label = paddle.layer.data(
-        name='label', type=paddle.data_type.integer_value(10))
-
-    # Here we can build the prediction network in different ways. Please
-    # choose one by uncomment corresponding line.
-    predict = softmax_regression(images)
-    #predict = multilayer_perceptron(images)
-    #predict = convolutional_neural_network(images)
-
-    cost = paddle.layer.classification_cost(input=predict, label=label)
-
-    try:
-        with gzip.open('params.tar.gz', 'r') as f:
-            parameters = paddle.parameters.Parameters.from_tar(f)
-    except IOError:
-        parameters = paddle.parameters.create(cost)
-
-    optimizer = paddle.optimizer.Momentum(
-        learning_rate=0.1 / 128.0,
-        momentum=0.9,
-        regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128))
-
-    trainer = paddle.trainer.SGD(cost=cost,
-                                 parameters=parameters,
-                                 update_equation=optimizer)
-
-    lists = []
-
-    def event_handler(event):
-        if isinstance(event, paddle.event.EndIteration):
-            if event.batch_id % 1000 == 0:
-                print "Pass %d, Batch %d, Cost %f, %s" % (
-                    event.pass_id, event.batch_id, event.cost, event.metrics)
-
-                with gzip.open('params.tar.gz', 'w') as f:
-                    parameters.to_tar(f)
-
-        elif isinstance(event, paddle.event.EndPass):
-            result = trainer.test(reader=paddle.batch(
-                paddle.dataset.mnist.test(), batch_size=128))
-            print "Test with Pass %d, Cost %f, %s\n" % (
-                event.pass_id, result.cost, result.metrics)
-            lists.append((event.pass_id, result.cost,
-                          result.metrics['classification_error_evaluator']))
-
-    trainer.train(
-        reader=paddle.batch(
-            paddle.reader.shuffle(
-                paddle.dataset.mnist.train(), buf_size=8192),
-            batch_size=128),
-        event_handler=event_handler,
-        num_passes=100)
-
-    # find the best pass
-    best = sorted(lists, key=lambda list: float(list[1]))[0]
-    print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1])
-    print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100)
-
-    test_creator = paddle.dataset.mnist.test()
-    test_data = []
-    for item in test_creator():
-        test_data.append((item[0], ))
-        if len(test_data) == 100:
-            break
-
-    # output is a softmax layer. It returns probabilities.
-    # Shape should be (100, 10)
-    probs = paddle.infer(
-        output_layer=predict, parameters=parameters, input=test_data)
-    print probs.shape
-
-
-if __name__ == '__main__':
-    main()
diff --git a/demo/recommendation/.gitignore b/demo/recommendation/.gitignore
deleted file mode 100644
index fd27ef62a87cae51f2392c0eba50a44490d029af..0000000000000000000000000000000000000000
--- a/demo/recommendation/.gitignore
+++ /dev/null
@@ -1,10 +0,0 @@
-log.txt
-data/meta.bin
-data/ml-1m
-data/ratings.dat.train
-data/ratings.dat.test
-data/train.list
-data/test.list
-dataprovider_copy_1.py
-*.pyc
-output
diff --git a/demo/recommendation/api_train_v2.py b/demo/recommendation/api_train_v2.py
deleted file mode 100644
index f6a061799e3ac50236a68beedaf700dd6c698a05..0000000000000000000000000000000000000000
--- a/demo/recommendation/api_train_v2.py
+++ /dev/null
@@ -1,125 +0,0 @@
-import paddle.v2 as paddle
-import cPickle
-import copy
-
-
-def main():
-    paddle.init(use_gpu=False)
-    movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
-    uid = paddle.layer.data(
-        name='user_id',
-        type=paddle.data_type.integer_value(
-            paddle.dataset.movielens.max_user_id() + 1))
-    usr_emb = paddle.layer.embedding(input=uid, size=32)
-
-    usr_gender_id = paddle.layer.data(
-        name='gender_id', type=paddle.data_type.integer_value(2))
-    usr_gender_emb = paddle.layer.embedding(input=usr_gender_id, size=16)
-
-    usr_age_id = paddle.layer.data(
-        name='age_id',
-        type=paddle.data_type.integer_value(
-            len(paddle.dataset.movielens.age_table)))
-    usr_age_emb = paddle.layer.embedding(input=usr_age_id, size=16)
-
-    usr_job_id = paddle.layer.data(
-        name='job_id',
-        type=paddle.data_type.integer_value(paddle.dataset.movielens.max_job_id(
-        ) + 1))
-
-    usr_job_emb = paddle.layer.embedding(input=usr_job_id, size=16)
-
-    usr_combined_features = paddle.layer.fc(
-        input=[usr_emb, usr_gender_emb, usr_age_emb, usr_job_emb],
-        size=200,
-        act=paddle.activation.Tanh())
-
-    mov_id = paddle.layer.data(
-        name='movie_id',
-        type=paddle.data_type.integer_value(
-            paddle.dataset.movielens.max_movie_id() + 1))
-    mov_emb = paddle.layer.embedding(input=mov_id, size=32)
-
-    mov_categories = paddle.layer.data(
-        name='category_id',
-        type=paddle.data_type.sparse_binary_vector(
-            len(paddle.dataset.movielens.movie_categories())))
-
-    mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32)
-
-    mov_title_id = paddle.layer.data(
-        name='movie_title',
-        type=paddle.data_type.integer_value_sequence(len(movie_title_dict)))
-    mov_title_emb = paddle.layer.embedding(input=mov_title_id, size=32)
-    mov_title_conv = paddle.networks.sequence_conv_pool(
-        input=mov_title_emb, hidden_size=32, context_len=3)
-
-    mov_combined_features = paddle.layer.fc(
-        input=[mov_emb, mov_categories_hidden, mov_title_conv],
-        size=200,
-        act=paddle.activation.Tanh())
-
-    inference = paddle.layer.cos_sim(
-        a=usr_combined_features, b=mov_combined_features, size=1, scale=5)
-    cost = paddle.layer.mse_cost(
-        input=inference,
-        label=paddle.layer.data(
-            name='score', type=paddle.data_type.dense_vector(1)))
-
-    parameters = paddle.parameters.create(cost)
-
-    trainer = paddle.trainer.SGD(cost=cost,
-                                 parameters=parameters,
-                                 update_equation=paddle.optimizer.Adam(
-                                     learning_rate=1e-4))
-    feeding = {
-        'user_id': 0,
-        'gender_id': 1,
-        'age_id': 2,
-        'job_id': 3,
-        'movie_id': 4,
-        'category_id': 5,
-        'movie_title': 6,
-        'score': 7
-    }
-
-    def event_handler(event):
-        if isinstance(event, paddle.event.EndIteration):
-            if event.batch_id % 100 == 0:
-                print "Pass %d Batch %d Cost %.2f" % (
-                    event.pass_id, event.batch_id, event.cost)
-
-    trainer.train(
-        reader=paddle.batch(
-            paddle.reader.shuffle(
-                paddle.dataset.movielens.train(), buf_size=8192),
-            batch_size=256),
-        event_handler=event_handler,
-        feeding=feeding,
-        num_passes=1)
-
-    user_id = 234
-    movie_id = 345
-
-    user = paddle.dataset.movielens.user_info()[user_id]
-    movie = paddle.dataset.movielens.movie_info()[movie_id]
-
-    feature = user.value() + movie.value()
-
-    def reader():
-        yield feature
-
-    infer_dict = copy.copy(feeding)
-    del infer_dict['score']
-
-    prediction = paddle.infer(
-        output=inference,
-        parameters=parameters,
-        reader=paddle.batch(
-            reader, batch_size=32),
-        feeding=infer_dict)
-    print(prediction + 5) / 2
-
-
-if __name__ == '__main__':
-    main()
diff --git a/demo/recommendation/common_utils.py b/demo/recommendation/common_utils.py
deleted file mode 100755
index c20c65286621d701ad58409b539bbe9c813d453a..0000000000000000000000000000000000000000
--- a/demo/recommendation/common_utils.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from paddle.trainer.PyDataProvider2 import *
-
-
-def meta_to_header(meta, name):
-    metas = meta[name]['__meta__']['raw_meta']
-    for each_meta in metas:
-        slot_name = each_meta.get('name', '%s_id' % name)
-        if each_meta['type'] == 'id':
-            yield slot_name, integer_value(each_meta['max'])
-        elif each_meta['type'] == 'embedding':
-            is_seq = each_meta['seq'] == 'sequence'
-            yield slot_name, integer_value(
-                len(each_meta['dict']),
-                seq_type=SequenceType.SEQUENCE
-                if is_seq else SequenceType.NO_SEQUENCE)
-        elif each_meta['type'] == 'one_hot_dense':
-            yield slot_name, dense_vector(len(each_meta['dict']))
diff --git a/demo/recommendation/data/config.json b/demo/recommendation/data/config.json
deleted file mode 100644
index f26e74ce47bb7843a571e6033f051c046b31f054..0000000000000000000000000000000000000000
--- a/demo/recommendation/data/config.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-  "user": {
-    "file": {
-      "name": "users.dat",
-      "delimiter": "::"
-    },
-    "fields": ["id", "gender", "age", "occupation"]
-  },
-  "movie": {
-    "file": {
-      "name": "movies.dat",
-      "delimiter": "::"
-    },
-    "fields": ["id", "title", "genres"]
-  }
-}
diff --git a/demo/recommendation/data/config_generator.py b/demo/recommendation/data/config_generator.py
deleted file mode 100644
index 4ca496a252dffc62ed62bb8f2a5ee1661a940580..0000000000000000000000000000000000000000
--- a/demo/recommendation/data/config_generator.py
+++ /dev/null
@@ -1,127 +0,0 @@
-#!/bin/env python2
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-config_generator.py
-
-Usage:
-    ./config_generator.py <config_file> [--output_format=<output_format>]
-    ./config_generator.py -h | --help
-
-Options:
-    -h --help                           Show this screen.
-    --output_format=<output_format>     Output Config format(json or yaml) [default: json].
-"""
-
-import json
-import docopt
-import copy
-
-DEFAULT_FILE = {"type": "split", "delimiter": ","}
-
-DEFAULT_FIELD = {
-    "id": {
-        "type": "id"
-    },
-    "gender": {
-        "name": "gender",
-        "type": "embedding",
-        "dict": {
-            "type": "char_based"
-        }
-    },
-    "age": {
-        "name": "age",
-        "type": "embedding",
-        "dict": {
-            "type": "whole_content",
-            "sort": True
-        }
-    },
-    "occupation": {
-        "name": "occupation",
-        "type": "embedding",
-        "dict": {
-            "type": "whole_content",
-            "sort": "true"
-        }
-    },
-    "title": {
-        "regex": {
-            "pattern": r"^(.*)\((\d+)\)$",
-            "group_id": 1,
-            "strip": True
-        },
-        "name": "title",
-        "type": {
-            "name": "embedding",
-            "seq_type": "sequence",
-        },
-        "dict": {
-            "type": "char_based"
-        }
-    },
-    "genres": {
-        "type": "one_hot_dense",
-        "dict": {
-            "type": "split",
-            "delimiter": "|"
-        },
-        "name": "genres"
-    }
-}
-
-
-def merge_dict(master_dict, slave_dict):
-    return dict(((k, master_dict.get(k) or slave_dict.get(k))
-                 for k in set(slave_dict) | set(master_dict)))
-
-
-def main(filename, fmt):
-    with open(filename, 'r') as f:
-        conf = json.load(f)
-        obj = dict()
-        for k in conf:
-            val = conf[k]
-            file_dict = val['file']
-            file_dict = merge_dict(file_dict, DEFAULT_FILE)
-
-            fields = []
-            for pos, field_key in enumerate(val['fields']):
-                assert isinstance(field_key, basestring)
-                field = copy.deepcopy(DEFAULT_FIELD[field_key])
-                field['pos'] = pos
-                fields.append(field)
-            obj[k] = {"file": file_dict, "fields": fields}
-    meta = {"meta": obj}
-    # print meta
-    if fmt == 'json':
-
-        def formatter(x):
-            import json
-            return json.dumps(x, indent=2)
-    elif fmt == 'yaml':
-
-        def formatter(x):
-            import yaml
-            return yaml.safe_dump(x, default_flow_style=False)
-    else:
-        raise NotImplementedError("Dump format %s is not implemented" % fmt)
-
-    print formatter(meta)
-
-
-if __name__ == '__main__':
-    args = docopt.docopt(__doc__, version="0.1.0")
-    main(args["<config_file>"], args["--output_format"])
diff --git a/demo/recommendation/data/meta_config.json b/demo/recommendation/data/meta_config.json
deleted file mode 100644
index cc6a046e271dd0faaa47eeb5a5bef6d3604113fe..0000000000000000000000000000000000000000
--- a/demo/recommendation/data/meta_config.json
+++ /dev/null
@@ -1,81 +0,0 @@
-{
-  "meta": {
-    "movie": {
-      "fields": [
-        {
-          "type": "id", 
-          "pos": 0
-        }, 
-        {
-          "regex": {
-            "pattern": "^(.*)\\((\\d+)\\)$", 
-            "group_id": 1, 
-            "strip": true
-          }, 
-          "type": {
-            "seq_type": "sequence", 
-            "name": "embedding"
-          }, 
-          "dict": {
-            "type": "char_based"
-          }, 
-          "name": "title", 
-          "pos": 1
-        }, 
-        {
-          "type": "one_hot_dense", 
-          "dict": {
-            "delimiter": "|", 
-            "type": "split"
-          }, 
-          "name": "genres", 
-          "pos": 2
-        }
-      ], 
-      "file": {
-        "delimiter": "::", 
-        "type": "split", 
-        "name": "movies.dat"
-      }
-    }, 
-    "user": {
-      "fields": [
-        {
-          "type": "id", 
-          "pos": 0
-        }, 
-        {
-          "type": "embedding", 
-          "dict": {
-            "type": "char_based"
-          }, 
-          "name": "gender", 
-          "pos": 1
-        }, 
-        {
-          "type": "embedding", 
-          "dict": {
-            "sort": true, 
-            "type": "whole_content"
-          }, 
-          "name": "age", 
-          "pos": 2
-        }, 
-        {
-          "type": "embedding", 
-          "dict": {
-            "sort": "true", 
-            "type": "whole_content"
-          }, 
-          "name": "occupation", 
-          "pos": 3
-        }
-      ], 
-      "file": {
-        "delimiter": "::", 
-        "type": "split", 
-        "name": "users.dat"
-      }
-    }
-  }
-}
diff --git a/demo/recommendation/data/meta_generator.py b/demo/recommendation/data/meta_generator.py
deleted file mode 100644
index 38e4679d266c331a751114cd13f0e3453016cf26..0000000000000000000000000000000000000000
--- a/demo/recommendation/data/meta_generator.py
+++ /dev/null
@@ -1,430 +0,0 @@
-#!/bin/env python2
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Preprocess Movielens dataset, to get movie/user object.
-
-Usage:
-    ./preprocess.py <dataset_dir> <binary_filename> [--config=<config_file>]
-    ./preprocess.py -h | --help
-
-Options:
-    -h --help               Show this screen.
-    --version               Show version.
-    --config=<config_file>  Get MetaData config file [default: config.json].
-"""
-import docopt
-import os
-import sys
-import re
-import collections
-
-try:
-    import cPickle as pickle
-except ImportError:
-    import pickle
-
-
-class UniqueIDGenerator(object):
-    def __init__(self):
-        self.pool = collections.defaultdict(self.__next_id__)
-        self.next_id = 0
-
-    def __next_id__(self):
-        tmp = self.next_id
-        self.next_id += 1
-        return tmp
-
-    def __call__(self, k):
-        return self.pool[k]
-
-    def to_list(self):
-        ret_val = [None] * len(self.pool)
-        for k in self.pool.keys():
-            ret_val[self.pool[k]] = k
-        return ret_val
-
-
-class SortedIDGenerator(object):
-    def __init__(self):
-        self.__key_set__ = set()
-        self.dict = None
-
-    def scan(self, key):
-        self.__key_set__.add(key)
-
-    def finish_scan(self, compare=None, key=None, reverse=False):
-        self.__key_set__ = sorted(
-            list(self.__key_set__), cmp=compare, key=key, reverse=reverse)
-        self.dict = dict()
-        for idx, each_key in enumerate(self.__key_set__):
-            self.dict[each_key] = idx
-
-    def __call__(self, key):
-        return self.dict[key]
-
-    def to_list(self):
-        return self.__key_set__
-
-
-class SplitFileReader(object):
-    def __init__(self, work_dir, config):
-        assert isinstance(config, dict)
-        self.filename = config['name']
-        self.delimiter = config.get('delimiter', ',')
-        self.work_dir = work_dir
-
-    def read(self):
-        with open(os.path.join(self.work_dir, self.filename), 'r') as f:
-            for line in f:
-                line = line.strip()
-                if isinstance(self.delimiter, unicode):
-                    self.delimiter = str(self.delimiter)
-                yield line.split(self.delimiter)
-
-    @staticmethod
-    def create(work_dir, config):
-        assert isinstance(config, dict)
-        if config['type'] == 'split':
-            return SplitFileReader(work_dir, config)
-
-
-class IFileReader(object):
-    READERS = [SplitFileReader]
-
-    def read(self):
-        raise NotImplementedError()
-
-    @staticmethod
-    def create(work_dir, config):
-        for reader_cls in IFileReader.READERS:
-            val = reader_cls.create(work_dir, config)
-            if val is not None:
-                return val
-
-
-class IDFieldParser(object):
-    TYPE = 'id'
-
-    def __init__(self, config):
-        self.__max_id__ = -sys.maxint - 1
-        self.__min_id__ = sys.maxint
-        self.__id_count__ = 0
-
-    def scan(self, line):
-        idx = int(line)
-        self.__max_id__ = max(self.__max_id__, idx)
-        self.__min_id__ = min(self.__min_id__, idx)
-        self.__id_count__ += 1
-
-    def parse(self, line):
-        return int(line)
-
-    def meta_field(self):
-        return {
-            "is_key": True,
-            'max': self.__max_id__,
-            'min': self.__min_id__,
-            'count': self.__id_count__,
-            'type': 'id'
-        }
-
-
-class SplitEmbeddingDict(object):
-    def __init__(self, delimiter):
-        self.__id__ = UniqueIDGenerator()
-        self.delimiter = delimiter
-
-    def scan(self, multi):
-        for val in multi.split(self.delimiter):
-            self.__id__(val)
-
-    def parse(self, multi):
-        return map(self.__id__, multi.split(self.delimiter))
-
-    def meta_field(self):
-        return self.__id__.to_list()
-
-
-class EmbeddingFieldParser(object):
-    TYPE = 'embedding'
-
-    NO_SEQUENCE = "no_sequence"
-    SEQUENCE = "sequence"
-
-    class CharBasedEmbeddingDict(object):
-        def __init__(self, is_seq=True):
-            self.__id__ = UniqueIDGenerator()
-            self.is_seq = is_seq
-
-        def scan(self, s):
-            for ch in s:
-                self.__id__(ch)
-
-        def parse(self, s):
-            return map(self.__id__, s) if self.is_seq else self.__id__(s[0])
-
-        def meta_field(self):
-            return self.__id__.to_list()
-
-    class WholeContentDict(object):
-        def __init__(self, need_sort=True):
-            assert need_sort
-            self.__id__ = SortedIDGenerator()
-            self.__has_finished__ = False
-
-        def scan(self, txt):
-            self.__id__.scan(txt)
-
-        def meta_field(self):
-            if not self.__has_finished__:
-                self.__id__.finish_scan()
-                self.__has_finished__ = True
-            return self.__id__.to_list()
-
-        def parse(self, txt):
-            return self.__id__(txt)
-
-    def __init__(self, config):
-        try:
-            self.seq_type = config['type']['seq_type']
-        except TypeError:
-            self.seq_type = EmbeddingFieldParser.NO_SEQUENCE
-
-        if config['dict']['type'] == 'char_based':
-            self.dict = EmbeddingFieldParser.CharBasedEmbeddingDict(
-                self.seq_type == EmbeddingFieldParser.SEQUENCE)
-        elif config['dict']['type'] == 'split':
-            self.dict = SplitEmbeddingDict(config['dict'].get('delimiter', ','))
-        elif config['dict']['type'] == 'whole_content':
-            self.dict = EmbeddingFieldParser.WholeContentDict(config['dict'][
-                'sort'])
-        else:
-            print config
-            assert False
-
-        self.name = config['name']
-
-    def scan(self, s):
-        self.dict.scan(s)
-
-    def meta_field(self):
-        return {
-            'name': self.name,
-            'dict': self.dict.meta_field(),
-            'type': 'embedding',
-            'seq': self.seq_type
-        }
-
-    def parse(self, s):
-        return self.dict.parse(s)
-
-
-class OneHotDenseFieldParser(object):
-    TYPE = 'one_hot_dense'
-
-    def __init__(self, config):
-        if config['dict']['type'] == 'split':
-            self.dict = SplitEmbeddingDict(config['dict']['delimiter'])
-        self.name = config['name']
-
-    def scan(self, s):
-        self.dict.scan(s)
-
-    def meta_field(self):
-        # print self.dict.meta_field()
-        return {
-            'dict': self.dict.meta_field(),
-            'name': self.name,
-            'type': 'one_hot_dense'
-        }
-
-    def parse(self, s):
-        ids = self.dict.parse(s)
-        retv = [0.0] * len(self.dict.meta_field())
-        for idx in ids:
-            retv[idx] = 1.0
-        # print retv
-        return retv
-
-
-class FieldParserFactory(object):
-    PARSERS = [IDFieldParser, EmbeddingFieldParser, OneHotDenseFieldParser]
-
-    @staticmethod
-    def create(config):
-        if isinstance(config['type'], basestring):
-            config_type = config['type']
-        elif isinstance(config['type'], dict):
-            config_type = config['type']['name']
-
-        assert config_type is not None
-
-        for each_parser_cls in FieldParserFactory.PARSERS:
-            if config_type == each_parser_cls.TYPE:
-                return each_parser_cls(config)
-        print config
-
-
-class CompositeFieldParser(object):
-    def __init__(self, parser, extractor):
-        self.extractor = extractor
-        self.parser = parser
-
-    def scan(self, *args, **kwargs):
-        self.parser.scan(self.extractor.extract(*args, **kwargs))
-
-    def parse(self, *args, **kwargs):
-        return self.parser.parse(self.extractor.extract(*args, **kwargs))
-
-    def meta_field(self):
-        return self.parser.meta_field()
-
-
-class PositionContentExtractor(object):
-    def __init__(self, pos):
-        self.pos = pos
-
-    def extract(self, line):
-        assert isinstance(line, list)
-        return line[self.pos]
-
-
-class RegexPositionContentExtractor(PositionContentExtractor):
-    def __init__(self, pos, pattern, group_id, strip=True):
-        PositionContentExtractor.__init__(self, pos)
-        pattern = pattern.strip()
-        self.pattern = re.compile(pattern)
-        self.group_id = group_id
-        self.strip = strip
-
-    def extract(self, line):
-        line = PositionContentExtractor.extract(self, line)
-        match = self.pattern.match(line)
-        # print line, self.pattern.pattern, match
-        assert match is not None
-        txt = match.group(self.group_id)
-        if self.strip:
-            txt.strip()
-        return txt
-
-
-class ContentExtractorFactory(object):
-    def extract(self, line):
-        pass
-
-    @staticmethod
-    def create(config):
-        if 'pos' in config:
-            if 'regex' not in config:
-                return PositionContentExtractor(config['pos'])
-            else:
-                extra_args = config['regex']
-                return RegexPositionContentExtractor(
-                    pos=config['pos'], **extra_args)
-
-
-class MetaFile(object):
-    def __init__(self, work_dir):
-        self.work_dir = work_dir
-        self.obj = dict()
-
-    def parse(self, config):
-        config = config['meta']
-
-        ret_obj = dict()
-        for key in config.keys():
-            val = config[key]
-            assert 'file' in val
-            reader = IFileReader.create(self.work_dir, val['file'])
-            assert reader is not None
-            assert 'fields' in val and isinstance(val['fields'], list)
-            fields_config = val['fields']
-            field_parsers = map(MetaFile.__field_config_mapper__, fields_config)
-
-            for each_parser in field_parsers:
-                assert each_parser is not None
-
-            for each_block in reader.read():
-                for each_parser in field_parsers:
-                    each_parser.scan(each_block)
-
-            metas = map(lambda x: x.meta_field(), field_parsers)
-            # print metas
-            key_index = filter(
-                lambda x: x is not None,
-                map(lambda (idx, meta): idx if 'is_key' in meta and meta['is_key'] else None,
-                    enumerate(metas)))[0]
-
-            key_map = []
-            for i in range(min(key_index, len(metas))):
-                key_map.append(i)
-            for i in range(key_index + 1, len(metas)):
-                key_map.append(i)
-
-            obj = {'__meta__': {'raw_meta': metas, 'feature_map': key_map}}
-
-            for each_block in reader.read():
-                idx = field_parsers[key_index].parse(each_block)
-                val = []
-                for i, each_parser in enumerate(field_parsers):
-                    if i != key_index:
-                        val.append(each_parser.parse(each_block))
-                obj[idx] = val
-            ret_obj[key] = obj
-        self.obj = ret_obj
-        return ret_obj
-
-    @staticmethod
-    def __field_config_mapper__(conf):
-        assert isinstance(conf, dict)
-        extrator = ContentExtractorFactory.create(conf)
-        field_parser = FieldParserFactory.create(conf)
-        assert extrator is not None
-        assert field_parser is not None
-        return CompositeFieldParser(field_parser, extrator)
-
-    def dump(self, fp):
-        pickle.dump(self.obj, fp, pickle.HIGHEST_PROTOCOL)
-
-
-def preprocess(binary_filename, dataset_dir, config, **kwargs):
-    assert isinstance(config, str)
-    with open(config, 'r') as config_file:
-        file_loader = None
-        if config.lower().endswith('.yaml'):
-            import yaml
-            file_loader = yaml
-        elif config.lower().endswith('.json'):
-            import json
-            file_loader = json
-        config = file_loader.load(config_file)
-    meta = MetaFile(dataset_dir)
-    meta.parse(config)
-    with open(binary_filename, 'wb') as outf:
-        meta.dump(outf)
-
-
-if __name__ == '__main__':
-    args = docopt.docopt(__doc__, version='0.1.0')
-    kwargs = dict()
-    for key in args.keys():
-        if key != '--help':
-            param_name = key
-            assert isinstance(param_name, str)
-            param_name = param_name.replace('<', '')
-            param_name = param_name.replace('>', '')
-            param_name = param_name.replace('--', '')
-            kwargs[param_name] = args[key]
-    preprocess(**kwargs)
diff --git a/demo/recommendation/data/ml_data.sh b/demo/recommendation/data/ml_data.sh
deleted file mode 100755
index 2268d876389e0bdf5ead405e74d278d276626f82..0000000000000000000000000000000000000000
--- a/demo/recommendation/data/ml_data.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -ex
-cd "$(dirname "$0")"
-# download the dataset
-wget http://files.grouplens.org/datasets/movielens/ml-1m.zip
-# unzip the dataset
-unzip ml-1m.zip
-# remove the unused zip file
-rm ml-1m.zip
diff --git a/demo/recommendation/data/split.py b/demo/recommendation/data/split.py
deleted file mode 100644
index be6869c22f04be1db0f8e9c35c73c851e4c490b0..0000000000000000000000000000000000000000
--- a/demo/recommendation/data/split.py
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/bin/env python2
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Separate movielens 1m dataset to train/test file.
-
-Usage:
-    ./separate.py <input_file> [--test_ratio=<test_ratio>] [--delimiter=<delimiter>]
-    ./separate.py -h | --help
-
-Options:
-    -h --help                       Show this screen.
-    --version                       Show version.
-    --test_ratio=<test_ratio>       Test ratio for separate [default: 0.1].
-    --delimiter=<delimiter>         File delimiter [default: ,].
-"""
-import docopt
-import collections
-import random
-
-
-def process(test_ratio, input_file, delimiter, **kwargs):
-    test_ratio = float(test_ratio)
-    rating_dict = collections.defaultdict(list)
-    with open(input_file, 'r') as f:
-        for line in f:
-            user_id = int(line.split(delimiter)[0])
-            rating_dict[user_id].append(line.strip())
-
-    with open(input_file + ".train", 'w') as train_file:
-        with open(input_file + ".test", 'w') as test_file:
-            for k in rating_dict.keys():
-                lines = rating_dict[k]
-                assert isinstance(lines, list)
-                random.shuffle(lines)
-                test_len = int(len(lines) * test_ratio)
-                for line in lines[:test_len]:
-                    print >> test_file, line
-
-                for line in lines[test_len:]:
-                    print >> train_file, line
-
-
-if __name__ == '__main__':
-    args = docopt.docopt(__doc__, version='0.1.0')
-    kwargs = dict()
-    for key in args.keys():
-        if key != '--help':
-            param_name = key
-            assert isinstance(param_name, str)
-            param_name = param_name.replace('<', '')
-            param_name = param_name.replace('>', '')
-            param_name = param_name.replace('--', '')
-            kwargs[param_name] = args[key]
-    process(**kwargs)
diff --git a/demo/recommendation/dataprovider.py b/demo/recommendation/dataprovider.py
deleted file mode 100755
index c4ff96d80e81926049c9a71d6d9d991c0b568c25..0000000000000000000000000000000000000000
--- a/demo/recommendation/dataprovider.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer.PyDataProvider2 import *
-import common_utils  # parse
-
-
-def __list_to_map__(lst):
-    ret_val = dict()
-    for each in lst:
-        k, v = each
-        ret_val[k] = v
-    return ret_val
-
-
-def hook(settings, meta, **kwargs):
-    """
-    Init hook is invoked before process data. It will set obj.slots and store
-    data meta.
-
-    :param obj: global object. It will passed to process routine.
-    :type obj: object
-    :param meta: the meta file object, which passed from trainer_config. Meta
-                 file record movie/user features.
-    :param kwargs: unused other arguments.
-    """
-    del kwargs  # unused kwargs
-
-    # Header define slots that used for paddle.
-    #    first part is movie features.
-    #    second part is user features.
-    #    final part is rating score.
-    # header is a list of [USE_SEQ_OR_NOT?, SlotType]
-    movie_headers = list(common_utils.meta_to_header(meta, 'movie'))
-    settings.movie_names = [h[0] for h in movie_headers]
-    headers = movie_headers
-    user_headers = list(common_utils.meta_to_header(meta, 'user'))
-    settings.user_names = [h[0] for h in user_headers]
-    headers.extend(user_headers)
-    headers.append(("rating", dense_vector(1)))  # Score
-
-    # slot types.
-    settings.input_types = __list_to_map__(headers)
-    settings.meta = meta
-
-
-@provider(init_hook=hook, cache=CacheType.CACHE_PASS_IN_MEM)
-def process(settings, filename):
-    with open(filename, 'r') as f:
-        for line in f:
-            # Get a rating from file.
-            user_id, movie_id, score = map(int, line.split('::')[:-1])
-
-            # Scale score to [-5, +5]
-            score = float(score) * 2 - 5.0
-
-            # Get movie/user features by movie_id, user_id
-            movie_meta = settings.meta['movie'][movie_id]
-            user_meta = settings.meta['user'][user_id]
-
-            outputs = [('movie_id', movie_id - 1)]
-
-            # Then add movie features
-            for i, each_meta in enumerate(movie_meta):
-                outputs.append((settings.movie_names[i + 1], each_meta))
-
-            # Then add user id.
-            outputs.append(('user_id', user_id - 1))
-
-            # Then add user features.
-            for i, each_meta in enumerate(user_meta):
-                outputs.append((settings.user_names[i + 1], each_meta))
-
-            # Finally, add score
-            outputs.append(('rating', [score]))
-            # Return data to paddle
-            yield __list_to_map__(outputs)
diff --git a/demo/recommendation/evaluate.py b/demo/recommendation/evaluate.py
deleted file mode 100755
index 3afa7a1e9db5fefb1bbf5aaa174b8168afae4058..0000000000000000000000000000000000000000
--- a/demo/recommendation/evaluate.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/python
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import re
-import math
-
-
-def get_best_pass(log_filename):
-    with open(log_filename, 'r') as f:
-        text = f.read()
-        pattern = re.compile('Test.*? cost=([0-9]+\.[0-9]+).*?pass-([0-9]+)',
-                             re.S)
-        results = re.findall(pattern, text)
-        sorted_results = sorted(results, key=lambda result: float(result[0]))
-        return sorted_results[0]
-
-
-log_filename = sys.argv[1]
-log = get_best_pass(log_filename)
-predict_error = math.sqrt(float(log[0])) / 2
-print 'Best pass is %s, error is %s, which means predict get error as %f' % (
-    log[1], log[0], predict_error)
-
-evaluate_pass = "output/pass-%s" % log[1]
-print "evaluating from pass %s" % evaluate_pass
diff --git a/demo/recommendation/evaluate.sh b/demo/recommendation/evaluate.sh
deleted file mode 100755
index 02b2857de028bc9c05d7ddd67012043b671b2764..0000000000000000000000000000000000000000
--- a/demo/recommendation/evaluate.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-function get_best_pass() {
-  cat $1  | grep -Pzo 'Test .*\n.*pass-.*' | sed  -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' | sort | head -n 1
-}
-
-LOG=`get_best_pass log.txt`
-LOG=(${LOG})
-echo 'Best pass is '${LOG[1]}, ' error is '${LOG[0]}, 'which means predict get error as '`echo ${LOG[0]} | python -c 'import math; print math.sqrt(float(raw_input()))/2'`
-
-evaluate_pass="output/pass-${LOG[1]}"
-
-echo 'evaluating from pass '$evaluate_pass
diff --git a/demo/recommendation/prediction.py b/demo/recommendation/prediction.py
deleted file mode 100755
index 8ad993eab3a9f637cfff752bfedbbc62eaf3c8d5..0000000000000000000000000000000000000000
--- a/demo/recommendation/prediction.py
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/bin/env python2
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from py_paddle import swig_paddle, DataProviderConverter
-
-from common_utils import *
-from paddle.trainer.config_parser import parse_config
-
-try:
-    import cPickle as pickle
-except ImportError:
-    import pickle
-import sys
-
-if __name__ == '__main__':
-    model_path = sys.argv[1]
-    swig_paddle.initPaddle('--use_gpu=0')
-    conf = parse_config("trainer_config.py", "is_predict=1")
-    network = swig_paddle.GradientMachine.createFromConfigProto(
-        conf.model_config)
-    assert isinstance(network, swig_paddle.GradientMachine)
-    network.loadParameters(model_path)
-    with open('./data/meta.bin', 'rb') as f:
-        meta = pickle.load(f)
-        headers = [h[1] for h in meta_to_header(meta, 'movie')]
-        headers.extend([h[1] for h in meta_to_header(meta, 'user')])
-        cvt = DataProviderConverter(headers)
-        while True:
-            movie_id = int(raw_input("Input movie_id: "))
-            user_id = int(raw_input("Input user_id: "))
-            movie_meta = meta['movie'][movie_id]  # Query Data From Meta.
-            user_meta = meta['user'][user_id]
-            data = [movie_id - 1]
-            data.extend(movie_meta)
-            data.append(user_id - 1)
-            data.extend(user_meta)
-            print "Prediction Score is %.2f" % (
-                (network.forwardTest(cvt.convert([data]))[0]['value'][0][0] + 5)
-                / 2)
diff --git a/demo/recommendation/preprocess.sh b/demo/recommendation/preprocess.sh
deleted file mode 100755
index eeb81ce3cb47e65c0aeb303e7571024ba82dad65..0000000000000000000000000000000000000000
--- a/demo/recommendation/preprocess.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-UNAME_STR=`uname`
-
-if [[ ${UNAME_STR} == 'Linux' ]]; then
-	SHUF_PROG='shuf'
-else
-	SHUF_PROG='gshuf'
-fi
-
-
-cd "$(dirname "$0")"
-delimiter='::'
-dir=ml-1m
-cd data
-echo 'generate meta config file'
-python config_generator.py config.json > meta_config.json
-echo 'generate meta file'
-python meta_generator.py $dir meta.bin --config=meta_config.json
-echo 'split train/test file'
-python split.py $dir/ratings.dat --delimiter=${delimiter} --test_ratio=0.1
-echo 'shuffle train file'
-${SHUF_PROG} $dir/ratings.dat.train > ratings.dat.train
-cp $dir/ratings.dat.test .
-echo "./data/ratings.dat.train" > train.list
-echo "./data/ratings.dat.test" > test.list
diff --git a/demo/recommendation/requirements.txt b/demo/recommendation/requirements.txt
deleted file mode 100644
index 1ea154584a428b6a389309f1f8def502e0aadfce..0000000000000000000000000000000000000000
--- a/demo/recommendation/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-PyYAML
-docopt
diff --git a/demo/recommendation/run.sh b/demo/recommendation/run.sh
deleted file mode 100755
index 22aef556082ba429e9ca7c6dd3ec72699b9dbcf4..0000000000000000000000000000000000000000
--- a/demo/recommendation/run.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-paddle train \
-    --config=trainer_config.py \
-    --save_dir=./output \
-    --use_gpu=false \
-    --trainer_count=4\
-    --test_all_data_in_one_period=true \
-    --log_period=100 \
-    --dot_period=1 \
-    --num_passes=50  2>&1 | tee 'log.txt'
-paddle usage -l log.txt -e $? -n "recommendation" >/dev/null 2>&1
diff --git a/demo/recommendation/trainer_config.py b/demo/recommendation/trainer_config.py
deleted file mode 100755
index 25f529d7d7c430f179107fb189ade34760ab309d..0000000000000000000000000000000000000000
--- a/demo/recommendation/trainer_config.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-try:
-    import cPickle as pickle
-except ImportError:
-    import pickle
-
-is_predict = get_config_arg('is_predict', bool, False)
-
-META_FILE = 'data/meta.bin'
-
-with open(META_FILE, 'rb') as f:
-    # load meta file
-    meta = pickle.load(f)
-
-settings(
-    batch_size=1600, learning_rate=1e-3, learning_method=RMSPropOptimizer())
-
-
-def construct_feature(name):
-    """
-    Construct movie/user features.
-
-    This method read from meta data. Then convert feature to neural network due
-    to feature type. The map relation as follow.
-
-    * id: embedding => fc
-    * embedding:
-        is_sequence:  embedding => context_projection => fc => pool
-        not sequence: embedding => fc
-    * one_hot_dense:  fc => fc
-
-    Then gather all features vector, and use a fc layer to combined them as
-    return.
-
-    :param name: 'movie' or 'user'
-    :type name: basestring
-    :return: combined feature output
-    :rtype: LayerOutput
-    """
-    __meta__ = meta[name]['__meta__']['raw_meta']
-    fusion = []
-    for each_meta in __meta__:
-        type_name = each_meta['type']
-        slot_name = each_meta.get('name', '%s_id' % name)
-        if type_name == 'id':
-            slot_dim = each_meta['max']
-            embedding = embedding_layer(
-                input=data_layer(
-                    slot_name, size=slot_dim), size=256)
-            fusion.append(fc_layer(input=embedding, size=256))
-        elif type_name == 'embedding':
-            is_seq = each_meta['seq'] == 'sequence'
-            slot_dim = len(each_meta['dict'])
-            din = data_layer(slot_name, slot_dim)
-            embedding = embedding_layer(input=din, size=256)
-            if is_seq:
-                fusion.append(
-                    text_conv_pool(
-                        input=embedding, context_len=5, hidden_size=256))
-            else:
-                fusion.append(fc_layer(input=embedding, size=256))
-        elif type_name == 'one_hot_dense':
-            slot_dim = len(each_meta['dict'])
-            hidden = fc_layer(input=data_layer(slot_name, slot_dim), size=256)
-            fusion.append(fc_layer(input=hidden, size=256))
-
-    return fc_layer(name="%s_fusion" % name, input=fusion, size=256)
-
-
-movie_feature = construct_feature("movie")
-user_feature = construct_feature("user")
-similarity = cos_sim(a=movie_feature, b=user_feature)
-if not is_predict:
-    outputs(mse_cost(input=similarity, label=data_layer('rating', size=1)))
-
-    define_py_data_sources2(
-        'data/train.list',
-        'data/test.list',
-        module='dataprovider',
-        obj='process',
-        args={'meta': meta})
-else:
-    outputs(similarity)
diff --git a/demo/semantic_role_labeling/.gitignore b/demo/semantic_role_labeling/.gitignore
deleted file mode 100644
index 65c9b674c7d1dad53b7d1c6ee1dcbdb72553888d..0000000000000000000000000000000000000000
--- a/demo/semantic_role_labeling/.gitignore
+++ /dev/null
@@ -1,14 +0,0 @@
-*.pyc
-train.log
-data/feature
-data/conll05st-release/
-data/src.dict
-data/test.wsj.props
-data/test.wsj.seq_pair
-data/test.wsj.words
-data/tgt.dict
-output
-data/emb
-data/targetDict.txt
-data/verbDict.txt
-data/wordDict.txt
diff --git a/demo/semantic_role_labeling/api_train_v2.py b/demo/semantic_role_labeling/api_train_v2.py
deleted file mode 100644
index 3af636aef5879b43641d55bd7c9b0b8a1242ff8b..0000000000000000000000000000000000000000
--- a/demo/semantic_role_labeling/api_train_v2.py
+++ /dev/null
@@ -1,277 +0,0 @@
-import math
-import numpy as np
-import gzip
-import logging
-import paddle.v2.dataset.conll05 as conll05
-import paddle.v2.evaluator as evaluator
-import paddle.v2 as paddle
-
-logger = logging.getLogger('paddle')
-
-word_dict, verb_dict, label_dict = conll05.get_dict()
-word_dict_len = len(word_dict)
-label_dict_len = len(label_dict)
-pred_len = len(verb_dict)
-
-mark_dict_len = 2
-word_dim = 32
-mark_dim = 5
-hidden_dim = 512
-depth = 8
-default_std = 1 / math.sqrt(hidden_dim) / 3.0
-mix_hidden_lr = 1e-3
-
-
-def d_type(size):
-    return paddle.data_type.integer_value_sequence(size)
-
-
-def db_lstm():
-    #8 features
-    word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
-    predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
-
-    ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
-    ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
-    ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
-    ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
-    ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
-    mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
-
-    emb_para = paddle.attr.Param(name='emb', initial_std=0., is_static=True)
-    std_0 = paddle.attr.Param(initial_std=0.)
-    std_default = paddle.attr.Param(initial_std=default_std)
-
-    predicate_embedding = paddle.layer.embedding(
-        size=word_dim,
-        input=predicate,
-        param_attr=paddle.attr.Param(
-            name='vemb', initial_std=default_std))
-    mark_embedding = paddle.layer.embedding(
-        size=mark_dim, input=mark, param_attr=std_0)
-
-    word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
-    emb_layers = [
-        paddle.layer.embedding(
-            size=word_dim, input=x, param_attr=emb_para) for x in word_input
-    ]
-    emb_layers.append(predicate_embedding)
-    emb_layers.append(mark_embedding)
-
-    hidden_0 = paddle.layer.mixed(
-        size=hidden_dim,
-        bias_attr=std_default,
-        input=[
-            paddle.layer.full_matrix_projection(
-                input=emb, param_attr=std_default) for emb in emb_layers
-        ])
-
-    lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
-    hidden_para_attr = paddle.attr.Param(
-        initial_std=default_std, learning_rate=mix_hidden_lr)
-
-    lstm_0 = paddle.layer.lstmemory(
-        input=hidden_0,
-        act=paddle.activation.Relu(),
-        gate_act=paddle.activation.Sigmoid(),
-        state_act=paddle.activation.Sigmoid(),
-        bias_attr=std_0,
-        param_attr=lstm_para_attr)
-
-    #stack L-LSTM and R-LSTM with direct edges
-    input_tmp = [hidden_0, lstm_0]
-
-    for i in range(1, depth):
-        mix_hidden = paddle.layer.mixed(
-            size=hidden_dim,
-            bias_attr=std_default,
-            input=[
-                paddle.layer.full_matrix_projection(
-                    input=input_tmp[0], param_attr=hidden_para_attr),
-                paddle.layer.full_matrix_projection(
-                    input=input_tmp[1], param_attr=lstm_para_attr)
-            ])
-
-        lstm = paddle.layer.lstmemory(
-            input=mix_hidden,
-            act=paddle.activation.Relu(),
-            gate_act=paddle.activation.Sigmoid(),
-            state_act=paddle.activation.Sigmoid(),
-            reverse=((i % 2) == 1),
-            bias_attr=std_0,
-            param_attr=lstm_para_attr)
-
-        input_tmp = [mix_hidden, lstm]
-
-    feature_out = paddle.layer.mixed(
-        size=label_dict_len,
-        bias_attr=std_default,
-        input=[
-            paddle.layer.full_matrix_projection(
-                input=input_tmp[0], param_attr=hidden_para_attr),
-            paddle.layer.full_matrix_projection(
-                input=input_tmp[1], param_attr=lstm_para_attr)
-        ], )
-
-    return feature_out
-
-
-def load_parameter(file_name, h, w):
-    with open(file_name, 'rb') as f:
-        f.read(16)  # skip header.
-        return np.fromfile(f, dtype=np.float32).reshape(h, w)
-
-
-def train():
-    paddle.init(use_gpu=False, trainer_count=1)
-
-    # define network topology
-    feature_out = db_lstm()
-    target = paddle.layer.data(name='target', type=d_type(label_dict_len))
-    crf_cost = paddle.layer.crf(size=label_dict_len,
-                                input=feature_out,
-                                label=target,
-                                param_attr=paddle.attr.Param(
-                                    name='crfw',
-                                    initial_std=default_std,
-                                    learning_rate=mix_hidden_lr))
-
-    crf_dec = paddle.layer.crf_decoding(
-        size=label_dict_len,
-        input=feature_out,
-        label=target,
-        param_attr=paddle.attr.Param(name='crfw'))
-    evaluator.sum(input=crf_dec)
-
-    # create parameters
-    parameters = paddle.parameters.create(crf_cost)
-    parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
-
-    # create optimizer
-    optimizer = paddle.optimizer.Momentum(
-        momentum=0,
-        learning_rate=2e-2,
-        regularization=paddle.optimizer.L2Regularization(rate=8e-4),
-        model_average=paddle.optimizer.ModelAverage(
-            average_window=0.5, max_average_window=10000), )
-
-    trainer = paddle.trainer.SGD(cost=crf_cost,
-                                 parameters=parameters,
-                                 update_equation=optimizer,
-                                 extra_layers=crf_dec)
-
-    reader = paddle.batch(
-        paddle.reader.shuffle(
-            conll05.test(), buf_size=8192), batch_size=10)
-
-    feeding = {
-        'word_data': 0,
-        'ctx_n2_data': 1,
-        'ctx_n1_data': 2,
-        'ctx_0_data': 3,
-        'ctx_p1_data': 4,
-        'ctx_p2_data': 5,
-        'verb_data': 6,
-        'mark_data': 7,
-        'target': 8
-    }
-
-    def event_handler(event):
-        if isinstance(event, paddle.event.EndIteration):
-            if event.batch_id % 100 == 0:
-                logger.info("Pass %d, Batch %d, Cost %f, %s" % (
-                    event.pass_id, event.batch_id, event.cost, event.metrics))
-            if event.batch_id and event.batch_id % 1000 == 0:
-                result = trainer.test(reader=reader, feeding=feeding)
-                logger.info("\nTest with Pass %d, Batch %d, %s" %
-                            (event.pass_id, event.batch_id, result.metrics))
-
-        if isinstance(event, paddle.event.EndPass):
-            # save parameters
-            with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f:
-                parameters.to_tar(f)
-
-            result = trainer.test(reader=reader, feeding=feeding)
-            logger.info("\nTest with Pass %d, %s" %
-                        (event.pass_id, result.metrics))
-
-    trainer.train(
-        reader=reader,
-        event_handler=event_handler,
-        num_passes=10,
-        feeding=feeding)
-
-
-def infer_a_batch(inferer, test_data, word_dict, pred_dict, label_dict):
-    probs = inferer.infer(input=test_data, field='id')
-    assert len(probs) == sum(len(x[0]) for x in test_data)
-
-    for idx, test_sample in enumerate(test_data):
-        start_id = 0
-        pred_str = "%s\t" % (pred_dict[test_sample[6][0]])
-
-        for w, tag in zip(test_sample[0],
-                          probs[start_id:start_id + len(test_sample[0])]):
-            pred_str += "%s[%s] " % (word_dict[w], label_dict[tag])
-        print(pred_str.strip())
-        start_id += len(test_sample[0])
-
-
-def infer():
-    label_dict_reverse = dict((value, key)
-                              for key, value in label_dict.iteritems())
-    word_dict_reverse = dict((value, key)
-                             for key, value in word_dict.iteritems())
-    pred_dict_reverse = dict((value, key)
-                             for key, value in verb_dict.iteritems())
-
-    test_creator = paddle.dataset.conll05.test()
-
-    paddle.init(use_gpu=False, trainer_count=1)
-
-    # define network topology
-    feature_out = db_lstm()
-    predict = paddle.layer.crf_decoding(
-        size=label_dict_len,
-        input=feature_out,
-        param_attr=paddle.attr.Param(name='crfw'))
-
-    test_pass = 0
-    with gzip.open('params_pass_%d.tar.gz' % (test_pass)) as f:
-        parameters = paddle.parameters.Parameters.from_tar(f)
-        inferer = paddle.inference.Inference(
-            output_layer=predict, parameters=parameters)
-
-        # prepare test data
-        test_data = []
-        test_batch_size = 50
-
-        for idx, item in enumerate(test_creator()):
-            test_data.append(item[0:8])
-
-            if idx and (not idx % test_batch_size):
-                infer_a_batch(
-                    inferer,
-                    test_data,
-                    word_dict_reverse,
-                    pred_dict_reverse,
-                    label_dict_reverse, )
-                test_data = []
-        infer_a_batch(
-            inferer,
-            test_data,
-            word_dict_reverse,
-            pred_dict_reverse,
-            label_dict_reverse, )
-        test_data = []
-
-
-def main(is_inferring=False):
-    if is_inferring:
-        infer()
-    else:
-        train()
-
-
-if __name__ == '__main__':
-    main(is_inferring=False)
diff --git a/demo/semantic_role_labeling/data/extract_dict_feature.py b/demo/semantic_role_labeling/data/extract_dict_feature.py
deleted file mode 100644
index da44111976a0dec68345fc139d0aa459ca9211c2..0000000000000000000000000000000000000000
--- a/demo/semantic_role_labeling/data/extract_dict_feature.py
+++ /dev/null
@@ -1,81 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import os
-from optparse import OptionParser
-
-
-def extract_dict_features(pair_file, feature_file):
-
-    with open(pair_file) as fin, open(feature_file, 'w') as feature_out:
-        for line in fin:
-            sentence, predicate, labels = line.strip().split('\t')
-            sentence_list = sentence.split()
-            labels_list = labels.split()
-
-            verb_index = labels_list.index('B-V')
-
-            mark = [0] * len(labels_list)
-            if verb_index > 0:
-                mark[verb_index - 1] = 1
-                ctx_n1 = sentence_list[verb_index - 1]
-            else:
-                ctx_n1 = 'bos'
-
-            if verb_index > 1:
-                mark[verb_index - 2] = 1
-                ctx_n2 = sentence_list[verb_index - 2]
-            else:
-                ctx_n2 = 'bos'
-
-            mark[verb_index] = 1
-            ctx_0 = sentence_list[verb_index]
-
-            if verb_index < len(labels_list) - 1:
-                mark[verb_index + 1] = 1
-                ctx_p1 = sentence_list[verb_index + 1]
-            else:
-                ctx_p1 = 'eos'
-
-            if verb_index < len(labels_list) - 2:
-                mark[verb_index + 2] = 1
-                ctx_p2 = sentence_list[verb_index + 2]
-            else:
-                ctx_p2 = 'eos'
-
-
-            feature_str  = sentence + '\t' \
-                           + predicate + '\t' \
-                           + ctx_n2 + '\t' \
-                           + ctx_n1 + '\t' \
-                           + ctx_0 + '\t' \
-                           + ctx_p1 + '\t' \
-                           + ctx_p2 + '\t' \
-                           + ' '.join([str(i) for i in mark]) + '\t' \
-                           + labels
-
-            feature_out.write(feature_str + '\n')
-
-
-if __name__ == '__main__':
-
-    usage = '-p pair_file -f feature_file'
-    parser = OptionParser(usage)
-    parser.add_option('-p', dest='pair_file', help='the pair file')
-    parser.add_option('-f', dest='feature_file', help='the feature file')
-
-    (options, args) = parser.parse_args()
-
-    extract_dict_features(options.pair_file, options.feature_file)
diff --git a/demo/semantic_role_labeling/data/extract_pairs.py b/demo/semantic_role_labeling/data/extract_pairs.py
deleted file mode 100644
index 94a8488c16734eb1882d54f7ec36f4b9308c09d4..0000000000000000000000000000000000000000
--- a/demo/semantic_role_labeling/data/extract_pairs.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import os
-from optparse import OptionParser
-
-
-def read_labels(props_file):
-    '''
-    a sentence maybe has more than one verb, each verb has its label sequence
-    label[],  is a 3-dimension list. 
-    the first dim is to store all sentence's label seqs, len is the sentence number
-    the second dim is to store all label sequences for one sentences
-    the third dim is to store each label for one word
-    '''
-    labels = []
-    with open(props_file) as fin:
-        label_seqs_for_one_sentences = []
-        one_seg_in_file = []
-        for line in fin:
-            line = line.strip()
-            if line == '':
-                for i in xrange(len(one_seg_in_file[0])):
-                    a_kind_lable = [x[i] for x in one_seg_in_file]
-                    label_seqs_for_one_sentences.append(a_kind_lable)
-                labels.append(label_seqs_for_one_sentences)
-                one_seg_in_file = []
-                label_seqs_for_one_sentences = []
-            else:
-                part = line.split()
-                one_seg_in_file.append(part)
-    return labels
-
-
-def read_sentences(words_file):
-    sentences = []
-    with open(words_file) as fin:
-        s = ''
-        for line in fin:
-            line = line.strip()
-            if line == '':
-                sentences.append(s)
-                s = ''
-            else:
-                s += line + ' '
-    return sentences
-
-
-def transform_labels(sentences, labels):
-    sen_lab_pair = []
-    for i in xrange(len(sentences)):
-        if len(labels[i]) == 1:
-            continue
-        else:
-            verb_list = []
-            for x in labels[i][0]:
-                if x != '-':
-                    verb_list.append(x)
-
-            for j in xrange(1, len(labels[i])):
-                label_list = labels[i][j]
-                current_tag = 'O'
-                is_in_bracket = False
-                label_seq = []
-                verb_word = ''
-                for ll in label_list:
-                    if ll == '*' and is_in_bracket == False:
-                        label_seq.append('O')
-                    elif ll == '*' and is_in_bracket == True:
-                        label_seq.append('I-' + current_tag)
-                    elif ll == '*)':
-                        label_seq.append('I-' + current_tag)
-                        is_in_bracket = False
-                    elif ll.find('(') != -1 and ll.find(')') != -1:
-                        current_tag = ll[1:ll.find('*')]
-                        label_seq.append('B-' + current_tag)
-                        is_in_bracket = False
-                    elif ll.find('(') != -1 and ll.find(')') == -1:
-                        current_tag = ll[1:ll.find('*')]
-                        label_seq.append('B-' + current_tag)
-                        is_in_bracket = True
-                    else:
-                        print 'error:', ll
-                sen_lab_pair.append((sentences[i], verb_list[j - 1], label_seq))
-    return sen_lab_pair
-
-
-def write_file(sen_lab_pair, output_file):
-    with open(output_file, 'w') as fout:
-        for x in sen_lab_pair:
-            sentence = x[0]
-            label_seq = ' '.join(x[2])
-            assert len(sentence.split()) == len(x[2])
-            fout.write(sentence + '\t' + x[1] + '\t' + label_seq + '\n')
-
-
-if __name__ == '__main__':
-
-    usage = '-w words_file -p props_file -o output_file'
-    parser = OptionParser(usage)
-    parser.add_option('-w', dest='words_file', help='the words file')
-    parser.add_option('-p', dest='props_file', help='the props file')
-    parser.add_option('-o', dest='output_file', help='the output_file')
-    (options, args) = parser.parse_args()
-
-    sentences = read_sentences(options.words_file)
-    labels = read_labels(options.props_file)
-    sen_lab_pair = transform_labels(sentences, labels)
-
-    write_file(sen_lab_pair, options.output_file)
diff --git a/demo/semantic_role_labeling/data/get_data.sh b/demo/semantic_role_labeling/data/get_data.sh
deleted file mode 100755
index a0ef26a13b9a03392cb8b6207d6d21b7761e38e8..0000000000000000000000000000000000000000
--- a/demo/semantic_role_labeling/data/get_data.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-wget http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz
-wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/verbDict.txt
-wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/targetDict.txt 
-wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt 
-wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/emb
-tar -xzvf conll05st-tests.tar.gz
-rm conll05st-tests.tar.gz
-cp ./conll05st-release/test.wsj/words/test.wsj.words.gz  .
-cp ./conll05st-release/test.wsj/props/test.wsj.props.gz  . 
-gunzip test.wsj.words.gz
-gunzip test.wsj.props.gz
-
-python extract_pairs.py  -w test.wsj.words -p test.wsj.props -o test.wsj.seq_pair
-python extract_dict_feature.py -p test.wsj.seq_pair -f feature 
diff --git a/demo/semantic_role_labeling/data/test.list b/demo/semantic_role_labeling/data/test.list
deleted file mode 100644
index ec370e897a7811b572613150ccb6f665c3adb974..0000000000000000000000000000000000000000
--- a/demo/semantic_role_labeling/data/test.list
+++ /dev/null
@@ -1 +0,0 @@
-./data/feature
diff --git a/demo/semantic_role_labeling/data/train.list b/demo/semantic_role_labeling/data/train.list
deleted file mode 100644
index ec370e897a7811b572613150ccb6f665c3adb974..0000000000000000000000000000000000000000
--- a/demo/semantic_role_labeling/data/train.list
+++ /dev/null
@@ -1 +0,0 @@
-./data/feature
diff --git a/demo/semantic_role_labeling/dataprovider.py b/demo/semantic_role_labeling/dataprovider.py
deleted file mode 100644
index 360c57ea6283ca43986610abf1831742bfc0c3ef..0000000000000000000000000000000000000000
--- a/demo/semantic_role_labeling/dataprovider.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer.PyDataProvider2 import *
-
-UNK_IDX = 0
-
-
-def hook(settings, word_dict, label_dict, predicate_dict, **kwargs):
-    settings.word_dict = word_dict
-    settings.label_dict = label_dict
-    settings.predicate_dict = predicate_dict
-
-    #all inputs are integral and sequential type
-    settings.slots = [
-        integer_value_sequence(len(word_dict)),
-        integer_value_sequence(len(word_dict)),
-        integer_value_sequence(len(word_dict)),
-        integer_value_sequence(len(word_dict)),
-        integer_value_sequence(len(word_dict)),
-        integer_value_sequence(len(word_dict)),
-        integer_value_sequence(len(predicate_dict)), integer_value_sequence(2),
-        integer_value_sequence(len(label_dict))
-    ]
-
-
-def get_batch_size(yeild_data):
-    return len(yeild_data[0])
-
-
-@provider(
-    init_hook=hook,
-    should_shuffle=True,
-    calc_batch_size=get_batch_size,
-    can_over_batch_size=True,
-    cache=CacheType.CACHE_PASS_IN_MEM)
-def process(settings, file_name):
-    with open(file_name, 'r') as fdata:
-        for line in fdata:
-            sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2,  mark, label = \
-                line.strip().split('\t')
-
-            words = sentence.split()
-            sen_len = len(words)
-            word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]
-
-            predicate_slot = [settings.predicate_dict.get(predicate)] * sen_len
-            ctx_n2_slot = [settings.word_dict.get(ctx_n2, UNK_IDX)] * sen_len
-            ctx_n1_slot = [settings.word_dict.get(ctx_n1, UNK_IDX)] * sen_len
-            ctx_0_slot = [settings.word_dict.get(ctx_0, UNK_IDX)] * sen_len
-            ctx_p1_slot = [settings.word_dict.get(ctx_p1, UNK_IDX)] * sen_len
-            ctx_p2_slot = [settings.word_dict.get(ctx_p2, UNK_IDX)] * sen_len
-
-            marks = mark.split()
-            mark_slot = [int(w) for w in marks]
-
-            label_list = label.split()
-            label_slot = [settings.label_dict.get(w) for w in label_list]
-            yield word_slot, ctx_n2_slot, ctx_n1_slot, \
-                  ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot, label_slot
diff --git a/demo/semantic_role_labeling/db_lstm.py b/demo/semantic_role_labeling/db_lstm.py
deleted file mode 100644
index 04e2a559b19bd4b9aec0242eb43edf6ab1e7624e..0000000000000000000000000000000000000000
--- a/demo/semantic_role_labeling/db_lstm.py
+++ /dev/null
@@ -1,218 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import math
-import os
-import sys
-from paddle.trainer_config_helpers import *
-
-#file paths
-word_dict_file = './data/wordDict.txt'
-label_dict_file = './data/targetDict.txt'
-predicate_file = './data/verbDict.txt'
-train_list_file = './data/train.list'
-test_list_file = './data/test.list'
-
-is_test = get_config_arg('is_test', bool, False)
-is_predict = get_config_arg('is_predict', bool, False)
-
-if not is_predict:
-    #load dictionaries
-    word_dict = dict()
-    label_dict = dict()
-    predicate_dict = dict()
-    with open(word_dict_file, 'r') as f_word, \
-         open(label_dict_file, 'r') as f_label, \
-         open(predicate_file, 'r') as f_pre:
-        for i, line in enumerate(f_word):
-            w = line.strip()
-            word_dict[w] = i
-
-        for i, line in enumerate(f_label):
-            w = line.strip()
-            label_dict[w] = i
-
-        for i, line in enumerate(f_pre):
-            w = line.strip()
-            predicate_dict[w] = i
-
-    if is_test:
-        train_list_file = None
-
-    #define data provider
-    define_py_data_sources2(
-        train_list=train_list_file,
-        test_list=test_list_file,
-        module='dataprovider',
-        obj='process',
-        args={
-            'word_dict': word_dict,
-            'label_dict': label_dict,
-            'predicate_dict': predicate_dict
-        })
-
-    word_dict_len = len(word_dict)
-    label_dict_len = len(label_dict)
-    pred_len = len(predicate_dict)
-
-else:
-    word_dict_len = get_config_arg('dict_len', int)
-    label_dict_len = get_config_arg('label_len', int)
-    pred_len = get_config_arg('pred_len', int)
-
-############################## Hyper-parameters ##################################
-mark_dict_len = 2
-word_dim = 32
-mark_dim = 5
-hidden_dim = 512
-depth = 8
-
-########################### Optimizer #######################################
-
-settings(
-    batch_size=150,
-    learning_method=MomentumOptimizer(momentum=0),
-    learning_rate=2e-2,
-    regularization=L2Regularization(8e-4),
-    is_async=False,
-    model_average=ModelAverage(
-        average_window=0.5, max_average_window=10000), )
-
-####################################### network ##############################
-#8 features and 1 target
-word = data_layer(name='word_data', size=word_dict_len)
-predicate = data_layer(name='verb_data', size=pred_len)
-
-ctx_n2 = data_layer(name='ctx_n2_data', size=word_dict_len)
-ctx_n1 = data_layer(name='ctx_n1_data', size=word_dict_len)
-ctx_0 = data_layer(name='ctx_0_data', size=word_dict_len)
-ctx_p1 = data_layer(name='ctx_p1_data', size=word_dict_len)
-ctx_p2 = data_layer(name='ctx_p2_data', size=word_dict_len)
-mark = data_layer(name='mark_data', size=mark_dict_len)
-
-if not is_predict:
-    target = data_layer(name='target', size=label_dict_len)
-
-default_std = 1 / math.sqrt(hidden_dim) / 3.0
-
-emb_para = ParameterAttribute(name='emb', initial_std=0., learning_rate=0.)
-std_0 = ParameterAttribute(initial_std=0.)
-std_default = ParameterAttribute(initial_std=default_std)
-
-predicate_embedding = embedding_layer(
-    size=word_dim,
-    input=predicate,
-    param_attr=ParameterAttribute(
-        name='vemb', initial_std=default_std))
-mark_embedding = embedding_layer(
-    name='word_ctx-in_embedding', size=mark_dim, input=mark, param_attr=std_0)
-
-word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
-emb_layers = [
-    embedding_layer(
-        size=word_dim, input=x, param_attr=emb_para) for x in word_input
-]
-emb_layers.append(predicate_embedding)
-emb_layers.append(mark_embedding)
-
-hidden_0 = mixed_layer(
-    name='hidden0',
-    size=hidden_dim,
-    bias_attr=std_default,
-    input=[
-        full_matrix_projection(
-            input=emb, param_attr=std_default) for emb in emb_layers
-    ])
-
-mix_hidden_lr = 1e-3
-lstm_para_attr = ParameterAttribute(initial_std=0.0, learning_rate=1.0)
-hidden_para_attr = ParameterAttribute(
-    initial_std=default_std, learning_rate=mix_hidden_lr)
-
-lstm_0 = lstmemory(
-    name='lstm0',
-    input=hidden_0,
-    act=ReluActivation(),
-    gate_act=SigmoidActivation(),
-    state_act=SigmoidActivation(),
-    bias_attr=std_0,
-    param_attr=lstm_para_attr)
-
-#stack L-LSTM and R-LSTM with direct edges
-input_tmp = [hidden_0, lstm_0]
-
-for i in range(1, depth):
-
-    mix_hidden = mixed_layer(
-        name='hidden' + str(i),
-        size=hidden_dim,
-        bias_attr=std_default,
-        input=[
-            full_matrix_projection(
-                input=input_tmp[0], param_attr=hidden_para_attr),
-            full_matrix_projection(
-                input=input_tmp[1], param_attr=lstm_para_attr)
-        ])
-
-    lstm = lstmemory(
-        name='lstm' + str(i),
-        input=mix_hidden,
-        act=ReluActivation(),
-        gate_act=SigmoidActivation(),
-        state_act=SigmoidActivation(),
-        reverse=((i % 2) == 1),
-        bias_attr=std_0,
-        param_attr=lstm_para_attr)
-
-    input_tmp = [mix_hidden, lstm]
-
-feature_out = mixed_layer(
-    name='output',
-    size=label_dict_len,
-    bias_attr=std_default,
-    input=[
-        full_matrix_projection(
-            input=input_tmp[0], param_attr=hidden_para_attr),
-        full_matrix_projection(
-            input=input_tmp[1], param_attr=lstm_para_attr)
-    ], )
-
-if not is_predict:
-    crf_l = crf_layer(
-        name='crf',
-        size=label_dict_len,
-        input=feature_out,
-        label=target,
-        param_attr=ParameterAttribute(
-            name='crfw', initial_std=default_std, learning_rate=mix_hidden_lr))
-
-    crf_dec_l = crf_decoding_layer(
-        name='crf_dec_l',
-        size=label_dict_len,
-        input=feature_out,
-        label=target,
-        param_attr=ParameterAttribute(name='crfw'))
-
-    eval = sum_evaluator(input=crf_dec_l)
-
-    outputs(crf_l)
-
-else:
-    crf_dec_l = crf_decoding_layer(
-        name='crf_dec_l',
-        size=label_dict_len,
-        input=feature_out,
-        param_attr=ParameterAttribute(name='crfw'))
-
-    outputs(crf_dec_l)
diff --git a/demo/semantic_role_labeling/predict.py b/demo/semantic_role_labeling/predict.py
deleted file mode 100644
index 372fd090b6e8f08f5bb34697772c2e4976810595..0000000000000000000000000000000000000000
--- a/demo/semantic_role_labeling/predict.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import numpy as np
-from optparse import OptionParser
-from py_paddle import swig_paddle, DataProviderConverter
-from paddle.trainer.PyDataProvider2 import integer_value_sequence
-from paddle.trainer.config_parser import parse_config
-"""
-Usage: run following command to show help message.
-  python predict.py -h
-"""
-UNK_IDX = 0
-
-
-class Prediction():
-    def __init__(self, train_conf, dict_file, model_dir, label_file,
-                 predicate_dict_file):
-        """
-        train_conf: trainer configure.
-        dict_file: word dictionary file name.
-        model_dir: directory of model.
-        """
-
-        self.dict = {}
-        self.labels = {}
-        self.predicate_dict = {}
-        self.labels_reverse = {}
-        self.load_dict_label(dict_file, label_file, predicate_dict_file)
-
-        len_dict = len(self.dict)
-        len_label = len(self.labels)
-        len_pred = len(self.predicate_dict)
-
-        conf = parse_config(
-            train_conf, 'dict_len=' + str(len_dict) + ',label_len=' +
-            str(len_label) + ',pred_len=' + str(len_pred) + ',is_predict=True')
-        self.network = swig_paddle.GradientMachine.createFromConfigProto(
-            conf.model_config)
-        self.network.loadParameters(model_dir)
-
-        slots = [
-            integer_value_sequence(len_dict), integer_value_sequence(len_dict),
-            integer_value_sequence(len_dict), integer_value_sequence(len_dict),
-            integer_value_sequence(len_dict), integer_value_sequence(len_dict),
-            integer_value_sequence(len_pred), integer_value_sequence(2)
-        ]
-        self.converter = DataProviderConverter(slots)
-
-    def load_dict_label(self, dict_file, label_file, predicate_dict_file):
-        """
-        Load dictionary from self.dict_file.
-        """
-        for line_count, line in enumerate(open(dict_file, 'r')):
-            self.dict[line.strip()] = line_count
-
-        for line_count, line in enumerate(open(label_file, 'r')):
-            self.labels[line.strip()] = line_count
-            self.labels_reverse[line_count] = line.strip()
-
-        for line_count, line in enumerate(open(predicate_dict_file, 'r')):
-            self.predicate_dict[line.strip()] = line_count
-
-    def get_data(self, data_file):
-        """
-        Get input data of paddle format.
-        """
-        with open(data_file, 'r') as fdata:
-            for line in fdata:
-                sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = line.strip(
-                ).split('\t')
-                words = sentence.split()
-                sen_len = len(words)
-
-                word_slot = [self.dict.get(w, UNK_IDX) for w in words]
-                predicate_slot = [self.predicate_dict.get(predicate, UNK_IDX)
-                                  ] * sen_len
-                ctx_n2_slot = [self.dict.get(ctx_n2, UNK_IDX)] * sen_len
-                ctx_n1_slot = [self.dict.get(ctx_n1, UNK_IDX)] * sen_len
-                ctx_0_slot = [self.dict.get(ctx_0, UNK_IDX)] * sen_len
-                ctx_p1_slot = [self.dict.get(ctx_p1, UNK_IDX)] * sen_len
-                ctx_p2_slot = [self.dict.get(ctx_p2, UNK_IDX)] * sen_len
-
-                marks = mark.split()
-                mark_slot = [int(w) for w in marks]
-
-                yield word_slot, ctx_n2_slot, ctx_n1_slot, \
-                      ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot
-
-    def predict(self, data_file, output_file):
-        """
-        data_file: file name of input data.
-        """
-        input = self.converter(self.get_data(data_file))
-        output = self.network.forwardTest(input)
-        lab = output[0]["id"].tolist()
-
-        with open(data_file, 'r') as fin, open(output_file, 'w') as fout:
-            index = 0
-            for line in fin:
-                sen = line.split('\t')[0]
-                len_sen = len(sen.split())
-                line_labels = lab[index:index + len_sen]
-                index += len_sen
-                fout.write(sen + '\t' + ' '.join(
-                    [self.labels_reverse[i] for i in line_labels]) + '\n')
-
-
-def option_parser():
-    usage = (
-        "python predict.py -c config -w model_dir "
-        "-d word dictionary -l label_file -i input_file  -p pred_dict_file")
-    parser = OptionParser(usage="usage: %s [options]" % usage)
-    parser.add_option(
-        "-c",
-        "--tconf",
-        action="store",
-        dest="train_conf",
-        help="network config")
-    parser.add_option(
-        "-d",
-        "--dict",
-        action="store",
-        dest="dict_file",
-        help="dictionary file")
-    parser.add_option(
-        "-l",
-        "--label",
-        action="store",
-        dest="label_file",
-        default=None,
-        help="label file")
-    parser.add_option(
-        "-p",
-        "--predict_dict_file",
-        action="store",
-        dest="predict_dict_file",
-        default=None,
-        help="predict_dict_file")
-    parser.add_option(
-        "-i",
-        "--data",
-        action="store",
-        dest="data_file",
-        help="data file to predict")
-    parser.add_option(
-        "-w",
-        "--model",
-        action="store",
-        dest="model_path",
-        default=None,
-        help="model path")
-
-    parser.add_option(
-        "-o",
-        "--output_file",
-        action="store",
-        dest="output_file",
-        default=None,
-        help="output file")
-    return parser.parse_args()
-
-
-def main():
-    options, args = option_parser()
-    train_conf = options.train_conf
-    data_file = options.data_file
-    dict_file = options.dict_file
-    model_path = options.model_path
-    label_file = options.label_file
-    predict_dict_file = options.predict_dict_file
-    output_file = options.output_file
-
-    swig_paddle.initPaddle("--use_gpu=0")
-    predict = Prediction(train_conf, dict_file, model_path, label_file,
-                         predict_dict_file)
-    predict.predict(data_file, output_file)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/demo/semantic_role_labeling/predict.sh b/demo/semantic_role_labeling/predict.sh
deleted file mode 100755
index 873aad670d16803ce321ab60baabe9fe29ea64bf..0000000000000000000000000000000000000000
--- a/demo/semantic_role_labeling/predict.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/bash
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-function get_best_pass() {
-  cat $1  | grep -Pzo 'Test .*\n.*pass-.*' | \
-  sed  -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' | \
-  sort -n | head -n 1
-}   
-
-log=train.log
-LOG=`get_best_pass $log`
-LOG=(${LOG})
-best_model_path="output/pass-${LOG[1]}"
-
-config_file=db_lstm.py
-dict_file=./data/wordDict.txt
-label_file=./data/targetDict.txt 
-predicate_dict_file=./data/verbDict.txt
-input_file=./data/feature
-output_file=predict.res
- 
-python predict.py \
-     -c $config_file \
-     -w $best_model_path \
-     -l $label_file \
-     -p $predicate_dict_file  \
-     -d $dict_file \
-     -i $input_file \
-     -o $output_file
diff --git a/demo/semantic_role_labeling/test.sh b/demo/semantic_role_labeling/test.sh
deleted file mode 100755
index 095bbff2ea42627a13d8ebab436f5a05abc09743..0000000000000000000000000000000000000000
--- a/demo/semantic_role_labeling/test.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-function get_best_pass() {
-  cat $1  | grep -Pzo 'Test .*\n.*pass-.*' | \
-  sed  -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' |\
-  sort -n | head -n 1
-}
-
-log=train.log
-LOG=`get_best_pass $log`
-LOG=(${LOG})
-evaluate_pass="output/pass-${LOG[1]}"
-
-echo 'evaluating from pass '$evaluate_pass
-model_list=./model.list
-touch $model_list | echo $evaluate_pass > $model_list
-
-paddle train \
-  --config=./db_lstm.py \
-  --model_list=$model_list \
-  --job=test \
-  --use_gpu=false \
-  --config_args=is_test=1 \
-  --test_all_data_in_one_period=1 \
-2>&1 | tee 'test.log'
-paddle usage -l test.log -e $? -n "semantic_role_labeling_test" >/dev/null 2>&1
diff --git a/demo/semantic_role_labeling/train.sh b/demo/semantic_role_labeling/train.sh
deleted file mode 100755
index eee14010d7b04a1b824f39090fa82fc532085e0d..0000000000000000000000000000000000000000
--- a/demo/semantic_role_labeling/train.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-paddle train \
-  --config=./db_lstm.py \
-  --use_gpu=0 \
-  --log_period=5000 \
-  --trainer_count=1 \
-  --show_parameter_stats_period=5000 \
-  --save_dir=./output \
-  --num_passes=10000 \
-  --average_test_period=10000000 \
-  --init_model_path=./data \
-  --load_missing_parameter_strategy=rand \
-  --test_all_data_in_one_period=1 \
-  2>&1 | tee 'train.log'
-paddle usage -l train.log -e $? -n "semantic_role_labeling_train" >/dev/null 2>&1
diff --git a/demo/sentiment/.gitignore b/demo/sentiment/.gitignore
deleted file mode 100644
index bf2a9ab1ce3c937bf06179074cd952dc53591dfd..0000000000000000000000000000000000000000
--- a/demo/sentiment/.gitignore
+++ /dev/null
@@ -1,11 +0,0 @@
-data/aclImdb
-data/imdb
-data/pre-imdb
-data/mosesdecoder-master
-logs/
-model_output
-dataprovider_copy_1.py
-model.list
-test.log
-train.log
-*.pyc
diff --git a/demo/sentiment/data/get_imdb.sh b/demo/sentiment/data/get_imdb.sh
deleted file mode 100755
index 7600af6fbb900ee845702f1297779c1f0ed9bf84..0000000000000000000000000000000000000000
--- a/demo/sentiment/data/get_imdb.sh
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -e
-set -x
-
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-cd $DIR
-
-#download the dataset
-echo "Downloading aclImdb..."
-#http://ai.stanford.edu/%7Eamaas/data/sentiment/
-wget http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz
-
-echo "Downloading mosesdecoder..."
-#https://github.com/moses-smt/mosesdecoder
-wget https://github.com/moses-smt/mosesdecoder/archive/master.zip
-
-#extract package
-echo "Unzipping..."
-tar -zxvf aclImdb_v1.tar.gz
-unzip master.zip
-
-#move train and test set to imdb_data directory 
-#in order to process when traing
-mkdir -p imdb/train
-mkdir -p imdb/test
-
-cp -r aclImdb/train/pos/ imdb/train/pos
-cp -r aclImdb/train/neg/ imdb/train/neg
-
-cp -r aclImdb/test/pos/ imdb/test/pos
-cp -r aclImdb/test/neg/ imdb/test/neg
-
-#remove compressed package
-rm aclImdb_v1.tar.gz
-rm master.zip
-
-echo "Done."
diff --git a/demo/sentiment/dataprovider.py b/demo/sentiment/dataprovider.py
deleted file mode 100755
index 4b7f5d0e504aef3884a04cbed8c16503a4079772..0000000000000000000000000000000000000000
--- a/demo/sentiment/dataprovider.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from paddle.trainer.PyDataProvider2 import *
-
-
-def hook(settings, dictionary, **kwargs):
-    settings.word_dict = dictionary
-    settings.input_types = [
-        integer_value_sequence(len(settings.word_dict)), integer_value(2)
-    ]
-    settings.logger.info('dict len : %d' % (len(settings.word_dict)))
-
-
-@provider(init_hook=hook)
-def process(settings, file_name):
-    with open(file_name, 'r') as fdata:
-        for line_count, line in enumerate(fdata):
-            label, comment = line.strip().split('\t\t')
-            label = int(label)
-            words = comment.split()
-            word_slot = [
-                settings.word_dict[w] for w in words if w in settings.word_dict
-            ]
-            if not word_slot:
-                continue
-            yield word_slot, label
diff --git a/demo/sentiment/predict.py b/demo/sentiment/predict.py
deleted file mode 100755
index 64c78e0d6b9297e7a321a4f070517593b0bfe332..0000000000000000000000000000000000000000
--- a/demo/sentiment/predict.py
+++ /dev/null
@@ -1,154 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os, sys
-import numpy as np
-from optparse import OptionParser
-from py_paddle import swig_paddle, DataProviderConverter
-from paddle.trainer.PyDataProvider2 import integer_value_sequence
-from paddle.trainer.config_parser import parse_config
-"""
-Usage: run following command to show help message.
-  python predict.py -h
-"""
-
-
-class SentimentPrediction():
-    def __init__(self, train_conf, dict_file, model_dir=None, label_file=None):
-        """
-        train_conf: trainer configure.
-        dict_file: word dictionary file name.
-        model_dir: directory of model.
-        """
-        self.train_conf = train_conf
-        self.dict_file = dict_file
-        self.word_dict = {}
-        self.dict_dim = self.load_dict()
-        self.model_dir = model_dir
-        if model_dir is None:
-            self.model_dir = os.path.dirname(train_conf)
-
-        self.label = None
-        if label_file is not None:
-            self.load_label(label_file)
-
-        conf = parse_config(train_conf, "is_predict=1")
-        self.network = swig_paddle.GradientMachine.createFromConfigProto(
-            conf.model_config)
-        self.network.loadParameters(self.model_dir)
-        input_types = [integer_value_sequence(self.dict_dim)]
-        self.converter = DataProviderConverter(input_types)
-
-    def load_dict(self):
-        """
-        Load dictionary from self.dict_file.
-        """
-        for line_count, line in enumerate(open(self.dict_file, 'r')):
-            self.word_dict[line.strip().split('\t')[0]] = line_count
-        return len(self.word_dict)
-
-    def load_label(self, label_file):
-        """
-        Load label.
-        """
-        self.label = {}
-        for v in open(label_file, 'r'):
-            self.label[int(v.split('\t')[1])] = v.split('\t')[0]
-
-    def get_index(self, data):
-        """
-        transform word into integer index according to the dictionary.
-        """
-        words = data.strip().split()
-        word_slot = [self.word_dict[w] for w in words if w in self.word_dict]
-        return word_slot
-
-    def batch_predict(self, data_batch):
-        input = self.converter(data_batch)
-        output = self.network.forwardTest(input)
-        prob = output[0]["value"]
-        labs = np.argsort(-prob)
-        for idx, lab in enumerate(labs):
-            if self.label is None:
-                print("predicting label is %d" % (lab[0]))
-            else:
-                print("predicting label is %s" % (self.label[lab[0]]))
-
-
-def option_parser():
-    usage = "python predict.py -n config -w model_dir -d dictionary -i input_file "
-    parser = OptionParser(usage="usage: %s [options]" % usage)
-    parser.add_option(
-        "-n",
-        "--tconf",
-        action="store",
-        dest="train_conf",
-        help="network config")
-    parser.add_option(
-        "-d",
-        "--dict",
-        action="store",
-        dest="dict_file",
-        help="dictionary file")
-    parser.add_option(
-        "-b",
-        "--label",
-        action="store",
-        dest="label",
-        default=None,
-        help="dictionary file")
-    parser.add_option(
-        "-c",
-        "--batch_size",
-        type="int",
-        action="store",
-        dest="batch_size",
-        default=1,
-        help="the batch size for prediction")
-    parser.add_option(
-        "-w",
-        "--model",
-        action="store",
-        dest="model_path",
-        default=None,
-        help="model path")
-    return parser.parse_args()
-
-
-def main():
-    options, args = option_parser()
-    train_conf = options.train_conf
-    batch_size = options.batch_size
-    dict_file = options.dict_file
-    model_path = options.model_path
-    label = options.label
-    swig_paddle.initPaddle("--use_gpu=0")
-    predict = SentimentPrediction(train_conf, dict_file, model_path, label)
-
-    batch = []
-    for line in sys.stdin:
-        words = predict.get_index(line)
-        if words:
-            batch.append([words])
-        else:
-            print('All the words in [%s] are not in the dictionary.' % line)
-        if len(batch) == batch_size:
-            predict.batch_predict(batch)
-            batch = []
-    if len(batch) > 0:
-        predict.batch_predict(batch)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/demo/sentiment/predict.sh b/demo/sentiment/predict.sh
deleted file mode 100755
index c72a8e8641516543ef267fcb4b448630246d1e8d..0000000000000000000000000000000000000000
--- a/demo/sentiment/predict.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-#Note the default model is pass-00002, you shold make sure the model path
-#exists or change the mode path.
-model=model_output/pass-00002/
-config=trainer_config.py
-label=data/pre-imdb/labels.list
-cat ./data/aclImdb/test/pos/10007_10.txt | python predict.py \
-     --tconf=$config\
-     --model=$model \
-     --label=$label \
-     --dict=./data/pre-imdb/dict.txt \
-     --batch_size=1
diff --git a/demo/sentiment/preprocess.py b/demo/sentiment/preprocess.py
deleted file mode 100755
index 29b3682b747c66574590de5ea70574981cc536bb..0000000000000000000000000000000000000000
--- a/demo/sentiment/preprocess.py
+++ /dev/null
@@ -1,359 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-import random
-import operator
-import numpy as np
-from subprocess import Popen, PIPE
-from os.path import join as join_path
-from optparse import OptionParser
-
-from paddle.utils.preprocess_util import *
-"""
-Usage: run following command to show help message.
-  python preprocess.py -h 
-"""
-
-
-def save_dict(dict, filename, is_reverse=True):
-    """
-    Save dictionary into file.
-    dict:   input dictionary.
-    filename: output file name, string.
-    is_reverse: True, descending order by value.
-                False, ascending order by value.
-    """
-    f = open(filename, 'w')
-    for k, v in sorted(dict.items(), key=operator.itemgetter(1),\
-                       reverse=is_reverse):
-        f.write('%s\t%s\n' % (k, v))
-    f.close()
-
-
-def tokenize(sentences):
-    """
-    Use tokenizer.perl to tokenize input sentences.
-    tokenizer.perl is tool of Moses.
-    sentences : a list of input sentences.
-    return: a list of processed text.
-    """
-    dir = './data/mosesdecoder-master/scripts/tokenizer/tokenizer.perl'
-    tokenizer_cmd = [dir, '-l', 'en', '-q', '-']
-    assert isinstance(sentences, list)
-    text = "\n".join(sentences)
-    tokenizer = Popen(tokenizer_cmd, stdin=PIPE, stdout=PIPE)
-    tok_text, _ = tokenizer.communicate(text)
-    toks = tok_text.split('\n')[:-1]
-    return toks
-
-
-def read_lines(path):
-    """
-    path: String, file path.
-    return a list of sequence.
-    """
-    seqs = []
-    with open(path, 'r') as f:
-        for line in f.readlines():
-            line = line.strip()
-            if len(line):
-                seqs.append(line)
-    return seqs
-
-
-class SentimentDataSetCreate():
-    """
-    A class to process data for sentiment analysis task.
-    """
-
-    def __init__(self,
-                 data_path,
-                 output_path,
-                 use_okenizer=True,
-                 multi_lines=False):
-        """
-        data_path: string, traing and testing dataset path
-        output_path: string, output path, store processed dataset
-        multi_lines: whether a file has multi lines.
-                     In order to shuffle fully, it needs to read all files into
-                     memory, then shuffle them if one file has multi lines.
-        """
-        self.output_path = output_path
-        self.data_path = data_path
-
-        self.train_dir = 'train'
-        self.test_dir = 'test'
-
-        self.train_list = "train.list"
-        self.test_list = "test.list"
-
-        self.label_list = "labels.list"
-        self.classes_num = 0
-
-        self.batch_size = 50000
-        self.batch_dir = 'batches'
-
-        self.dict_file = "dict.txt"
-        self.dict_with_test = False
-        self.dict_size = 0
-        self.word_count = {}
-
-        self.tokenizer = use_okenizer
-        self.overwrite = False
-
-        self.multi_lines = multi_lines
-
-        self.train_dir = join_path(data_path, self.train_dir)
-        self.test_dir = join_path(data_path, self.test_dir)
-        self.train_list = join_path(output_path, self.train_list)
-        self.test_list = join_path(output_path, self.test_list)
-        self.label_list = join_path(output_path, self.label_list)
-        self.dict_file = join_path(output_path, self.dict_file)
-
-    def data_list(self, path):
-        """
-        create dataset from path
-        path: data path
-        return: data list
-        """
-        label_set = get_label_set_from_dir(path)
-        data = []
-        for lab_name in label_set.keys():
-            file_paths = list_files(join_path(path, lab_name))
-            for p in file_paths:
-                data.append({"label"  : label_set[lab_name],\
-                             "seq_path": p})
-        return data, label_set
-
-    def create_dict(self, data):
-        """
-        create dict for input data.
-        data: list, [sequence, sequnce, ...]
-        """
-        for seq in data:
-            for w in seq.strip().lower().split():
-                if w not in self.word_count:
-                    self.word_count[w] = 1
-                else:
-                    self.word_count[w] += 1
-
-    def create_dataset(self):
-        """
-        create file batches and dictionary of train data set.
-        If the self.overwrite is false and train.list already exists in
-        self.output_path, this function will not create and save file
-        batches from the data set path.
-        return: dictionary size, class number.
-        """
-        out_path = self.output_path
-        if out_path and not os.path.exists(out_path):
-            os.makedirs(out_path)
-
-        # If self.overwrite is false or self.train_list has existed,
-        # it will not process dataset.
-        if not (self.overwrite or not os.path.exists(self.train_list)):
-            print "%s already exists." % self.train_list
-            return
-
-        # Preprocess train data.
-        train_data, train_lab_set = self.data_list(self.train_dir)
-        print "processing train set..."
-        file_lists = self.save_data(train_data, "train", self.batch_size, True,
-                                    True)
-        save_list(file_lists, self.train_list)
-
-        # If have test data path, preprocess test data.
-        if os.path.exists(self.test_dir):
-            test_data, test_lab_set = self.data_list(self.test_dir)
-            assert (train_lab_set == test_lab_set)
-            print "processing test set..."
-            file_lists = self.save_data(test_data, "test", self.batch_size,
-                                        False, self.dict_with_test)
-            save_list(file_lists, self.test_list)
-
-        # save labels set.
-        save_dict(train_lab_set, self.label_list, False)
-        self.classes_num = len(train_lab_set.keys())
-
-        # save dictionary.
-        save_dict(self.word_count, self.dict_file, True)
-        self.dict_size = len(self.word_count)
-
-    def save_data(self,
-                  data,
-                  prefix="",
-                  batch_size=50000,
-                  is_shuffle=False,
-                  build_dict=False):
-        """
-        Create batches for a Dataset object.
-        data: the Dataset object to process.
-        prefix: the prefix of each batch.
-        batch_size: number of data in each batch.
-        build_dict: whether to build dictionary for data
-
-        return: list of batch names
-        """
-        if is_shuffle and self.multi_lines:
-            return self.save_data_multi_lines(data, prefix, batch_size,
-                                              build_dict)
-
-        if is_shuffle:
-            random.shuffle(data)
-        num_batches = int(math.ceil(len(data) / float(batch_size)))
-        batch_names = []
-        for i in range(num_batches):
-            batch_name = join_path(self.output_path,
-                                   "%s_part_%03d" % (prefix, i))
-            begin = i * batch_size
-            end = min((i + 1) * batch_size, len(data))
-            # read a batch of data
-            label_list, data_list = self.get_data_list(begin, end, data)
-            if build_dict:
-                self.create_dict(data_list)
-            self.save_file(label_list, data_list, batch_name)
-            batch_names.append(batch_name)
-
-        return batch_names
-
-    def get_data_list(self, begin, end, data):
-        """
-        begin: int, begining index of data.
-        end: int, ending index of data.
-        data: a list of {"seq_path": seqquence path, "label": label index}
-
-        return a list of label and a list of sequence.
-        """
-        label_list = []
-        data_list = []
-        for j in range(begin, end):
-            seqs = read_lines(data[j]["seq_path"])
-            lab = int(data[j]["label"])
-            #File may have multiple lines.
-            for seq in seqs:
-                data_list.append(seq)
-                label_list.append(lab)
-        if self.tokenizer:
-            data_list = tokenize(data_list)
-        return label_list, data_list
-
-    def save_data_multi_lines(self,
-                              data,
-                              prefix="",
-                              batch_size=50000,
-                              build_dict=False):
-        """
-        In order to shuffle fully, there is no need to load all data if
-        each file only contains one sample, it only needs to shuffle list
-        of file name. But one file contains multi lines, each line is one
-        sample. It needs to read all data into memory to shuffle fully.
-        This interface is mainly for data containning multi lines in each
-        file, which consumes more memory if there is a great mount of data.
-
-        data: the Dataset object to process.
-        prefix: the prefix of each batch.
-        batch_size: number of data in each batch.
-        build_dict: whether to build dictionary for data
-
-        return: list of batch names
-        """
-        assert self.multi_lines
-        label_list = []
-        data_list = []
-
-        # read all data
-        label_list, data_list = self.get_data_list(0, len(data), data)
-        if build_dict:
-            self.create_dict(data_list)
-
-        length = len(label_list)
-        perm_list = np.array([i for i in xrange(length)])
-        random.shuffle(perm_list)
-
-        num_batches = int(math.ceil(length / float(batch_size)))
-        batch_names = []
-        for i in range(num_batches):
-            batch_name = join_path(self.output_path,
-                                   "%s_part_%03d" % (prefix, i))
-            begin = i * batch_size
-            end = min((i + 1) * batch_size, length)
-            sub_label = [label_list[perm_list[i]] for i in range(begin, end)]
-            sub_data = [data_list[perm_list[i]] for i in range(begin, end)]
-            self.save_file(sub_label, sub_data, batch_name)
-            batch_names.append(batch_name)
-
-        return batch_names
-
-    def save_file(self, label_list, data_list, filename):
-        """
-        Save data into file.
-        label_list: a list of int value.
-        data_list: a list of sequnece.
-        filename: output file name.
-        """
-        f = open(filename, 'w')
-        print "saving file: %s" % filename
-        for lab, seq in zip(label_list, data_list):
-            f.write('%s\t\t%s\n' % (lab, seq))
-        f.close()
-
-
-def option_parser():
-    parser = OptionParser(usage="usage: python preprcoess.py "\
-                                "-i data_dir [options]")
-    parser.add_option(
-        "-i",
-        "--data",
-        action="store",
-        dest="input",
-        help="Input data directory.")
-    parser.add_option(
-        "-o",
-        "--output",
-        action="store",
-        dest="output",
-        default=None,
-        help="Output directory.")
-    parser.add_option(
-        "-t",
-        "--tokenizer",
-        action="store",
-        dest="use_tokenizer",
-        default=True,
-        help="Whether to use tokenizer.")
-    parser.add_option("-m", "--multi_lines", action="store",
-                      dest="multi_lines", default=False,
-                      help="If input text files have multi lines and they "\
-                           "need to be shuffled, you should set -m True,")
-    return parser.parse_args()
-
-
-def main():
-    options, args = option_parser()
-    data_dir = options.input
-    output_dir = options.output
-    use_tokenizer = options.use_tokenizer
-    multi_lines = options.multi_lines
-    if output_dir is None:
-        outname = os.path.basename(options.input)
-        output_dir = join_path(os.path.dirname(data_dir), 'pre-' + outname)
-    data_creator = SentimentDataSetCreate(data_dir, output_dir, use_tokenizer,
-                                          multi_lines)
-    data_creator.create_dataset()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/demo/sentiment/preprocess.sh b/demo/sentiment/preprocess.sh
deleted file mode 100755
index 19ec34d4f016365d18db01ddec559d26202b19c6..0000000000000000000000000000000000000000
--- a/demo/sentiment/preprocess.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-echo "Start to preprcess..."
-
-data_dir="./data/imdb"
-python preprocess.py -i $data_dir
-
-echo "Done."
diff --git a/demo/sentiment/sentiment_net.py b/demo/sentiment/sentiment_net.py
deleted file mode 100644
index a01577ca5ae025b7bec67c6d54c7dbd931dbee74..0000000000000000000000000000000000000000
--- a/demo/sentiment/sentiment_net.py
+++ /dev/null
@@ -1,145 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from os.path import join as join_path
-
-from paddle.trainer_config_helpers import *
-
-
-def sentiment_data(data_dir=None,
-                   is_test=False,
-                   is_predict=False,
-                   train_list="train.list",
-                   test_list="test.list",
-                   dict_file="dict.txt"):
-    """
-    Predefined data provider for sentiment analysis.
-    is_test: whether this config is used for test.
-    is_predict: whether this config is used for prediction.
-    train_list: text file name, containing a list of training set.
-    test_list: text file name, containing a list of testing set.
-    dict_file: text file name, containing dictionary.
-    """
-    dict_dim = len(open(join_path(data_dir, "dict.txt")).readlines())
-    class_dim = len(open(join_path(data_dir, 'labels.list')).readlines())
-    if is_predict:
-        return dict_dim, class_dim
-
-    if data_dir is not None:
-        train_list = join_path(data_dir, train_list)
-        test_list = join_path(data_dir, test_list)
-        dict_file = join_path(data_dir, dict_file)
-
-    train_list = train_list if not is_test else None
-    word_dict = dict()
-    with open(dict_file, 'r') as f:
-        for i, line in enumerate(open(dict_file, 'r')):
-            word_dict[line.split('\t')[0]] = i
-
-    define_py_data_sources2(
-        train_list,
-        test_list,
-        module="dataprovider",
-        obj="process",
-        args={'dictionary': word_dict})
-
-    return dict_dim, class_dim
-
-
-def bidirectional_lstm_net(input_dim,
-                           class_dim=2,
-                           emb_dim=128,
-                           lstm_dim=128,
-                           is_predict=False):
-    data = data_layer("word", input_dim)
-    emb = embedding_layer(input=data, size=emb_dim)
-    bi_lstm = bidirectional_lstm(input=emb, size=lstm_dim)
-    dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
-    output = fc_layer(input=dropout, size=class_dim, act=SoftmaxActivation())
-
-    if not is_predict:
-        lbl = data_layer("label", 1)
-        outputs(classification_cost(input=output, label=lbl))
-    else:
-        outputs(output)
-
-
-def stacked_lstm_net(input_dim,
-                     class_dim=2,
-                     emb_dim=128,
-                     hid_dim=512,
-                     stacked_num=3,
-                     is_predict=False):
-    """
-    A Wrapper for sentiment classification task.
-    This network uses bi-directional recurrent network,
-    consisting three LSTM layers. This configure is referred to
-    the paper as following url, but use fewer layrs.
-        http://www.aclweb.org/anthology/P15-1109
-
-    input_dim: here is word dictionary dimension.
-    class_dim: number of categories.
-    emb_dim: dimension of word embedding.
-    hid_dim: dimension of hidden layer.
-    stacked_num: number of stacked lstm-hidden layer.
-    is_predict: is predicting or not.
-                Some layers is not needed in network when predicting.
-    """
-    hid_lr = 1e-3
-    assert stacked_num % 2 == 1
-
-    layer_attr = ExtraLayerAttribute(drop_rate=0.5)
-    fc_para_attr = ParameterAttribute(learning_rate=hid_lr)
-    lstm_para_attr = ParameterAttribute(initial_std=0., learning_rate=1.)
-    para_attr = [fc_para_attr, lstm_para_attr]
-    bias_attr = ParameterAttribute(initial_std=0., l2_rate=0.)
-    relu = ReluActivation()
-    linear = LinearActivation()
-
-    data = data_layer("word", input_dim)
-    emb = embedding_layer(input=data, size=emb_dim)
-
-    fc1 = fc_layer(input=emb, size=hid_dim, act=linear, bias_attr=bias_attr)
-    lstm1 = lstmemory(
-        input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
-
-    inputs = [fc1, lstm1]
-    for i in range(2, stacked_num + 1):
-        fc = fc_layer(
-            input=inputs,
-            size=hid_dim,
-            act=linear,
-            param_attr=para_attr,
-            bias_attr=bias_attr)
-        lstm = lstmemory(
-            input=fc,
-            reverse=(i % 2) == 0,
-            act=relu,
-            bias_attr=bias_attr,
-            layer_attr=layer_attr)
-        inputs = [fc, lstm]
-
-    fc_last = pooling_layer(input=inputs[0], pooling_type=MaxPooling())
-    lstm_last = pooling_layer(input=inputs[1], pooling_type=MaxPooling())
-    output = fc_layer(
-        input=[fc_last, lstm_last],
-        size=class_dim,
-        act=SoftmaxActivation(),
-        bias_attr=bias_attr,
-        param_attr=para_attr)
-
-    if is_predict:
-        outputs(output)
-    else:
-        outputs(classification_cost(input=output, label=data_layer('label', 1)))
diff --git a/demo/sentiment/test.sh b/demo/sentiment/test.sh
deleted file mode 100755
index 85c4f3ccfc3ede23fcf701769b9701ecbf57c789..0000000000000000000000000000000000000000
--- a/demo/sentiment/test.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-function get_best_pass() {
-  cat $1  | grep -Pzo 'Test .*\n.*pass-.*' | \
-  sed  -r 'N;s/Test.* classification_error_evaluator=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' |\
-  sort -n | head -n 1
-}
-
-log=train.log
-LOG=`get_best_pass $log`
-LOG=(${LOG})
-evaluate_pass="model_output/pass-${LOG[1]}"
-
-echo 'evaluating from pass '$evaluate_pass
-
-model_list=./model.list
-touch $model_list | echo $evaluate_pass > $model_list
-net_conf=trainer_config.py
-paddle train --config=$net_conf \
-             --model_list=$model_list \
-             --job=test \
-             --use_gpu=false \
-             --trainer_count=4 \
-             --config_args=is_test=1 \
-             2>&1 | tee 'test.log'
-paddle usage -l test.log -e $? -n "sentiment_test" >/dev/null 2>&1
diff --git a/demo/sentiment/train.sh b/demo/sentiment/train.sh
deleted file mode 100755
index 14620f733bf03444e5ba3b3b792dfbed6146ecde..0000000000000000000000000000000000000000
--- a/demo/sentiment/train.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-config=trainer_config.py
-output=./model_output
-paddle train --config=$config \
-             --save_dir=$output \
-             --job=train \
-             --use_gpu=false \
-             --trainer_count=4 \
-             --num_passes=10 \
-             --log_period=10 \
-             --dot_period=20 \
-             --show_parameter_stats_period=100 \
-             --test_all_data_in_one_period=1 \
-             2>&1 | tee 'train.log'
-paddle usage -l train.log -e $? -n "sentiment_train" >/dev/null 2>&1
diff --git a/demo/sentiment/train_v2.py b/demo/sentiment/train_v2.py
deleted file mode 100644
index 1c856556bd0cb32f60eba322469b3621c37e1349..0000000000000000000000000000000000000000
--- a/demo/sentiment/train_v2.py
+++ /dev/null
@@ -1,159 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import paddle.v2 as paddle
-
-
-def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128):
-    data = paddle.layer.data("word",
-                             paddle.data_type.integer_value_sequence(input_dim))
-    emb = paddle.layer.embedding(input=data, size=emb_dim)
-    conv_3 = paddle.networks.sequence_conv_pool(
-        input=emb, context_len=3, hidden_size=hid_dim)
-    conv_4 = paddle.networks.sequence_conv_pool(
-        input=emb, context_len=4, hidden_size=hid_dim)
-    output = paddle.layer.fc(input=[conv_3, conv_4],
-                             size=class_dim,
-                             act=paddle.activation.Softmax())
-    lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
-    cost = paddle.layer.classification_cost(input=output, label=lbl)
-    return cost
-
-
-def stacked_lstm_net(input_dim,
-                     class_dim=2,
-                     emb_dim=128,
-                     hid_dim=512,
-                     stacked_num=3):
-    """
-    A Wrapper for sentiment classification task.
-    This network uses bi-directional recurrent network,
-    consisting three LSTM layers. This configure is referred to
-    the paper as following url, but use fewer layrs.
-        http://www.aclweb.org/anthology/P15-1109
-
-    input_dim: here is word dictionary dimension.
-    class_dim: number of categories.
-    emb_dim: dimension of word embedding.
-    hid_dim: dimension of hidden layer.
-    stacked_num: number of stacked lstm-hidden layer.
-    """
-    assert stacked_num % 2 == 1
-
-    layer_attr = paddle.attr.Extra(drop_rate=0.5)
-    fc_para_attr = paddle.attr.Param(learning_rate=1e-3)
-    lstm_para_attr = paddle.attr.Param(initial_std=0., learning_rate=1.)
-    para_attr = [fc_para_attr, lstm_para_attr]
-    bias_attr = paddle.attr.Param(initial_std=0., l2_rate=0.)
-    relu = paddle.activation.Relu()
-    linear = paddle.activation.Linear()
-
-    data = paddle.layer.data("word",
-                             paddle.data_type.integer_value_sequence(input_dim))
-    emb = paddle.layer.embedding(input=data, size=emb_dim)
-
-    fc1 = paddle.layer.fc(input=emb,
-                          size=hid_dim,
-                          act=linear,
-                          bias_attr=bias_attr)
-    lstm1 = paddle.layer.lstmemory(
-        input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
-
-    inputs = [fc1, lstm1]
-    for i in range(2, stacked_num + 1):
-        fc = paddle.layer.fc(input=inputs,
-                             size=hid_dim,
-                             act=linear,
-                             param_attr=para_attr,
-                             bias_attr=bias_attr)
-        lstm = paddle.layer.lstmemory(
-            input=fc,
-            reverse=(i % 2) == 0,
-            act=relu,
-            bias_attr=bias_attr,
-            layer_attr=layer_attr)
-        inputs = [fc, lstm]
-
-    fc_last = paddle.layer.pooling(
-        input=inputs[0], pooling_type=paddle.pooling.Max())
-    lstm_last = paddle.layer.pooling(
-        input=inputs[1], pooling_type=paddle.pooling.Max())
-    output = paddle.layer.fc(input=[fc_last, lstm_last],
-                             size=class_dim,
-                             act=paddle.activation.Softmax(),
-                             bias_attr=bias_attr,
-                             param_attr=para_attr)
-
-    lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
-    cost = paddle.layer.classification_cost(input=output, label=lbl)
-    return cost
-
-
-if __name__ == '__main__':
-    # init
-    paddle.init(use_gpu=False)
-
-    #data
-    print 'load dictionary...'
-    word_dict = paddle.dataset.imdb.word_dict()
-    dict_dim = len(word_dict)
-    class_dim = 2
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
-        batch_size=100)
-    test_reader = paddle.batch(
-        lambda: paddle.dataset.imdb.test(word_dict), batch_size=100)
-
-    feeding = {'word': 0, 'label': 1}
-
-    # network config
-    # Please choose the way to build the network
-    # by uncommenting the corresponding line.
-    cost = convolution_net(dict_dim, class_dim=class_dim)
-    # cost = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
-
-    # create parameters
-    parameters = paddle.parameters.create(cost)
-
-    # create optimizer
-    adam_optimizer = paddle.optimizer.Adam(
-        learning_rate=2e-3,
-        regularization=paddle.optimizer.L2Regularization(rate=8e-4),
-        model_average=paddle.optimizer.ModelAverage(average_window=0.5))
-
-    # End batch and end pass event handler
-    def event_handler(event):
-        if isinstance(event, paddle.event.EndIteration):
-            if event.batch_id % 100 == 0:
-                print "\nPass %d, Batch %d, Cost %f, %s" % (
-                    event.pass_id, event.batch_id, event.cost, event.metrics)
-            else:
-                sys.stdout.write('.')
-                sys.stdout.flush()
-        if isinstance(event, paddle.event.EndPass):
-            result = trainer.test(reader=test_reader, feeding=feeding)
-            print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
-
-    # create trainer
-    trainer = paddle.trainer.SGD(cost=cost,
-                                 parameters=parameters,
-                                 update_equation=adam_optimizer)
-
-    trainer.train(
-        reader=train_reader,
-        event_handler=event_handler,
-        feeding=feeding,
-        num_passes=2)
diff --git a/demo/sentiment/trainer_config.py b/demo/sentiment/trainer_config.py
deleted file mode 100644
index f1cadaa728ac58107e15f77b5994d31da088caf7..0000000000000000000000000000000000000000
--- a/demo/sentiment/trainer_config.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from sentiment_net import *
-from paddle.trainer_config_helpers import *
-
-# whether this config is used for test
-is_test = get_config_arg('is_test', bool, False)
-# whether this config is used for prediction
-is_predict = get_config_arg('is_predict', bool, False)
-
-data_dir = "./data/pre-imdb"
-dict_dim, class_dim = sentiment_data(data_dir, is_test, is_predict)
-
-################## Algorithm Config #####################
-
-settings(
-    batch_size=128,
-    learning_rate=2e-3,
-    learning_method=AdamOptimizer(),
-    model_average=ModelAverage(0.5),
-    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25)
-
-#################### Network Config ######################
-stacked_lstm_net(
-    dict_dim, class_dim=class_dim, stacked_num=3, is_predict=is_predict)
-# bidirectional_lstm_net(dict_dim, class_dim=class_dim, is_predict=is_predict)
diff --git a/demo/seqToseq/.gitignore b/demo/seqToseq/.gitignore
deleted file mode 100644
index 21cec2c2c1f3422cbb0ad133281dc1ecdd076a96..0000000000000000000000000000000000000000
--- a/demo/seqToseq/.gitignore
+++ /dev/null
@@ -1,17 +0,0 @@
-data/wmt14
-data/pre-wmt14
-data/wmt14_model
-data/paraphrase
-data/pre-paraphrase
-data/paraphrase_model
-translation/gen.log
-translation/gen_result
-translation/train.log
-paraphrase/train.log
-dataprovider_copy_1.py
-translation/thirdparty.tgz
-translation/thirdparty/train.conf
-translation/thirdparty/dataprovider.py
-translation/thirdparty/seqToseq_net.py
-translation/thirdparty/*.dict
-*.pyc
diff --git a/demo/seqToseq/api_train_v2.py b/demo/seqToseq/api_train_v2.py
deleted file mode 100644
index bb535f09260613098681db212ffc91631acf67e2..0000000000000000000000000000000000000000
--- a/demo/seqToseq/api_train_v2.py
+++ /dev/null
@@ -1,236 +0,0 @@
-import sys
-
-import paddle.v2 as paddle
-
-
-def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
-    ### Network Architecture
-    word_vector_dim = 512  # dimension of word vector
-    decoder_size = 512  # dimension of hidden unit in GRU Decoder network
-    encoder_size = 512  # dimension of hidden unit in GRU Encoder network
-
-    beam_size = 3
-    max_length = 250
-
-    #### Encoder
-    src_word_id = paddle.layer.data(
-        name='source_language_word',
-        type=paddle.data_type.integer_value_sequence(source_dict_dim))
-    src_embedding = paddle.layer.embedding(
-        input=src_word_id,
-        size=word_vector_dim,
-        param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
-    src_forward = paddle.networks.simple_gru(
-        name='src_forward_gru', input=src_embedding, size=encoder_size)
-    src_backward = paddle.networks.simple_gru(
-        name='src_backward_gru',
-        input=src_embedding,
-        size=encoder_size,
-        reverse=True)
-    encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
-
-    #### Decoder
-    with paddle.layer.mixed(size=decoder_size) as encoded_proj:
-        encoded_proj += paddle.layer.full_matrix_projection(
-            input=encoded_vector)
-
-    backward_first = paddle.layer.first_seq(input=src_backward)
-
-    with paddle.layer.mixed(
-            name="decoder_boot_mixed",
-            size=decoder_size,
-            act=paddle.activation.Tanh()) as decoder_boot:
-        decoder_boot += paddle.layer.full_matrix_projection(
-            input=backward_first)
-
-    def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
-
-        decoder_mem = paddle.layer.memory(
-            name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
-
-        context = paddle.networks.simple_attention(
-            name="simple_attention",
-            encoded_sequence=enc_vec,
-            encoded_proj=enc_proj,
-            decoder_state=decoder_mem)
-
-        with paddle.layer.mixed(
-                name="input_recurrent",
-                size=decoder_size * 3,
-                # enable error clipping 
-                layer_attr=paddle.attr.ExtraAttr(
-                    error_clipping_threshold=100.0)) as decoder_inputs:
-            decoder_inputs += paddle.layer.full_matrix_projection(input=context)
-            decoder_inputs += paddle.layer.full_matrix_projection(
-                input=current_word)
-
-        gru_step = paddle.layer.gru_step(
-            name='gru_decoder',
-            input=decoder_inputs,
-            output_mem=decoder_mem,
-            # uncomment to enable local threshold for gradient clipping
-            # param_attr=paddle.attr.ParamAttr(gradient_clipping_threshold=9.9),
-            size=decoder_size)
-
-        with paddle.layer.mixed(
-                name="gru_step_output",
-                size=target_dict_dim,
-                bias_attr=True,
-                act=paddle.activation.Softmax()) as out:
-            out += paddle.layer.full_matrix_projection(input=gru_step)
-        return out
-
-    decoder_group_name = "decoder_group"
-    group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
-    group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
-    group_inputs = [group_input1, group_input2]
-
-    if not is_generating:
-        trg_embedding = paddle.layer.embedding(
-            input=paddle.layer.data(
-                name='target_language_word',
-                type=paddle.data_type.integer_value_sequence(target_dict_dim)),
-            size=word_vector_dim,
-            param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
-        group_inputs.append(trg_embedding)
-
-        # For decoder equipped with attention mechanism, in training,
-        # target embeding (the groudtruth) is the data input,
-        # while encoded source sequence is accessed to as an unbounded memory.
-        # Here, the StaticInput defines a read-only memory
-        # for the recurrent_group.
-        decoder = paddle.layer.recurrent_group(
-            name=decoder_group_name,
-            step=gru_decoder_with_attention,
-            input=group_inputs)
-
-        lbl = paddle.layer.data(
-            name='target_language_next_word',
-            type=paddle.data_type.integer_value_sequence(target_dict_dim))
-        cost = paddle.layer.classification_cost(input=decoder, label=lbl)
-
-        return cost
-    else:
-        # In generation, the decoder predicts a next target word based on
-        # the encoded source sequence and the last generated target word.
-
-        # The encoded source sequence (encoder's output) must be specified by
-        # StaticInput, which is a read-only memory.
-        # Embedding of the last generated word is automatically gotten by
-        # GeneratedInputs, which is initialized by a start mark, such as <s>,
-        # and must be included in generation.
-
-        trg_embedding = paddle.layer.GeneratedInputV2(
-            size=target_dict_dim,
-            embedding_name='_target_language_embedding',
-            embedding_size=word_vector_dim)
-        group_inputs.append(trg_embedding)
-
-        beam_gen = paddle.layer.beam_search(
-            name=decoder_group_name,
-            step=gru_decoder_with_attention,
-            input=group_inputs,
-            bos_id=0,
-            eos_id=1,
-            beam_size=beam_size,
-            max_length=max_length)
-
-        return beam_gen
-
-
-def main():
-    paddle.init(
-        use_gpu=False,
-        trainer_count=1,
-        # log gradient clipping info
-        log_clipping=True,
-        # log error clipping info
-        log_error_clipping=True)
-    is_generating = False
-
-    # source and target dict dim.
-    dict_size = 30000
-    source_dict_dim = target_dict_dim = dict_size
-
-    # train the network
-    if not is_generating:
-        cost = seqToseq_net(source_dict_dim, target_dict_dim)
-        parameters = paddle.parameters.create(cost)
-
-        # define optimize method and trainer
-        optimizer = paddle.optimizer.Adam(
-            learning_rate=5e-5,
-            # uncomment to enable global threshold for gradient clipping
-            # gradient_clipping_threshold=10.0,
-            regularization=paddle.optimizer.L2Regularization(rate=8e-4))
-        trainer = paddle.trainer.SGD(cost=cost,
-                                     parameters=parameters,
-                                     update_equation=optimizer)
-        # define data reader
-        wmt14_reader = paddle.batch(
-            paddle.reader.shuffle(
-                paddle.dataset.wmt14.train(dict_size), buf_size=8192),
-            batch_size=5)
-
-        # define event_handler callback
-        def event_handler(event):
-            if isinstance(event, paddle.event.EndIteration):
-                if event.batch_id % 10 == 0:
-                    print "\nPass %d, Batch %d, Cost %f, %s" % (
-                        event.pass_id, event.batch_id, event.cost,
-                        event.metrics)
-                else:
-                    sys.stdout.write('.')
-                    sys.stdout.flush()
-
-        # start to train
-        trainer.train(
-            reader=wmt14_reader, event_handler=event_handler, num_passes=2)
-
-    # generate a english sequence to french
-    else:
-        # use the first 3 samples for generation
-        gen_creator = paddle.dataset.wmt14.gen(dict_size)
-        gen_data = []
-        gen_num = 3
-        for item in gen_creator():
-            gen_data.append((item[0], ))
-            if len(gen_data) == gen_num:
-                break
-
-        beam_gen = seqToseq_net(source_dict_dim, target_dict_dim, is_generating)
-        # get the pretrained model, whose bleu = 26.92
-        parameters = paddle.dataset.wmt14.model()
-        # prob is the prediction probabilities, and id is the prediction word. 
-        beam_result = paddle.infer(
-            output_layer=beam_gen,
-            parameters=parameters,
-            input=gen_data,
-            field=['prob', 'id'])
-
-        # get the dictionary
-        src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size)
-
-        # the delimited element of generated sequences is -1,
-        # the first element of each generated sequence is the sequence length
-        seq_list = []
-        seq = []
-        for w in beam_result[1]:
-            if w != -1:
-                seq.append(w)
-            else:
-                seq_list.append(' '.join([trg_dict.get(w) for w in seq[1:]]))
-                seq = []
-
-        prob = beam_result[0]
-        beam_size = 3
-        for i in xrange(gen_num):
-            print "\n*******************************************************\n"
-            print "src:", ' '.join(
-                [src_dict.get(w) for w in gen_data[i][0]]), "\n"
-            for j in xrange(beam_size):
-                print "prob = %f:" % (prob[i][j]), seq_list[i * beam_size + j]
-
-
-if __name__ == '__main__':
-    main()
diff --git a/demo/seqToseq/data/paraphrase_data.sh b/demo/seqToseq/data/paraphrase_data.sh
deleted file mode 100755
index e6497c91286d44b5ef3b66c5f824e36a09728720..0000000000000000000000000000000000000000
--- a/demo/seqToseq/data/paraphrase_data.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-set -x
-
-# download the in-house paraphrase dataset
-wget http://paddlepaddle.bj.bcebos.com/model_zoo/embedding/paraphrase.tar.gz
-
-# untar the dataset
-tar -zxvf paraphrase.tar.gz
-rm paraphrase.tar.gz
diff --git a/demo/seqToseq/data/paraphrase_model.sh b/demo/seqToseq/data/paraphrase_model.sh
deleted file mode 100755
index d0e7f214a38c4dad0fdf7c10ba3b76eb0ab40f06..0000000000000000000000000000000000000000
--- a/demo/seqToseq/data/paraphrase_model.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-set -x
-
-dim=32
-pretrained_dir='../../model_zoo/embedding/'
-preModel=$pretrained_dir'model_'$dim'.emb'
-preDict=$pretrained_dir'baidu.dict'
-
-usrDict_dir='pre-paraphrase/'
-srcDict=$usrDict_dir'src.dict'
-trgDict=$usrDict_dir'trg.dict'
-
-usrModel_dir='paraphrase_model/'
-mkdir $usrModel_dir
-srcModel=$usrModel_dir'_source_language_embedding'
-trgModel=$usrModel_dir'_target_language_embedding'
-
-echo 'extract desired parameters based on user dictionary'
-script=$pretrained_dir'extract_para.py'
-python $script --preModel $preModel --preDict $preDict \
-          --usrModel $srcModel --usrDict $srcDict -d $dim
-python $script --preModel $preModel --preDict $preDict \
-          --usrModel $trgModel --usrDict $trgDict -d $dim
diff --git a/demo/seqToseq/data/wmt14_data.sh b/demo/seqToseq/data/wmt14_data.sh
deleted file mode 100755
index 43f67168d2a876ba5401e0f8490a88adac9c5551..0000000000000000000000000000000000000000
--- a/demo/seqToseq/data/wmt14_data.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-set -x
-mkdir wmt14
-cd wmt14
-
-# download the dataset
-wget http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/bitexts.tgz
-wget http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz
-
-# untar the dataset
-tar -zxvf bitexts.tgz
-tar -zxvf dev+test.tgz
-gunzip bitexts.selected/*
-mv bitexts.selected train
-rm bitexts.tgz
-rm dev+test.tgz
-
-# separate the dev and test dataset
-mkdir test gen
-mv dev/ntst1213.* test
-mv dev/ntst14.* gen 
-rm -rf dev
-
-set +x
-# rename the suffix, .fr->.src, .en->.trg
-for dir in train test gen
-do 
-  filelist=`ls $dir`
-  cd $dir
-  for file in $filelist
-  do 
-    if [ ${file##*.} = "fr" ]; then
-      mv $file ${file/%fr/src}
-    elif [ ${file##*.} = 'en' ]; then
-      mv $file ${file/%en/trg}
-    fi
-  done
-  cd ..
-done
diff --git a/demo/seqToseq/data/wmt14_model.sh b/demo/seqToseq/data/wmt14_model.sh
deleted file mode 100755
index c4b55b90a3eb98f94e0eb3be028c6de1ef57326b..0000000000000000000000000000000000000000
--- a/demo/seqToseq/data/wmt14_model.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-set -x
-
-# download the pretrained model
-wget http://paddlepaddle.bj.bcebos.com/model_zoo/wmt14_model.tar.gz
-
-# untar the model
-tar -zxvf wmt14_model.tar.gz
-rm wmt14_model.tar.gz 
diff --git a/demo/seqToseq/dataprovider.py b/demo/seqToseq/dataprovider.py
deleted file mode 100755
index c2b49804be582d7d0bc3ef6332741be03936eb24..0000000000000000000000000000000000000000
--- a/demo/seqToseq/dataprovider.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer.PyDataProvider2 import *
-
-UNK_IDX = 2
-START = "<s>"
-END = "<e>"
-
-
-def hook(settings, src_dict_path, trg_dict_path, is_generating, file_list,
-         **kwargs):
-    # job_mode = 1: training mode
-    # job_mode = 0: generating mode
-    settings.job_mode = not is_generating
-
-    def fun(dict_path):
-        out_dict = dict()
-        with open(dict_path, "r") as fin:
-            out_dict = {
-                line.strip(): line_count
-                for line_count, line in enumerate(fin)
-            }
-        return out_dict
-
-    settings.src_dict = fun(src_dict_path)
-    settings.trg_dict = fun(trg_dict_path)
-
-    settings.logger.info("src dict len : %d" % (len(settings.src_dict)))
-
-    if settings.job_mode:
-        settings.slots = {
-            'source_language_word':
-            integer_value_sequence(len(settings.src_dict)),
-            'target_language_word':
-            integer_value_sequence(len(settings.trg_dict)),
-            'target_language_next_word':
-            integer_value_sequence(len(settings.trg_dict))
-        }
-        settings.logger.info("trg dict len : %d" % (len(settings.trg_dict)))
-    else:
-        settings.slots = {
-            'source_language_word':
-            integer_value_sequence(len(settings.src_dict)),
-            'sent_id':
-            integer_value_sequence(len(open(file_list[0], "r").readlines()))
-        }
-
-
-def _get_ids(s, dictionary):
-    words = s.strip().split()
-    return [dictionary[START]] + \
-           [dictionary.get(w, UNK_IDX) for w in words] + \
-           [dictionary[END]]
-
-
-@provider(init_hook=hook, pool_size=50000)
-def process(settings, file_name):
-    with open(file_name, 'r') as f:
-        for line_count, line in enumerate(f):
-            line_split = line.strip().split('\t')
-            if settings.job_mode and len(line_split) != 2:
-                continue
-            src_seq = line_split[0]  # one source sequence
-            src_ids = _get_ids(src_seq, settings.src_dict)
-
-            if settings.job_mode:
-                trg_seq = line_split[1]  # one target sequence
-                trg_words = trg_seq.split()
-                trg_ids = [settings.trg_dict.get(w, UNK_IDX) for w in trg_words]
-
-                # remove sequence whose length > 80 in training mode
-                if len(src_ids) > 80 or len(trg_ids) > 80:
-                    continue
-                trg_ids_next = trg_ids + [settings.trg_dict[END]]
-                trg_ids = [settings.trg_dict[START]] + trg_ids
-                yield {
-                    'source_language_word': src_ids,
-                    'target_language_word': trg_ids,
-                    'target_language_next_word': trg_ids_next
-                }
-            else:
-                yield {'source_language_word': src_ids, 'sent_id': [line_count]}
diff --git a/demo/seqToseq/paraphrase/train.conf b/demo/seqToseq/paraphrase/train.conf
deleted file mode 100644
index be79c5e771c0e864fd1776cedb3ef37c997b6df6..0000000000000000000000000000000000000000
--- a/demo/seqToseq/paraphrase/train.conf
+++ /dev/null
@@ -1,33 +0,0 @@
-#edit-mode: -*- python -*-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-sys.path.append("..")
-
-from seqToseq_net import *
-
-is_generating = False
-### Data Definiation
-train_conf = seq_to_seq_data(data_dir = "./data/pre-paraphrase",
-                             is_generating = is_generating)
-
-### Algorithm Configuration
-settings(
-      learning_method = AdamOptimizer(),
-      batch_size = 50,
-      learning_rate = 5e-4)
-
-### Network Architecture
-gru_encoder_decoder(train_conf, is_generating, word_vector_dim = 32)
diff --git a/demo/seqToseq/paraphrase/train.sh b/demo/seqToseq/paraphrase/train.sh
deleted file mode 100755
index 9bb6dbdb1d4c5e35bfb31855e0331f0250a69a20..0000000000000000000000000000000000000000
--- a/demo/seqToseq/paraphrase/train.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-cd ..
-
-paddle train \
-    --config='paraphrase/train.conf' \
-    --save_dir='paraphrase/model' \
-    --init_model_path='data/paraphrase_model' \
-    --load_missing_parameter_strategy=rand \
-    --use_gpu=false \
-    --num_passes=16 \
-    --show_parameter_stats_period=100 \
-    --trainer_count=4 \
-    --log_period=10 \
-    --dot_period=5 \
-    2>&1 | tee 'paraphrase/train.log'
-paddle usage -l 'paraphrase/train.log' -e $? -n "seqToseq_paraphrase_train" >/dev/null 2>&1
diff --git a/demo/seqToseq/preprocess.py b/demo/seqToseq/preprocess.py
deleted file mode 100755
index 03f371331a0755e5939e457f4bdfb1770b8dad88..0000000000000000000000000000000000000000
--- a/demo/seqToseq/preprocess.py
+++ /dev/null
@@ -1,219 +0,0 @@
-#!/bin/env python
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Example:
-    python preprocess.py -i INPUT [-d DICTSIZE] [-m]
-
-Options:
-    -h, --help     show this help message and exit
-    -i INPUT       input original dataset path
-    -d DICTSIZE    specified word count of dictionary
-    -m --mergeDict merge source and target dictionary
-"""
-import os
-import sys
-
-import string
-from optparse import OptionParser
-from paddle.utils.preprocess_util import save_list, DatasetCreater
-
-
-class SeqToSeqDatasetCreater(DatasetCreater):
-    """
-    A class to process data for sequence to sequence application.
-    """
-
-    def __init__(self, data_path, output_path):
-        """
-        data_path: the path to store the train data, test data and gen data
-        output_path: the path to store the processed dataset
-        """
-        DatasetCreater.__init__(self, data_path)
-        self.gen_dir_name = 'gen'
-        self.gen_list_name = 'gen.list'
-        self.output_path = output_path
-
-    def concat_file(self, file_path, file1, file2, output_path, output):
-        """
-        Concat file1 and file2 to be one output file 
-        The i-th line of output = i-th line of file1 + '\t' + i-th line of file2
-        file_path: the path to store file1 and file2
-        output_path: the path to store output file
-        """
-        file1 = os.path.join(file_path, file1)
-        file2 = os.path.join(file_path, file2)
-        output = os.path.join(output_path, output)
-        if not os.path.exists(output):
-            os.system('paste ' + file1 + ' ' + file2 + ' > ' + output)
-
-    def cat_file(self, dir_path, suffix, output_path, output):
-        """
-        Cat all the files in dir_path with suffix to be one output file 
-        dir_path: the base directory to store input file
-        suffix: suffix of file name
-        output_path: the path to store output file
-        """
-        cmd = 'cat '
-        file_list = os.listdir(dir_path)
-        file_list.sort()
-        for file in file_list:
-            if file.endswith(suffix):
-                cmd += os.path.join(dir_path, file) + ' '
-        output = os.path.join(output_path, output)
-        if not os.path.exists(output):
-            os.system(cmd + '> ' + output)
-
-    def build_dict(self, file_path, dict_path, dict_size=-1):
-        """ 
-        Create the dictionary for the file, Note that
-        1. Valid characters include all printable characters
-        2. There is distinction between uppercase and lowercase letters
-        3. There is 3 special token: 
-           <s>: the start of a sequence
-           <e>: the end of a sequence
-           <unk>: a word not included in dictionary
-        file_path: the path to store file 
-        dict_path: the path to store dictionary
-        dict_size: word count of dictionary
-                   if is -1, dictionary will contains all the words in file 
-        """
-        if not os.path.exists(dict_path):
-            dictory = dict()
-            with open(file_path, "r") as fdata:
-                for line in fdata:
-                    line = line.split('\t')
-                    for line_split in line:
-                        words = line_split.strip().split()
-                        for word in words:
-                            if word not in dictory:
-                                dictory[word] = 1
-                            else:
-                                dictory[word] += 1
-            output = open(dict_path, "w+")
-            output.write('<s>\n<e>\n<unk>\n')
-            count = 3
-            for key, value in sorted(
-                    dictory.items(), key=lambda d: d[1], reverse=True):
-                output.write(key + "\n")
-                count += 1
-                if count == dict_size:
-                    break
-            self.dict_size = count
-
-    def create_dataset(self,
-                       dict_size=-1,
-                       mergeDict=False,
-                       suffixes=['.src', '.trg']):
-        """
-        Create seqToseq dataset 
-        """
-        # dataset_list and dir_list has one-to-one relationship
-        train_dataset = os.path.join(self.data_path, self.train_dir_name)
-        test_dataset = os.path.join(self.data_path, self.test_dir_name)
-        gen_dataset = os.path.join(self.data_path, self.gen_dir_name)
-        dataset_list = [train_dataset, test_dataset, gen_dataset]
-
-        train_dir = os.path.join(self.output_path, self.train_dir_name)
-        test_dir = os.path.join(self.output_path, self.test_dir_name)
-        gen_dir = os.path.join(self.output_path, self.gen_dir_name)
-        dir_list = [train_dir, test_dir, gen_dir]
-
-        # create directory
-        for dir in dir_list:
-            if not os.path.exists(dir):
-                os.mkdir(dir)
-
-        # checkout dataset should be parallel corpora
-        suffix_len = len(suffixes[0])
-        for dataset in dataset_list:
-            file_list = os.listdir(dataset)
-            if len(file_list) % 2 == 1:
-                raise RuntimeError("dataset should be parallel corpora")
-            file_list.sort()
-            for i in range(0, len(file_list), 2):
-                if file_list[i][:-suffix_len] != file_list[i + 1][:-suffix_len]:
-                    raise RuntimeError(
-                        "source and target file name should be equal")
-
-        # cat all the files with the same suffix in dataset
-        for suffix in suffixes:
-            for dataset in dataset_list:
-                outname = os.path.basename(dataset) + suffix
-                self.cat_file(dataset, suffix, dataset, outname)
-
-        # concat parallel corpora and create file.list
-        print 'concat parallel corpora for dataset'
-        id = 0
-        list = ['train.list', 'test.list', 'gen.list']
-        for dataset in dataset_list:
-            outname = os.path.basename(dataset)
-            self.concat_file(dataset, outname + suffixes[0],
-                             outname + suffixes[1], dir_list[id], outname)
-            save_list([os.path.join(dir_list[id], outname)],
-                      os.path.join(self.output_path, list[id]))
-            id += 1
-
-        # build dictionary for train data
-        dict = ['src.dict', 'trg.dict']
-        dict_path = [
-            os.path.join(self.output_path, dict[0]),
-            os.path.join(self.output_path, dict[1])
-        ]
-        if mergeDict:
-            outname = os.path.join(train_dir, train_dataset.split('/')[-1])
-            print 'build src dictionary for train data'
-            self.build_dict(outname, dict_path[0], dict_size)
-            print 'build trg dictionary for train data'
-            os.system('cp ' + dict_path[0] + ' ' + dict_path[1])
-        else:
-            outname = os.path.join(train_dataset, self.train_dir_name)
-            for id in range(0, 2):
-                suffix = suffixes[id]
-                print 'build ' + suffix[1:] + ' dictionary for train data'
-                self.build_dict(outname + suffix, dict_path[id], dict_size)
-        print 'dictionary size is', self.dict_size
-
-
-def main():
-    usage = "usage: \n" \
-            "python %prog -i INPUT [-d DICTSIZE] [-m]"
-    parser = OptionParser(usage)
-    parser.add_option(
-        "-i", action="store", dest="input", help="input original dataset path")
-    parser.add_option(
-        "-d",
-        action="store",
-        dest="dictsize",
-        help="specified word count of dictionary")
-    parser.add_option(
-        "-m",
-        "--mergeDict",
-        action="store_true",
-        dest="mergeDict",
-        help="merge source and target dictionary")
-    (options, args) = parser.parse_args()
-    if options.input[-1] == os.path.sep:
-        options.input = options.input[:-1]
-    outname = os.path.basename(options.input)
-    output_path = os.path.join(os.path.dirname(options.input), 'pre-' + outname)
-    dictsize = int(options.dictsize) if options.dictsize else -1
-    if not os.path.exists(output_path):
-        os.mkdir(output_path)
-        data_creator = SeqToSeqDatasetCreater(options.input, output_path)
-        data_creator.create_dataset(dictsize, options.mergeDict)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/demo/seqToseq/seqToseq_net.py b/demo/seqToseq/seqToseq_net.py
deleted file mode 100644
index 3d1f86ec3b7eda4fceaf3a1e406e3d0a1a4a2f60..0000000000000000000000000000000000000000
--- a/demo/seqToseq/seqToseq_net.py
+++ /dev/null
@@ -1,204 +0,0 @@
-# edit-mode: -*- python -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import os
-from paddle.trainer_config_helpers import *
-
-
-def seq_to_seq_data(data_dir,
-                    is_generating,
-                    dict_size=30000,
-                    train_list='train.list',
-                    test_list='test.list',
-                    gen_list='gen.list',
-                    gen_result='gen_result'):
-    """
-    Predefined seqToseq train data provider for application
-    is_generating: whether this config is used for generating
-    dict_size: word count of dictionary
-    train_list: a text file containing a list of training data
-    test_list: a text file containing a list of testing data
-    gen_list: a text file containing a list of generating data
-    gen_result: a text file containing generating result
-    """
-    src_lang_dict = os.path.join(data_dir, 'src.dict')
-    trg_lang_dict = os.path.join(data_dir, 'trg.dict')
-
-    if is_generating:
-        train_list = None
-        test_list = os.path.join(data_dir, gen_list)
-    else:
-        train_list = os.path.join(data_dir, train_list)
-        test_list = os.path.join(data_dir, test_list)
-
-    define_py_data_sources2(
-        train_list,
-        test_list,
-        module="dataprovider",
-        obj="process",
-        args={
-            "src_dict_path": src_lang_dict,
-            "trg_dict_path": trg_lang_dict,
-            "is_generating": is_generating
-        })
-
-    return {
-        "src_dict_path": src_lang_dict,
-        "trg_dict_path": trg_lang_dict,
-        "gen_result": gen_result
-    }
-
-
-def gru_encoder_decoder(data_conf,
-                        is_generating,
-                        word_vector_dim=512,
-                        encoder_size=512,
-                        decoder_size=512,
-                        beam_size=3,
-                        max_length=250,
-                        error_clipping=50):
-    """
-    A wrapper for an attention version of GRU Encoder-Decoder network
-    is_generating: whether this config is used for generating
-    encoder_size: dimension of hidden unit in GRU Encoder network
-    decoder_size: dimension of hidden unit in GRU Decoder network
-    word_vector_dim: dimension of word vector
-    beam_size: expand width in beam search
-    max_length: a stop condition of sequence generation
-    """
-    for k, v in data_conf.iteritems():
-        globals()[k] = v
-    source_dict_dim = len(open(src_dict_path, "r").readlines())
-    target_dict_dim = len(open(trg_dict_path, "r").readlines())
-    gen_trans_file = gen_result
-
-    src_word_id = data_layer(name='source_language_word', size=source_dict_dim)
-    src_embedding = embedding_layer(
-        input=src_word_id,
-        size=word_vector_dim,
-        param_attr=ParamAttr(name='_source_language_embedding'))
-    src_forward = simple_gru(
-        input=src_embedding,
-        size=encoder_size,
-        naive=True,
-        gru_layer_attr=ExtraLayerAttribute(
-            error_clipping_threshold=error_clipping))
-    src_backward = simple_gru(
-        input=src_embedding,
-        size=encoder_size,
-        reverse=True,
-        naive=True,
-        gru_layer_attr=ExtraLayerAttribute(
-            error_clipping_threshold=error_clipping))
-    encoded_vector = concat_layer(input=[src_forward, src_backward])
-
-    with mixed_layer(size=decoder_size) as encoded_proj:
-        encoded_proj += full_matrix_projection(input=encoded_vector)
-
-    backward_first = first_seq(input=src_backward)
-    with mixed_layer(
-            size=decoder_size,
-            act=TanhActivation(), ) as decoder_boot:
-        decoder_boot += full_matrix_projection(input=backward_first)
-
-    def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
-        decoder_mem = memory(
-            name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
-
-        context = simple_attention(
-            encoded_sequence=enc_vec,
-            encoded_proj=enc_proj,
-            decoder_state=decoder_mem, )
-
-        with mixed_layer(size=decoder_size * 3) as decoder_inputs:
-            decoder_inputs += full_matrix_projection(input=context)
-            decoder_inputs += full_matrix_projection(input=current_word)
-
-        gru_step = gru_step_naive_layer(
-            name='gru_decoder',
-            input=decoder_inputs,
-            output_mem=decoder_mem,
-            size=decoder_size,
-            layer_attr=ExtraLayerAttribute(
-                error_clipping_threshold=error_clipping))
-
-        with mixed_layer(
-                size=target_dict_dim, bias_attr=True,
-                act=SoftmaxActivation()) as out:
-            out += full_matrix_projection(input=gru_step)
-        return out
-
-    decoder_group_name = "decoder_group"
-    group_inputs = [
-        StaticInput(
-            input=encoded_vector, is_seq=True), StaticInput(
-                input=encoded_proj, is_seq=True)
-    ]
-
-    if not is_generating:
-        trg_embedding = embedding_layer(
-            input=data_layer(
-                name='target_language_word', size=target_dict_dim),
-            size=word_vector_dim,
-            param_attr=ParamAttr(name='_target_language_embedding'))
-        group_inputs.append(trg_embedding)
-
-        # For decoder equipped with attention mechanism, in training,
-        # target embeding (the groudtruth) is the data input,
-        # while encoded source sequence is accessed to as an unbounded memory.
-        # Here, the StaticInput defines a read-only memory
-        # for the recurrent_group.
-        decoder = recurrent_group(
-            name=decoder_group_name,
-            step=gru_decoder_with_attention,
-            input=group_inputs)
-
-        lbl = data_layer(name='target_language_next_word', size=target_dict_dim)
-        cost = classification_cost(input=decoder, label=lbl)
-        outputs(cost)
-    else:
-        # In generation, the decoder predicts a next target word based on
-        # the encoded source sequence and the last generated target word.
-
-        # The encoded source sequence (encoder's output) must be specified by
-        # StaticInput, which is a read-only memory.
-        # Embedding of the last generated word is automatically gotten by
-        # GeneratedInputs, which is initialized by a start mark, such as <s>,
-        # and must be included in generation.
-
-        trg_embedding = GeneratedInput(
-            size=target_dict_dim,
-            embedding_name='_target_language_embedding',
-            embedding_size=word_vector_dim)
-        group_inputs.append(trg_embedding)
-
-        beam_gen = beam_search(
-            name=decoder_group_name,
-            step=gru_decoder_with_attention,
-            input=group_inputs,
-            bos_id=0,
-            eos_id=1,
-            beam_size=beam_size,
-            max_length=max_length)
-
-        seqtext_printer_evaluator(
-            input=beam_gen,
-            id_input=data_layer(
-                name="sent_id", size=1),
-            dict_file=trg_dict_path,
-            result_file=gen_trans_file)
-        outputs(beam_gen)
diff --git a/demo/seqToseq/translation/eval_bleu.sh b/demo/seqToseq/translation/eval_bleu.sh
deleted file mode 100755
index 54c2ed237e93adb3456dbe62f75626d36c2d90bc..0000000000000000000000000000000000000000
--- a/demo/seqToseq/translation/eval_bleu.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-gen_file=$1
-beam_size=$2
-
-# find top1 generating result
-top1=$(printf '%s_top1.txt' `basename $gen_file .txt`)
-if [ $beam_size -eq 1 ]; then
-    awk -F "\t" '{sub(" <e>","",$2);sub(" ","",$2);print $2}' $gen_file >$top1
-else
-    awk 'BEGIN{
-        FS="\t";
-        OFS="\t";
-        read_pos = 2} {
-        if (NR == read_pos){
-            sub(" <e>","",$3);
-            sub(" ","",$3);
-            print $3;
-            read_pos += (2 + res_num);
-      }}' res_num=$beam_size $gen_file >$top1
-fi 
-
-# evalute bleu value
-bleu_script=multi-bleu.perl
-standard_res=../data/wmt14/gen/ntst14.trg
-bleu_res=`perl $bleu_script $standard_res <$top1`
-
-echo $bleu_res
-rm $top1
diff --git a/demo/seqToseq/translation/gen.conf b/demo/seqToseq/translation/gen.conf
deleted file mode 100644
index e9bea4e4559ff31ad83c4474e91de7e7acc77e9f..0000000000000000000000000000000000000000
--- a/demo/seqToseq/translation/gen.conf
+++ /dev/null
@@ -1,36 +0,0 @@
-#edit-mode: -*- python -*-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-sys.path.append("..")
-
-from seqToseq_net import *
-
-# whether this config is used for generating
-is_generating = True
-
-### Data Definiation
-gen_conf = seq_to_seq_data(data_dir = "./data/pre-wmt14", 
-                           is_generating = is_generating,
-                           gen_result = "./translation/gen_result")
-
-### Algorithm Configuration
-settings(
-      learning_method = AdamOptimizer(),
-      batch_size = 1,
-      learning_rate = 0)
-
-### Network Architecture
-gru_encoder_decoder(gen_conf, is_generating)
diff --git a/demo/seqToseq/translation/gen.sh b/demo/seqToseq/translation/gen.sh
deleted file mode 100755
index 64b78f5e9654e7b206740f92e224e0164108c9f1..0000000000000000000000000000000000000000
--- a/demo/seqToseq/translation/gen.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-cd ..
-
-paddle train \
-    --job=test \
-    --config='translation/gen.conf' \
-    --save_dir='data/wmt14_model' \
-    --use_gpu=false \
-    --num_passes=13 \
-    --test_pass=12 \
-    --trainer_count=1 \
-    2>&1 | tee 'translation/gen.log'
-paddle usage -l 'translation/gen.log' -e $? -n "seqToseq_translation_gen" >/dev/null 2>&1
diff --git a/demo/seqToseq/translation/moses_bleu.sh b/demo/seqToseq/translation/moses_bleu.sh
deleted file mode 100755
index 2f230d7f4c736da003966fbdb277f6b8b1ec952c..0000000000000000000000000000000000000000
--- a/demo/seqToseq/translation/moses_bleu.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-set -x
-echo "Downloading multi-bleu.perl"
-wget https://raw.githubusercontent.com/moses-smt/mosesdecoder/master/scripts/generic/multi-bleu.perl --no-check-certificate
diff --git a/demo/seqToseq/translation/train.conf b/demo/seqToseq/translation/train.conf
deleted file mode 100644
index 72b7ccdbb95dbda8f06674079db9a3257bb31622..0000000000000000000000000000000000000000
--- a/demo/seqToseq/translation/train.conf
+++ /dev/null
@@ -1,36 +0,0 @@
-#edit-mode: -*- python -*-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-sys.path.append("..")
-
-from seqToseq_net import *
-
-# whether this config is used for generating
-is_generating = False
-
-### Data Definiation
-data_dir  = "./data/pre-wmt14"
-train_conf = seq_to_seq_data(data_dir = data_dir, 
-                             is_generating = is_generating)
-
-### Algorithm Configuration
-settings(
-    learning_method = AdamOptimizer(),
-    batch_size = 50,
-    learning_rate = 5e-4)
-
-### Network Architecture
-gru_encoder_decoder(train_conf, is_generating)
diff --git a/demo/seqToseq/translation/train.sh b/demo/seqToseq/translation/train.sh
deleted file mode 100755
index b0ec9854b118cbb9ed39d6bed0cdd845403926a4..0000000000000000000000000000000000000000
--- a/demo/seqToseq/translation/train.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-cd ..
-
-paddle train \
---config='translation/train.conf' \
---save_dir='translation/model' \
---use_gpu=false \
---num_passes=16 \
---show_parameter_stats_period=100 \
---trainer_count=4 \
---log_period=10 \
---dot_period=5 \
-2>&1 | tee 'translation/train.log'
-paddle usage -l 'translation/train.log' -e $? -n "seqToseq_translation_train" >/dev/null 2>&1
diff --git a/demo/word2vec/api_train_v2.py b/demo/word2vec/api_train_v2.py
deleted file mode 100644
index c0940f0e56eafa22f8aeb7052c0ddc79d8862917..0000000000000000000000000000000000000000
--- a/demo/word2vec/api_train_v2.py
+++ /dev/null
@@ -1,100 +0,0 @@
-import gzip
-import math
-
-import paddle.v2 as paddle
-
-embsize = 32
-hiddensize = 256
-N = 5
-
-
-def wordemb(inlayer):
-    wordemb = paddle.layer.embedding(
-        input=inlayer,
-        size=embsize,
-        param_attr=paddle.attr.Param(
-            name="_proj",
-            initial_std=0.001,
-            learning_rate=1,
-            l2_rate=0,
-            sparse_update=True))
-    return wordemb
-
-
-def main():
-    # for local training
-    cluster_train = False
-
-    if not cluster_train:
-        paddle.init(use_gpu=False, trainer_count=1)
-    else:
-        paddle.init(
-            use_gpu=False,
-            trainer_count=2,
-            port=7164,
-            ports_num=1,
-            ports_num_for_sparse=1,
-            num_gradient_servers=1)
-    word_dict = paddle.dataset.imikolov.build_dict()
-    dict_size = len(word_dict)
-    firstword = paddle.layer.data(
-        name="firstw", type=paddle.data_type.integer_value(dict_size))
-    secondword = paddle.layer.data(
-        name="secondw", type=paddle.data_type.integer_value(dict_size))
-    thirdword = paddle.layer.data(
-        name="thirdw", type=paddle.data_type.integer_value(dict_size))
-    fourthword = paddle.layer.data(
-        name="fourthw", type=paddle.data_type.integer_value(dict_size))
-    nextword = paddle.layer.data(
-        name="fifthw", type=paddle.data_type.integer_value(dict_size))
-
-    Efirst = wordemb(firstword)
-    Esecond = wordemb(secondword)
-    Ethird = wordemb(thirdword)
-    Efourth = wordemb(fourthword)
-
-    contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
-    hidden1 = paddle.layer.fc(input=contextemb,
-                              size=hiddensize,
-                              act=paddle.activation.Sigmoid(),
-                              layer_attr=paddle.attr.Extra(drop_rate=0.5),
-                              bias_attr=paddle.attr.Param(learning_rate=2),
-                              param_attr=paddle.attr.Param(
-                                  initial_std=1. / math.sqrt(embsize * 8),
-                                  learning_rate=1))
-    predictword = paddle.layer.fc(input=hidden1,
-                                  size=dict_size,
-                                  bias_attr=paddle.attr.Param(learning_rate=2),
-                                  act=paddle.activation.Softmax())
-
-    def event_handler(event):
-        if isinstance(event, paddle.event.EndIteration):
-            if event.batch_id % 100 == 0:
-                with gzip.open("batch-" + str(event.batch_id) + ".tar.gz",
-                               'w') as f:
-                    trainer.save_parameter_to_tar(f)
-                result = trainer.test(
-                    paddle.batch(
-                        paddle.dataset.imikolov.test(word_dict, N), 32))
-                print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
-                    event.pass_id, event.batch_id, event.cost, event.metrics,
-                    result.metrics)
-
-    cost = paddle.layer.classification_cost(input=predictword, label=nextword)
-
-    parameters = paddle.parameters.create(cost)
-    adagrad = paddle.optimizer.AdaGrad(
-        learning_rate=3e-3,
-        regularization=paddle.optimizer.L2Regularization(8e-4))
-    trainer = paddle.trainer.SGD(cost,
-                                 parameters,
-                                 adagrad,
-                                 is_local=not cluster_train)
-    trainer.train(
-        paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
-        num_passes=30,
-        event_handler=event_handler)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/doc/getstarted/build_and_install/docker_install_cn.rst b/doc/getstarted/build_and_install/docker_install_cn.rst
index da2d4234658b6ea4730346e721437cc1633c4362..87c286a1af75e08313813f1373ea03b85d4af523 100644
--- a/doc/getstarted/build_and_install/docker_install_cn.rst
+++ b/doc/getstarted/build_and_install/docker_install_cn.rst
@@ -12,13 +12,13 @@ PaddlePaddle需要的所有编译工具。把编译出来的PaddlePaddle也打
 像，称为生产镜像，里面涵盖了PaddlePaddle运行所需的所有环境。每次
 PaddlePaddle发布新版本的时候都会发布对应版本的生产镜像以及开发镜像。运
 行镜像包括纯CPU版本和GPU版本以及其对应的非AVX版本。我们会在
-`dockerhub.com <https://hub.docker.com/r/paddledev/paddle/>`_ 提供最新
+`dockerhub.com <https://hub.docker.com/r/paddlepaddle/paddle/tags/>`_ 提供最新
 的Docker镜像，可以在"tags"标签下找到最新的Paddle镜像版本。为了方便在国
 内的开发者下载Docker镜像，我们提供了国内的镜像服务器供大家使用。如果您
 在国内，请把文档里命令中的paddlepaddle/paddle替换成
 docker.paddlepaddle.org/paddle。
 
-1. 开发镜像：:code:`paddlepaddle/paddle:<version>-dev`
+1. 开发镜像：:code:`paddlepaddle/paddle:0.10.0-dev`
 
    这个镜像包含了Paddle相关的开发工具以及编译和运行环境。用户可以使用开发镜像代替配置本地环境，完成开发，编译，发布，
    文档编写等工作。由于不同的Paddle的版本可能需要不同的依赖和工具，所以如果需要自行配置开发环境需要考虑版本的因素。
@@ -37,13 +37,13 @@ docker.paddlepaddle.org/paddle。
 
    .. code-block:: bash
 
-      docker run -it --rm paddlepaddle/paddle:<version>-dev /bin/bash
+      docker run -it --rm paddlepaddle/paddle:0.10.0-dev /bin/bash
 
    或者，可以以后台进程方式运行容器：
 
    .. code-block:: bash
 
-      docker run -d -p 2202:22 -p 8888:8888 paddledev/paddle:<version>-dev
+      docker run -d -p 2202:22 -p 8888:8888 paddledev/paddle:0.10.0-dev
 
    然后用密码 :code:`root` SSH进入容器：
 
@@ -73,7 +73,7 @@ docker.paddlepaddle.org/paddle。
 
    .. code-block:: bash
 
-      nvidia-docker run -it --rm paddledev/paddle:0.10.0rc1-gpu /bin/bash
+      nvidia-docker run -it --rm paddledev/paddle:0.10.0-gpu /bin/bash
 
    注意: 如果使用nvidia-docker存在问题，你也许可以尝试更老的方法，具体如下，但是我们并不推荐这种方法。：
 
@@ -81,7 +81,7 @@ docker.paddlepaddle.org/paddle。
 
       export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
       export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
-      docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:<version>-gpu
+      docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:0.10.0-gpu
 
 3. 运行以及发布您的AI程序
 
@@ -98,7 +98,7 @@ docker.paddlepaddle.org/paddle。
       nvidia-docker run -it -v $PWD:/work paddle /work/a.py
 
 
-   这里`a.py`包含的所有依赖假设都可以在Paddle的运行容器中。如果需要包含更多的依赖、或者需要发布您的应用的镜像，可以编写`Dockerfile`使用`FROM paddledev/paddle:<version>`
+   这里`a.py`包含的所有依赖假设都可以在Paddle的运行容器中。如果需要包含更多的依赖、或者需要发布您的应用的镜像，可以编写`Dockerfile`使用`FROM paddledev/paddle:0.10.0`
    创建和发布自己的AI程序镜像。
 
 运行PaddlePaddle Book
@@ -177,7 +177,7 @@ Paddle的Docker开发镜像带有一个通过 `woboq code browser
 
 .. code-block:: bash
 
-   docker run -d --name paddle-cpu-doc paddle:<version>-dev
+   docker run -d --name paddle-cpu-doc paddle:0.10.0-dev
    docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx
 
 接着我们就能够打开浏览器在 http://localhost:8088/paddle/ 浏览代码。
diff --git a/doc/getstarted/build_and_install/docker_install_en.rst b/doc/getstarted/build_and_install/docker_install_en.rst
index 03df497506099d2fb758bd0ab437d2c082f2b537..b6fd3329b273aabe80edd5f1ff064a311648b3c2 100644
--- a/doc/getstarted/build_and_install/docker_install_en.rst
+++ b/doc/getstarted/build_and_install/docker_install_en.rst
@@ -23,7 +23,7 @@ Docker is simple as long as we understand a few basic concepts:
 
   .. code-block:: bash
 		  
-     docker pull paddlepaddle/paddle:0.10.0rc2
+     docker pull paddlepaddle/paddle:0.10.0
 
   to download a Docker image, paddlepaddle/paddle in this example,
   from Dockerhub.com.
@@ -35,7 +35,7 @@ Docker is simple as long as we understand a few basic concepts:
 
   .. code-block:: bash
 
-     docker run paddlepaddle/paddle:0.10.0rc2
+     docker run paddlepaddle/paddle:0.10.0
 
   to start a container to run a Docker image, paddlepaddle/paddle in this example.
 
@@ -62,7 +62,7 @@ of PaddlePaddle, we release both of them. Production image includes
 CPU-only version and a CUDA GPU version and their no-AVX versions.
 
 We put the docker images on `dockerhub.com
-<https://hub.docker.com/r/paddledev/paddle/>`_. You can find the
+<https://hub.docker.com/r/paddlepaddle/paddle/tags/>`_. You can find the
 latest versions under "tags" tab at dockerhub.com. If you are in
 China, you can use our Docker image registry mirror to speed up the
 download process. To use it, please replace all paddlepaddle/paddle in
@@ -89,7 +89,7 @@ the commands to docker.paddlepaddle.org/paddle.
 
    .. code-block:: bash
 
-      docker run -it --rm paddlepaddle/paddle:0.10.0rc2 /bin/bash
+      docker run -it --rm paddlepaddle/paddle:0.10.0 /bin/bash
 
    Above method work with the GPU image too -- the recommended way is
    using `nvidia-docker <https://github.com/NVIDIA/nvidia-docker>`_.
@@ -101,7 +101,7 @@ the commands to docker.paddlepaddle.org/paddle.
 
    .. code-block:: bash
 
-      nvidia-docker run -it --rm paddlepaddle/paddle:0.10.0rc2-gpu /bin/bash
+      nvidia-docker run -it --rm paddlepaddle/paddle:0.10.0-gpu /bin/bash
 
 2. development image :code:`paddlepaddle/paddle:<version>-dev`
 
@@ -149,13 +149,13 @@ Run the program using docker:
 
 .. code-block:: bash
 
-   docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0rc2 python /workspace/example.py
+   docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0 python /workspace/example.py
 
 Or if you are using GPU for training:
 
 .. code-block:: bash
 
-   nvidia-docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0rc2-gpu python /workspace/example.py
+   nvidia-docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0-gpu python /workspace/example.py
 
 Above commands will start a docker container by running :code:`python
 /workspace/example.py`. It will stop once :code:`python
@@ -166,7 +166,7 @@ run PaddlePaddle program interactively:
 
 .. code-block:: bash
 
-   docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0rc2 /bin/bash
+   docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0 /bin/bash
    # now we are inside docker container
    cd /workspace
    python example.py
@@ -175,7 +175,7 @@ Running with GPU is identical:
 
 .. code-block:: bash
 
-   nvidia-docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0rc2-gpu /bin/bash
+   nvidia-docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0-gpu /bin/bash
    # now we are inside docker container
    cd /workspace
    python example.py
diff --git a/go/cmd/master/master.go b/go/cmd/master/master.go
index cc6e45049a3a730426b451b309aa876717196619..d1f3d7d76c438670faf6677b01e790c5ebe1f2cb 100644
--- a/go/cmd/master/master.go
+++ b/go/cmd/master/master.go
@@ -14,7 +14,7 @@ import (
 	"github.com/namsral/flag"
 
 	"github.com/PaddlePaddle/Paddle/go/master"
-	"github.com/PaddlePaddle/Paddle/go/recordio"
+	"github.com/PaddlePaddle/recordio"
 )
 
 func main() {
diff --git a/go/master/service.go b/go/master/service.go
index 50e646b01f08c8e16fc46dd0be33402751c26c35..ab17a62f3854c1e32d731037fcc9857260d03781 100644
--- a/go/master/service.go
+++ b/go/master/service.go
@@ -6,7 +6,7 @@ import (
 	"sync"
 	"time"
 
-	"github.com/PaddlePaddle/Paddle/go/recordio"
+	"github.com/PaddlePaddle/recordio"
 )
 
 const (
diff --git a/go/recordio/README.md b/go/recordio/README.md
deleted file mode 100644
index 50e7e954764ec6f26397c6a24296b1bf65403d69..0000000000000000000000000000000000000000
--- a/go/recordio/README.md
+++ /dev/null
@@ -1,39 +0,0 @@
-# RecordIO
-
-## Write
-
-```go
-f, e := os.Create("a_file.recordio")
-w := recordio.NewWriter(f)
-w.Write([]byte("Hello"))
-w.Write([]byte("World!"))
-w.Close()
-f.Close()
-```
-
-## Read
-
-1. Load chunk index:
-
-   ```go
-   f, e := os.Open("a_file.recordio")
-   idx, e := recordio.LoadIndex(f)
-   fmt.Println("Total records: ", idx.Len())
-   f.Close()
-   ```
-
-2. Create one or more scanner to read a range of records.  The
-   following example reads the range
-   [1, 3), i.e., the second and the third records:
-
-   ```go
-   f, e := os.Open("a_file.recordio")
-   s := recrodio.NewScanner(f, idx, 1, 3)
-   for s.Scan() {
-      fmt.Println(string(s.Record()))
-   }
-   if s.Err() != nil {
-      log.Fatalf("Something wrong with scanning: %v", e)
-   }
-   f.Close()
-   ```
diff --git a/go/recordio/c/CMakeLists.txt b/go/recordio/c/CMakeLists.txt
deleted file mode 100644
index c300c091f8710a0c0c978f051370224c6de1a5c6..0000000000000000000000000000000000000000
--- a/go/recordio/c/CMakeLists.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-cmake_minimum_required(VERSION 3.0)
-
-get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
-get_filename_component(PARENT_DIR ${PARENT_DIR} DIRECTORY)
-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PARENT_DIR}/cmake")
-
-project(cxx_go C Go)
-
-include(golang)
-include(flags)
-
-go_library(recordio STATIC)
-add_subdirectory(test)
diff --git a/go/recordio/c/crecordio.go b/go/recordio/c/crecordio.go
deleted file mode 100644
index e5cc30299285adfa4eeec814e7b52577caf1631f..0000000000000000000000000000000000000000
--- a/go/recordio/c/crecordio.go
+++ /dev/null
@@ -1,116 +0,0 @@
-package main
-
-/*
-#include <string.h>
-
-typedef int reader;
-typedef int writer;
-*/
-import "C"
-
-import (
-	"log"
-	"os"
-	"strings"
-	"unsafe"
-
-	"github.com/PaddlePaddle/Paddle/go/recordio"
-)
-
-var nullPtr = unsafe.Pointer(uintptr(0))
-
-type writer struct {
-	w *recordio.Writer
-	f *os.File
-}
-
-type reader struct {
-	scanner *recordio.Scanner
-}
-
-func cArrayToSlice(p unsafe.Pointer, len int) []byte {
-	if p == nullPtr {
-		return nil
-	}
-
-	// create a Go clice backed by a C array, reference:
-	// https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
-	//
-	// Go garbage collector will not interact with this data, need
-	// to be freed properly.
-	return (*[1 << 30]byte)(p)[:len:len]
-}
-
-//export create_recordio_writer
-func create_recordio_writer(path *C.char) C.writer {
-	p := C.GoString(path)
-	f, err := os.Create(p)
-	if err != nil {
-		log.Println(err)
-		return -1
-	}
-
-	w := recordio.NewWriter(f, -1, -1)
-	writer := &writer{f: f, w: w}
-	return addWriter(writer)
-}
-
-//export recordio_write
-func recordio_write(writer C.writer, buf *C.uchar, size C.int) C.int {
-	w := getWriter(writer)
-	b := cArrayToSlice(unsafe.Pointer(buf), int(size))
-	c, err := w.w.Write(b)
-	if err != nil {
-		log.Println(err)
-		return -1
-	}
-
-	return C.int(c)
-}
-
-//export release_recordio_writer
-func release_recordio_writer(writer C.writer) {
-	w := removeWriter(writer)
-	w.w.Close()
-	w.f.Close()
-}
-
-//export create_recordio_reader
-func create_recordio_reader(path *C.char) C.reader {
-	p := C.GoString(path)
-	s, err := recordio.NewScanner(strings.Split(p, ",")...)
-	if err != nil {
-		log.Println(err)
-		return -1
-	}
-
-	r := &reader{scanner: s}
-	return addReader(r)
-}
-
-//export recordio_read
-func recordio_read(reader C.reader, record **C.uchar) C.int {
-	r := getReader(reader)
-	if r.scanner.Scan() {
-		buf := r.scanner.Record()
-		if len(buf) == 0 {
-			*record = (*C.uchar)(nullPtr)
-			return 0
-		}
-
-		size := C.int(len(buf))
-		*record = (*C.uchar)(C.malloc(C.size_t(len(buf))))
-		C.memcpy(unsafe.Pointer(*record), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
-		return size
-	}
-
-	return -1
-}
-
-//export release_recordio_reader
-func release_recordio_reader(reader C.reader) {
-	r := removeReader(reader)
-	r.scanner.Close()
-}
-
-func main() {} // Required but ignored
diff --git a/go/recordio/c/register.go b/go/recordio/c/register.go
deleted file mode 100644
index 61dfdbd4ab64a05a25cc24219456853a010c4ce4..0000000000000000000000000000000000000000
--- a/go/recordio/c/register.go
+++ /dev/null
@@ -1,61 +0,0 @@
-package main
-
-/*
-typedef int reader;
-typedef int writer;
-*/
-import "C"
-
-import "sync"
-
-var mu sync.Mutex
-var handleMap = make(map[C.reader]*reader)
-var curHandle C.reader
-var writerMap = make(map[C.writer]*writer)
-var curWriterHandle C.writer
-
-func addReader(r *reader) C.reader {
-	mu.Lock()
-	defer mu.Unlock()
-	reader := curHandle
-	curHandle++
-	handleMap[reader] = r
-	return reader
-}
-
-func getReader(reader C.reader) *reader {
-	mu.Lock()
-	defer mu.Unlock()
-	return handleMap[reader]
-}
-
-func removeReader(reader C.reader) *reader {
-	mu.Lock()
-	defer mu.Unlock()
-	r := handleMap[reader]
-	delete(handleMap, reader)
-	return r
-}
-
-func addWriter(w *writer) C.writer {
-	mu.Lock()
-	defer mu.Unlock()
-	writer := curWriterHandle
-	curWriterHandle++
-	writerMap[writer] = w
-	return writer
-}
-
-func getWriter(writer C.writer) *writer {
-	mu.Lock()
-	defer mu.Unlock()
-	return writerMap[writer]
-}
-
-func removeWriter(writer C.writer) *writer {
-	mu.Lock()
-	defer mu.Unlock()
-	w := writerMap[writer]
-	delete(writerMap, writer)
-	return w
-}
diff --git a/go/recordio/c/test/CMakeLists.txt b/go/recordio/c/test/CMakeLists.txt
deleted file mode 100644
index bac1006ae12e07574afaa4b00160b559d173c332..0000000000000000000000000000000000000000
--- a/go/recordio/c/test/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-cmake_minimum_required(VERSION 3.0)
-
-include_directories(${CMAKE_BINARY_DIR})
-
-add_executable(recordio_test test.c)
-add_dependencies(recordio_test recordio)
-set (CMAKE_EXE_LINKER_FLAGS "-pthread")
-target_link_libraries(recordio_test ${CMAKE_BINARY_DIR}/librecordio.a)
diff --git a/go/recordio/c/test/test.c b/go/recordio/c/test/test.c
deleted file mode 100644
index b25536a9d76a8654cf1b15075c76887495e1d9bd..0000000000000000000000000000000000000000
--- a/go/recordio/c/test/test.c
+++ /dev/null
@@ -1,56 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "librecordio.h"
-
-void fail() {
-  // TODO(helin): fix: gtest using cmake is not working, using this
-  // hacky way for now.
-  printf("test failed.\n");
-  exit(-1);
-}
-
-int main() {
-  writer w = create_recordio_writer("/tmp/test_recordio_0");
-  recordio_write(w, "hello", 6);
-  recordio_write(w, "hi", 3);
-  release_recordio_writer(w);
-
-  w = create_recordio_writer("/tmp/test_recordio_1");
-  recordio_write(w, "dog", 4);
-  recordio_write(w, "cat", 4);
-  release_recordio_writer(w);
-
-  reader r = create_recordio_reader("/tmp/test_recordio_*");
-  unsigned char* item = NULL;
-  int size = recordio_read(r, &item);
-  if (strcmp(item, "hello") || size != 6) {
-    fail();
-  }
-  free(item);
-
-  size = recordio_read(r, &item);
-  if (strcmp(item, "hi") || size != 3) {
-    fail();
-  }
-  free(item);
-
-  size = recordio_read(r, &item);
-  if (strcmp(item, "dog") || size != 4) {
-    fail();
-  }
-  free(item);
-
-  size = recordio_read(r, &item);
-  if (strcmp(item, "cat") || size != 4) {
-    fail();
-  }
-  free(item);
-
-  size = recordio_read(r, &item);
-  if (size != -1) {
-    fail();
-  }
-
-  release_recordio_reader(r);
-}
diff --git a/go/recordio/chunk.go b/go/recordio/chunk.go
deleted file mode 100644
index 4e983ab72bddbfec23929e1874142bc7673b317b..0000000000000000000000000000000000000000
--- a/go/recordio/chunk.go
+++ /dev/null
@@ -1,181 +0,0 @@
-package recordio
-
-import (
-	"bytes"
-	"compress/gzip"
-	"encoding/binary"
-	"fmt"
-	"hash/crc32"
-	"io"
-
-	"github.com/golang/snappy"
-)
-
-// A Chunk contains the Header and optionally compressed records.  To
-// create a chunk, just use ch := &Chunk{}.
-type Chunk struct {
-	records  [][]byte
-	numBytes int // sum of record lengths.
-}
-
-func (ch *Chunk) add(record []byte) {
-	ch.records = append(ch.records, record)
-	ch.numBytes += len(record)
-}
-
-// dump the chunk into w, and clears the chunk and makes it ready for
-// the next add invocation.
-func (ch *Chunk) dump(w io.Writer, compressorIndex int) error {
-	// NOTE: don't check ch.numBytes instead, because empty
-	// records are allowed.
-	if len(ch.records) == 0 {
-		return nil
-	}
-
-	// Write raw records and their lengths into data buffer.
-	var data bytes.Buffer
-
-	for _, r := range ch.records {
-		var rs [4]byte
-		binary.LittleEndian.PutUint32(rs[:], uint32(len(r)))
-
-		if _, e := data.Write(rs[:]); e != nil {
-			return fmt.Errorf("Failed to write record length: %v", e)
-		}
-
-		if _, e := data.Write(r); e != nil {
-			return fmt.Errorf("Failed to write record: %v", e)
-		}
-	}
-
-	compressed, e := compressData(&data, compressorIndex)
-	if e != nil {
-		return e
-	}
-
-	// Write chunk header and compressed data.
-	hdr := &Header{
-		checkSum:       crc32.ChecksumIEEE(compressed.Bytes()),
-		compressor:     uint32(compressorIndex),
-		compressedSize: uint32(compressed.Len()),
-		numRecords:     uint32(len(ch.records)),
-	}
-
-	if _, e := hdr.write(w); e != nil {
-		return fmt.Errorf("Failed to write chunk header: %v", e)
-	}
-
-	if _, e := w.Write(compressed.Bytes()); e != nil {
-		return fmt.Errorf("Failed to write chunk data: %v", e)
-	}
-
-	// Clear the current chunk.
-	ch.records = nil
-	ch.numBytes = 0
-
-	return nil
-}
-
-type noopCompressor struct {
-	*bytes.Buffer
-}
-
-func (c *noopCompressor) Close() error {
-	return nil
-}
-
-func compressData(src io.Reader, compressorIndex int) (*bytes.Buffer, error) {
-	compressed := new(bytes.Buffer)
-	var compressor io.WriteCloser
-
-	switch compressorIndex {
-	case NoCompression:
-		compressor = &noopCompressor{compressed}
-	case Snappy:
-		compressor = snappy.NewBufferedWriter(compressed)
-	case Gzip:
-		compressor = gzip.NewWriter(compressed)
-	default:
-		return nil, fmt.Errorf("Unknown compression algorithm: %d", compressorIndex)
-	}
-
-	if _, e := io.Copy(compressor, src); e != nil {
-		return nil, fmt.Errorf("Failed to compress chunk data: %v", e)
-	}
-	compressor.Close()
-
-	return compressed, nil
-}
-
-// parse the specified chunk from r.
-func parseChunk(r io.ReadSeeker, chunkOffset int64) (*Chunk, error) {
-	var e error
-	var hdr *Header
-
-	if _, e = r.Seek(chunkOffset, io.SeekStart); e != nil {
-		return nil, fmt.Errorf("Failed to seek chunk: %v", e)
-	}
-
-	hdr, e = parseHeader(r)
-	if e != nil {
-		return nil, fmt.Errorf("Failed to parse chunk header: %v", e)
-	}
-
-	var buf bytes.Buffer
-	if _, e = io.CopyN(&buf, r, int64(hdr.compressedSize)); e != nil {
-		return nil, fmt.Errorf("Failed to read chunk data: %v", e)
-	}
-
-	if hdr.checkSum != crc32.ChecksumIEEE(buf.Bytes()) {
-		return nil, fmt.Errorf("Checksum checking failed.")
-	}
-
-	deflated, e := deflateData(&buf, int(hdr.compressor))
-	if e != nil {
-		return nil, e
-	}
-
-	ch := &Chunk{}
-	for i := 0; i < int(hdr.numRecords); i++ {
-		var rs [4]byte
-		if _, e = deflated.Read(rs[:]); e != nil {
-			return nil, fmt.Errorf("Failed to read record length: %v", e)
-		}
-
-		r := make([]byte, binary.LittleEndian.Uint32(rs[:]))
-		if _, e = deflated.Read(r); e != nil {
-			return nil, fmt.Errorf("Failed to read a record: %v", e)
-		}
-
-		ch.records = append(ch.records, r)
-		ch.numBytes += len(r)
-	}
-
-	return ch, nil
-}
-
-func deflateData(src io.Reader, compressorIndex int) (*bytes.Buffer, error) {
-	var e error
-	var deflator io.Reader
-
-	switch compressorIndex {
-	case NoCompression:
-		deflator = src
-	case Snappy:
-		deflator = snappy.NewReader(src)
-	case Gzip:
-		deflator, e = gzip.NewReader(src)
-		if e != nil {
-			return nil, fmt.Errorf("Failed to create gzip reader: %v", e)
-		}
-	default:
-		return nil, fmt.Errorf("Unknown compression algorithm: %d", compressorIndex)
-	}
-
-	deflated := new(bytes.Buffer)
-	if _, e = io.Copy(deflated, deflator); e != nil {
-		return nil, fmt.Errorf("Failed to deflate chunk data: %v", e)
-	}
-
-	return deflated, nil
-}
diff --git a/go/recordio/header.go b/go/recordio/header.go
deleted file mode 100644
index d3aefae3646eb002bd2c31789c7eb182faf02b1f..0000000000000000000000000000000000000000
--- a/go/recordio/header.go
+++ /dev/null
@@ -1,59 +0,0 @@
-package recordio
-
-import (
-	"encoding/binary"
-	"fmt"
-	"io"
-)
-
-const (
-	// NoCompression means writing raw chunk data into files.
-	// With other choices, chunks are compressed before written.
-	NoCompression = iota
-	// Snappy had been the default compressing algorithm widely
-	// used in Google.  It compromises between speech and
-	// compression ratio.
-	Snappy
-	// Gzip is a well-known compression algorithm.  It is
-	// recommmended only you are looking for compression ratio.
-	Gzip
-
-	magicNumber       uint32 = 0x01020304
-	defaultCompressor        = Snappy
-)
-
-// Header is the metadata of Chunk.
-type Header struct {
-	checkSum       uint32
-	compressor     uint32
-	compressedSize uint32
-	numRecords     uint32
-}
-
-func (c *Header) write(w io.Writer) (int, error) {
-	var buf [20]byte
-	binary.LittleEndian.PutUint32(buf[0:4], magicNumber)
-	binary.LittleEndian.PutUint32(buf[4:8], c.checkSum)
-	binary.LittleEndian.PutUint32(buf[8:12], c.compressor)
-	binary.LittleEndian.PutUint32(buf[12:16], c.compressedSize)
-	binary.LittleEndian.PutUint32(buf[16:20], c.numRecords)
-	return w.Write(buf[:])
-}
-
-func parseHeader(r io.Reader) (*Header, error) {
-	var buf [20]byte
-	if _, e := r.Read(buf[:]); e != nil {
-		return nil, e
-	}
-
-	if v := binary.LittleEndian.Uint32(buf[0:4]); v != magicNumber {
-		return nil, fmt.Errorf("Failed to parse magic number")
-	}
-
-	return &Header{
-		checkSum:       binary.LittleEndian.Uint32(buf[4:8]),
-		compressor:     binary.LittleEndian.Uint32(buf[8:12]),
-		compressedSize: binary.LittleEndian.Uint32(buf[12:16]),
-		numRecords:     binary.LittleEndian.Uint32(buf[16:20]),
-	}, nil
-}
diff --git a/go/recordio/range_scanner.go b/go/recordio/range_scanner.go
deleted file mode 100644
index 46e2eee68c7b7fc6bb1b69f60a75fd85cfe85576..0000000000000000000000000000000000000000
--- a/go/recordio/range_scanner.go
+++ /dev/null
@@ -1,140 +0,0 @@
-package recordio
-
-import "io"
-
-// Index consists offsets and sizes of the consequetive chunks in a RecordIO file.
-type Index struct {
-	chunkOffsets []int64
-	chunkLens    []uint32
-	numRecords   int   // the number of all records in a file.
-	chunkRecords []int // the number of records in chunks.
-}
-
-// LoadIndex scans the file and parse chunkOffsets, chunkLens, and len.
-func LoadIndex(r io.ReadSeeker) (*Index, error) {
-	f := &Index{}
-	offset := int64(0)
-	var e error
-	var hdr *Header
-
-	for {
-		hdr, e = parseHeader(r)
-		if e != nil {
-			break
-		}
-
-		f.chunkOffsets = append(f.chunkOffsets, offset)
-		f.chunkLens = append(f.chunkLens, hdr.numRecords)
-		f.chunkRecords = append(f.chunkRecords, int(hdr.numRecords))
-		f.numRecords += int(hdr.numRecords)
-
-		offset, e = r.Seek(int64(hdr.compressedSize), io.SeekCurrent)
-		if e != nil {
-			break
-		}
-	}
-
-	if e == io.EOF {
-		return f, nil
-	}
-	return nil, e
-}
-
-// NumRecords returns the total number of records in a RecordIO file.
-func (r *Index) NumRecords() int {
-	return r.numRecords
-}
-
-// NumChunks returns the total number of chunks in a RecordIO file.
-func (r *Index) NumChunks() int {
-	return len(r.chunkLens)
-}
-
-// ChunkIndex return the Index of i-th Chunk.
-func (r *Index) ChunkIndex(i int) *Index {
-	idx := &Index{}
-	idx.chunkOffsets = []int64{r.chunkOffsets[i]}
-	idx.chunkLens = []uint32{r.chunkLens[i]}
-	idx.chunkRecords = []int{r.chunkRecords[i]}
-	idx.numRecords = idx.chunkRecords[0]
-	return idx
-}
-
-// Locate returns the index of chunk that contains the given record,
-// and the record index within the chunk.  It returns (-1, -1) if the
-// record is out of range.
-func (r *Index) Locate(recordIndex int) (int, int) {
-	sum := 0
-	for i, l := range r.chunkLens {
-		sum += int(l)
-		if recordIndex < sum {
-			return i, recordIndex - sum + int(l)
-		}
-	}
-	return -1, -1
-}
-
-// RangeScanner scans records in a specified range within [0, numRecords).
-type RangeScanner struct {
-	reader          io.ReadSeeker
-	index           *Index
-	start, end, cur int
-	chunkIndex      int
-	chunk           *Chunk
-	err             error
-}
-
-// NewRangeScanner creates a scanner that sequencially reads records in the
-// range [start, start+len).  If start < 0, it scans from the
-// beginning.  If len < 0, it scans till the end of file.
-func NewRangeScanner(r io.ReadSeeker, index *Index, start, len int) *RangeScanner {
-	if start < 0 {
-		start = 0
-	}
-	if len < 0 || start+len >= index.NumRecords() {
-		len = index.NumRecords() - start
-	}
-
-	return &RangeScanner{
-		reader:     r,
-		index:      index,
-		start:      start,
-		end:        start + len,
-		cur:        start - 1, // The intial status required by Scan.
-		chunkIndex: -1,
-		chunk:      &Chunk{},
-	}
-}
-
-// Scan moves the cursor forward for one record and loads the chunk
-// containing the record if not yet.
-func (s *RangeScanner) Scan() bool {
-	s.cur++
-
-	if s.cur >= s.end {
-		s.err = io.EOF
-	} else {
-		if ci, _ := s.index.Locate(s.cur); s.chunkIndex != ci {
-			s.chunkIndex = ci
-			s.chunk, s.err = parseChunk(s.reader, s.index.chunkOffsets[ci])
-		}
-	}
-
-	return s.err == nil
-}
-
-// Record returns the record under the current cursor.
-func (s *RangeScanner) Record() []byte {
-	_, ri := s.index.Locate(s.cur)
-	return s.chunk.records[ri]
-}
-
-// Err returns the first non-EOF error that was encountered by the
-// Scanner.
-func (s *RangeScanner) Err() error {
-	if s.err == io.EOF {
-		return nil
-	}
-
-	return s.err
-}
diff --git a/go/recordio/recordio_internal_test.go b/go/recordio/recordio_internal_test.go
deleted file mode 100644
index 30e317925d8c95e64a42bd8ac5a1dd43b95ee81d..0000000000000000000000000000000000000000
--- a/go/recordio/recordio_internal_test.go
+++ /dev/null
@@ -1,90 +0,0 @@
-package recordio
-
-import (
-	"bytes"
-	"testing"
-	"unsafe"
-
-	"github.com/stretchr/testify/assert"
-)
-
-func TestChunkHead(t *testing.T) {
-	assert := assert.New(t)
-
-	c := &Header{
-		checkSum:       123,
-		compressor:     456,
-		compressedSize: 789,
-	}
-
-	var buf bytes.Buffer
-	_, e := c.write(&buf)
-	assert.Nil(e)
-
-	cc, e := parseHeader(&buf)
-	assert.Nil(e)
-	assert.Equal(c, cc)
-}
-
-func TestWriteAndRead(t *testing.T) {
-	assert := assert.New(t)
-
-	data := []string{
-		"12345",
-		"1234",
-		"12"}
-
-	var buf bytes.Buffer
-	w := NewWriter(&buf, 10, NoCompression) // use a small maxChunkSize.
-
-	n, e := w.Write([]byte(data[0])) // not exceed chunk size.
-	assert.Nil(e)
-	assert.Equal(5, n)
-
-	n, e = w.Write([]byte(data[1])) // not exceed chunk size.
-	assert.Nil(e)
-	assert.Equal(4, n)
-
-	n, e = w.Write([]byte(data[2])) // exeeds chunk size, dump and create a new chunk.
-	assert.Nil(e)
-	assert.Equal(n, 2)
-
-	assert.Nil(w.Close()) // flush the second chunk.
-	assert.Nil(w.Writer)
-
-	n, e = w.Write([]byte("anything")) // not effective after close.
-	assert.NotNil(e)
-	assert.Equal(n, 0)
-
-	idx, e := LoadIndex(bytes.NewReader(buf.Bytes()))
-	assert.Nil(e)
-	assert.Equal([]uint32{2, 1}, idx.chunkLens)
-	assert.Equal(
-		[]int64{0,
-			int64(4 + // magic number
-				unsafe.Sizeof(Header{}) +
-				5 + // first record
-				4 + // second record
-				2*4)}, // two record legnths
-		idx.chunkOffsets)
-
-	s := NewRangeScanner(bytes.NewReader(buf.Bytes()), idx, -1, -1)
-	i := 0
-	for s.Scan() {
-		assert.Equal(data[i], string(s.Record()))
-		i++
-	}
-}
-
-func TestWriteEmptyFile(t *testing.T) {
-	assert := assert.New(t)
-
-	var buf bytes.Buffer
-	w := NewWriter(&buf, 10, NoCompression) // use a small maxChunkSize.
-	assert.Nil(w.Close())
-	assert.Equal(0, buf.Len())
-
-	idx, e := LoadIndex(bytes.NewReader(buf.Bytes()))
-	assert.Nil(e)
-	assert.Equal(0, idx.NumRecords())
-}
diff --git a/go/recordio/recordio_test.go b/go/recordio/recordio_test.go
deleted file mode 100644
index e4ef835afa6b4cc0e2bf24e9cbc544565ff8ca50..0000000000000000000000000000000000000000
--- a/go/recordio/recordio_test.go
+++ /dev/null
@@ -1,81 +0,0 @@
-package recordio_test
-
-import (
-	"bytes"
-	"reflect"
-	"testing"
-
-	"github.com/PaddlePaddle/Paddle/go/recordio"
-)
-
-func TestWriteRead(t *testing.T) {
-	const total = 1000
-	var buf bytes.Buffer
-	w := recordio.NewWriter(&buf, 0, -1)
-	for i := 0; i < total; i++ {
-		_, err := w.Write(make([]byte, i))
-		if err != nil {
-			t.Fatal(err)
-		}
-	}
-	w.Close()
-
-	idx, err := recordio.LoadIndex(bytes.NewReader(buf.Bytes()))
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if idx.NumRecords() != total {
-		t.Fatal("num record does not match:", idx.NumRecords(), total)
-	}
-
-	s := recordio.NewRangeScanner(bytes.NewReader(buf.Bytes()), idx, -1, -1)
-	i := 0
-	for s.Scan() {
-		if !reflect.DeepEqual(s.Record(), make([]byte, i)) {
-			t.Fatal("not equal:", len(s.Record()), len(make([]byte, i)))
-		}
-		i++
-	}
-
-	if i != total {
-		t.Fatal("total count not match:", i, total)
-	}
-}
-
-func TestChunkIndex(t *testing.T) {
-	const total = 1000
-	var buf bytes.Buffer
-	w := recordio.NewWriter(&buf, 0, -1)
-	for i := 0; i < total; i++ {
-		_, err := w.Write(make([]byte, i))
-		if err != nil {
-			t.Fatal(err)
-		}
-	}
-	w.Close()
-
-	idx, err := recordio.LoadIndex(bytes.NewReader(buf.Bytes()))
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if idx.NumChunks() != total {
-		t.Fatal("unexpected chunk num:", idx.NumChunks(), total)
-	}
-
-	for i := 0; i < total; i++ {
-		newIdx := idx.ChunkIndex(i)
-		s := recordio.NewRangeScanner(bytes.NewReader(buf.Bytes()), newIdx, -1, -1)
-		j := 0
-		for s.Scan() {
-			if !reflect.DeepEqual(s.Record(), make([]byte, i)) {
-				t.Fatal("not equal:", len(s.Record()), len(make([]byte, i)))
-			}
-			j++
-		}
-		if j != 1 {
-			t.Fatal("unexpected record per chunk:", j)
-		}
-	}
-}
diff --git a/go/recordio/scanner.go b/go/recordio/scanner.go
deleted file mode 100644
index 865228ff651c6eee2cf1fa05ec38a4964394b6dc..0000000000000000000000000000000000000000
--- a/go/recordio/scanner.go
+++ /dev/null
@@ -1,140 +0,0 @@
-package recordio
-
-import (
-	"fmt"
-	"os"
-	"path/filepath"
-)
-
-// Scanner is a scanner for multiple recordio files.
-type Scanner struct {
-	paths      []string
-	curFile    *os.File
-	curScanner *RangeScanner
-	pathIdx    int
-	end        bool
-	err        error
-}
-
-// NewScanner creates a new Scanner.
-func NewScanner(paths ...string) (*Scanner, error) {
-	var ps []string
-	for _, s := range paths {
-		match, err := filepath.Glob(s)
-		if err != nil {
-			return nil, err
-		}
-
-		ps = append(ps, match...)
-	}
-
-	if len(ps) == 0 {
-		return nil, fmt.Errorf("no valid path provided: %v", paths)
-	}
-
-	return &Scanner{paths: ps}, nil
-}
-
-// Scan moves the cursor forward for one record and loads the chunk
-// containing the record if not yet.
-func (s *Scanner) Scan() bool {
-	if s.err != nil {
-		return false
-	}
-
-	if s.end {
-		return false
-	}
-
-	if s.curScanner == nil {
-		more, err := s.nextFile()
-		if err != nil {
-			s.err = err
-			return false
-		}
-
-		if !more {
-			s.end = true
-			return false
-		}
-	}
-
-	curMore := s.curScanner.Scan()
-	s.err = s.curScanner.Err()
-
-	if s.err != nil {
-		return curMore
-	}
-
-	if !curMore {
-		err := s.curFile.Close()
-		if err != nil {
-			s.err = err
-			return false
-		}
-		s.curFile = nil
-
-		more, err := s.nextFile()
-		if err != nil {
-			s.err = err
-			return false
-		}
-
-		if !more {
-			s.end = true
-			return false
-		}
-
-		return s.Scan()
-	}
-	return true
-}
-
-// Err returns the first non-EOF error that was encountered by the
-// Scanner.
-func (s *Scanner) Err() error {
-	return s.err
-}
-
-// Record returns the record under the current cursor.
-func (s *Scanner) Record() []byte {
-	if s.curScanner == nil {
-		return nil
-	}
-
-	return s.curScanner.Record()
-}
-
-// Close release the resources.
-func (s *Scanner) Close() error {
-	s.curScanner = nil
-	if s.curFile != nil {
-		err := s.curFile.Close()
-		s.curFile = nil
-		return err
-	}
-	return nil
-}
-
-func (s *Scanner) nextFile() (bool, error) {
-	if s.pathIdx >= len(s.paths) {
-		return false, nil
-	}
-
-	path := s.paths[s.pathIdx]
-	s.pathIdx++
-	f, err := os.Open(path)
-	if err != nil {
-		return false, err
-	}
-
-	idx, err := LoadIndex(f)
-	if err != nil {
-		f.Close()
-		return false, err
-	}
-
-	s.curFile = f
-	s.curScanner = NewRangeScanner(f, idx, 0, -1)
-	return true, nil
-}
diff --git a/go/recordio/writer.go b/go/recordio/writer.go
deleted file mode 100644
index 39112e518fb45c66f4e16733924c14a261134d9d..0000000000000000000000000000000000000000
--- a/go/recordio/writer.go
+++ /dev/null
@@ -1,60 +0,0 @@
-package recordio
-
-import (
-	"fmt"
-	"io"
-)
-
-const (
-	defaultMaxChunkSize = 32 * 1024 * 1024
-)
-
-// Writer creates a RecordIO file.
-type Writer struct {
-	io.Writer    // Set to nil to mark a closed writer.
-	chunk        *Chunk
-	maxChunkSize int // total records size, excluding metadata, before compression.
-	compressor   int
-}
-
-// NewWriter creates a RecordIO file writer.  Each chunk is compressed
-// using the deflate algorithm given compression level.  Note that
-// level 0 means no compression and -1 means default compression.
-func NewWriter(w io.Writer, maxChunkSize, compressor int) *Writer {
-	if maxChunkSize < 0 {
-		maxChunkSize = defaultMaxChunkSize
-	}
-
-	if compressor < 0 {
-		compressor = defaultCompressor
-	}
-
-	return &Writer{
-		Writer:       w,
-		chunk:        &Chunk{},
-		maxChunkSize: maxChunkSize,
-		compressor:   compressor}
-}
-
-// Writes a record.  It returns an error if Close has been called.
-func (w *Writer) Write(record []byte) (int, error) {
-	if w.Writer == nil {
-		return 0, fmt.Errorf("Cannot write since writer had been closed")
-	}
-
-	if w.chunk.numBytes+len(record) > w.maxChunkSize {
-		if e := w.chunk.dump(w.Writer, w.compressor); e != nil {
-			return 0, e
-		}
-	}
-
-	w.chunk.add(record)
-	return len(record), nil
-}
-
-// Close flushes the current chunk and makes the writer invalid.
-func (w *Writer) Close() error {
-	e := w.chunk.dump(w.Writer, w.compressor)
-	w.Writer = nil
-	return e
-}
diff --git a/paddle/cuda/include/hl_cpu_matrix_kernel.cuh b/paddle/cuda/include/hl_cpu_matrix_kernel.cuh
index 9c49a4bd2083794e98b099b25944bedec3d5a2ff..aaa24325514812eda33309660ba85c3ceece770e 100644
--- a/paddle/cuda/include/hl_cpu_matrix_kernel.cuh
+++ b/paddle/cuda/include/hl_cpu_matrix_kernel.cuh
@@ -17,10 +17,9 @@ limitations under the License. */
 
 #include <stdio.h>
 #include "hl_base.h"
-#if defined(__ARM_NEON__) || defined(__ARM_NEON)
-#include "hl_neon_matrix_kernel.cuh"
-#else
-#include "hl_sse_matrix_kernel.cuh"
+
+#ifndef __CUDA_ARCH__
+#include "hl_cpu_matrix_kernel_detail.cuh"
 #endif
 
 /**
@@ -114,35 +113,6 @@ void hl_cpu_apply_quaternary_op(Op op,
   }
 }
 
-template <class Agg, class Op, class Saver>
-void hl_matrix_row_op(Agg agg, Op op, Saver sv,
-                      int dimM, int dimN,
-                      real *dst, int ld,
-                      real *A, int lda) {
-  for (int i = 0; i < dimM; i++) {
-    real tmp = agg.init();
-    for (int j = 0; j < dimN; j++) {
-        tmp = agg(tmp, op(A[i * lda + j]));
-    }
-    dst[i*ld] = sv(dst[i*ld], tmp);
-  }
-}
-
-template <class Agg, class Op, class Saver>
-void hl_matrix_row_op(Agg agg, Op op, Saver sv,
-                      int dimM, int dimN,
-                      real *dst, int ld,
-                      real *A, int lda,
-                      real *B, int ldb) {
-  for (int i = 0; i < dimM; i++) {
-    real tmp = agg.init();
-    for (int j = 0; j < dimN; j++) {
-        tmp = agg(tmp, op(A[i * lda + j], B[i * ldb + j]));
-    }
-    dst[i*ld] = sv(dst[i*ld], tmp);
-  }
-}
-
 template <class Agg, class Op, class Saver>
 void hl_cpu_matrix_row_op(Agg agg, Op op, Saver sv,
                           int dimM, int dimN,
diff --git a/paddle/cuda/include/hl_sse_matrix_kernel.cuh b/paddle/cuda/include/hl_cpu_matrix_kernel_detail.cuh
similarity index 89%
rename from paddle/cuda/include/hl_sse_matrix_kernel.cuh
rename to paddle/cuda/include/hl_cpu_matrix_kernel_detail.cuh
index 9e50580669d2d4523dda239e90b4ed18a9214e2f..85ca836fdc46682195ac29a1ebf2237c28fc3311 100644
--- a/paddle/cuda/include/hl_sse_matrix_kernel.cuh
+++ b/paddle/cuda/include/hl_cpu_matrix_kernel_detail.cuh
@@ -13,26 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 
-#ifndef HL_SSE_MATRIX_KERNEL_CUH_
-#define HL_SSE_MATRIX_KERNEL_CUH_
+#ifndef HL_MATRIX_KERNEL_DETAIL_CUH_
+#define HL_MATRIX_KERNEL_DETAIL_CUH_
 
 #include "hl_matrix_type.cuh"
 
-#define VECTOR_SIZE     16
-
-#ifndef PADDLE_TYPE_DOUBLE
-/* number of float in vector */
-#define     VECTOR_LEN      4
-#define     VECTOR_SET      _mm_set_ps1
-#else
-#if   defined(__APPLE__) || defined(__OSX__)
-#define     _mm_set_pd1     _mm_set1_pd
-#endif
-/* number of double in vector */
-#define     VECTOR_LEN      2
-#define     VECTOR_SET      _mm_set_pd1
-#endif
-
 inline bool hl_check_align(size_t size) {
   return !(size & (VECTOR_SIZE - 1));
 }
@@ -41,27 +26,63 @@ inline bool hl_check_align(void *ptr) {
   return hl_check_align(reinterpret_cast<size_t>(ptr));
 }
 
-#ifndef PADDLE_TYPE_DOUBLE
-template <class Agg>
-inline real hl_agg_op(Agg agg, vecType mm) {
-  __m128 lo = _mm_unpacklo_ps(mm, mm);
-  __m128 hi = _mm_unpackhi_ps(mm, mm);
-  __m128 tmp1 = agg.vecOp(lo, hi);
-  __m128 tmp2 = _mm_movehl_ps(tmp1, tmp1);
-  __m128 ret = agg.vecOp(tmp1, tmp2);
+template <class Agg, class Op, class Saver>
+void hl_matrix_row_op(Agg agg, Op op, Saver sv,
+                      int dimM, int dimN,
+                      real *dst, int ld,
+                      real *A, int lda) {
+  for (int i = 0; i < dimM; i++) {
+    real tmp = agg.init();
+    for (int j = 0; j < dimN; j++) {
+        tmp = agg(tmp, op(A[i * lda + j]));
+    }
+    dst[i*ld] = sv(dst[i*ld], tmp);
+  }
+}
 
-  return _mm_cvtss_f32(ret);
+template <class Agg, class Op, class Saver>
+void hl_matrix_row_op(Agg agg, Op op, Saver sv,
+                      int dimM, int dimN,
+                      real *dst, int ld,
+                      real *A, int lda,
+                      real *B, int ldb) {
+  for (int i = 0; i < dimM; i++) {
+    real tmp = agg.init();
+    for (int j = 0; j < dimN; j++) {
+        tmp = agg(tmp, op(A[i * lda + j], B[i * ldb + j]));
+    }
+    dst[i*ld] = sv(dst[i*ld], tmp);
+  }
 }
-#else
-template <class Agg>
-inline real hl_agg_op(Agg agg, vecType mm) {
-  __m128d lo = _mm_unpacklo_pd(mm, mm);
-  __m128d hi = _mm_unpackhi_pd(mm, mm);
-  __m128d ret = agg.vecOp(lo, hi);
-
-  return _mm_cvtsd_f64(ret);
+
+template <class Agg, class Op, class Saver>
+void hl_matrix_column_op(Agg agg, Op op, Saver sv,
+                         int dimM, int dimN,
+                         real *dst,
+                         real *A, int lda) {
+  for (int j = 0; j < dimN; j++) {
+    real tmp = agg.init();
+    for (int i = 0; i < dimM; i++) {
+        tmp = agg(tmp, op(A[i * lda + j]));
+    }
+    dst[j] = sv(dst[j], tmp);
+  }
+}
+
+template <class Agg, class Op, class Saver>
+void hl_matrix_column_op(Agg agg, Op op, Saver sv,
+                         int dimM, int dimN,
+                         real *dst,
+                         real *A, int lda,
+                         real *B, int ldb) {
+  for (int j = 0; j < dimN; j++) {
+    real tmp = agg.init();
+    for (int i = 0; i < dimM; i++) {
+        tmp = agg(tmp, op(A[i * lda + j], B[i * ldb + j]));
+    }
+    dst[j] = sv(dst[j], tmp);
+  }
 }
-#endif
 
 template <class Agg, class Op, class Saver>
 void hl_sse_matrix_row_op(Agg agg, Op op, Saver sv,
@@ -118,35 +139,6 @@ void hl_sse_matrix_row_op(Agg agg, Op op, Saver sv,
   }
 }
 
-template <class Agg, class Op, class Saver>
-void hl_matrix_column_op(Agg agg, Op op, Saver sv,
-                         int dimM, int dimN,
-                         real *dst,
-                         real *A, int lda) {
-  for (int j = 0; j < dimN; j++) {
-    real tmp = agg.init();
-    for (int i = 0; i < dimM; i++) {
-        tmp = agg(tmp, op(A[i * lda + j]));
-    }
-    dst[j] = sv(dst[j], tmp);
-  }
-}
-
-template <class Agg, class Op, class Saver>
-void hl_matrix_column_op(Agg agg, Op op, Saver sv,
-                         int dimM, int dimN,
-                         real *dst,
-                         real *A, int lda,
-                         real *B, int ldb) {
-  for (int j = 0; j < dimN; j++) {
-    real tmp = agg.init();
-    for (int i = 0; i < dimM; i++) {
-        tmp = agg(tmp, op(A[i * lda + j], B[i * ldb + j]));
-    }
-    dst[j] = sv(dst[j], tmp);
-  }
-}
-
 /*
  * MaxRow greater than or equal dimN
  * dimN is multiples of VECTOR_LEN
@@ -315,4 +307,4 @@ void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv,
   }
 }
 
-#endif /* HL_SSE_MATRIX_KERNEL_CUH_ */
+#endif /* HL_MATRIX_KERNEL_DETAIL_CUH_ */
diff --git a/paddle/cuda/include/hl_cpu_scalar.cuh b/paddle/cuda/include/hl_cpu_scalar.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..93043cd4bc030ef525d5bcf8d83196f2ce92eec6
--- /dev/null
+++ b/paddle/cuda/include/hl_cpu_scalar.cuh
@@ -0,0 +1,50 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifndef HL_CPU_SCALAR_CUH_
+#define HL_CPU_SCALAR_CUH_
+
+#define VECTOR_SIMD false
+#define VECTOR_SET  hl_vec_set
+
+#ifndef PADDLE_TYPE_DOUBLE
+/* size of float */
+#define VECTOR_SIZE 4
+#else
+/* size of double */
+#define VECTOR_SIZE 8
+#endif
+
+typedef real vecType;
+
+/* Consider a real as a vector */
+#define VECTOR_LEN  1
+
+template <class Agg>
+inline real hl_agg_op(Agg agg, vecType mm) {
+  return mm;
+}
+
+INLINE real hl_vec_set(const real r) {
+  return r;
+}
+
+INLINE real hl_vec_classification_error(const real a,
+                                        const real b,
+                                        const real p,
+                                        const real r) {
+  return ((a > p) == (b > p)) ? 0.0f : 1.0f;
+}
+
+#endif  // HL_CPU_SCALAR_CUH_
diff --git a/paddle/cuda/include/hl_cpu_simd_neon.cuh b/paddle/cuda/include/hl_cpu_simd_neon.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..0b1cf4abdc4d5ef2a640c75587308f7f082b854b
--- /dev/null
+++ b/paddle/cuda/include/hl_cpu_simd_neon.cuh
@@ -0,0 +1,73 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifndef HL_CPU_SIMD_NEON_CUH_
+#define HL_CPU_SIMD_NEON_CUH_
+
+#include <arm_neon.h>
+
+#define VECTOR_SIMD true
+#define VECTOR_SIZE 16
+#define VECTOR_SET  hl_vec_set
+
+#ifndef PADDLE_TYPE_DOUBLE
+
+typedef float32x4_t vecType;
+
+/* number of float in vector */
+#define VECTOR_LEN  4
+
+template <class Agg>
+inline real hl_agg_op(Agg agg, vecType mm) {
+  float32x4_t rev = vrev64q_f32(mm);
+  float32x4_t tmp1 = agg.vecOp(rev, rev);
+  float32x2_t lo = vget_high_f32(rev);
+  float32x2_t hi = vget_low_f32(rev);
+  float32x4_t tmp2 = vcombine_f32(hi, lo);
+  float32x4_t ret = agg.vecOp(tmp1, tmp2);
+
+  return vgetq_lane_f32(ret, 0);
+}
+
+inline float32x4_t hl_vec_set(const real f) {
+  return vdupq_n_f32(f);
+}
+
+inline float32x4_t hl_vec_classification_error(const float32x4_t a,
+                                               const float32x4_t b,
+                                               const float32x4_t p,
+                                               const float32x4_t r) {
+  uint32x4_t tmp1 = vcgtq_f32(a, p);
+  uint32x4_t tmp2 = vcgtq_f32(b, p);
+  uint32x4_t tmp3 = veorq_u32(tmp1, tmp2);
+  return vcvtq_f32_u32(vandq_u32(tmp3, vcvtq_u32_f32(r)));
+}
+
+#else
+
+#ifdef __aarch64__
+typedef float64x2_t vecType;
+
+/* number of float in vector */
+#define VECTOR_LEN  2
+#define VECTOR_SET  vdupq_n_f64
+
+#error To be implemented
+#else
+#error NEON instructions does not support double precision
+#endif  // __aarch64__
+
+#endif
+
+#endif  // HL_CPU_SIMD_NEON_CUH_
diff --git a/paddle/cuda/include/hl_cpu_simd_sse.cuh b/paddle/cuda/include/hl_cpu_simd_sse.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..a104b626220f473324fc3c99e7cd305c3e86f3db
--- /dev/null
+++ b/paddle/cuda/include/hl_cpu_simd_sse.cuh
@@ -0,0 +1,94 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifndef HL_CPU_SIMD_SSE_CUH_
+#define HL_CPU_SIMD_SSE_CUH_
+
+#include <mmintrin.h>
+#include <xmmintrin.h>
+#include <emmintrin.h>
+
+#define VECTOR_SIMD true
+#define VECTOR_SIZE 16
+#define VECTOR_SET  hl_vec_set
+
+#ifndef PADDLE_TYPE_DOUBLE
+
+typedef __m128  vecType;
+
+/* number of float in vector */
+#define VECTOR_LEN  4
+
+template <class Agg>
+inline real hl_agg_op(Agg agg, vecType mm) {
+  __m128 lo = _mm_unpacklo_ps(mm, mm);
+  __m128 hi = _mm_unpackhi_ps(mm, mm);
+  __m128 tmp1 = agg.vecOp(lo, hi);
+  __m128 tmp2 = _mm_movehl_ps(tmp1, tmp1);
+  __m128 ret = agg.vecOp(tmp1, tmp2);
+
+  return _mm_cvtss_f32(ret);
+}
+
+inline __m128 hl_vec_set(const real f) {
+  return _mm_set_ps1(f);
+}
+
+inline __m128 hl_vec_classification_error(const __m128 a,
+                                          const __m128 b,
+                                          const __m128 p,
+                                          const __m128 r) {
+  __m128 tmp1 = _mm_cmpgt_ps(a, p);
+  __m128 tmp2 = _mm_cmpgt_ps(b, p);
+  __m128 tmp3 = _mm_xor_ps(tmp1, tmp2);
+  return _mm_and_ps(tmp3, r);
+}
+
+#else
+
+typedef __m128d vecType;
+
+/* number of double in vector */
+#define VECTOR_LEN  2
+
+template <class Agg>
+inline real hl_agg_op(Agg agg, vecType mm) {
+  __m128d lo = _mm_unpacklo_pd(mm, mm);
+  __m128d hi = _mm_unpackhi_pd(mm, mm);
+  __m128d ret = agg.vecOp(lo, hi);
+
+  return _mm_cvtsd_f64(ret);
+}
+
+inline __m128d hl_vec_set(const real d) {
+#if defined(__APPLE__) || defined(__OSX__)
+  return _mm_set1_pd(d);
+#else
+  return _mm_set_pd1(d);
+#endif
+}
+
+inline __m128d hl_vec_classification_error(const __m128d a,
+                                           const __m128d b,
+                                           const __m128d p,
+                                           const __m128d r) {
+  __m128d tmp1 = _mm_cmpgt_pd(a, p);
+  __m128d tmp2 = _mm_cmpgt_pd(b, p);
+  __m128d tmp3 = _mm_xor_pd(tmp1, tmp2);
+  return _mm_and_pd(tmp3, r);
+}
+
+#endif
+
+#endif  // HL_CPU_SIMD_SSE_CUH_
diff --git a/paddle/cuda/include/hl_matrix_base.cuh b/paddle/cuda/include/hl_matrix_base.cuh
index 8b755c1095c2c4fdb7e74d8cddc948e6a6af380b..53fdb47ec9c05f5cf85d0956176ad9abf6d656f9 100644
--- a/paddle/cuda/include/hl_matrix_base.cuh
+++ b/paddle/cuda/include/hl_matrix_base.cuh
@@ -18,26 +18,6 @@ limitations under the License. */
 
 #include "hl_matrix_type.cuh"
 
-#ifdef __CUDA_ARCH__
-/**
- * CUDA kernel inline function
- */
-#define INLINE   __device__ inline
-#else
-/**
- * CPP inline function
- */
-#define INLINE   inline
-#endif
-
-#ifndef PADDLE_TYPE_DOUBLE
-#define     DEVICE_FMAX     fmaxf
-#define     DEVICE_FMIN     fminf
-#else
-#define     DEVICE_FMAX     fmax
-#define     DEVICE_FMIN     fmin
-#endif
-
 class BaseOp {
 public:
   static const bool sse = false;
@@ -66,10 +46,8 @@ typedef BaseOp SSESquaredDiff;
 typedef BaseOp SSEFirst;
 typedef BaseOp SSESecond;
 typedef BaseOp SSEClassificationError;
-#elif defined(__ARM__NEON__) || defined(__ARM_NEON)
-#include "hl_matrix_base_neon.cuh"
 #else
-#include "hl_matrix_base_sse.cuh"
+#include "hl_matrix_base_detail.cuh"
 #endif
 
 namespace aggregate {
@@ -124,7 +102,7 @@ public:
   add2(const real s1, const real s2)
     : SSEAdd2(s1, s2), p1(s1), p2(s2) {}
   INLINE real operator()(const real a, const real b) const {
-     return p1 * a + p2 * b;
+    return p1 * a + p2 * b;
   }
 };
 
diff --git a/paddle/cuda/include/hl_matrix_base_detail.cuh b/paddle/cuda/include/hl_matrix_base_detail.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..de1fd17d524a486cc15af721731d2e815f17263e
--- /dev/null
+++ b/paddle/cuda/include/hl_matrix_base_detail.cuh
@@ -0,0 +1,153 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifndef HL_MATRIX_BASE_DETAIL_CUH_
+#define HL_MATRIX_BASE_DETAIL_CUH_
+
+#include "hl_matrix_type.cuh"
+#include "hl_tensor_ops.h"
+
+namespace aggregate {
+class SSESum {
+public:
+  static const bool sse = VECTOR_SIMD;
+  INLINE vecType vecOp(const vecType a, const vecType b) const {
+    return hppl::binary::add<vecType>()(a, b);
+  }
+};
+
+class SSEMax {
+public:
+  static const bool sse = VECTOR_SIMD;
+  INLINE vecType vecOp(const vecType a, const vecType b) const {
+    return hppl::binary::max<vecType>()(a, b);
+  }
+};
+
+class SSEMin {
+public:
+  static const bool sse = VECTOR_SIMD;
+  INLINE vecType vecOp(const vecType a, const vecType b) const {
+    return hppl::binary::min<vecType>()(a, b);
+  }
+};
+}  // namespace aggregate
+
+namespace base {
+namespace unary {
+class SSEIdentity {
+public:
+  static const bool sse = VECTOR_SIMD;
+  INLINE vecType vecOp(const vecType a) const {
+    return a;
+  }
+};
+}  // namespace unary
+
+namespace binary {
+class SSEAdd {
+public:
+  static const bool sse = VECTOR_SIMD;
+  INLINE vecType vecOp(const vecType a, const vecType b) const {
+    return hppl::binary::add<vecType>()(a, b);
+  }
+};
+
+class SSEAdd2 {
+public:
+  static const bool sse = VECTOR_SIMD;
+  const real p1;
+  const real p2;
+  vecType mp1;
+  vecType mp2;
+
+public:
+  SSEAdd2(const real s1, const real s2) : p1(s1), p2(s2) {
+    mp1 = hl_vec_set(p1);
+    mp2 = hl_vec_set(p2);
+  }
+  INLINE vecType vecOp(const vecType a, const vecType b) const {
+    return hppl::binary::add_scale<vecType>(mp1, mp2)(a, b);
+  }
+};
+
+class SSESub {
+public:
+  static const bool sse = VECTOR_SIMD;
+  INLINE vecType vecOp(const vecType a, const vecType b) const {
+    return hppl::binary::sub<vecType>()(a, b);
+  }
+};
+
+class SSEMul {
+public:
+  static const bool sse = VECTOR_SIMD;
+  INLINE vecType vecOp(const vecType a, const vecType b) const {
+    return hppl::binary::mul<vecType>()(a, b);
+  }
+};
+
+class SSEDiv {
+public:
+  static const bool sse = VECTOR_SIMD;
+  INLINE vecType vecOp(const vecType a, const vecType b) const {
+    return hppl::binary::div<vecType>()(a, b);
+  }
+};
+
+class SSESquaredDiff {
+public:
+  static const bool sse = VECTOR_SIMD;
+  INLINE vecType vecOp(const vecType a, const vecType b) const {
+    vecType tmp = hppl::binary::sub<vecType>()(a, b);
+    return hppl::binary::mul<vecType>()(tmp, tmp);
+  }
+};
+
+class SSEFirst {
+public:
+  static const bool sse = VECTOR_SIMD;
+  INLINE vecType vecOp(const vecType a, const vecType b) const {
+    return a;
+  }
+};
+
+class SSESecond {
+public:
+  static const bool sse = VECTOR_SIMD;
+  INLINE vecType vecOp(const vecType a, const vecType b) const {
+    return b;
+  }
+};
+
+class SSEClassificationError {
+public:
+  static const bool sse = VECTOR_SIMD;
+  const real p;
+  vecType mp;
+  vecType result;
+
+public:
+  explicit SSEClassificationError(const real s) : p(s) {
+    mp = hl_vec_set(p);
+    result = hl_vec_set(1.0f);
+  }
+  INLINE vecType vecOp(const vecType a, const vecType b) const {
+    return hl_vec_classification_error(a, b, mp, result);
+  }
+};
+}  // namespace binary
+}  // namespace base
+
+#endif /* HL_MATRIX_BASE_DETAIL_CUH_ */
diff --git a/paddle/cuda/include/hl_matrix_base_neon.cuh b/paddle/cuda/include/hl_matrix_base_neon.cuh
deleted file mode 100644
index e13019f5ee24ad600005c99678426ee3808b0e54..0000000000000000000000000000000000000000
--- a/paddle/cuda/include/hl_matrix_base_neon.cuh
+++ /dev/null
@@ -1,161 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-
-#ifndef HL_MATRIX_BASE_NEON_CUH_
-#define HL_MATRIX_BASE_NEON_CUH_
-
-namespace aggregate {
-class SSESum {
-public:
-  static const bool sse = true;
-  INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
-    return vaddq_f32(a, b);
-  }
-};
-
-class SSEMax {
-public:
-  static const bool sse = true;
-  INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
-    return vmaxq_f32(a, b);
-  }
-};
-
-class SSEMin {
-public:
-  static const bool sse = true;
-  INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
-    return vminq_f32(a, b);
-  }
-};
-}  // namespace aggregate
-
-namespace base {
-namespace unary {
-class SSEIdentity {
-public:
-  static const bool sse = true;
-  INLINE float32x4_t vecOp(const float32x4_t a) const {
-    return a;
-  }
-};
-}  // namespace unary
-
-namespace binary {
-class SSEAdd {
-public:
-  static const bool sse = true;
-  INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
-    return vaddq_f32(a, b);
-  }
-};
-
-class SSEAdd2 {
-public:
-  static const bool sse = true;
-  const real p1;
-  const real p2;
-  float32x4_t mp1;
-  float32x4_t mp2;
-
-public:
-  SSEAdd2(const real s1, const real s2) : p1(s1), p2(s2) {
-    mp1 = vdupq_n_f32(p1);
-    mp2 = vdupq_n_f32(p2);
-  }
-  INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
-    float32x4_t tmp1, tmp2;
-    tmp1 = vmulq_f32(mp1, a);
-    tmp2 = vmulq_f32(mp2, b);
-    return vaddq_f32(tmp1, tmp2);
-  }
-};
-
-class SSESub {
-public:
-  static const bool sse = true;
-  INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
-    return vsubq_f32(a, b);
-  }
-};
-
-class SSEMul {
-public:
-  static const bool sse = true;
-  INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
-    return vmulq_f32(a, b);
-  }
-};
-
-class SSEDiv {
-public:
-  static const bool sse = true;
-  INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
-    float32x4_t tmp;
-    tmp = vrecpeq_f32(b);
-    return vmulq_f32(a, tmp);
-  }
-};
-
-class SSESquaredDiff {
-public:
-  static const bool sse = true;
-  INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
-    float32x4_t tmp;
-    tmp = vsubq_f32(a, b);
-    return vmulq_f32(tmp, tmp);
-  }
-};
-
-class SSEFirst {
-public:
-  static const bool sse = true;
-  INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
-    return a;
-  }
-};
-
-class SSESecond {
-public:
-  static const bool sse = true;
-  INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
-    return b;
-  }
-};
-
-class SSEClassificationError {
-public:
-  static const bool sse = true;
-  const real p;
-  float32x4_t mp;
-  uint32x4_t result;
-
-public:
-  explicit SSEClassificationError(const real s) : p(s) {
-    mp = vdupq_n_f32(p);
-    result = vdupq_n_u32(1);
-  }
-  // TODO: to be check
-  INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
-    uint32x4_t tmp1 = vcgtq_f32(a, mp);
-    uint32x4_t tmp2 = vcgtq_f32(b, mp);
-    uint32x4_t tmp3 = veorq_u32(tmp1, tmp2);
-    return vcvtq_f32_u32(vandq_u32(tmp3, result));
-  }
-};
-}  // namespace binary
-}  // namespace base
-
-#endif /* HL_MATRIX_BASE_NEON_CUH_ */
diff --git a/paddle/cuda/include/hl_matrix_base_sse.cuh b/paddle/cuda/include/hl_matrix_base_sse.cuh
deleted file mode 100644
index db6c9cca03a8974a15cd2e7fbaf73033e3a57f4b..0000000000000000000000000000000000000000
--- a/paddle/cuda/include/hl_matrix_base_sse.cuh
+++ /dev/null
@@ -1,211 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-
-#ifndef HL_MATRIX_BASE_SSE_CUH_
-#define HL_MATRIX_BASE_SSE_CUH_
-
-namespace aggregate {
-class SSESum {
-public:
-  static const bool sse = true;
-  INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
-    return _mm_add_ps(a, b);
-  }
-  INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
-    return _mm_add_pd(a, b);
-  }
-};
-
-class SSEMax {
-public:
-  static const bool sse = true;
-  INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
-    return _mm_max_ps(a, b);
-  }
-  INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
-    return _mm_max_pd(a, b);
-  }
-};
-
-class SSEMin {
-public:
-  static const bool sse = true;
-  INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
-    return _mm_min_ps(a, b);
-  }
-  INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
-    return _mm_min_pd(a, b);
-  }
-};
-}  // namespace aggregate
-
-namespace base {
-namespace unary {
-class SSEIdentity {
-public:
-  static const bool sse = true;
-  INLINE __m128 vecOp(const __m128 a) const {
-    return a;
-  }
-  INLINE __m128d vecOp(const __m128d a) const {
-    return a;
-  }
-};
-}  // namespace unary
-
-namespace binary {
-class SSEAdd {
-public:
-  static const bool sse = true;
-  INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
-    return _mm_add_ps(a, b);
-  }
-  INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
-    return _mm_add_pd(a, b);
-  }
-};
-
-class SSEAdd2 {
-public:
-  static const bool sse = true;
-  const real p1;
-  const real p2;
-  union {__m128 f; __m128d d;} mp1;
-  union {__m128 f; __m128d d;} mp2;
-
-public:
-  SSEAdd2(const real s1, const real s2) : p1(s1), p2(s2) {
-    if (sizeof(real) == sizeof(float)) {
-      mp1.f = _mm_set1_ps(p1);
-      mp2.f = _mm_set1_ps(p2);
-    } else {
-      mp1.d = _mm_set1_pd(p1);
-      mp2.d = _mm_set1_pd(p2);
-    }
-  }
-  INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
-    __m128 tmp1, tmp2;
-    tmp1 = _mm_mul_ps(mp1.f, a);
-    tmp2 = _mm_mul_ps(mp2.f, b);
-    return _mm_add_ps(tmp1, tmp2);
-  }
-  INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
-    __m128d tmp1, tmp2;
-    tmp1 = _mm_mul_pd(mp1.d, a);
-    tmp2 = _mm_mul_pd(mp2.d, b);
-    return _mm_add_pd(tmp1, tmp2);
-  }
-};
-
-class SSESub {
-public:
-  static const bool sse = true;
-  INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
-    return _mm_sub_ps(a, b);
-  }
-  INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
-    return _mm_sub_pd(a, b);
-  }
-};
-
-class SSEMul {
-public:
-  static const bool sse = true;
-  INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
-    return _mm_mul_ps(a, b);
-  }
-  INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
-    return _mm_mul_pd(a, b);
-  }
-};
-
-class SSEDiv {
-public:
-  static const bool sse = true;
-  INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
-    return _mm_div_ps(a, b);
-  }
-  INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
-    return _mm_div_pd(a, b);
-  }
-};
-
-class SSESquaredDiff {
-public:
-  static const bool sse = true;
-  INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
-    return _mm_mul_ps(_mm_sub_ps(a, b), _mm_sub_ps(a, b));
-  }
-  INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
-    return _mm_mul_pd(_mm_sub_pd(a, b), _mm_sub_pd(a, b));
-  }
-};
-
-class SSEFirst {
-public:
-  static const bool sse = true;
-  INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
-    return a;
-  }
-  INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
-    return a;
-  }
-};
-
-class SSESecond {
-public:
-  static const bool sse = true;
-  INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
-    return b;
-  }
-  INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
-    return b;
-  }
-};
-
-class SSEClassificationError {
-public:
-  static const bool sse = true;
-  const real p;
-  union {__m128 f; __m128d d;} mp;
-  union {__m128 f; __m128d d;} result;
-
-public:
-  explicit SSEClassificationError(const real s) : p(s) {
-    if (sizeof(real) == sizeof(float)) {
-      mp.f = _mm_set1_ps(p);
-      result.f = _mm_set1_ps(1.0f);
-    } else {
-      mp.d = _mm_set1_pd(p);
-      result.d = _mm_set1_pd(1.0);
-    }
-  }
-  INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
-    __m128 tmp1 = _mm_cmpgt_ps(a, mp.f);
-    __m128 tmp2 = _mm_cmpgt_ps(b, mp.f);
-    __m128 tmp3 = _mm_xor_ps(tmp1, tmp2);
-    return _mm_and_ps(tmp3, result.f);
-  }
-  INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
-    __m128d tmp1 = _mm_cmpgt_pd(a, mp.d);
-    __m128d tmp2 = _mm_cmpgt_pd(b, mp.d);
-    __m128d tmp3 = _mm_xor_pd(tmp1, tmp2);
-    return _mm_and_pd(tmp3, result.d);
-  }
-};
-}  // namespace binary
-}  // namespace base
-
-#endif /* HL_MATRIX_BASE_SSE_CUH_ */
diff --git a/paddle/cuda/include/hl_matrix_type.cuh b/paddle/cuda/include/hl_matrix_type.cuh
index f965ba966793f6f6eea0ad3606f60553fe904dda..e18235219bd9f78dd87a92d448cb290d9a4904a1 100644
--- a/paddle/cuda/include/hl_matrix_type.cuh
+++ b/paddle/cuda/include/hl_matrix_type.cuh
@@ -17,35 +17,35 @@ limitations under the License. */
 
 #include "hl_base.h"
 
-#if defined(__CUDA_ARCH__)
+#ifdef __CUDA_ARCH__
+/**
+ * CUDA kernel inline function
+ */
+#define INLINE   __device__ inline
+#else
+/**
+ * CPP inline function
+ */
+#define INLINE   inline
+#endif
+
+#ifdef __CUDA_ARCH__
 #include <vector_types.h>
 #ifndef PADDLE_TYPE_DOUBLE
 typedef float4 vecType;
 #else
 typedef double2 vecType;
 #endif
-#elif (defined  __ARM_NEON) || (defined __ARM_NEON__)
-#include <arm_neon.h>
-#ifndef PADDLE_TYPE_DOUBLE
-typedef float32x4_t  vecType;
-#else
-#error NEON instructions does not support double precision
-#endif
+#elif defined(__SSE3__)
+#include "hl_cpu_simd_sse.cuh"
+#define PADDLE_USE_SSE3
+#elif (defined(__ARM_NEON) || defined(__ARM_NEON__)) && !defined(__NVCC__)
+// Currently nvcc does not support neon intrinsic.
+// TODO: Extract simd intrinsic implementation from .cu files.
+#include "hl_cpu_simd_neon.cuh"
+#define PADDLE_USE_NEON
 #else
-#include <mmintrin.h>
-#include <xmmintrin.h>
-#include <emmintrin.h>
-#ifndef PADDLE_TYPE_DOUBLE
-typedef __m128  vecType;
-#else
-typedef __m128d vecType;
-#endif
-#endif
-
-#ifdef __CUDA_ARCH__
-#define INLINE   __device__ inline
-#else
-#define INLINE   inline
+#include "hl_cpu_scalar.cuh"
 #endif
 
 #endif  // HL_MATRIX_TYPE_CUH_
diff --git a/paddle/cuda/include/hl_neon_matrix_kernel.cuh b/paddle/cuda/include/hl_neon_matrix_kernel.cuh
deleted file mode 100644
index 7b4e5b00079b66d0a46a1344a43f41962cf50f10..0000000000000000000000000000000000000000
--- a/paddle/cuda/include/hl_neon_matrix_kernel.cuh
+++ /dev/null
@@ -1,299 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-
-#ifndef HL_NEON_MATRIX_KERNEL_CUH_
-#define HL_NEON_MATRIX_KERNEL_CUH_
-
-#include "hl_matrix_type.cuh"
-
-#define VECTOR_SIZE     16
-
-/* number of float in vector */
-#define     VECTOR_LEN      4
-#define     VECTOR_SET      vdupq_n_f32
-
-inline bool hl_check_align(size_t size) {
-  return !(size & (VECTOR_SIZE - 1));
-}
-
-inline bool hl_check_align(void *ptr) {
-  return hl_check_align(reinterpret_cast<size_t>(ptr));
-}
-
-template <class Agg>
-inline real hl_agg_op(Agg agg, vecType mm) {
-  float32x4_t rev = vrev64q_f32(mm);
-  float32x4_t tmp1 = agg.vecOp(rev, rev);
-  float32x2_t lo = vget_high_f32(rev);
-  float32x2_t hi = vget_low_f32(rev);
-  float32x4_t tmp2 = vcombine_f32(hi, lo);
-  float32x4_t ret = agg.vecOp(tmp1, tmp2);
-
-  return vgetq_lane_f32(ret, 0);
-}
-
-template <class Agg, class Op, class Saver>
-void hl_sse_matrix_row_op(Agg agg, Op op, Saver sv,
-                          int dimM, int dimN,
-                          real *dst, int ld,
-                          real *A, int lda) {
-  for (int i = 0; i < dimM; i++, A += lda) {
-    vecType mm = VECTOR_SET(agg.init());
-    vecType *a = (vecType*)(A);
-    for (int j = 0; j < dimN / VECTOR_LEN; j++, a++) {
-      mm = agg.vecOp(mm, op.vecOp(*a));
-    }
-
-    int rem = dimN % VECTOR_LEN;
-    if (rem) {
-      real tmp = hl_agg_op(agg, mm);
-      real *a = A + (dimN / VECTOR_LEN) * VECTOR_LEN;
-      for (int j = 0; j < rem; j++) {
-        tmp = agg(tmp, op(a[j]));
-      }
-      dst[i*ld] = sv(dst[i*ld], tmp);
-    } else {
-      dst[i*ld] = sv(dst[i*ld], hl_agg_op(agg, mm));
-    }
-  }
-}
-
-template <class Agg, class Op, class Saver>
-void hl_sse_matrix_row_op(Agg agg, Op op, Saver sv,
-                          int dimM, int dimN,
-                          real *dst, int ld,
-                          real *A, int lda,
-                          real *B, int ldb) {
-  for (int i = 0; i < dimM; i++, A += lda, B += ldb) {
-    vecType mm = VECTOR_SET(agg.init());
-    vecType *a = (vecType*)(A);
-    vecType *b = (vecType*)(B);
-    for (int j = 0; j < dimN / VECTOR_LEN; j++, a++, b++) {
-        mm = agg.vecOp(mm, op.vecOp(*a, *b));
-    }
-
-    int rem = dimN % VECTOR_LEN;
-    if (rem) {
-      real tmp = hl_agg_op(agg, mm);
-      real *a = A + (dimN / VECTOR_LEN) * VECTOR_LEN;
-      real *b = B + (dimN / VECTOR_LEN) * VECTOR_LEN;
-      for (int j = 0; j < rem; j++) {
-          tmp = agg(tmp, op(a[j], b[j]));
-      }
-      dst[i*ld] = sv(dst[i*ld], tmp);
-    } else {
-        dst[i*ld] = sv(dst[i*ld], hl_agg_op(agg, mm));
-    }
-  }
-}
-
-template <class Agg, class Op, class Saver>
-void hl_matrix_column_op(Agg agg, Op op, Saver sv,
-                         int dimM, int dimN,
-                         real *dst,
-                         real *A, int lda) {
-  for (int j = 0; j < dimN; j++) {
-    real tmp = agg.init();
-    for (int i = 0; i < dimM; i++) {
-        tmp = agg(tmp, op(A[i * lda + j]));
-    }
-    dst[j] = sv(dst[j], tmp);
-  }
-}
-
-template <class Agg, class Op, class Saver>
-void hl_matrix_column_op(Agg agg, Op op, Saver sv,
-                         int dimM, int dimN,
-                         real *dst,
-                         real *A, int lda,
-                         real *B, int ldb) {
-  for (int j = 0; j < dimN; j++) {
-    real tmp = agg.init();
-    for (int i = 0; i < dimM; i++) {
-        tmp = agg(tmp, op(A[i * lda + j], B[i * ldb + j]));
-    }
-    dst[j] = sv(dst[j], tmp);
-  }
-}
-
-/*
- * MaxRow greater than or equal dimN
- * dimN is multiples of VECTOR_LEN
- * so rem <= MaxRow / VECTOR_LEN
- */
-template <int MaxRow, class Agg, class Op, class Saver>
-void hl_sse_column_op_with_rem(Agg agg, Op op, Saver sv,
-                               int dimM, int dimN,
-                               real *dst,
-                               real *A, int lda) {
-  vecType mm[MaxRow / VECTOR_LEN];
-  for (int n = 0; n < MaxRow / VECTOR_LEN; n++) {
-    mm[n] = VECTOR_SET(agg.init());
-  }
-
-  for (int i = 0; i < dimM; i++) {
-    vecType *a = (vecType*)(A + i * lda);
-    for (int n = 0; n < dimN / VECTOR_LEN; n++) {
-      mm[n] = agg.vecOp(mm[n], op.vecOp(a[n]));
-    }
-  }
-
-  vecType *result = (vecType*)(dst);
-  for (int n = 0; n < dimN / VECTOR_LEN; n++) {
-    result[n] = sv.vecOp(result[n], mm[n]);
-  }
-
-  int rem = dimN % VECTOR_LEN;
-  if (rem) {
-    A += (dimN / VECTOR_LEN) * VECTOR_LEN;
-    dst += (dimN / VECTOR_LEN) * VECTOR_LEN;
-    hl_matrix_column_op(agg, op, sv, dimM, rem, dst, A, lda);
-  }
-}
-
-/*
- * dimN is multiples of VECTOR_LEN
- * dimN greater than Step
- */
-template <int Step, class Agg, class Op, class Saver>
-void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv,
-                             int dimM, int dimN,
-                             real *dst,
-                             real *A, int lda) {
-  for (int j = 0; j < dimN / Step; j++, dst += Step, A += Step) {
-    vecType mm[Step / VECTOR_LEN];
-    for (int n = 0; n < Step / VECTOR_LEN; n++) {
-      mm[n] = VECTOR_SET(agg.init());
-    }
-
-    for (int i = 0; i < dimM; i++) {
-      vecType *a = (vecType*)(A + i * lda);
-      for (int n = 0; n < Step / VECTOR_LEN; n++) {
-        mm[n] = agg.vecOp(mm[n], op.vecOp(a[n]));
-      }
-    }
-
-    vecType *result = (vecType*)(dst);
-    for (int n = 0; n < Step / VECTOR_LEN; n++) {
-      result[n] = sv.vecOp(result[n], mm[n]);
-    }
-  }
-
-  int remRow = dimN % Step;
-  if (remRow) {
-    hl_sse_column_op_with_rem<Step>(agg, op, sv, dimM, remRow, dst, A, lda);
-  }
-}
-
-template <class Agg, class Op, class Saver>
-void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv,
-                             int dimM, int dimN,
-                             real *dst,
-                             real *A, int lda) {
-  if (dimN <= 16) {
-    hl_sse_matrix_column_op<16>(agg, op, sv, dimM, dimN, dst, A, lda);
-  } else if (dimN <= 32) {
-    hl_sse_matrix_column_op<32>(agg, op, sv, dimM, dimN, dst, A, lda);
-  } else if (dimN <= 1024 || dimM <= 512) {
-    hl_sse_matrix_column_op<64>(agg, op, sv, dimM, dimN, dst, A, lda);
-  } else {
-    hl_sse_matrix_column_op<1024>(agg, op, sv, dimM, dimN, dst, A, lda);
-  }
-}
-
-template <int MaxRow, class Agg, class Op, class Saver>
-void hl_sse_column_op_with_rem(Agg agg, Op op, Saver sv,
-                               int dimM, int dimN,
-                               real *dst,
-                               real *A, int lda,
-                               real *B, int ldb) {
-  vecType mm[MaxRow / VECTOR_LEN];
-  for (int n = 0; n < MaxRow / VECTOR_LEN; n++) {
-    mm[n] = VECTOR_SET(agg.init());
-  }
-
-  for (int i = 0; i < dimM; i++) {
-    vecType *a = (vecType*)(A + i * lda);
-    vecType *b = (vecType*)(B + i * ldb);
-    for (int n = 0; n < dimN / VECTOR_LEN; n++) {
-      mm[n] = agg.vecOp(mm[n], op.vecOp(a[n], b[n]));
-    }
-  }
-
-  vecType *result = (vecType*)(dst);
-  for (int n = 0; n < dimN / VECTOR_LEN; n++) {
-    result[n] = sv.vecOp(result[n], mm[n]);
-  }
-
-  int rem = dimN % VECTOR_LEN;
-  if (rem) {
-    A += (dimN / VECTOR_LEN) * VECTOR_LEN;
-    B += (dimN / VECTOR_LEN) * VECTOR_LEN;
-    dst += (dimN / VECTOR_LEN) * VECTOR_LEN;
-    hl_matrix_column_op(agg, op, sv, dimM, rem, dst, A, lda, B, ldb);
-  }
-}
-
-template <int Step, class Agg, class Op, class Saver>
-void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv,
-                             int dimM, int dimN,
-                             real *dst,
-                             real *A, int lda,
-                             real *B, int ldb) {
-  for (int j = 0; j < dimN / Step; j++, dst += Step, A += Step, B += Step) {
-    vecType mm[Step / VECTOR_LEN];
-    for (int n = 0; n < Step / VECTOR_LEN; n++) {
-      mm[n] = VECTOR_SET(agg.init());
-    }
-
-    for (int i = 0; i < dimM; i++) {
-      vecType *a = (vecType*)(A + i * lda);
-      vecType *b = (vecType*)(B + i * ldb);
-      for (int n = 0; n < Step / VECTOR_LEN; n++) {
-        mm[n] = agg.vecOp(mm[n], op.vecOp(a[n], b[n]));
-      }
-    }
-
-    vecType *result = (vecType*)(dst);
-    for (int n = 0; n < Step / VECTOR_LEN; n++) {
-      result[n] = sv.vecOp(result[n], mm[n]);
-    }
-  }
-
-  int remRow = dimN % Step;
-  if (remRow) {
-    hl_sse_column_op_with_rem<Step>(
-        agg, op, sv, dimM, remRow, dst, A, lda, B, ldb);
-  }
-}
-
-template <class Agg, class Op, class Saver>
-void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv,
-                             int dimM, int dimN,
-                             real *dst,
-                             real *A, int lda,
-                             real *B, int ldb) {
-  if (dimN <= 16) {
-    hl_sse_matrix_column_op<16>(agg, op, sv, dimM, dimN, dst, A, lda, B, ldb);
-  } else if (dimN <= 32) {
-    hl_sse_matrix_column_op<32>(agg, op, sv, dimM, dimN, dst, A, lda, B, ldb);
-  } else if (dimN <= 1024 || dimM <= 512) {
-    hl_sse_matrix_column_op<64>(agg, op, sv, dimM, dimN, dst, A, lda, B, ldb);
-  } else {
-    hl_sse_matrix_column_op<1024>(agg, op, sv, dimM, dimN, dst, A, lda, B, ldb);
-  }
-}
-
-#endif /* HL_NEON_MATRIX_KERNEL_CUH_ */
diff --git a/paddle/cuda/include/hl_tensor_ops.h b/paddle/cuda/include/hl_tensor_ops.h
index 7945b98201b1812790fb0d53123e9ee007640485..93d38b7d2299d994cde0934213668a525bffa80c 100644
--- a/paddle/cuda/include/hl_tensor_ops.h
+++ b/paddle/cuda/include/hl_tensor_ops.h
@@ -328,6 +328,208 @@ public:
   INLINE T operator()(const T a, const T b) const { return a < b ? b : a; }
 };
 
+#ifdef PADDLE_USE_SSE3
+#ifndef PADDLE_TYPE_DOUBLE
+template <>
+class add<__m128> {
+public:
+  INLINE __m128 operator()(const __m128 a, const __m128 b) const {
+    return _mm_add_ps(a, b);
+  }
+};
+
+template <>
+class add_scale<__m128> {
+private:
+  const __m128 p1;
+  const __m128 p2;
+
+public:
+  INLINE add_scale(const __m128 s1, const __m128 s2) : p1(s1), p2(s2) {}
+  INLINE __m128 operator()(const __m128 a, const __m128 b) const {
+    return _mm_add_ps(_mm_mul_ps(p1, a), _mm_mul_ps(p2, b));
+  }
+};
+
+template <>
+class sub<__m128> {
+public:
+  INLINE __m128 operator()(const __m128 a, const __m128 b) const {
+    return _mm_sub_ps(a, b);
+  }
+};
+
+template <>
+class mul<__m128> {
+public:
+  INLINE __m128 operator()(const __m128 a, const __m128 b) const {
+    return _mm_mul_ps(a, b);
+  }
+};
+
+template <>
+class div<__m128> {
+public:
+  INLINE __m128 operator()(const __m128 a, const __m128 b) const {
+    return _mm_div_ps(a, b);
+  }
+};
+
+template <>
+class min<__m128> {
+public:
+  INLINE __m128 operator()(const __m128 a, const __m128 b) const {
+    return _mm_min_ps(a, b);
+  }
+};
+
+template <>
+class max<__m128> {
+public:
+  INLINE __m128 operator()(const __m128 a, const __m128 b) const {
+    return _mm_max_ps(a, b);
+  }
+};
+#else
+template <>
+class add<__m128d> {
+public:
+  INLINE __m128d operator()(const __m128d a, const __m128d b) const {
+    return _mm_add_pd(a, b);
+  }
+};
+
+template <>
+class add_scale<__m128d> {
+private:
+  const __m128d p1;
+  const __m128d p2;
+
+public:
+  INLINE add_scale(const __m128d s1, const __m128d s2) : p1(s1), p2(s2) {}
+  INLINE __m128d operator()(const __m128d a, const __m128d b) const {
+    return _mm_add_pd(_mm_mul_pd(p1, a), _mm_mul_pd(p2, b));
+  }
+};
+
+template <>
+class sub<__m128d> {
+public:
+  INLINE __m128d operator()(const __m128d a, const __m128d b) const {
+    return _mm_sub_pd(a, b);
+  }
+};
+
+template <>
+class mul<__m128d> {
+public:
+  INLINE __m128d operator()(const __m128d a, const __m128d b) const {
+    return _mm_mul_pd(a, b);
+  }
+};
+
+template <>
+class div<__m128d> {
+public:
+  INLINE __m128d operator()(const __m128d a, const __m128d b) const {
+    return _mm_div_pd(a, b);
+  }
+};
+
+template <>
+class min<__m128d> {
+public:
+  INLINE __m128d operator()(const __m128d a, const __m128d b) const {
+    return _mm_min_pd(a, b);
+  }
+};
+
+template <>
+class max<__m128d> {
+public:
+  INLINE __m128d operator()(const __m128d a, const __m128d b) const {
+    return _mm_max_pd(a, b);
+  }
+};
+#endif  // PADDLE_TYPE_DOUBLE
+#endif  // PADDLE_USE_SSE3
+
+#ifdef PADDLE_USE_NEON
+#ifndef PADDLE_TYPE_DOUBLE
+template <>
+class add<float32x4_t> {
+public:
+  INLINE float32x4_t operator()(const float32x4_t a,
+                                const float32x4_t b) const {
+    return vmulq_f32(a, b);
+  }
+};
+
+template <>
+class add_scale<float32x4_t> {
+private:
+  const float32x4_t p1;
+  const float32x4_t p2;
+
+public:
+  INLINE add_scale(const float32x4_t s1, const float32x4_t s2)
+      : p1(s1), p2(s2) {}
+  INLINE float32x4_t operator()(const float32x4_t a,
+                                const float32x4_t b) const {
+    return vaddq_f32(vmulq_f32(p1, a), vmulq_f32(p2, b));
+  }
+};
+
+template <>
+class sub<float32x4_t> {
+public:
+  INLINE float32x4_t operator()(const float32x4_t a,
+                                const float32x4_t b) const {
+    return vsubq_f32(a, b);
+  }
+};
+
+template <>
+class mul<float32x4_t> {
+public:
+  INLINE float32x4_t operator()(const float32x4_t a,
+                                const float32x4_t b) const {
+    return vmulq_f32(a, b);
+  }
+};
+
+template <>
+class div<float32x4_t> {
+public:
+  INLINE float32x4_t operator()(const float32x4_t a,
+                                const float32x4_t b) const {
+    float32x4_t tmp = vrecpeq_f32(b);
+    return vmulq_f32(a, tmp);
+  }
+};
+
+template <>
+class min<float32x4_t> {
+public:
+  INLINE float32x4_t operator()(const float32x4_t a,
+                                const float32x4_t b) const {
+    return vminq_f32(a, b);
+  }
+};
+
+template <>
+class max<float32x4_t> {
+public:
+  INLINE float32x4_t operator()(const float32x4_t a,
+                                const float32x4_t b) const {
+    return vmaxq_f32(a, b);
+  }
+};
+#else
+#error To be implemented
+#endif  // PADDLE_TYPE_DOUBLE
+#endif  // PADDLE_USE_NEON
+
 }  // namespace binary
 }  // namespace hppl
 
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 81cce31feccf0f1b26888738c1a7e1e3504c7616..c347aaf8d3c51eadbd897bddd87f8afb67c9a0fa 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -2916,11 +2916,11 @@ def memory(name,
     to specify the layer needs to be remembered as the following:
 
     .. code-block:: python
+
        mem = memory(size=256)
        state = fc_layer(input=mem, size=256)
        mem.set_input(mem)
 
-
     :param name: the name of the layer which this memory remembers.
                  If name is None, user should call set_input() to specify the
                  name of the layer which this memory remembers.
@@ -3407,7 +3407,7 @@ def recurrent_group(step,
                           else, for training or testing, one of the input type must
                           be LayerOutput.
 
-    : type is_generating: bool
+    :type is_generating: bool
 
     :return: LayerOutput object.
     :rtype: LayerOutput
@@ -3814,7 +3814,7 @@ def mse_cost(input, label, weight=None, name=None, coeff=1.0, layer_attr=None):
 
     ..  math::
 
-        \frac{1}{N}\sum_{i=1}^N(t_i-y_i)^2
+        \\frac{1}{N}\sum_{i=1}^N(t_i-y_i)^2
 
     :param name: layer name.
     :type name: basestring
@@ -4769,21 +4769,36 @@ def warp_ctc_layer(input,
                    layer_attr=None):
     """
     A layer intergrating the open-source `warp-ctc
-    <https://github.com/baidu-research/warp-ctc>` library, which is used in
+    <https://github.com/baidu-research/warp-ctc>`_ library, which is used in
     `Deep Speech 2: End-toEnd Speech Recognition in English and Mandarin
-    <https://arxiv.org/pdf/1512.02595v1.pdf>`, to compute Connectionist Temporal
-    Classification (CTC) loss.
+    <https://arxiv.org/pdf/1512.02595v1.pdf>`_, to compute Connectionist Temporal
+    Classification (CTC) loss. Besides, another `warp-ctc
+    <https://github.com/gangliao/warp-ctc>`_ repository, which is forked from
+    the official one, is maintained to enable more compiling options. During the
+    building process, PaddlePaddle will clone the source codes, build and
+    install it to :code:`third_party/install/warpctc` directory.
+
+    To use warp_ctc layer, you need to specify the path of :code:`libwarpctc.so`,
+    using following methods:
+
+    1. Set it in :code:`paddle.init` (python api) or :code:`paddle_init` (c api),
+    such as :code:`paddle.init(use_gpu=True,
+    warpctc_dir=your_paddle_source_dir/third_party/install/warpctc/lib)`.
+
+    2. Set environment variable LD_LIBRARY_PATH on Linux or DYLD_LIBRARY_PATH
+    on Mac OS. For instance, :code:`export
+    LD_LIBRARY_PATH=your_paddle_source_dir/third_party/install/warpctc/lib:$LD_LIBRARY_PATH`.
 
     More details of CTC can be found by referring to `Connectionist Temporal
     Classification: Labelling Unsegmented Sequence Data with Recurrent
     Neural Networks <http://machinelearning.wustl.edu/mlpapers/paper_files/
-    icml2006_GravesFGS06.pdf>`_
+    icml2006_GravesFGS06.pdf>`_.
 
     Note:
         - Let num_classes represent the category number. Considering the 'blank'
-          label needed by CTC, you need to use (num_classes + 1) as the input
-          size. Thus, the size of both warp_ctc_layer and 'input' layer should
-          be set to num_classes + 1.
+          label needed by CTC, you need to use (num_classes + 1) as the input size.
+          Thus, the size of both warp_ctc layer and 'input' layer should be set to
+          num_classes + 1.
         - You can set 'blank' to any value ranged in [0, num_classes], which
           should be consistent as that used in your labels.
         - As a native 'softmax' activation is interated to the warp-ctc library,
diff --git a/v1_api_demo/README.md b/v1_api_demo/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9442f76941287a710220f07cf7dbb29ebcadabdc
--- /dev/null
+++ b/v1_api_demo/README.md
@@ -0,0 +1,5 @@
+The examples in v1_api_demo are using v1_api now, and will be upgraded into v2_api later.
+Thus, v1_api_demo is a temporary directory. We decide not to maintain it and will delete it in future.
+
+Please go to [PaddlePaddle/book](https://github.com/PaddlePaddle/book) and 
+[PaddlePaddle/models](https://github.com/PaddlePaddle/models) to learn PaddlePaddle.
diff --git a/demo/gan/.gitignore b/v1_api_demo/gan/.gitignore
similarity index 100%
rename from demo/gan/.gitignore
rename to v1_api_demo/gan/.gitignore
diff --git a/demo/gan/README.md b/v1_api_demo/gan/README.md
similarity index 100%
rename from demo/gan/README.md
rename to v1_api_demo/gan/README.md
diff --git a/demo/gan/data/download_cifar.sh b/v1_api_demo/gan/data/download_cifar.sh
similarity index 100%
rename from demo/gan/data/download_cifar.sh
rename to v1_api_demo/gan/data/download_cifar.sh
diff --git a/demo/gan/data/get_mnist_data.sh b/v1_api_demo/gan/data/get_mnist_data.sh
similarity index 100%
rename from demo/gan/data/get_mnist_data.sh
rename to v1_api_demo/gan/data/get_mnist_data.sh
diff --git a/demo/gan/gan_conf.py b/v1_api_demo/gan/gan_conf.py
similarity index 100%
rename from demo/gan/gan_conf.py
rename to v1_api_demo/gan/gan_conf.py
diff --git a/demo/gan/gan_conf_image.py b/v1_api_demo/gan/gan_conf_image.py
similarity index 100%
rename from demo/gan/gan_conf_image.py
rename to v1_api_demo/gan/gan_conf_image.py
diff --git a/demo/gan/gan_trainer.py b/v1_api_demo/gan/gan_trainer.py
similarity index 100%
rename from demo/gan/gan_trainer.py
rename to v1_api_demo/gan/gan_trainer.py
diff --git a/demo/mnist/.gitignore b/v1_api_demo/mnist/.gitignore
similarity index 100%
rename from demo/mnist/.gitignore
rename to v1_api_demo/mnist/.gitignore
diff --git a/demo/mnist/api_train.py b/v1_api_demo/mnist/api_train.py
similarity index 100%
rename from demo/mnist/api_train.py
rename to v1_api_demo/mnist/api_train.py
diff --git a/demo/mnist/data/generate_list.py b/v1_api_demo/mnist/data/generate_list.py
similarity index 100%
rename from demo/mnist/data/generate_list.py
rename to v1_api_demo/mnist/data/generate_list.py
diff --git a/demo/mnist/data/get_mnist_data.sh b/v1_api_demo/mnist/data/get_mnist_data.sh
similarity index 100%
rename from demo/mnist/data/get_mnist_data.sh
rename to v1_api_demo/mnist/data/get_mnist_data.sh
diff --git a/demo/mnist/light_mnist.py b/v1_api_demo/mnist/light_mnist.py
similarity index 100%
rename from demo/mnist/light_mnist.py
rename to v1_api_demo/mnist/light_mnist.py
diff --git a/demo/mnist/mnist_provider.py b/v1_api_demo/mnist/mnist_provider.py
similarity index 100%
rename from demo/mnist/mnist_provider.py
rename to v1_api_demo/mnist/mnist_provider.py
diff --git a/demo/mnist/mnist_util.py b/v1_api_demo/mnist/mnist_util.py
similarity index 100%
rename from demo/mnist/mnist_util.py
rename to v1_api_demo/mnist/mnist_util.py
diff --git a/demo/mnist/train.sh b/v1_api_demo/mnist/train.sh
similarity index 100%
rename from demo/mnist/train.sh
rename to v1_api_demo/mnist/train.sh
diff --git a/demo/mnist/vgg_16_mnist.py b/v1_api_demo/mnist/vgg_16_mnist.py
similarity index 100%
rename from demo/mnist/vgg_16_mnist.py
rename to v1_api_demo/mnist/vgg_16_mnist.py
diff --git a/demo/model_zoo/embedding/.gitignore b/v1_api_demo/model_zoo/embedding/.gitignore
similarity index 100%
rename from demo/model_zoo/embedding/.gitignore
rename to v1_api_demo/model_zoo/embedding/.gitignore
diff --git a/demo/model_zoo/embedding/extract_para.py b/v1_api_demo/model_zoo/embedding/extract_para.py
similarity index 100%
rename from demo/model_zoo/embedding/extract_para.py
rename to v1_api_demo/model_zoo/embedding/extract_para.py
diff --git a/demo/model_zoo/embedding/paraconvert.py b/v1_api_demo/model_zoo/embedding/paraconvert.py
similarity index 100%
rename from demo/model_zoo/embedding/paraconvert.py
rename to v1_api_demo/model_zoo/embedding/paraconvert.py
diff --git a/demo/model_zoo/embedding/pre_DictAndModel.sh b/v1_api_demo/model_zoo/embedding/pre_DictAndModel.sh
similarity index 100%
rename from demo/model_zoo/embedding/pre_DictAndModel.sh
rename to v1_api_demo/model_zoo/embedding/pre_DictAndModel.sh
diff --git a/demo/model_zoo/resnet/.gitignore b/v1_api_demo/model_zoo/resnet/.gitignore
similarity index 100%
rename from demo/model_zoo/resnet/.gitignore
rename to v1_api_demo/model_zoo/resnet/.gitignore
diff --git a/demo/model_zoo/resnet/classify.py b/v1_api_demo/model_zoo/resnet/classify.py
similarity index 100%
rename from demo/model_zoo/resnet/classify.py
rename to v1_api_demo/model_zoo/resnet/classify.py
diff --git a/demo/model_zoo/resnet/example/.gitignore b/v1_api_demo/model_zoo/resnet/example/.gitignore
similarity index 100%
rename from demo/model_zoo/resnet/example/.gitignore
rename to v1_api_demo/model_zoo/resnet/example/.gitignore
diff --git a/demo/model_zoo/resnet/example/__init__.py b/v1_api_demo/model_zoo/resnet/example/__init__.py
similarity index 100%
rename from demo/model_zoo/resnet/example/__init__.py
rename to v1_api_demo/model_zoo/resnet/example/__init__.py
diff --git a/demo/model_zoo/resnet/example/cat.jpg b/v1_api_demo/model_zoo/resnet/example/cat.jpg
similarity index 100%
rename from demo/model_zoo/resnet/example/cat.jpg
rename to v1_api_demo/model_zoo/resnet/example/cat.jpg
diff --git a/demo/model_zoo/resnet/example/dog.jpg b/v1_api_demo/model_zoo/resnet/example/dog.jpg
similarity index 100%
rename from demo/model_zoo/resnet/example/dog.jpg
rename to v1_api_demo/model_zoo/resnet/example/dog.jpg
diff --git a/demo/model_zoo/resnet/example/image_list_provider.py b/v1_api_demo/model_zoo/resnet/example/image_list_provider.py
similarity index 100%
rename from demo/model_zoo/resnet/example/image_list_provider.py
rename to v1_api_demo/model_zoo/resnet/example/image_list_provider.py
diff --git a/demo/model_zoo/resnet/example/test.list b/v1_api_demo/model_zoo/resnet/example/test.list
similarity index 100%
rename from demo/model_zoo/resnet/example/test.list
rename to v1_api_demo/model_zoo/resnet/example/test.list
diff --git a/demo/model_zoo/resnet/extract_fea_c++.sh b/v1_api_demo/model_zoo/resnet/extract_fea_c++.sh
similarity index 100%
rename from demo/model_zoo/resnet/extract_fea_c++.sh
rename to v1_api_demo/model_zoo/resnet/extract_fea_c++.sh
diff --git a/demo/model_zoo/resnet/extract_fea_py.sh b/v1_api_demo/model_zoo/resnet/extract_fea_py.sh
similarity index 100%
rename from demo/model_zoo/resnet/extract_fea_py.sh
rename to v1_api_demo/model_zoo/resnet/extract_fea_py.sh
diff --git a/demo/model_zoo/resnet/get_model.sh b/v1_api_demo/model_zoo/resnet/get_model.sh
similarity index 100%
rename from demo/model_zoo/resnet/get_model.sh
rename to v1_api_demo/model_zoo/resnet/get_model.sh
diff --git a/demo/model_zoo/resnet/load_feature.py b/v1_api_demo/model_zoo/resnet/load_feature.py
similarity index 100%
rename from demo/model_zoo/resnet/load_feature.py
rename to v1_api_demo/model_zoo/resnet/load_feature.py
diff --git a/demo/model_zoo/resnet/net_diagram.sh b/v1_api_demo/model_zoo/resnet/net_diagram.sh
similarity index 100%
rename from demo/model_zoo/resnet/net_diagram.sh
rename to v1_api_demo/model_zoo/resnet/net_diagram.sh
diff --git a/demo/model_zoo/resnet/predict.sh b/v1_api_demo/model_zoo/resnet/predict.sh
similarity index 100%
rename from demo/model_zoo/resnet/predict.sh
rename to v1_api_demo/model_zoo/resnet/predict.sh
diff --git a/demo/model_zoo/resnet/resnet.py b/v1_api_demo/model_zoo/resnet/resnet.py
similarity index 100%
rename from demo/model_zoo/resnet/resnet.py
rename to v1_api_demo/model_zoo/resnet/resnet.py
diff --git a/demo/quick_start/.gitignore b/v1_api_demo/quick_start/.gitignore
similarity index 100%
rename from demo/quick_start/.gitignore
rename to v1_api_demo/quick_start/.gitignore
diff --git a/demo/quick_start/api_predict.py b/v1_api_demo/quick_start/api_predict.py
similarity index 100%
rename from demo/quick_start/api_predict.py
rename to v1_api_demo/quick_start/api_predict.py
diff --git a/demo/quick_start/api_predict.sh b/v1_api_demo/quick_start/api_predict.sh
similarity index 100%
rename from demo/quick_start/api_predict.sh
rename to v1_api_demo/quick_start/api_predict.sh
diff --git a/demo/quick_start/api_train.py b/v1_api_demo/quick_start/api_train.py
similarity index 100%
rename from demo/quick_start/api_train.py
rename to v1_api_demo/quick_start/api_train.py
diff --git a/demo/quick_start/api_train.sh b/v1_api_demo/quick_start/api_train.sh
similarity index 100%
rename from demo/quick_start/api_train.sh
rename to v1_api_demo/quick_start/api_train.sh
diff --git a/demo/quick_start/cluster/cluster_train.sh b/v1_api_demo/quick_start/cluster/cluster_train.sh
similarity index 100%
rename from demo/quick_start/cluster/cluster_train.sh
rename to v1_api_demo/quick_start/cluster/cluster_train.sh
diff --git a/demo/quick_start/cluster/env.sh b/v1_api_demo/quick_start/cluster/env.sh
similarity index 100%
rename from demo/quick_start/cluster/env.sh
rename to v1_api_demo/quick_start/cluster/env.sh
diff --git a/demo/quick_start/cluster/pserver.sh b/v1_api_demo/quick_start/cluster/pserver.sh
similarity index 100%
rename from demo/quick_start/cluster/pserver.sh
rename to v1_api_demo/quick_start/cluster/pserver.sh
diff --git a/demo/quick_start/data/README.md b/v1_api_demo/quick_start/data/README.md
similarity index 100%
rename from demo/quick_start/data/README.md
rename to v1_api_demo/quick_start/data/README.md
diff --git a/demo/quick_start/data/get_data.sh b/v1_api_demo/quick_start/data/get_data.sh
similarity index 100%
rename from demo/quick_start/data/get_data.sh
rename to v1_api_demo/quick_start/data/get_data.sh
diff --git a/demo/quick_start/data/proc_from_raw_data/get_data.sh b/v1_api_demo/quick_start/data/proc_from_raw_data/get_data.sh
similarity index 100%
rename from demo/quick_start/data/proc_from_raw_data/get_data.sh
rename to v1_api_demo/quick_start/data/proc_from_raw_data/get_data.sh
diff --git a/demo/quick_start/data/proc_from_raw_data/preprocess.py b/v1_api_demo/quick_start/data/proc_from_raw_data/preprocess.py
similarity index 100%
rename from demo/quick_start/data/proc_from_raw_data/preprocess.py
rename to v1_api_demo/quick_start/data/proc_from_raw_data/preprocess.py
diff --git a/demo/quick_start/dataprovider_bow.py b/v1_api_demo/quick_start/dataprovider_bow.py
similarity index 100%
rename from demo/quick_start/dataprovider_bow.py
rename to v1_api_demo/quick_start/dataprovider_bow.py
diff --git a/demo/quick_start/dataprovider_emb.py b/v1_api_demo/quick_start/dataprovider_emb.py
similarity index 100%
rename from demo/quick_start/dataprovider_emb.py
rename to v1_api_demo/quick_start/dataprovider_emb.py
diff --git a/demo/quick_start/predict.sh b/v1_api_demo/quick_start/predict.sh
similarity index 100%
rename from demo/quick_start/predict.sh
rename to v1_api_demo/quick_start/predict.sh
diff --git a/demo/quick_start/train.sh b/v1_api_demo/quick_start/train.sh
similarity index 100%
rename from demo/quick_start/train.sh
rename to v1_api_demo/quick_start/train.sh
diff --git a/demo/quick_start/trainer_config.bidi-lstm.py b/v1_api_demo/quick_start/trainer_config.bidi-lstm.py
similarity index 100%
rename from demo/quick_start/trainer_config.bidi-lstm.py
rename to v1_api_demo/quick_start/trainer_config.bidi-lstm.py
diff --git a/demo/quick_start/trainer_config.cnn.py b/v1_api_demo/quick_start/trainer_config.cnn.py
similarity index 100%
rename from demo/quick_start/trainer_config.cnn.py
rename to v1_api_demo/quick_start/trainer_config.cnn.py
diff --git a/demo/quick_start/trainer_config.db-lstm.py b/v1_api_demo/quick_start/trainer_config.db-lstm.py
similarity index 100%
rename from demo/quick_start/trainer_config.db-lstm.py
rename to v1_api_demo/quick_start/trainer_config.db-lstm.py
diff --git a/demo/quick_start/trainer_config.emb.py b/v1_api_demo/quick_start/trainer_config.emb.py
similarity index 100%
rename from demo/quick_start/trainer_config.emb.py
rename to v1_api_demo/quick_start/trainer_config.emb.py
diff --git a/demo/quick_start/trainer_config.lr.py b/v1_api_demo/quick_start/trainer_config.lr.py
similarity index 100%
rename from demo/quick_start/trainer_config.lr.py
rename to v1_api_demo/quick_start/trainer_config.lr.py
diff --git a/demo/quick_start/trainer_config.lstm.py b/v1_api_demo/quick_start/trainer_config.lstm.py
similarity index 100%
rename from demo/quick_start/trainer_config.lstm.py
rename to v1_api_demo/quick_start/trainer_config.lstm.py
diff --git a/demo/quick_start/trainer_config.resnet-lstm.py b/v1_api_demo/quick_start/trainer_config.resnet-lstm.py
similarity index 100%
rename from demo/quick_start/trainer_config.resnet-lstm.py
rename to v1_api_demo/quick_start/trainer_config.resnet-lstm.py
diff --git a/demo/sequence_tagging/data/get_data.sh b/v1_api_demo/sequence_tagging/data/get_data.sh
similarity index 100%
rename from demo/sequence_tagging/data/get_data.sh
rename to v1_api_demo/sequence_tagging/data/get_data.sh
diff --git a/demo/sequence_tagging/data/test.list b/v1_api_demo/sequence_tagging/data/test.list
similarity index 100%
rename from demo/sequence_tagging/data/test.list
rename to v1_api_demo/sequence_tagging/data/test.list
diff --git a/demo/sequence_tagging/data/train.list b/v1_api_demo/sequence_tagging/data/train.list
similarity index 100%
rename from demo/sequence_tagging/data/train.list
rename to v1_api_demo/sequence_tagging/data/train.list
diff --git a/demo/sequence_tagging/dataprovider.py b/v1_api_demo/sequence_tagging/dataprovider.py
similarity index 100%
rename from demo/sequence_tagging/dataprovider.py
rename to v1_api_demo/sequence_tagging/dataprovider.py
diff --git a/demo/sequence_tagging/linear_crf.py b/v1_api_demo/sequence_tagging/linear_crf.py
similarity index 100%
rename from demo/sequence_tagging/linear_crf.py
rename to v1_api_demo/sequence_tagging/linear_crf.py
diff --git a/demo/sequence_tagging/readme.md b/v1_api_demo/sequence_tagging/readme.md
similarity index 100%
rename from demo/sequence_tagging/readme.md
rename to v1_api_demo/sequence_tagging/readme.md
diff --git a/demo/sequence_tagging/rnn_crf.py b/v1_api_demo/sequence_tagging/rnn_crf.py
similarity index 100%
rename from demo/sequence_tagging/rnn_crf.py
rename to v1_api_demo/sequence_tagging/rnn_crf.py
diff --git a/demo/sequence_tagging/train.sh b/v1_api_demo/sequence_tagging/train.sh
similarity index 100%
rename from demo/sequence_tagging/train.sh
rename to v1_api_demo/sequence_tagging/train.sh
diff --git a/demo/sequence_tagging/train_linear.sh b/v1_api_demo/sequence_tagging/train_linear.sh
similarity index 100%
rename from demo/sequence_tagging/train_linear.sh
rename to v1_api_demo/sequence_tagging/train_linear.sh
diff --git a/demo/traffic_prediction/README b/v1_api_demo/traffic_prediction/README
similarity index 100%
rename from demo/traffic_prediction/README
rename to v1_api_demo/traffic_prediction/README
diff --git a/demo/traffic_prediction/data/get_data.sh b/v1_api_demo/traffic_prediction/data/get_data.sh
similarity index 100%
rename from demo/traffic_prediction/data/get_data.sh
rename to v1_api_demo/traffic_prediction/data/get_data.sh
diff --git a/demo/traffic_prediction/dataprovider.py b/v1_api_demo/traffic_prediction/dataprovider.py
similarity index 100%
rename from demo/traffic_prediction/dataprovider.py
rename to v1_api_demo/traffic_prediction/dataprovider.py
diff --git a/demo/traffic_prediction/gen_result.py b/v1_api_demo/traffic_prediction/gen_result.py
similarity index 100%
rename from demo/traffic_prediction/gen_result.py
rename to v1_api_demo/traffic_prediction/gen_result.py
diff --git a/demo/traffic_prediction/predict.sh b/v1_api_demo/traffic_prediction/predict.sh
similarity index 100%
rename from demo/traffic_prediction/predict.sh
rename to v1_api_demo/traffic_prediction/predict.sh
diff --git a/demo/traffic_prediction/train.sh b/v1_api_demo/traffic_prediction/train.sh
similarity index 100%
rename from demo/traffic_prediction/train.sh
rename to v1_api_demo/traffic_prediction/train.sh
diff --git a/demo/traffic_prediction/trainer_config.py b/v1_api_demo/traffic_prediction/trainer_config.py
similarity index 100%
rename from demo/traffic_prediction/trainer_config.py
rename to v1_api_demo/traffic_prediction/trainer_config.py
diff --git a/v1_api_demo/vae/README.md b/v1_api_demo/vae/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e55d483b023773900729622a6cac44116fc79c76
--- /dev/null
+++ b/v1_api_demo/vae/README.md
@@ -0,0 +1,13 @@
+#Variational Autoencoder (VAE)
+
+This demo implements VAE training described in the original paper (https://arxiv.org/abs/1312.6114).
+
+
+In order to run the model, first download the MNIST dataset by running the shell script in ./data.
+
+Then you can run the command below. The flag --useGpu specifies whether to use gpu for training (0 is cpu, 1 is gpu).  
+
+$python vae_train.py [--use_gpu 1]
+
+The generated images will be stored in ./samples/
+The corresponding models will be stored in ./params/
diff --git a/v1_api_demo/vae/data/get_mnist_data.sh b/v1_api_demo/vae/data/get_mnist_data.sh
new file mode 100755
index 0000000000000000000000000000000000000000..a77c81bf5af9ddb6634ff89460797ca543c5e517
--- /dev/null
+++ b/v1_api_demo/vae/data/get_mnist_data.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env sh
+# This script downloads the mnist data and unzips it.
+set -e
+DIR="$( cd "$(dirname "$0")" ; pwd -P )"
+rm -rf "$DIR/mnist_data"
+mkdir "$DIR/mnist_data"
+cd "$DIR/mnist_data"
+
+echo "Downloading..."
+
+for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
+do
+    if [ ! -e $fname ]; then
+        wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
+        gunzip ${fname}.gz
+    fi
+done
diff --git a/v1_api_demo/vae/dataloader.py b/v1_api_demo/vae/dataloader.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9ff95d44f825cd941b5687f754618e66d491e7f
--- /dev/null
+++ b/v1_api_demo/vae/dataloader.py
@@ -0,0 +1,60 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+
+
+class MNISTloader():
+    def __init__(self,
+                 data_path="./data/mnist_data/",
+                 batch_size=60,
+                 process='train'):
+        self.batch_size = batch_size
+        self.data_path = data_path
+        self._pointer = 0
+        self.image_batches = np.array([])
+        self.process = process
+
+    def _extract_images(self, filename, n):
+        f = open(filename, 'rb')
+        f.read(16)
+        data = np.fromfile(f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28))
+        #Mapping data into [-1, 1]
+        data = data / 255. * 2. - 1
+        data_batches = np.split(data, 60000 / self.batch_size, 0)
+
+        f.close()
+
+        return data_batches
+
+    @property
+    def pointer(self):
+        return self._pointer
+
+    def load_data(self):
+        TRAIN_IMAGES = '%s/train-images-idx3-ubyte' % self.data_path
+        TEST_IMAGES = '%s/t10k-images-idx3-ubyte' % self.data_path
+
+        if self.process == 'train':
+            self.image_batches = self._extract_images(TRAIN_IMAGES, 60000)
+        else:
+            self.image_batches = self._extract_images(TEST_IMAGES, 10000)
+
+    def next_batch(self):
+        batch = self.image_batches[self._pointer]
+        self._pointer = (self._pointer + 1) % (60000 / self.batch_size)
+        return np.array(batch)
+
+    def reset_pointer(self):
+        self._pointer = 0
diff --git a/v1_api_demo/vae/vae_conf.py b/v1_api_demo/vae/vae_conf.py
new file mode 100644
index 0000000000000000000000000000000000000000..301dd23793d19ec5946cc7bb07e32c53c04a972b
--- /dev/null
+++ b/v1_api_demo/vae/vae_conf.py
@@ -0,0 +1,116 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer_config_helpers import *
+import numpy as np
+
+is_generating = get_config_arg("is_generating", bool, False)
+
+settings(batch_size=32, learning_rate=1e-3, learning_method=AdamOptimizer())
+
+X_dim = 28 * 28
+h_dim = 128
+z_dim = 100
+
+
+def reparameterization(mu, logvar):
+    eps = ParamAttr(initial_mean=0., initial_std=1)
+    with mixed_layer() as sigma:
+        sigma += dotmul_projection(layer_math.exp(logvar) * 0.5, param_attr=eps)
+    return mu + sigma
+
+
+def q_func(X):
+    """
+    xavier initialization
+    """
+    param_attr = ParamAttr(
+        name='share.w', initial_mean=0., initial_std=1. / np.sqrt(X_dim / 2.))
+    mu_param = ParamAttr(
+        name='mu.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
+    logvar_param = ParamAttr(
+        name='logvar.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
+
+    bias_attr = ParamAttr(name='share.bias', initial_mean=0., initial_std=0.)
+    mu_bias = ParamAttr(name='mu.bias', initial_mean=0., initial_std=0.)
+    logvar_bias = ParamAttr(name='logvar.bias', initial_mean=0., initial_std=0.)
+
+    share_layer = fc_layer(
+        X,
+        size=h_dim,
+        param_attr=param_attr,
+        bias_attr=bias_attr,
+        act=ReluActivation())
+
+    return (fc_layer(
+        share_layer,
+        size=z_dim,
+        param_attr=mu_param,
+        bias_attr=mu_bias,
+        act=LinearActivation()), fc_layer(
+            share_layer,
+            size=z_dim,
+            param_attr=logvar_param,
+            bias_attr=logvar_bias,
+            act=LinearActivation()))
+
+
+def generator(z):
+
+    hidden_param = ParamAttr(
+        name='hidden.w', initial_mean=0., initial_std=1. / np.sqrt(z_dim / 2.))
+    hidden_bias = ParamAttr(name='hidden.bias', initial_mean=0., initial_std=0.)
+    prob_param = ParamAttr(
+        name='prob.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
+    prob_bias = ParamAttr(name='prob.bias', initial_mean=0., initial_std=0.)
+
+    hidden_layer = fc_layer(
+        z,
+        size=h_dim,
+        act=ReluActivation(),
+        param_attr=hidden_param,
+        bias_attr=hidden_bias)
+    prob = fc_layer(
+        hidden_layer,
+        size=X_dim,
+        act=SigmoidActivation(),
+        param_attr=prob_param,
+        bias_attr=prob_bias)
+
+    return prob
+
+
+def reconstruct_error(prob, X):
+    cost = multi_binary_label_cross_entropy(input=prob, label=X)
+    return cost
+
+
+def KL_loss(mu, logvar):
+    with mixed_layer() as mu_square:
+        mu_square += dotmul_operator(mu, mu, scale=1.)
+
+    cost = 0.5 * sum_cost(layer_math.exp(logvar) + mu_square - 1. - logvar)
+
+    return cost
+
+
+if not is_generating:
+    x_batch = data_layer(name='x_batch', size=X_dim)
+    mu, logvar = q_func(x_batch)
+    z_samples = reparameterization(mu, logvar)
+    prob = generator(z_samples)
+    outputs(reconstruct_error(prob, x_batch) + KL_loss(mu, logvar))
+else:
+    z_samples = data_layer(name='noise', size=z_dim)
+    outputs(generator(z_samples))
diff --git a/v1_api_demo/vae/vae_train.py b/v1_api_demo/vae/vae_train.py
new file mode 100644
index 0000000000000000000000000000000000000000..1babb011c77b92861cc680a2e1aaa8c9ae5d97b5
--- /dev/null
+++ b/v1_api_demo/vae/vae_train.py
@@ -0,0 +1,175 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import random
+import numpy as np
+import cPickle
+import sys, os
+from PIL import Image
+
+from paddle.trainer.config_parser import parse_config
+from paddle.trainer.config_parser import logger
+import py_paddle.swig_paddle as api
+import dataloader
+import matplotlib.pyplot as plt
+
+
+def plot_samples(samples):
+    fig = plt.figure(figsize=(4, 4))
+    gs = gridspec.GridSpec(4, 4)
+    gs.update(wspace=0.05, hspace=0.05)
+    for i, sample in enumerate(samples):
+        plt.subplot(gs[i])
+        plt.axis('off')
+        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')
+
+    return fig
+
+
+def CHECK_EQ(a, b):
+    assert a == b, "a=%s, b=%s" % (a, b)
+
+
+def get_fake_samples(generator_machine, batch_size, noise):
+    gen_inputs = api.Arguments.createArguments(1)
+    gen_inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
+    gen_outputs = api.Arguments.createArguments(0)
+    generator_machine.forward(gen_inputs, gen_outputs, api.PASS_TEST)
+    fake_samples = gen_outputs.getSlotValue(0).copyToNumpyMat()
+    return fake_samples
+
+
+def copy_shared_parameters(src, dst):
+    '''
+    copy the parameters from src to dst
+    :param src: the source of the parameters
+    :type src: GradientMachine
+    :param dst: the destination of the parameters
+    :type dst: GradientMachine
+    '''
+    src_params = [src.getParameter(i) for i in xrange(src.getParameterSize())]
+    src_params = dict([(p.getName(), p) for p in src_params])
+
+    for i in xrange(dst.getParameterSize()):
+        dst_param = dst.getParameter(i)
+        src_param = src_params.get(dst_param.getName(), None)
+        if src_param is None:
+            continue
+        src_value = src_param.getBuf(api.PARAMETER_VALUE)
+        dst_value = dst_param.getBuf(api.PARAMETER_VALUE)
+        CHECK_EQ(len(src_value), len(dst_value))
+        dst_value.copyFrom(src_value)
+        dst_param.setValueUpdated()
+
+
+def find(iterable, cond):
+    for item in iterable:
+        if cond(item):
+            return item
+    return None
+
+
+def get_layer_size(model_conf, layer_name):
+    layer_conf = find(model_conf.layers, lambda x: x.name == layer_name)
+    assert layer_conf is not None, "Cannot find '%s' layer" % layer_name
+    return layer_conf.size
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--use_gpu", default="1", help="1 means use gpu for training")
+    parser.add_argument("--gpu_id", default="0", help="the gpu_id parameter")
+    args = parser.parse_args()
+    use_gpu = args.use_gpu
+    assert use_gpu in ["0", "1"]
+
+    if not os.path.exists("./samples/"):
+        os.makedirs("./samples/")
+
+    if not os.path.exists("./params/"):
+        os.makedirs("./params/")
+
+    api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10',
+                   '--log_period=1000', '--gpu_id=' + args.gpu_id,
+                   '--save_dir=' + "./params/")
+
+    conf = "vae_conf.py"
+
+    trainer_conf = parse_config(conf, "is_generating=False")
+    gener_conf = parse_config(conf, "is_generating=True")
+
+    batch_size = trainer_conf.opt_config.batch_size
+
+    noise_dim = get_layer_size(gener_conf.model_config, "noise")
+
+    mnist = dataloader.MNISTloader(batch_size=batch_size)
+    mnist.load_data()
+
+    training_machine = api.GradientMachine.createFromConfigProto(
+        trainer_conf.model_config)
+
+    generator_machine = api.GradientMachine.createFromConfigProto(
+        gener_conf.model_config)
+
+    trainer = api.Trainer.create(trainer_conf, training_machine)
+
+    trainer.startTrain()
+
+    for train_pass in xrange(100):
+        trainer.startTrainPass()
+        mnist.reset_pointer()
+        i = 0
+        it = 0
+        while mnist.pointer != 0 or i == 0:
+            X = mnist.next_batch().astype('float32')
+
+            inputs = api.Arguments.createArguments(1)
+            inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(X))
+
+            trainer.trainOneDataBatch(batch_size, inputs)
+
+            if it % 1000 == 0:
+
+                outputs = api.Arguments.createArguments(0)
+                training_machine.forward(inputs, outputs, api.PASS_TEST)
+                loss = np.mean(outputs.getSlotValue(0).copyToNumpyMat())
+                print "\niter: {}".format(str(it).zfill(3))
+                print "VAE loss: {}".format(str(loss).zfill(3))
+
+                #Sync parameters between networks (GradientMachine) at the beginning
+                copy_shared_parameters(training_machine, generator_machine)
+
+                z_samples = np.random.randn(batch_size,
+                                            noise_dim).astype('float32')
+                samples = get_fake_samples(generator_machine, batch_size,
+                                           z_samples)
+
+                #Generating the first 16 images for a picture. 
+                figure = plot_samples(samples[:16])
+                plt.savefig(
+                    "./samples/{}_{}.png".format(
+                        str(train_pass).zfill(3), str(i).zfill(3)),
+                    bbox_inches='tight')
+                plt.close(figure)
+                i += 1
+            it += 1
+
+        trainer.finishTrainPass()
+    trainer.finishTrain()
+
+
+if __name__ == '__main__':
+    main()