diff --git a/.copyright.hook b/.copyright.hook
new file mode 100644
index 0000000000000000000000000000000000000000..dc1b096a0ad28db732b794fa856efed71917c5e8
--- /dev/null
+++ b/.copyright.hook
@@ -0,0 +1,121 @@
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import io, re
+import sys, os
+import subprocess
+import platform
+
+COPYRIGHT = '''
+  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+LANG_COMMENT_MARK = None
+
+NEW_LINE_MARK = None
+
+COPYRIGHT_HEADER = None
+
+if platform.system() == "Windows":
+    NEW_LINE_MARK = "\r\n"
+else:
+    NEW_LINE_MARK = '\n'
+    COPYRIGHT_HEADER = COPYRIGHT.split(NEW_LINE_MARK)[1]
+    p = re.search('(\d{4})', COPYRIGHT_HEADER).group(0)
+    process = subprocess.Popen(["date", "+%Y"], stdout=subprocess.PIPE)
+    date, err = process.communicate()
+    date = date.decode("utf-8").rstrip("\n")
+    COPYRIGHT_HEADER = COPYRIGHT_HEADER.replace(p, date)
+
+
+def generate_copyright(template, lang='C'):
+    if lang == 'Python':
+        LANG_COMMENT_MARK = '#'
+    else:
+        LANG_COMMENT_MARK = "//"
+
+    lines = template.split(NEW_LINE_MARK)
+    BLANK = " "
+    ans = LANG_COMMENT_MARK + BLANK + COPYRIGHT_HEADER + NEW_LINE_MARK
+    for lino, line in enumerate(lines):
+        if lino == 0 or lino == 1 or lino == len(lines) - 1: continue
+        if len(line)  == 0:
+            BLANK = ""
+        else:
+            BLANK = " "
+        ans += LANG_COMMENT_MARK + BLANK + line + NEW_LINE_MARK
+
+    return ans + "\n"
+
+
+def lang_type(filename):
+    if filename.endswith(".py"):
+        return "Python"
+    elif filename.endswith(".h"):
+        return "C"
+    elif filename.endswith(".c"):
+        return "C"
+    elif filename.endswith(".hpp"):
+        return "C"
+    elif filename.endswith(".cc"):
+        return "C"
+    elif filename.endswith(".cpp"):
+        return "C"
+    elif filename.endswith(".cu"):
+        return "C"
+    elif filename.endswith(".cuh"):
+        return "C"
+    elif filename.endswith(".go"):
+        return "C"
+    elif filename.endswith(".proto"):
+        return "C"
+    else:
+        print("Unsupported filetype %s", filename)
+        exit(0)
+
+
+PYTHON_ENCODE = re.compile("^[ \t\v]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)")
+
+
+def main(argv=None):
+    parser = argparse.ArgumentParser(
+        description='Checker for copyright declaration.')
+    parser.add_argument('filenames', nargs='*', help='Filenames to check')
+    args = parser.parse_args(argv)
+
+    retv = 0
+    for filename in args.filenames:
+        fd = io.open(filename, encoding="utf-8")
+        first_line = fd.readline()
+        second_line = fd.readline()
+        if "COPYRIGHT (C)" in first_line.upper(): continue
+        if first_line.startswith("#!") or PYTHON_ENCODE.match(
+                second_line) != None or PYTHON_ENCODE.match(first_line) != None:
+            continue
+        original_contents = io.open(filename, encoding="utf-8").read()
+        new_contents = generate_copyright(
+            COPYRIGHT, lang_type(filename)) + original_contents
+        print('Auto Insert Copyright Header {}'.format(filename))
+        retv = 1
+        with io.open(filename, 'w') as output_file:
+            output_file.write(new_contents)
+
+    return retv
+
+
+if __name__ == '__main__':
+    exit(main())
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 59661c9c1da53a2ddac0127ed1827fedde811a1d..89c620bb2f7ef634fa80b64eec7037e8cb9a190c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -31,3 +31,11 @@
     -   id: go-fmt
         types:
         - go
+-   repo: local
+    hooks:
+    -   id: copyright_checker
+        name: copyright_checker
+        entry: python ./.copyright.hook
+        language: system
+        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
+        exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000000000000000000000000000000000000..54131b48eca463aef817a4b96ba1b64de4b60aab
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,46 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at paddle-dev@baidu.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
+
+[homepage]: http://contributor-covenant.org
+[version]: http://contributor-covenant.org/version/1/4/
diff --git a/CODE_OF_CONDUCT_cn.md b/CODE_OF_CONDUCT_cn.md
new file mode 100644
index 0000000000000000000000000000000000000000..2be794f1f324cf9b6bc304d4e5812076b56f4551
--- /dev/null
+++ b/CODE_OF_CONDUCT_cn.md
@@ -0,0 +1,50 @@
+# 参与者公约
+
+## 我们的保证
+
+为了促进一个开放透明且友好的环境，我们作为贡献者和维护者保证：无论年龄、种族、民族、性别认同和表达（方式）、体型、身体健全与否、经验水平、国籍、个人表现、宗教或性别取向，参与者在我们项目和社区中都免于骚扰。
+
+## 我们的标准
+
+有助于创造正面环境的行为包括但不限于：
+* 使用友好和包容性语言
+* 尊重不同的观点和经历
+* 耐心地接受建设性批评
+* 关注对社区最有利的事情
+* 友善对待其他社区成员
+
+身为参与者不能接受的行为包括但不限于：
+* 使用与性有关的言语或是图像，以及不受欢迎的性骚扰
+* 捣乱/煽动/造谣的行为或进行侮辱/贬损的评论，人身攻击及政治攻击
+* 公开或私下的骚扰
+* 未经许可地发布他人的个人资料，例如住址或是电子地址
+* 其他可以被合理地认定为不恰当或者违反职业操守的行为
+
+## 我们的责任
+
+项目维护者有责任为「可接受的行为」标准做出诠释，以及对已发生的不被接受的行为采取恰当且公平的纠正措施。
+
+项目维护者有权利及责任去删除、编辑、拒绝与本行为标准有所违背的评论(comments)、提交(commits)、代码、wiki 编辑、问题(issues)和其他贡献，以及项目维护者可暂时或永久性的禁止任何他们认为有不适当、威胁、冒犯、有害行为的贡献者。
+
+## 使用范围
+
+当一个人代表该项目或是其社区时，本行为标准适用于其项目平台和公共平台。
+
+代表项目或是社区的情况，举例来说包括使用官方项目的电子邮件地址、通过官方的社区媒体账号发布或线上或线下事件中担任指定代表。
+
+该项目的呈现方式可由其项目维护者进行进一步的定义及解释。
+
+## 强制执行
+
+可以通过paddle-dev@baidu.com，来联系项目团队来举报滥用、骚扰或其他不被接受的行为。
+
+任何维护团队认为有必要且适合的所有投诉都将进行审查及调查，并做出相对应的回应。项目小组有对事件回报者有保密的义务。具体执行的方针近一步细节可能会单独公布。
+
+没有切实地遵守或是执行本行为标准的项目维护人员，可能会因项目领导人或是其他成员的决定，暂时或是永久地取消其参与资格。
+
+## 来源
+
+本行为标准改编自[贡献者公约][主页]，版本 1.4
+可在此观看https://www.contributor-covenant.org/zh-cn/version/1/4/code-of-conduct.html
+
+[主页]: https://www.contributor-covenant.org
diff --git a/README.md b/README.md
index 577528e7aaf45ce002467590ec66b19afb145920..d06375a444dd65675bdd75baccf8445c1638a87c 100644
--- a/README.md
+++ b/README.md
@@ -37,6 +37,7 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl
 
       - Optimized math operations through SSE/AVX intrinsics, BLAS libraries
       (e.g. MKL, OpenBLAS, cuBLAS) or customized CPU/GPU kernels.
+      - Optimized CNN networks through MKL-DNN library.
       - Highly optimized recurrent networks which can handle **variable-length**
       sequence without padding.
       - Optimized local and distributed training for models with high dimensional
diff --git a/adversarial/README.md b/adversarial/README.md
deleted file mode 100644
index 51da21918a9d6e2192a2e03eabef4fde97896bc5..0000000000000000000000000000000000000000
--- a/adversarial/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Advbox
-
-Advbox is a Python toolbox to create adversarial examples that fool neural networks. It requires Python and paddle.
-
-## How to use
-
-1. train a model and save it's parameters. (like fluid_mnist.py)
-2. load the parameters which is trained in step1, then reconstruct the model.(like mnist_tutorial_fgsm.py)
-3. use advbox to generate the adversarial sample.
diff --git a/adversarial/advbox/__init__.py b/adversarial/advbox/__init__.py
deleted file mode 100644
index f56f14f18dafdfe1e712cea178a63f09a087b587..0000000000000000000000000000000000000000
--- a/adversarial/advbox/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-   A set of tools for generating adversarial example on paddle platform 
-"""
diff --git a/adversarial/advbox/attacks/base.py b/adversarial/advbox/attacks/base.py
deleted file mode 100644
index 98a65f2fddff999ac6fa98a5733128a63a60f916..0000000000000000000000000000000000000000
--- a/adversarial/advbox/attacks/base.py
+++ /dev/null
@@ -1,39 +0,0 @@
-"""
-The base model of the model.
-"""
-from abc import ABCMeta, abstractmethod
-
-
-class Attack(object):
-    """
-    Abstract base class for adversarial attacks. `Attack` represent an adversarial attack
-    which search an adversarial example. subclass should implement the _apply() method.
-
-    Args:
-        model(Model): an instance of the class advbox.base.Model.
-
-    """
-    __metaclass__ = ABCMeta
-
-    def __init__(self, model):
-        self.model = model
-
-    def __call__(self, image_label):
-        """
-        Generate the adversarial sample.
-
-        Args:
-        image_label(list): The image and label tuple list with one element.
-        """
-        adv_img = self._apply(image_label)
-        return adv_img
-
-    @abstractmethod
-    def _apply(self, image_label):
-        """
-        Search an adversarial example.
-
-        Args:
-        image_batch(list): The image and label tuple list with one element.
-        """
-        raise NotImplementedError
diff --git a/adversarial/advbox/attacks/gradientsign.py b/adversarial/advbox/attacks/gradientsign.py
deleted file mode 100644
index 15b1d176cb11330ac290d73aec1419a3d8f3cc4c..0000000000000000000000000000000000000000
--- a/adversarial/advbox/attacks/gradientsign.py
+++ /dev/null
@@ -1,38 +0,0 @@
-"""
-This module provide the attack method for FGSM's implement.
-"""
-from __future__ import division
-import numpy as np
-from collections import Iterable
-from .base import Attack
-
-
-class GradientSignAttack(Attack):
-    """
-    This attack was originally implemented by Goodfellow et al. (2015) with the
-    infinity norm (and is known as the "Fast Gradient Sign Method"). This is therefore called
-    the Fast Gradient Method.
-    Paper link: https://arxiv.org/abs/1412.6572
-    """
-
-    def _apply(self, image_label, epsilons=1000):
-        assert len(image_label) == 1
-        pre_label = np.argmax(self.model.predict(image_label))
-
-        min_, max_ = self.model.bounds()
-        gradient = self.model.gradient(image_label)
-        gradient_sign = np.sign(gradient) * (max_ - min_)
-
-        if not isinstance(epsilons, Iterable):
-            epsilons = np.linspace(0, 1, num=epsilons + 1)
-
-        for epsilon in epsilons:
-            adv_img = image_label[0][0].reshape(
-                gradient_sign.shape) + epsilon * gradient_sign
-            adv_img = np.clip(adv_img, min_, max_)
-            adv_label = np.argmax(self.model.predict([(adv_img, 0)]))
-            if pre_label != adv_label:
-                return adv_img
-
-
-FGSM = GradientSignAttack
diff --git a/adversarial/advbox/models/__init__.py b/adversarial/advbox/models/__init__.py
deleted file mode 100644
index eee0f6efd4774b42fcd082eb06d1398d2ee51bc4..0000000000000000000000000000000000000000
--- a/adversarial/advbox/models/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Paddle model for target of attack 
-"""
diff --git a/adversarial/advbox/models/base.py b/adversarial/advbox/models/base.py
deleted file mode 100644
index 74e1045def7648b4a8df30e89312d73c0d4fe7e1..0000000000000000000000000000000000000000
--- a/adversarial/advbox/models/base.py
+++ /dev/null
@@ -1,90 +0,0 @@
-"""
-The base model of the model.
-"""
-from abc import ABCMeta
-import abc
-
-abstractmethod = abc.abstractmethod
-
-
-class Model(object):
-    """
-    Base class of model to provide attack.
-
-
-    Args:
-        bounds(tuple): The lower and upper bound for the image pixel.
-        channel_axis(int): The index of the axis that represents the color channel.
-        preprocess(tuple): Two element tuple used to preprocess the input. First
-            substract the first element, then divide the second element.
-    """
-    __metaclass__ = ABCMeta
-
-    def __init__(self, bounds, channel_axis, preprocess=None):
-        assert len(bounds) == 2
-        assert channel_axis in [0, 1, 2, 3]
-
-        if preprocess is None:
-            preprocess = (0, 1)
-        self._bounds = bounds
-        self._channel_axis = channel_axis
-        self._preprocess = preprocess
-
-    def bounds(self):
-        """
-        Return the upper and lower bounds of the model.
-        """
-        return self._bounds
-
-    def channel_axis(self):
-        """
-        Return the channel axis of the model.
-        """
-        return self._channel_axis
-
-    def _process_input(self, input_):
-        res = input_
-        sub, div = self._preprocess
-        if sub != 0:
-            res = input_ - sub
-        assert div != 0
-        if div != 1:
-            res /= div
-        return res
-
-    @abstractmethod
-    def predict(self, image_batch):
-        """
-        Calculate the prediction of the image batch.
-
-        Args:
-            image_batch(numpy.ndarray): image batch of shape (batch_size, height, width, channels).
-
-        Return:
-            numpy.ndarray: predictions of the images with shape (batch_size, num_of_classes).
-        """
-        raise NotImplementedError
-
-    @abstractmethod
-    def num_classes(self):
-        """
-        Determine the number of the classes
-
-        Return:
-            int: the number of the classes
-        """
-        raise NotImplementedError
-
-    @abstractmethod
-    def gradient(self, image_batch):
-        """
-        Calculate the gradient of the cross-entropy loss w.r.t the image.
-
-        Args:
-            image_batch(list): The image and label tuple list.
-
-        Return:
-            numpy.ndarray: gradient of the cross-entropy loss w.r.t the image with
-                the shape (height, width, channel).
-        """
-        raise NotImplementedError
diff --git a/adversarial/advbox/models/paddle.py b/adversarial/advbox/models/paddle.py
deleted file mode 100644
index 33b2a3d5c6973470fb25c98872cd53b3ff11bab4..0000000000000000000000000000000000000000
--- a/adversarial/advbox/models/paddle.py
+++ /dev/null
@@ -1,101 +0,0 @@
-from __future__ import absolute_import
-
-import numpy as np
-import paddle.v2 as paddle
-import paddle.v2.fluid as fluid
-from paddle.v2.fluid.framework import program_guard
-
-from .base import Model
-
-
-class PaddleModel(Model):
-    """
-    Create a PaddleModel instance.
-    When you need to generate a adversarial sample, you should construct an instance of PaddleModel.
-
-    Args:
-        program(paddle.v2.fluid.framework.Program): The program of the model which generate the adversarial sample.
-        input_name(string): The name of the input.
-        logits_name(string): The name of the logits.
-        predict_name(string): The name of the predict.
-        cost_name(string): The name of the loss in the program.
-    """
-
-    def __init__(self,
-                 program,
-                 input_name,
-                 logits_name,
-                 predict_name,
-                 cost_name,
-                 bounds,
-                 channel_axis=3,
-                 preprocess=None):
-        super(PaddleModel, self).__init__(
-            bounds=bounds, channel_axis=channel_axis, preprocess=preprocess)
-
-        if preprocess is None:
-            preprocess = (0, 1)
-
-        self._program = program
-        self._place = fluid.CPUPlace()
-        self._exe = fluid.Executor(self._place)
-
-        self._input_name = input_name
-        self._logits_name = logits_name
-        self._predict_name = predict_name
-        self._cost_name = cost_name
-
-        # gradient
-        loss = self._program.block(0).var(self._cost_name)
-        param_grads = fluid.backward.append_backward(
-            loss, parameter_list=[self._input_name])
-        self._gradient = dict(param_grads)[self._input_name]
-
-    def predict(self, image_batch):
-        """
-            Predict the label of the image_batch.
-
-            Args:
-                image_batch(list): The image and label tuple list.
-            Return:
-                numpy.ndarray: predictions of the images with shape (batch_size, num_of_classes).
-        """
-        feeder = fluid.DataFeeder(
-            feed_list=[self._input_name, self._logits_name],
-            place=self._place,
-            program=self._program)
-        predict_var = self._program.block(0).var(self._predict_name)
-        predict = self._exe.run(self._program,
-                                feed=feeder.feed(image_batch),
-                                fetch_list=[predict_var])
-        return predict
-
-    def num_classes(self):
-        """
-            Calculate the number of classes of the output label. 
-
-        Return:
-            int: the number of classes
-        """
-        predict_var = self._program.block(0).var(self._predict_name)
-        assert len(predict_var.shape) == 2
-        return predict_var.shape[1]
-
-    def gradient(self, image_batch):
-        """
-        Calculate the gradient of the loss w.r.t the input.
-
-        Args:
-            image_batch(list): The image and label tuple list.
-        Return:
-            list: The list of the gradient of the image.
-        """
-        feeder = fluid.DataFeeder(
-            feed_list=[self._input_name, self._logits_name],
-            place=self._place,
-            program=self._program)
-
-        grad, = self._exe.run(self._program,
-                              feed=feeder.feed(image_batch),
-                              fetch_list=[self._gradient])
-        return grad
diff --git a/adversarial/fluid_mnist.py b/adversarial/fluid_mnist.py
deleted file mode 100644
index db4d4b51868ffa8be13d4d57a40e1def7e25d1a8..0000000000000000000000000000000000000000
--- a/adversarial/fluid_mnist.py
+++ /dev/null
@@ -1,86 +0,0 @@
-"""
-CNN on mnist data using fluid api of paddlepaddle
-"""
-import paddle.v2 as paddle
-import paddle.v2.fluid as fluid
-
-
-def mnist_cnn_model(img):
-    """
-    Mnist cnn model
-
-    Args:
-        img(Varaible): the input image to be recognized
-
-    Returns:
-        Variable: the label prediction
-    """
-    conv_pool_1 = fluid.nets.simple_img_conv_pool(
-        input=img,
-        num_filters=20,
-        filter_size=5,
-        pool_size=2,
-        pool_stride=2,
-        act='relu')
-
-    conv_pool_2 = fluid.nets.simple_img_conv_pool(
-        input=conv_pool_1,
-        num_filters=50,
-        filter_size=5,
-        pool_size=2,
-        pool_stride=2,
-        act='relu')
-
-    logits = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
-    return logits
-
-
-def main():
-    """
-    Train the cnn model on mnist datasets
-    """
-    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-    logits = mnist_cnn_model(img)
-    cost = fluid.layers.cross_entropy(input=logits, label=label)
-    avg_cost = fluid.layers.mean(x=cost)
-    optimizer = fluid.optimizer.Adam(learning_rate=0.01)
-    optimizer.minimize(avg_cost)
-
-    accuracy = fluid.evaluator.Accuracy(input=logits, label=label)
-
-    BATCH_SIZE = 50
-    PASS_NUM = 3
-    ACC_THRESHOLD = 0.98
-    LOSS_THRESHOLD = 10.0
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.mnist.train(), buf_size=500),
-        batch_size=BATCH_SIZE)
-
-    place = fluid.CPUPlace()
-    exe = fluid.Executor(place)
-    feeder = fluid.DataFeeder(feed_list=[img, label], place=place)
-    exe.run(fluid.default_startup_program())
-
-    for pass_id in range(PASS_NUM):
-        accuracy.reset(exe)
-        for data in train_reader():
-            loss, acc = exe.run(fluid.default_main_program(),
-                                feed=feeder.feed(data),
-                                fetch_list=[avg_cost] + accuracy.metrics)
-            pass_acc = accuracy.eval(exe)
-            print("pass_id=" + str(pass_id) + " acc=" + str(acc) + " pass_acc="
-                  + str(pass_acc))
-            if loss < LOSS_THRESHOLD and pass_acc > ACC_THRESHOLD:
-                break
-
-        pass_acc = accuracy.eval(exe)
-        print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc))
-    fluid.io.save_params(
-        exe, dirname='./mnist', main_program=fluid.default_main_program())
-    print('train mnist done')
-
-
-if __name__ == '__main__':
-    main()
diff --git a/adversarial/mnist_tutorial_fgsm.py b/adversarial/mnist_tutorial_fgsm.py
deleted file mode 100644
index 8b29346b8cd7f643771640afc4f783f7544cd071..0000000000000000000000000000000000000000
--- a/adversarial/mnist_tutorial_fgsm.py
+++ /dev/null
@@ -1,87 +0,0 @@
-"""
-FGSM demos on mnist using advbox tool.
-"""
-import paddle.v2 as paddle
-import paddle.v2.fluid as fluid
-import matplotlib.pyplot as plt
-import numpy as np
-
-from advbox.models.paddle import PaddleModel
-from advbox.attacks.gradientsign import GradientSignAttack
-
-
-def cnn_model(img):
-    """
-    Mnist cnn model
-    Args:
-        img(Varaible): the input image to be recognized
-    Returns:
-        Variable: the label prediction
-    """
-    #conv1 = fluid.nets.conv2d()
-    conv_pool_1 = fluid.nets.simple_img_conv_pool(
-        input=img,
-        num_filters=20,
-        filter_size=5,
-        pool_size=2,
-        pool_stride=2,
-        act='relu')
-
-    conv_pool_2 = fluid.nets.simple_img_conv_pool(
-        input=conv_pool_1,
-        num_filters=50,
-        filter_size=5,
-        pool_size=2,
-        pool_stride=2,
-        act='relu')
-
-    logits = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
-    return logits
-
-
-def main():
-    """
-    Advbox demo which demonstrate how to use advbox.
-    """
-    IMG_NAME = 'img'
-    LABEL_NAME = 'label'
-
-    img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
-    # gradient should flow
-    img.stop_gradient = False
-    label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
-    logits = cnn_model(img)
-    cost = fluid.layers.cross_entropy(input=logits, label=label)
-    avg_cost = fluid.layers.mean(x=cost)
-
-    place = fluid.CPUPlace()
-    exe = fluid.Executor(place)
-
-    BATCH_SIZE = 1
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.mnist.train(), buf_size=500),
-        batch_size=BATCH_SIZE)
-    feeder = fluid.DataFeeder(
-        feed_list=[IMG_NAME, LABEL_NAME],
-        place=place,
-        program=fluid.default_main_program())
-
-    fluid.io.load_params(
-        exe, "./mnist/", main_program=fluid.default_main_program())
-
-    # advbox demo
-    m = PaddleModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME,
-                    logits.name, avg_cost.name, (-1, 1))
-    att = GradientSignAttack(m)
-    for data in train_reader():
-        # fgsm attack
-        adv_img = att(data)
-        plt.imshow(n[0][0], cmap='Greys_r')
-        plt.show()
-        #np.save('adv_img', adv_img)
-        break
-
-
-if __name__ == '__main__':
-    main()
diff --git a/benchmark/paddle/image/alexnet.py b/benchmark/paddle/image/alexnet.py
index cad6051f1413a5bb95f87a940f3aa81e49e5d282..70296081877588885a7a6d4d8491409b41a8d378 100644
--- a/benchmark/paddle/image/alexnet.py
+++ b/benchmark/paddle/image/alexnet.py
@@ -1,4 +1,16 @@
-#!/usr/bin/env python
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 from paddle.trainer_config_helpers import *
 
diff --git a/benchmark/paddle/image/provider.py b/benchmark/paddle/image/provider.py
index 1018ec9ce1e529f618ddd7b7afa72a84c5e876a1..21e0d381aab24d21d91cdcc4aa630f6107405f9e 100644
--- a/benchmark/paddle/image/provider.py
+++ b/benchmark/paddle/image/provider.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import io, os
 import random
 import numpy as np
diff --git a/benchmark/paddle/rnn/imdb.py b/benchmark/paddle/rnn/imdb.py
index fc4ed4025f9ed2e0a32a1709ff8df4af53521196..c3b5faa19aaa325aaaf9cd9cc8f757d3f3b3bdcb 100755
--- a/benchmark/paddle/rnn/imdb.py
+++ b/benchmark/paddle/rnn/imdb.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import print_function
 import six.moves.cPickle as pickle
 import gzip
diff --git a/benchmark/paddle/rnn/provider.py b/benchmark/paddle/rnn/provider.py
index 928ca75daf84ccebb775364b0be0d8b3d5eebff9..f35cd5b079ff09ac35b171ff3032ecc5adabc947 100644
--- a/benchmark/paddle/rnn/provider.py
+++ b/benchmark/paddle/rnn/provider.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import io, os
 import random
 import numpy as np
diff --git a/benchmark/tensorflow/image/alexnet.py b/benchmark/tensorflow/image/alexnet.py
index f6a39ef778e21bee7374718a1b1ddf43392825a8..a37d7e7c62282891d67cee74cac1408eac1244f7 100644
--- a/benchmark/tensorflow/image/alexnet.py
+++ b/benchmark/tensorflow/image/alexnet.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from six.moves import xrange  # pylint: disable=redefined-builtin
 from datetime import datetime
 import math
diff --git a/benchmark/tensorflow/image/alexnet_multi_gpu.py b/benchmark/tensorflow/image/alexnet_multi_gpu.py
index 7b5ee78f4dd5429abd85d75c092a6e3a2a39f922..2ebab8fb60d471cd6de6d332d81608f2a992b9be 100644
--- a/benchmark/tensorflow/image/alexnet_multi_gpu.py
+++ b/benchmark/tensorflow/image/alexnet_multi_gpu.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from six.moves import xrange  # pylint: disable=redefined-builtin
 from datetime import datetime
 import math
diff --git a/benchmark/tensorflow/image/googlenet.py b/benchmark/tensorflow/image/googlenet.py
index decf855b54451efba5f6a7868fbcf631789f3572..1202cbb171efd74dec1fc6690a82e1f5126685f0 100644
--- a/benchmark/tensorflow/image/googlenet.py
+++ b/benchmark/tensorflow/image/googlenet.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from six.moves import xrange
 from datetime import datetime
 import math
diff --git a/benchmark/tensorflow/image/googlenet_multi_gpu.py b/benchmark/tensorflow/image/googlenet_multi_gpu.py
index 31466faa37c47c66e4fe4628e28c867875e89f2e..f06437eb6c82bf0dfbb9a76799e83182ec5ee888 100644
--- a/benchmark/tensorflow/image/googlenet_multi_gpu.py
+++ b/benchmark/tensorflow/image/googlenet_multi_gpu.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from six.moves import xrange  # pylint: disable=redefined-builtin
 from datetime import datetime
 import math
diff --git a/benchmark/tensorflow/image/smallnet_mnist_cifar.py b/benchmark/tensorflow/image/smallnet_mnist_cifar.py
index 1a625134a6c58586b29190ede9c66253f484d2cf..558c68575f4ae3ceba7119ed081549ad63e208d8 100644
--- a/benchmark/tensorflow/image/smallnet_mnist_cifar.py
+++ b/benchmark/tensorflow/image/smallnet_mnist_cifar.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from six.moves import xrange  # pylint: disable=redefined-builtin
 from datetime import datetime
 import math
diff --git a/benchmark/tensorflow/rnn/reader.py b/benchmark/tensorflow/rnn/reader.py
index f538329a15ea9ad9293c97c94340989e2c421eb2..9660d3c22b3a16954b0a1d09d38cf033824f0a5f 100755
--- a/benchmark/tensorflow/rnn/reader.py
+++ b/benchmark/tensorflow/rnn/reader.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os.path
 import io
 import numpy as np
diff --git a/cmake/external/grpc.cmake b/cmake/external/grpc.cmake
index abee6698e30b7e76ca42825ed225876bf2ba5ec0..79b2449fe6689993bbee8a24ae7c46b452afe0a0 100644
--- a/cmake/external/grpc.cmake
+++ b/cmake/external/grpc.cmake
@@ -33,7 +33,7 @@ ExternalProject_Add(
     extern_grpc
     DEPENDS protobuf zlib
     GIT_REPOSITORY "https://github.com/grpc/grpc.git"
-    GIT_TAG "v1.7.x"
+    GIT_TAG "v1.8.x"
     PREFIX          ${GRPC_SOURCES_DIR}
     UPDATE_COMMAND  ""
     CONFIGURE_COMMAND ""
diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake
index 0e79c0cc7992060cbe3b668ec927936183389eb6..4012a164be1d20bba050fb09749b11afc7b99588 100644
--- a/cmake/external/openblas.cmake
+++ b/cmake/external/openblas.cmake
@@ -100,6 +100,11 @@ IF(NOT ${CBLAS_FOUND})
                 \"${CBLAS_INSTALL_DIR}/lib -> ${CMAKE_INSTALL_PREFIX}/${TMP_INSTALL_DIR}\"
             )"
         )
+        INSTALL(CODE "execute_process(
+            COMMAND rm -r ${CMAKE_INSTALL_PREFIX}/${TMP_INSTALL_DIR}/cmake
+                    ${CMAKE_INSTALL_PREFIX}/${TMP_INSTALL_DIR}/pkgconfig
+            )"
+        )
     ENDIF()
 ENDIF(NOT ${CBLAS_FOUND})
 
diff --git a/cmake/make_resource.py b/cmake/make_resource.py
index a9241b0e3e36c2e79c79e46b4f9114b7f6947341..4f9f5546b9d4176d1035311cb9e1acf0c0eeccfd 100644
--- a/cmake/make_resource.py
+++ b/cmake/make_resource.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import re
 import sys
diff --git a/doc/api/v1/data_provider/src/mnist_config.py b/doc/api/v1/data_provider/src/mnist_config.py
index 429338c57f8f865f0c5835d933445b65ee2ea7aa..d2af9d849ec48347d4d56c2b5b6b517671c37518 100644
--- a/doc/api/v1/data_provider/src/mnist_config.py
+++ b/doc/api/v1/data_provider/src/mnist_config.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 define_py_data_sources2(
diff --git a/doc/api/v1/data_provider/src/mnist_provider.dict.py b/doc/api/v1/data_provider/src/mnist_provider.dict.py
index 2ba0b126a0d6239f84950e130410aaaa6e1f24cd..284f7dadb065f8c8a891e949a800280cde9edda9 100644
--- a/doc/api/v1/data_provider/src/mnist_provider.dict.py
+++ b/doc/api/v1/data_provider/src/mnist_provider.dict.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer.PyDataProvider2 import *
 
 
diff --git a/doc/api/v1/data_provider/src/sentimental_config.py b/doc/api/v1/data_provider/src/sentimental_config.py
index 7ce71608a2372b2484ae40ccf01f0621728ddef2..56adde13b97b30095729da5246805a2902870676 100644
--- a/doc/api/v1/data_provider/src/sentimental_config.py
+++ b/doc/api/v1/data_provider/src/sentimental_config.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 dictionary = dict()
diff --git a/doc/api/v1/data_provider/src/sentimental_provider.py b/doc/api/v1/data_provider/src/sentimental_provider.py
index 14bd0e05a921dbfd5212d8483524d3af3e4ae98f..59a2b6f7f5faff847f6fda15827632c4ab236b21 100644
--- a/doc/api/v1/data_provider/src/sentimental_provider.py
+++ b/doc/api/v1/data_provider/src/sentimental_provider.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer.PyDataProvider2 import *
 
 
diff --git a/doc/api/v2/fluid/layers.rst b/doc/api/v2/fluid/layers.rst
index 696a8012aa4aacb78250be3a2d7f49718c009839..986026e0b9364076e777e4ba66c990fbecfa83d8 100644
--- a/doc/api/v2/fluid/layers.rst
+++ b/doc/api/v2/fluid/layers.rst
@@ -358,3 +358,154 @@ reduce_min
 ..  autofunction:: paddle.v2.fluid.layers.reduce_min
     :noindex:
 
+
+split
+-----
+..  autofunction:: paddle.v2.fluid.layers.split
+    :noindex:
+
+
+matmul
+------
+..  autofunction:: paddle.v2.fluid.layers.matmul
+    :noindex:
+
+logsigmoid
+----------
+..  autofunction:: paddle.v2.fluid.layers.logsigmoid
+    :noindex:
+
+exp
+---
+..  autofunction:: paddle.v2.fluid.layers.exp
+    :noindex:
+
+relu
+----
+..  autofunction:: paddle.v2.fluid.layers.relu
+    :noindex:
+
+tanh
+----
+..  autofunction:: paddle.v2.fluid.layers.tanh
+    :noindex:
+
+tanh_shrink
+-----------
+..  autofunction:: paddle.v2.fluid.layers.tanh_shrink
+    :noindex:
+
+softshrink
+----------
+..  autofunction:: paddle.v2.fluid.layers.softshrink
+    :noindex:
+
+sqrt
+----
+..  autofunction:: paddle.v2.fluid.layers.sqrt
+    :noindex:
+
+abs
+----
+..  autofunction:: paddle.v2.fluid.layers.abs
+    :noindex:
+
+ceil
+----
+..  autofunction:: paddle.v2.fluid.layers.ceil
+    :noindex:
+
+floor
+-----
+..  autofunction:: paddle.v2.fluid.layers.floor
+    :noindex:
+
+round
+-----
+..  autofunction:: paddle.v2.fluid.layers.round
+    :noindex:
+
+reciprocal
+----------
+..  autofunction:: paddle.v2.fluid.layers.reciprocal
+    :noindex:
+
+log
+---
+..  autofunction:: paddle.v2.fluid.layers.log
+    :noindex:
+
+square
+------
+..  autofunction:: paddle.v2.fluid.layers.square
+    :noindex:
+
+softplus
+--------
+..  autofunction:: paddle.v2.fluid.layers.softplus
+    :noindex:
+
+softsign
+---------
+..  autofunction:: paddle.v2.fluid.layers.softsign
+    :noindex:
+
+brelu
+-----
+..  autofunction:: paddle.v2.fluid.layers.brelu
+    :noindex:
+
+leaky_relu
+----------
+..  autofunction:: paddle.v2.fluid.layers.leaky_relu
+    :noindex:
+
+soft_relu
+---------
+..  autofunction:: paddle.v2.fluid.layers.soft_relu
+    :noindex:
+
+elu
+----
+..  autofunction:: paddle.v2.fluid.layers.elu
+    :noindex:
+
+relu6
+-----
+..  autofunction:: paddle.v2.fluid.layers.relu6
+    :noindex:
+
+pow
+----
+..  autofunction:: paddle.v2.fluid.layers.pow
+    :noindex:
+
+hard_shrink
+-----------
+..  autofunction:: paddle.v2.fluid.layers.hard_shrink
+    :noindex:
+
+thresholded_relu
+----------------
+..  autofunction:: paddle.v2.fluid.layers.thresholded_relu
+    :noindex:
+
+hard_sigmoid
+-------------
+..  autofunction:: paddle.v2.fluid.layers.hard_sigmoid
+    :noindex:
+
+swish
+------
+..  autofunction:: paddle.v2.fluid.layers.swish
+    :noindex:
+
+l2_normalize
+------------
+..  autofunction:: paddle.v2.fluid.layers.l2_normalize
+    :noindex:
+
+sequence_reshape
+----------------
+..  autofunction:: paddle.v2.fluid.layers.sequence_reshape
+    :noindex:
diff --git a/doc/api/v2/fluid/nets.rst b/doc/api/v2/fluid/nets.rst
index b792efb71f85ae643df655568da69c82414e9d5d..f6b1cb4ba10659fb336899f08376c265c67290f1 100644
--- a/doc/api/v2/fluid/nets.rst
+++ b/doc/api/v2/fluid/nets.rst
@@ -20,3 +20,14 @@ sequence_conv_pool
     :noindex:
 
 
+glu
+---
+..  autofunction:: paddle.v2.fluid.nets.glu
+    :noindex:
+
+
+dot_product_attention
+---------------------
+..  autofunction:: paddle.v2.fluid.nets.dot_product_attention
+    :noindex:
+
diff --git a/doc/design/error_clip.md b/doc/design/error_clip.md
index 8e845462cce2a29556bcb6010b08f00fbc3d99d7..58aa73b8cd38d01e2426278a3479714e4fb6a3b0 100644
--- a/doc/design/error_clip.md
+++ b/doc/design/error_clip.md
@@ -46,12 +46,12 @@ class ErrorClipByValue(BaseErrorClipAttr):
         self.min = min
 
     def append_clip_op(self, block, grad_name):
-        block.append_op(
-            type="clip",
-            inputs={"X": grad_name},
-            outputs={"Out": grad_name},
-            attrs={"min": self.min,
-                   "max": self.max})
+        clip_op_desc = block.desc.append_op()
+        clip_op_desc.set_type("clip")
+        clip_op_desc.set_input("X", [grad_name])
+        clip_op_desc.set_output("Out", [grad_name])
+        clip_op_desc.set_attr("min", self.min)
+        clip_op_desc.set_attr("max", self.max)
 ```
 
 The `BaseErrorClipAttr` have one main member functions: `append_clip_op(self, block, grad_name)`.
@@ -80,6 +80,11 @@ def error_clip_callback(block, context):
                          op_desc.output_arg_names()):
         fwd_var = block.var_recursive(grad_to_var[grad_n])
         error_clip = getattr(fwd_var, "error_clip", None)
+        if not (error_clip is None or isinstance(error_clip,
+                                                 BaseErrorClipAttr)):
+            raise TypeError(
+                "Variable's error_clip should be an instance of BaseErrorClipAttr or None."
+            )
         if error_clip is not None:
             error_clip.append_clip_op(block, grad_n)
 ```
diff --git a/doc/design/fluid.md b/doc/design/fluid.md
index 585dc8ef39c0cfb30f470d79f7b27a59ceb5e940..2acc168007d25a083f588b48f84e12e29baf4f47 100644
--- a/doc/design/fluid.md
+++ b/doc/design/fluid.md
@@ -105,18 +105,10 @@ There are two ways to execute a Fluid program.  When a program is executed, it c
 
 There is a C++ class [`Executor`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/executor.h), which runs a `ProgramDesc`, similar to how an interpreter runs a Python program.
 
-Fluid is moving towards the direction of a compiler, which is explain in more detail later in this article.
+Fluid is moving towards the direction of a compiler, which is explain in [fluid_compiler.md](fluid_compiler.md).
 
 ## Backward Compatibility of Fluid
 
 Given all the advantages from the removal of the concept of a *model*, hardware manufacturers might still prefer the existence of the concept of a model, so it would be easier for them to support multiple frameworks all at once and could run a trained model during inference.  For example, Nervana, a startup company acquired by Intel, has been working on an XPU that reads the models in the format known as [n-graph](https://github.com/NervanaSystems/ngraph).  Similarly, [Movidius](https://www.movidius.com/) is producing a mobile deep learning chip that reads and runs graphs of operators.  The well-known [ONNX](https://github.com/onnx/onnx) is also a file format of graphs of operators.
 
 For Fluid, we can write a converter that extracts the parts in the `ProgramDesc` protobuf message, converts them into a graph of operators, and exports the graph into the ONNX or n-graph format.
-
-## Towards a Deep Learning Language and the Compiler
-
-We can change the `if-then-else` and loop structure a little bit in the above Fluid example programs, to make it into a new programming language, different than Python.
-
-Even if we do not invent a new language, as long as we get the `ProgramDesc` message filled in, we can write a transpiler, which translates each invocation to an operator, into a C++ call to a kernel function of that operator. For example, a transpiler that weaves the CUDA kernels outputs an NVIDIA-friendly C++ program, which can be built using `nvcc`.  Another transpiler could generate MKL-friendly code that should be built using `icc` from Intel.  More interestingly, we can translate a Fluid program into its distributed version of two `ProgramDesc` messages, one for running on the trainer process, and the other one for the parameter server.  For more details of the last example, the [concurrent programming design](concurrent_programming.md) document would be a good pointer.  The following figure explains the proposed two-stage process:
-
-![](fluid-compiler.png)
diff --git a/doc/design/fluid_compiler.md b/doc/design/fluid_compiler.md
new file mode 100644
index 0000000000000000000000000000000000000000..2a6beafc52e815fa067b273bb5887ddcf6ab15ae
--- /dev/null
+++ b/doc/design/fluid_compiler.md
@@ -0,0 +1,110 @@
+# PaddlePaddle Fluid: Towards a Compiled Programming Language
+
+As described in [fluid.md](fluid.md), when a Fluid application program
+runs, it generates a `ProgramDesc` protobuf message as an intermediate
+representation of itself.  The C++ class `Executor` can run this
+protobuf message as an interpreter.  This article describes the Fluid
+compiler.
+
+![](fluid-compiler.png)
+
+## ProgramDesc
+
+Before we go deeper into the idea of compiled language, let us take a
+look at a simple example Fluid application.
+
+```python
+import "fluid"
+
+func paddlepaddle() {
+  X = fluid.read(...)
+  W = fluid.Tensor(...)
+  Y = fluid.mult(X, W)
+}
+```
+
+This program consists of a [block](block.md) of three operators --
+`read`, `assign`, and `mult`.  Its `ProgramDesc` message looks like
+the following
+
+```protobuf
+message ProgramDesc {
+  block[0] = Block {
+    vars = [X, W, Y],
+    ops = [
+      read(output = X)
+      assign(input = ..., output = W)
+      mult(input = {X, W}, output = Y)
+    ],
+  }
+}
+```
+ 
+## Transpilers
+
+We can write a transpiler program that takes a `ProgramDesc`, e.g.,
+the above one, and outputs another `ProgramDesc`.  Let us take some
+examples:
+
+1. *Memory optimization transpiler*: We can write a transpiler that
+   inserts some `FreeMemoryOp`s in the above example `ProgramDesc` so
+   to free memory early, before the end of an iteration, so to keep a
+   small memory footprint.
+
+1. *Distributed training transpiler*: We can write a transpiler that
+   converts a`ProgramDesc` into its distributed version of two
+   `ProgramDesc`s -- one for running by the trainer processes and the
+   other for the parameter server.
+
+In the rest of this article, we talk about a special kind of
+transpiler, *Native code generator*, which takes a `ProgramDesc` and
+generates a `.cu` (or `.cc`) file, which could be built by C++
+compilers (gcc, nvcc, icc) into binaries.
+
+## Native Code Generator
+
+For the above example, the native code generator transpiler, say, the
+CUDA code generator, should generate a `main` function:
+
+```c++
+void main() {
+  auto X = fluid_cuda_read(...);
+  auto W = fluid_cuda_create_tensor(...);
+  auto Y = fluid_cuda_mult(X, W);
+}
+```
+
+and the definitions of functions `fluid_cuda_read`,
+`fluid_cuda_create_tensor`, and `fluid_cuda_mult`.  Please be aware
+that each function could just define a C++ instance of an operator and
+run it.  For example
+
+```c++
+paddle::Tensor fluid_cuda_read(...) {
+  paddle::Tensor t;
+  paddle::operator::Read r(&t, ...);
+  r.Run();
+  return t;
+}
+```
+
+For computational operators that have multiple *kernels*, each for a
+specific hardware platform, for example, the `mult` operator, the
+generated code should call its CUDA kernel:
+
+```c++
+paddle::Tensor fluid_cuda_mult(const paddle::Tensor& a, 
+                               const paddle::Tensor& b) {
+  paddle::Tensor t;
+  paddle::operator::Mult m(a, b, ...);
+  Mult.Run(cuda_context);
+}
+```
+
+where `cuda_context` could be a global variable of type
+`paddle::CUDADeviceContext`.
+
+## Multi-Block Code Generation
+
+Most Fluid application programs may have more than one blocks.  To
+execute them, we need to trace [scopes](scope.md).
diff --git a/doc/design/switch_kernel.md b/doc/design/switch_kernel.md
index 1846e5d9f99dd433b44ac6b5ae52893ec8f0d451..9719e031c70979cd95400701efd30879662e19bc 100644
--- a/doc/design/switch_kernel.md
+++ b/doc/design/switch_kernel.md
@@ -1,21 +1,24 @@
 ## Background
-Every operator has many kernels because there are multiple data types, places, data layout that Fluid supports. We use the `KernelType` to describe kernel types that operators can hold. 
+Every operator has many kernels because there are multiple data types, places, data layout, library type that Fluid supports. We use the `OpKernelType ` to describe kernel types that operators can hold.
 
-The `KernelType` is as follows.
+The `OpKernelType ` is as follows:
 
-```
-struct KernelType {
+```cpp
+struct OpKernelType {
   Place place_;
   DataType data_type_;
-  LayoutType layout_;
+  DataLayout data_layout_;
+  LibraryType library_type_;
 };
 ```
 
-The `place_` is a descriptor of the device and the computational library, e.g., `MKLDNNPlace`, `CUDAPlace`.
+- The `place_` is a descriptor of the device, e.g., CPUPlace, CUDAPlace.
 
-The `data_type_` is the data type that this kernel performs on, e.g., `FP32`, `INT64`. Note that one kernel may have inputs with different data types. However, it will be a major `data_type`. For example, the `cross_entropy` takes `int64` as it label, and `double`/`float` as its input logit and output cost. The major `data_type` of `cross_entropy` is `float`/`double`.
+- The `data_type_` is the data type that this kernel performs on, e.g., `FP32`, `INT64`. Note that one kernel may have inputs with different data types. However, it will be a major `data_type`. For example, the `cross_entropy` takes `int64` as it label, and `double`/`float` as its input logit and output cost. The major `data_type` of `cross_entropy` is `float` or `double`.
 
-The `layout` is useful for some computational library. One example is that MKLDNN uses many kinds of layout, such as `nChw8c`. Each kind of layout will invoke the different kernel.
+- The `data_layout_ ` is useful for some computational library. One example is that MKLDNN uses many kinds of layout, such as `nChw8c`. Each kind of layout will invoke the different kernel.
+
+- The `library_type_` describes the computational library, e.g., `MKLDNN`, `CUDNN`.
 
 ## Problem
 
@@ -25,42 +28,72 @@ We register a kernel for every operator and every kernel type ideally. However,
 2. Some operators will take too many memory. It is better to force them into CPU. However, the rest of operators in this neural network will be performed on GPU, i.e., model parallel problem.
 3. Some layout and place are particular. One example is that MKLDNN uses `nChw8` and there is no other library uses `nChw8c`.
 
-Problems under these situations are similar. We can formalise this problem as follow.
+Take one situation to give a detailed explanation, if we have two Operators: OP1 and OP2, OP1 has one output `op1_to_op2`, and `op1_to_op2` is the input of OP2.
+
+If OP1 and OP2 run on the same place(for example CPUPlace), then `op1_2_op2` can be used directly by OP2.
+
+```
+OP1(CPUPlace)
+     |
+ op1_2_op2
+     |
+OP2(CPUPlace)
+```
+
+If OP1 and OP2 run one different place, then OP2 cannot `use op1_2_op2` directly.
+
+Problems under these situations are similar. We can formalize this problem as follow.
 
 We register kernels with types $KT = \{kt_1, kt_2, kt_3, ...\}$ for one operator. The inputs of this operator should be run on kernel type $kt_{?}$, which the $kt_{?} \notin KT$. How to cast the input of this operator from $kt_{?}$ to any of kernel type in $KT$.
 
-## Solution
+## Solution: data transform
 
-It is clearly that transforming inputs of an operator toadapt another kernel type is not related to the particular operator. So we should register these transformation methods as global methods.
+It is clear that transforming inputs of an operator to adapt another kernel type is not related to the particular operator. So we should register these transformation methods as global methods.
 
-We can infer a kernel type from the inputs of an operators. We let this kernel type as `actual kernel type`, which means this kernel type is the actually kernel type that operator should be performed.
+We can infer kernel type for each input of an operator. We let this kernel type as `actual kernel type for var`, which means this kernel type is the kernel type that can process this input variable.
 
 We can get a kernel type by 1) The configuration of operator description. (Users may want to force use `MKL` for `conv` operator). 2) The place of the current executor. (Executor is running on GPU). This kernel type is what we expect the operator will be performed on. We let this kernel type as `expect kernel type`.
 
-We transform the input data from `actual` to `expect` if the expect kernel type is not as same as actual kernel type.
+We transform the input data from `actual` to `expect` if the actual kernel type is not as same as expect kernel type.
 
-The algorithm is described as follow
+The algorithm is described as following
 
 ```cpp
-using DataTransformationFN = std::function<void(const Tensor& in, Tensor* out)>;
-using KernelTypePair = std::pair<KernelType, KernelType>;
-
-map<KernelTypePair, DataTransformationFN> g_data_transformation_;
-
-void OpWithKernel::Run() {
-  vec<Tensor> inputs = ...
-  auto actual_kernel_type = GetActualKernelType(inputs);
-  
-  // The expected kernel type is related to actual kernel type.
-  // For the most operators, the expected kernel type is as same as
-  // actual kernel type.
-  //
-  // So we pass `actual_kernel_type` as a parameter of 
-  // GetExpectedKernelType
-  auto expect_kernel_type = GetExpectedKernelType(actual_kernel_type);
-  
-  auto trans = g_data_transformation_[{actual_kernel_type, expect_kernel_type}];
-  
-  kernel.run(trans(inputs));
+void OperatorWithKernel::Run(
+        const Scope& scope,
+        const platform::Place& place) const {
+  ExecutionContext ctx(...);
+  auto expected_kernel_key = this->GetExpectedKernelType(ctx);
+
+  Scope& new_scope = scope.NewScope();
+
+  for (auto& var_name : this->Inputs()) {
+    auto* tensor_in = GetTensor(var_name);
+    auto kernel_type_for_var = this->GetKernelTypeForVar(...);
+    if (kernel_type_for_var.place_ != expected_kernel_key.place_) {
+      auto* trans_var = new_scope.Var(var_name);
+      auto* out = DataTransform(expected_kernel_key,
+                                kernel_type_for_var,
+                                *tensor_in);
+      CopyVariableWithTensor(...);
+    }
+  }
+
+  auto kernel = kernels.find(expected_kernel_key);
+  kernel->Compute(ExecutionContext(...));
 }
 ```
+
+then the actual process for the multi-device above will be:
+
+```
+OP1(CPUPlace)
+     |
+op1_2_op2(on CPU)
+     |
+[transform](from CPU to GPU)
+     |
+op1_2_op2(on GPU)
+     |
+OP2(CUDAPlace)
+```
diff --git a/doc/faq/local/src/reduce_min_pool_size.py b/doc/faq/local/src/reduce_min_pool_size.py
index 5715397cc11e18246b8522fcc5b4f05780c9a0a7..9efdb5707ac4a0cb701ec4fef6dfff399d6150cb 100644
--- a/doc/faq/local/src/reduce_min_pool_size.py
+++ b/doc/faq/local/src/reduce_min_pool_size.py
@@ -1,3 +1,18 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
 @provider(min_pool_size=0, ...)
 def process(settings, filename):
     os.system('shuf %s > %s.shuf' % (filename, filename))  # shuffle before.
diff --git a/doc/faq/local/src/word2vec_config.py b/doc/faq/local/src/word2vec_config.py
index 866b40c3d4c96c1213b3f716f29b14dd38763edb..b4fcf0960eda37f98f9f7f2949148f8d51cd2008 100644
--- a/doc/faq/local/src/word2vec_config.py
+++ b/doc/faq/local/src/word2vec_config.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 ...  # the settings and define data provider is omitted.
 DICT_DIM = 3000  # dictionary dimension.
 word_ids = data_layer('word_ids', size=DICT_DIM)
diff --git a/doc/faq/local/src/word2vec_dataprovider.py b/doc/faq/local/src/word2vec_dataprovider.py
index ec2753a7d01d7dd4d804c3bed0bac1be9c3fb3d3..3b6273b0574fb54a7c374a1f19fdd993fff5c730 100644
--- a/doc/faq/local/src/word2vec_dataprovider.py
+++ b/doc/faq/local/src/word2vec_dataprovider.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 DICT_DIM = 3000
 
 
diff --git a/doc/faq/model/index_cn.rst b/doc/faq/model/index_cn.rst
index b47bbe05bdb39d1ade9434a7e54bf6ca88a91cc9..6947948bc79f4dba63954c459afb940e3242c405 100644
--- a/doc/faq/model/index_cn.rst
+++ b/doc/faq/model/index_cn.rst
@@ -67,3 +67,14 @@
 
   * 不同于上述介绍的recurrent layer , :code:`paddle.networks.lstmemory_unit` 定义了LSTM单元在一个时间步内的计算过程，它并不是一个完整的recurrent layer，也不能接收序列数据作为输入；
   * :code:`paddle.networks.lstmemory_unit` 只能在recurrent_group中作为step function使用；
+
+5. PaddlePaddle的softmax能否指定计算的维度
+-----------------------------------------
+
+PaddlePaddle的softmax不能指定计算维度，只能按行计算。
+在图像任务中，对于NCHW，如果需要在C维度计算softmax，可以先使用 :code:`paddle.layer.switch_order` 改变维度顺序，即将NCHW转换成NHWC，再做一定的reshape，最后计算softmax。
+
+6. PaddlePaddle是否支持维数可变的数据输入
+------------------------------------------
+
+PaddlePaddle提供的 :code:`paddle.data_type.dense_array` 支持维数可变的数据输入。在使用时，将对应数据层的维数设置成一个大于输入数据维数的值用于占位即可。
diff --git a/doc/getstarted/concepts/src/infer.py b/doc/getstarted/concepts/src/infer.py
index 4cc58dfee0bd6dade0340b4fd0ee1adb49ffebf6..a1b60388c45ae83cbed3b5597e8b8aef5d69f814 100644
--- a/doc/getstarted/concepts/src/infer.py
+++ b/doc/getstarted/concepts/src/infer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2 as paddle
 import numpy as np
 
diff --git a/doc/getstarted/concepts/src/train.py b/doc/getstarted/concepts/src/train.py
index 4bccbfca3c70c12aec564e2cae3b8ca174b68777..0e5bdb57bc95c513eb67d426741c860a34d37dd0 100644
--- a/doc/getstarted/concepts/src/train.py
+++ b/doc/getstarted/concepts/src/train.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2 as paddle
 import numpy as np
 
diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md
index 3109d72001f13a38a93b9ca39d3f8525c8cea9f1..92996585674b46f45549b972b9f295503b1c7f8c 100644
--- a/doc/howto/dev/new_op_cn.md
+++ b/doc/howto/dev/new_op_cn.md
@@ -24,7 +24,7 @@
 - `framework::OperatorWithKernel`：继承自OperatorBase，Op有计算函数，称作有Kernel。
 - `class OpProtoAndCheckerMaker`：描述该Op的输入、输出、属性、注释,主要用于Python API接口生成
 
-依据是否包含kernel，可以将Op分为两种：包含Kernel的Op和不包含kernel的Op，前者Op的定义继承自`OperatorBase`，后者继承自`OperatorWithKernel`。本教程主要介绍带Kernel的Op如何写，简单总结Op需要包含的内容如下：
+依据是否包含kernel，可以将Op分为两种：包含Kernel的Op和不包含kernel的Op，前者Op的定义继承自`OperatorWithKernel`，后者继承自`OperatorBase`。本教程主要介绍带Kernel的Op如何写，简单总结Op需要包含的内容如下：
 
 
  内容            | 定义位置
diff --git a/doc/howto/dev/new_op_en.md b/doc/howto/dev/new_op_en.md
index 7175d8370d6ce08c6d502eb42b8e53252db89bbb..da8b1bdd1082e439456daf25e9b3a1e8eb534375 100644
--- a/doc/howto/dev/new_op_en.md
+++ b/doc/howto/dev/new_op_en.md
@@ -4,7 +4,8 @@
  - [Implementing C++ Types](#implementing-c-types)
    - [Defining ProtoMaker](#defining-protomaker)
    - [Defining Operator](#defining-operator)
-   - [Registering Operator](#registering-operator)
+   - [Defining OpKernel](#defining-opkernel)
+   - [Registering Operator and OpKernel](#registering-operator-and-opkernel)
    - [Compilation](#compilation)
  - [Python Binding](#python-binding)
  - [Unit Tests](#unit-tests)
@@ -16,12 +17,13 @@
 
 Here are the base types needed. For details, please refer to the design docs.
 
-- `framework::OperatorBase`: Operator (Op)base class.
-- `framework::OpKernel`: Base class for Op computation.
-- `framework::OperatorWithKernel`: Inherited from OperatorBase, describing an operator with computation.
 - `class OpProtoAndCheckerMaker`: Describes an Operator's input, output, attributes and description, mainly used to interface with Python API.
+- `framework::OperatorBase`: Operator (Op)base class.
+- `framework::OpKernel`: Base class for Op computation kernel.
+- `framework::OperatorWithKernel`: Inherited from OperatorBase, describing an operator with computation kernels.
+
 
-An operator can be differentiated by whether in has kernel methods. An operator with kernel inherits from `OperatorWithKernel` while the ones without inherit from `OperatorBase`. This tutorial focuses on implementing operators with kernels. In short, an operator includes the following information:
+Operators can be categorized into two groups: operator with kernel(s) and operator without kernel(s). An operator with kernel(s) inherits from `OperatorWithKernel` while the one without kernel(s) inherits from `OperatorBase`. This tutorial focuses on implementing operators with kernels. In short, an operator includes the following information:
 
 
  Information           | Where is it defined
@@ -32,7 +34,7 @@ Kernel implementation       | The kernel methods shared between CPU and CUDA are
 Registering the Op           | Ops are registered in `.cc` files; For Kernel registration, `.cc` files contain the CPU implementation, while `.cu` files contain the CUDA implementation.
 
 
-New Operator implementations are added to the list [paddle/operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators), with file names in the format `*_op.h` (if applicable), `*_op.cc`, `*_op.cu` (if applicable).** The system will use the naming scheme to automatically build operators and their corresponding Python extensions. **
+New Operator implementations are added to the list [paddle/operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators), with file names in the format `*_op.h` (if applicable), `*_op.cc`, `*_op.cu` (if applicable).** The system will use the naming scheme to automatically build operators and their corresponding Python extensions.**
 
 
 Let's take matrix multiplication operator, [MulOp](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc), as an example to introduce the writing of an Operator with Kernel.
@@ -156,7 +158,8 @@ Usually `OpProtoMaker` and `Op`'s type definitions are written in `.cc` files, w
 - `typename T` denotes data type, such as `float` or `double`.
 
 `MulKernel` types need to rewrite the interface for `Compute`.
-- `Compute` takes one input variable `const framework::ExecutionContext& context`.
+
+- `Compute` takes one input parameter: `const framework::ExecutionContext& context`.
 - Compared with `InferShapeContext`, `ExecutionContext` includes device types, and can similarly extract input, output, and attribute variables.
 - `Compute` implements the computation logics of an `OpKernel`.
 
@@ -177,7 +180,7 @@ Usually `OpProtoMaker` and `Op`'s type definitions are written in `.cc` files, w
   };
   ```
 
-Note that **different devices (CPU, CUDA)share an Op definition; whether or not they share the same `OpKernel` depends on whether `Compute` calls functions that support both devices.**
+Note that **different devices (CPU, CUDA)share one Op definition; whether or not they share the same `OpKernel` depends on whether `Compute` calls functions can support both devices.**
 
 `MulOp`'s CPU and CUDA share the same `Kernel`. A non-sharing  `OpKernel` example can be seen in [`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43).
 
@@ -188,13 +191,14 @@ This concludes the forward implementation of an operator. Next its operation and
 
 The definition of its corresponding backward operator, if applicable, is similar to that of an forward operator. **Note that a backward operator does not include a `ProtoMaker`**.
 
-### Registering Operator
+### Registering Operator and OpKernel
 
 - In `.cc` files, register forward and backward operator classes and the CPU kernel.
 
     ```cpp
     namespace ops = paddle::operators;
     REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker, mul_grad, ops::MulOpGrad);
+
     REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel<paddle::platform::CPUDeviceContext, float>);
     REGISTER_OP_CPU_KERNEL(mul_grad,
                   ops::MulGradKernel<paddle::platform::CPUDeviceContext, float>);
@@ -204,6 +208,7 @@ The definition of its corresponding backward operator, if applicable, is similar
 
     - `REGISTER_OP` registers the `ops::MulOp` class, type named `mul`, its type `ProtoMaker` is `ops::MulOpMaker`, registering `ops::MulOpGrad` as `mul_grad`.
     - `REGISTER_OP_WITHOUT_GRADIENT` registers an operator without gradient.
+
     - `REGISTER_OP_CPU_KERNEL` registers `ops::MulKernel` class and specialized template types `paddle::platform::CPUPlace` and `float`, which also registers `ops::MulGradKernel`.
 
 
@@ -225,6 +230,7 @@ The definition of its corresponding backward operator, if applicable, is similar
 Run the following commands to compile.
 
 ```
+# maybe you need to rerun cmake
 make mul_op
 ```
 
diff --git a/doc/howto/dev/new_op_kernel_en.md b/doc/howto/dev/new_op_kernel_en.md
new file mode 100644
index 0000000000000000000000000000000000000000..123df0a7ee4943c0b789ef9cfa6e0804d0fdd564
--- /dev/null
+++ b/doc/howto/dev/new_op_kernel_en.md
@@ -0,0 +1,121 @@
+## Add Kernels for a New Device
+
+### Background
+
+PaddlePaddle Fluid have hundreds of operators.  Each operator could have one or more kernels.  A kernel is an implementation of the operator for a certain device, which could be a hardware device, e.g., the CUDA GPU, or a library that utilizes a device, e.g., Intel MKL that makes full use of the Xeon CPU.
+
+[This document](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/new_op_en.md) explains how to add an operator, and its kernels.  The kernels of an operator are indexed by a C++ type [`OpKernelType`](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/operator_kernel_type.md).  An operator chooses the right kernel at runtime.  This choosing mechanism is described [here](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/switch_kernel.md).
+
+### Write Kernels for A New Device 
+
+#### Add A New Device
+
+  For some historical reaons, we misuse the word *library* for *device*.  For example, we call the deivce type by *library type*.  An example is the header file [`library_type.h`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/library_type.h#L24).  We will correct this ASAP.
+
+To register a new device, we need to add an enum value to `LibraryType`:
+
+```
+enum class LibraryType {
+  kPlain = 0,
+  kMKLDNN = 1,
+  kCUDNN = 2,
+};
+```
+
+
+#### Add A New [Place](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/place.h#L53)
+
+If you have a new kind of Device, firstly you need to add a new kind of [`Place`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/place.h#L53). For example `CUDAPlace`:
+
+```cpp
+struct CUDAPlace {
+  CUDAPlace() : CUDAPlace(0) {}
+  explicit CUDAPlace(int d) : device(d) {}
+
+  inline int GetDeviceId() const { return device; }
+  // needed for variant equality comparison
+  inline bool operator==(const CUDAPlace &o) const {
+    return device == o.device;
+  }
+  inline bool operator!=(const CUDAPlace &o) const { return !(*this == o); }
+
+  int device;
+};
+
+typedef boost::variant<CUDAPlace, CPUPlace> Place;
+```
+
+#### Add [device context]((https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/device_context.h#L37))
+After a new kind of Device is added, you should add a corresponding [DeviceContext](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/device_context.h#L37) for it.
+
+```cpp
+class DeviceContext {
+ public:
+  virtual ~DeviceContext() {}
+  virtual Place GetPlace() const = 0;
+
+  virtual void Wait() const {}
+};
+```
+
+#### Implement new [OpKernel](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/operator.h#L351) for your Device.
+
+A detailed documentation can be found in [`new_op_and_kernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/new_op_en.md)
+
+```cpp
+class OpKernelBase {
+ public:
+  /**
+   * ExecutionContext is the only parameter of Kernel Run function.
+   * Run will get input/output variables, state such as momentum and
+   * device resource such as CUDA stream, cublas handle, etc. from
+   * ExecutionContext. User should construct it before run the Operator.
+   */
+
+  virtual void Compute(const ExecutionContext& context) const = 0;
+
+  virtual ~OpKernelBase() = default;
+};
+
+template <typename T>
+class OpKernel : public OpKernelBase {
+ public:
+  using ELEMENT_TYPE = T;
+};
+```
+
+
+#### Register the OpKernel to framework
+
+After writing the components described above, we should register the kernel to the framework.
+
+We use `REGISTER_OP_KERNEL` to do the registration.
+
+```cpp
+REGISTER_OP_KERNEL(
+	op_type,
+	library_type,
+	place_type,
+	kernel0, kernel1, ...)
+```
+
+kernel0, kernel1 are kernels that have the same `op_type`, `library_type`, `place_type` but different `data_types`.
+
+take [`conv2d`]((https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/conv_cudnn_op.cu.cc#L318)) as an example:
+
+	```cpp
+	REGISTER_OP_KERNEL(conv2d, CPU, paddle::platform::CPUPlace,
+    		paddle::operators::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
+    		paddle::operators::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
+    
+	REGISTER_OP_KERNEL(conv2d, CUDNN, ::paddle::platform::CUDAPlace,
+	       paddle::operators::CUDNNConvOpKernel<float>,
+	       paddle::operators::CUDNNConvOpKernel<double>);
+	```
+
+In the code above:
+
+ - `conv2d` is the type/name of the operator
+ - `CUDNN/CPU` is `library`
+ - `paddle::platform::CUDAPlace/CPUPlace` is `place`
+ - template parameter `float/double` on `CUDNNConvOpKernel<T>` is `data_type`.
diff --git a/doc/howto/usage/capi/organization_of_the_inputs_cn.md b/doc/howto/usage/capi/organization_of_the_inputs_cn.md
index 563ec5ca21ec5d75800fa201943d65e6d6fe51ea..a889ae4ffab7be02468b4a5ac5a18e3cc77803c9 100644
--- a/doc/howto/usage/capi/organization_of_the_inputs_cn.md
+++ b/doc/howto/usage/capi/organization_of_the_inputs_cn.md
@@ -19,7 +19,7 @@
 
 ### 基本使用概念
 
-- 在PaddlePaddle内部，神经网络中一个计算层的输入/输出被组织为一个 `Argument` 结构体，如果神经网络有多个输入或者多个输入，每一个输入/输入都会对应有自己的`Argument`。
+- 在PaddlePaddle内部，神经网络中一个计算层的输入/输出被组织为一个 `Argument` 结构体，如果神经网络有多个输入或者多个输出，每一个输入/输出都会对应有自己的`Argument`。
 - `Argument` 并不真正“存储”数据，而是将输入/输出信息有机地组织在一起。
 - 在`Argument`内部由`IVector`（对应着上文提到的一维整型数组）和`Matrix`（对应着上文提到的二维浮点型矩阵）来实际存储数据；由 `Sequence Start Positions` (下文详细解释) 来描述输入/输出的序列信息。
 
diff --git a/doc/howto/usage/cluster/fluid_cluster_train_en.md b/doc/howto/usage/cluster/fluid_cluster_train_en.md
new file mode 100644
index 0000000000000000000000000000000000000000..a64004a7c4ea12bc0d949d7f11f3e26af62bf912
--- /dev/null
+++ b/doc/howto/usage/cluster/fluid_cluster_train_en.md
@@ -0,0 +1,140 @@
+# Fluid Distributed Training
+
+## Introduction
+
+In this article, we'll explain how to config and run distributed training jobs with PaddlePaddle Fluid in a bare metal cluster.
+
+## Preparations
+
+### Get your cluster ready
+
+Prepare your computer nodes in the cluster. Nodes in this cluster can be of any specification that runs PaddlePaddle, and with a unique IP address assigned to it. Make sure they can communicate with each other.
+
+### Have PaddlePaddle installed
+
+PaddlePaddle must be installed on all nodes. If you have GPU cards on your nodes, be sure to properly install drivers and CUDA libraries.
+
+PaddlePaddle build and installation guide can be found from [here](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/build_and_install/index_en.html).
+
+### Update training script
+
+#### Non-cluster training script
+
+Let's take [Deep Learning 101](http://www.paddlepaddle.org/docs/develop/book/01.fit_a_line/index.html)'s first chapter: "fit a line" as an example.
+
+This demo's non-cluster version with fluid API is as follows:
+
+``` python
+import paddle.v2 as paddle
+import paddle.v2.fluid as fluid
+
+x = fluid.layers.data(name='x', shape=[13], dtype='float32')
+y_predict = fluid.layers.fc(input=x, size=1, act=None)
+y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+
+cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+avg_cost = fluid.layers.mean(x=cost)
+
+sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
+sgd_optimizer.minimize(avg_cost)
+
+BATCH_SIZE = 20
+
+train_reader = paddle.batch(
+    paddle.reader.shuffle(
+        paddle.dataset.uci_housing.train(), buf_size=500),
+    batch_size=BATCH_SIZE)
+
+place = fluid.CPUPlace()
+feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
+exe = fluid.Executor(place)
+
+exe.run(fluid.default_startup_program())
+
+PASS_NUM = 100
+for pass_id in range(PASS_NUM):
+    fluid.io.save_persistables(exe, "./fit_a_line.model/")
+    fluid.io.load_persistables(exe, "./fit_a_line.model/")
+    for data in train_reader():
+        avg_loss_value, = exe.run(fluid.default_main_program(),
+                                  feed=feeder.feed(data),
+                                  fetch_list=[avg_cost])
+
+        if avg_loss_value[0] < 10.0:
+            exit(0)  # if avg cost less than 10.0, we think our code is good.
+exit(1)
+```
+
+We created a simple fully connected neural networks training program and handed it to the fluid executor to run for 100 passes.
+
+Now let's try to convert it to a distributed version to run in a cluster.
+
+#### Introducing parameter server
+
+As you see from the non-cluster version of training script, there is only one role in it: the trainer, who does the computing as well as holding parameters. In cluster training, since multi-trainers are working on the same task, they need one centralized place to hold and distribute parameters. This centralized place is called the Parameter Server in PaddlePaddle.
+
+![parameter server architect](src/trainer.png)
+
+Parameter Server in fluid does not only hold parameters but is also assigned with a part of the program. Trainers communicate with parameter servers via send/receive OPs. For more tech detail, please refer to this [document](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/dist_refactor/distributed_architecture.md).
+
+Now we need to create program for both trainers and parameter servers, the question is how?
+
+#### Slice the program
+
+Fluid provides a tool called "Distribute Transpiler" to automatically convert the non-cluster program into cluster program.
+
+The idea behind this tool is to find optimize OPs and gradient parameters, slice the program into 2 pieces and connect them with send/receive OP.
+
+Optimize OPs and gradient parameters can be found from the return values of optimizer's minimize function.
+
+To put them together:
+
+``` python
+... #define the program, cost, and create sgd optimizer
+
+optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost) #get optimize OPs and gradient parameters
+
+t = fluid.DistributeTranspiler() # create transpiler instance
+# slice the program into 2 pieces with optimizer_ops and gradient parameters list, as well as pserver_endpoints, which is a comma separated list of [IP:PORT] and number of trainers
+t.transpile(optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2) 
+
+... #create executor
+
+# in pserver, run this
+#current_endpoint here means current pserver IP:PORT you wish to run on
+pserver_prog = t.get_pserver_program(current_endpoint)
+pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
+exe.run(pserver_startup)
+exe.run(pserver_prog)
+
+# in trainer, run this
+... # define data reader
+exe.run(fluid.default_startup_program())
+for pass_id in range(100):
+    for data in train_reader():
+        exe.run(t.get_trainer_program())
+
+
+```
+
+### E2E demo
+
+Please find the complete demo from [here](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/tests/book_distribute/notest_dist_fit_a_line.py). In parameter server node run this in the command line:
+
+``` bash
+PSERVERS=192.168.1.2:6174 SERVER_ENDPOINT=192.168.1.2:6174 TRAINING_ROLE=PSERVER python notest_dist_fit_a_line.py
+```
+
+*please note we assume that your parameter server runs at 192.168.1.2:6174*
+
+Wait until the prompt `Server listening on 192.168.1.2:6174`
+
+Then in 2 of your trainer node run this:
+
+``` bash
+PSERVERS=192.168.1.2:6174 SERVER_ENDPOINT=192.168.1.2:6174 TRAINING_ROLE=TRAINER python notest_dist_fit_a_line.py
+```
+
+*the reason you need to run this command twice in 2 nodes is: in the script we set the trainer count to be 2. You can change this setting on line 50*
+
+Now you have 2 trainers and 1 parameter server up and running.
diff --git a/doc/howto/usage/cluster/src/word2vec/api_train_v2.py b/doc/howto/usage/cluster/src/word2vec/api_train_v2.py
index c0940f0e56eafa22f8aeb7052c0ddc79d8862917..9a65f14628d8a0808dce25187b482354c72a838d 100644
--- a/doc/howto/usage/cluster/src/word2vec/api_train_v2.py
+++ b/doc/howto/usage/cluster/src/word2vec/api_train_v2.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import gzip
 import math
 
diff --git a/doc/howto/usage/cluster/src/word2vec/api_train_v2_cluster.py b/doc/howto/usage/cluster/src/word2vec/api_train_v2_cluster.py
index 2e6d8887124a5524505b097803a60a35478ca644..2afce9a66e521f9e3a8d566dd23762969f0594f5 100644
--- a/doc/howto/usage/cluster/src/word2vec/api_train_v2_cluster.py
+++ b/doc/howto/usage/cluster/src/word2vec/api_train_v2_cluster.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import math
 import os
 import paddle.v2 as paddle
diff --git a/doc/howto/usage/cluster/src/word2vec/prepare.py b/doc/howto/usage/cluster/src/word2vec/prepare.py
index 24f5c5b26d37ea03de3ab4dc2d967a4bd009eef0..ade01c378efced05787e1f62e6fa6c38f8ac5ad3 100644
--- a/doc/howto/usage/cluster/src/word2vec/prepare.py
+++ b/doc/howto/usage/cluster/src/word2vec/prepare.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2 as paddle
 import tarfile
 import os
diff --git a/doc/v1_api_tutorials/README.md b/doc/v1_api_tutorials/README.md
deleted file mode 100644
index 071b8da61fbcab3e88819273008b4526546202ad..0000000000000000000000000000000000000000
--- a/doc/v1_api_tutorials/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-The tutorials in v1_api_tutorials are using v1_api currently, and will be upgraded to v2_api later.
-Thus, v1_api_tutorials is a temporary directory. We decide not to maintain it and will delete it in future.
-
-Please go to [PaddlePaddle/book](https://github.com/PaddlePaddle/book) and 
-[PaddlePaddle/models](https://github.com/PaddlePaddle/models) to learn PaddlePaddle.
diff --git a/doc/v1_api_tutorials/embedding_model/index_cn.md b/doc/v1_api_tutorials/embedding_model/index_cn.md
deleted file mode 100644
index 2b4a79fbbfc0c4af74aa73c540919f5d9cf2635b..0000000000000000000000000000000000000000
--- a/doc/v1_api_tutorials/embedding_model/index_cn.md
+++ /dev/null
@@ -1,139 +0,0 @@
-# 中文词向量模型的使用 #
-----------
-本文档介绍如何在PaddlePaddle平台上,使用预训练的标准格式词向量模型。
-
-在此感谢 @lipeng 提出的代码需求，并给出的相关模型格式的定义。
-
-## 介绍 ###
-### 中文字典 ###
-我们的字典使用内部的分词工具对百度知道和百度百科的语料进行分词后产生。分词风格如下： "《红楼梦》"将被分为 "《"，"红楼梦"，"》"，和 "《红楼梦》"。字典采用UTF8编码，输出有2列：词本身和词频。字典共包含 3206326个词和4个特殊标记：
-  - `<s>`: 分词序列的开始
-  - `<e>`: 分词序列的结束
-  - `PALCEHOLDER_JUST_IGNORE_THE_EMBEDDING`: 占位符，没有实际意义
-  - `<unk>`: 未知词
-
-### 中文词向量的预训练模型 ###
-遵循文章 [A Neural Probabilistic Language Model](http://www.jmlr.org/papers/volume3/bengio03a/bengio03a.pdf)中介绍的方法，模型采用 n-gram 语言模型，结构如下图：6元上下文作为输入层->全连接层->softmax层 。对应于字典，我们预训练得到4种不同维度的词向量，分别为：32维、64维、128维和256维。
-<center>![](./neural-n-gram-model.png)</center>
-<center>Figure 1. neural-n-gram-model</center>
-
-### 下载和数据抽取 ###
-运行以下的命令下载和获取我们的字典和预训练模型：
-
-    cd $PADDLE_ROOT/demo/model_zoo/embedding
-    ./pre_DictAndModel.sh
-
-## 中文短语改写的例子 ##
-以下示范如何使用预训练的中文字典和词向量进行短语改写。
-
-### 数据的准备和预处理 ###
-首先，运行以下的命令下载数据集。该数据集（utf8编码）包含20个训练样例，5个测试样例和2个生成式样例。
-
-    cd $PADDLE_ROOT/demo/seqToseq/data
-    ./paraphrase_data.sh
-
-第二步，将数据处理成规范格式，在训练数集上训练生成词向量字典（数据将保存在 `$PADDLE_SOURCE_ROOT/demo/seqToseq/data/pre-paraphrase`）:
-
-    cd $PADDLE_ROOT/demo/seqToseq/
-    python preprocess.py -i data/paraphrase [--mergeDict]
-
-- 其中，如果使用`--mergeDict`选项，源语言短语和目标语言短语的字典将被合并（源语言和目标语言共享相同的编码字典）。本实例中，源语言和目标语言都是相同的语言，因此可以使用该选项。
-
-
-### 使用用户指定的词向量字典 ###
-使用如下命令，从预训练模型中，根据用户指定的字典，抽取对应的词向量构成新的词表:
-    cd $PADDLE_ROOT/demo/model_zoo/embedding
-    python extract_para.py --preModel PREMODEL --preDict PREDICT --usrModel USRMODEL--usrDict USRDICT -d DIM
-
-- `--preModel PREMODEL`: 预训练词向量字典模型的路径
-- `--preDict PREDICT`:  预训练模型使用的字典的路径
-- `--usrModel USRMODEL`: 抽取出的新词表的保存路径
-- `--usrDict USRDICT`: 用户指定新的字典的路径，用于构成新的词表
-- `-d DIM`: 参数（词向量）的维度
-
-此处，你也可以简单的运行以下的命令：
-
-    cd $PADDLE_ROOT/demo/seqToseq/data/
-    ./paraphrase_model.sh
-
-运行成功以后，你将会看到以下的模型结构：
-
-    paraphrase_model
-    |--- _source_language_embedding
-    |--- _target_language_embedding
-
-### 在PaddlePaddle平台训练模型 ###
-首先，配置模型文件，配置如下（可以参考保存在 `demo/seqToseq/paraphrase/train.conf`的配置）:
-
-    from seqToseq_net import *
-    is_generating = False
-
-    ################## Data Definition #####################
-    train_conf = seq_to_seq_data(data_dir = "./data/pre-paraphrase",
-                                 job_mode = job_mode)
-
-    ############## Algorithm Configuration ##################
-    settings(
-          learning_method = AdamOptimizer(),
-          batch_size = 50,
-          learning_rate = 5e-4)
-
-    ################# Network configure #####################
-    gru_encoder_decoder(train_conf, is_generating, word_vector_dim = 32)
-
-这个配置与`demo/seqToseq/translation/train.conf` 基本相同
-
-然后，使用以下命令进行模型训练:
-
-    cd $PADDLE_SOURCE_ROOT/demo/seqToseq/paraphrase
-    ./train.sh
-
-其中，`train.sh` 与`demo/seqToseq/translation/train.sh` 基本相同，只有2个配置不一样:
-
-- `--init_model_path`: 初始化模型的路径配置为`data/paraphrase_modeldata/paraphrase_model`
-- `--load_missing_parameter_strategy`：如果参数模型文件缺失，除词向量模型外的参数将使用正态分布随机初始化
-
-如果用户想要了解详细的数据集的格式、模型的结构和训练过程，请查看 [Text generation Tutorial](../text_generation/index_cn.md).
-
-## 可选功能 ##
-###  观测词向量
-PaddlePaddle 平台为想观测词向量的用户提供了将二进制词向量模型转换为文本模型的功能:
-
-    cd $PADDLE_ROOT/demo/model_zoo/embedding
-    python paraconvert.py --b2t -i INPUT -o OUTPUT -d DIM
-
-- `-i INPUT`: 输入的（二进制）词向量模型名称
-- `-o OUTPUT`: 输出的文本模型名称
-- `-d DIM`: （词向量）参数维度
-
-运行完以上命令，用户可以在输出的文本模型中看到:
-
-    0,4,32156096
-    -0.7845433,1.1937413,-0.1704215,0.4154715,0.9566584,-0.5558153,-0.2503305, ......
-    0.0000909,0.0009465,-0.0008813,-0.0008428,0.0007879,0.0000183,0.0001984, ......
-    ......
-
-- 其中，第一行是`PaddlePaddle` 输出文件的格式说明，包含3个属性：:
-  - `PaddlePaddle`的版本号，本例中为0
-  - 浮点数占用的字节数，本例中为4
-  - 总计的参数个数，本例中为32,156,096
-- 其余行是（词向量）参数行（假设词向量维度为32）
-  - 每行打印32个参数以','分隔
-  - 共有32,156,096/32 = 1,004,877行，也就是说，模型共包含1,004,877个被向量化的词
-
-### 词向量模型的修正
-`PaddlePaddle` 为想修正词向量模型的用户提供了将文本词向量模型转换为二进制模型的命令:
-
-    cd $PADDLE_ROOT/demo/model_zoo/embedding
-    python paraconvert.py --t2b -i INPUT -o OUTPUT
-
-- `-i INPUT`: 输入的文本词向量模型名称
-- `-o OUTPUT`: 输出的二进制词向量模型名称
-
-请注意，输入的文本格式如下:
-
-    -0.7845433,1.1937413,-0.1704215,0.4154715,0.9566584,-0.5558153,-0.2503305, ......
-    0.0000909,0.0009465,-0.0008813,-0.0008428,0.0007879,0.0000183,0.0001984, ......
-    ......
-- 输入文本中没有头部（格式说明）行
-- （输入文本）每行存储一个词，以逗号','分隔
diff --git a/doc/v1_api_tutorials/embedding_model/index_en.md b/doc/v1_api_tutorials/embedding_model/index_en.md
deleted file mode 100644
index 9525f64f9b5384c8e44690fb0887fb2293108e0a..0000000000000000000000000000000000000000
--- a/doc/v1_api_tutorials/embedding_model/index_en.md
+++ /dev/null
@@ -1,140 +0,0 @@
-# Chinese Word Embedding Model Tutorial #
-----------
-This tutorial is to guide you through the process of using a Pretrained Chinese Word Embedding Model in the PaddlePaddle standard format.
-
-We thank @lipeng for the pull request that defined the model schemas and pretrained the models.
-
-## Introduction ###
-### Chinese Word Dictionary ###
-Our Chinese-word dictionary is created on Baidu ZhiDao and Baidu Baike by using in-house word segmentor. For example, the participle of "《红楼梦》" is "《"，"红楼梦"，"》"，and "《红楼梦》". Our dictionary (using UTF-8 format) has has two columns: word and its frequency. The total word count is 3206326, including 4 special token:
-  - `<s>`: the start of a sequence
-  - `<e>`: the end of a sequence
-  - `PALCEHOLDER_JUST_IGNORE_THE_EMBEDDING`: a placeholder, just ignore it and its embedding
-  - `<unk>`: a word not included in dictionary
-
-### Pretrained Chinese Word Embedding Model ###
-Inspired by paper [A Neural Probabilistic Language Model](http://www.jmlr.org/papers/volume3/bengio03a/bengio03a.pdf), our model architecture (**Embedding joint of six words->FullyConnect->SoftMax**) is as following graph. And for our dictionary, we pretrain four models with different word vector dimenstions, i.e 32, 64, 128, 256.
-<center>![](./neural-n-gram-model.png)</center>
-<center>Figure 1. neural-n-gram-model</center>
-
-### Download and Extract ###
-To download and extract our dictionary and pretrained model, run the following commands.
-
-    cd $PADDLE_ROOT/demo/model_zoo/embedding
-    ./pre_DictAndModel.sh
-
-## Chinese Paraphrasing Example ##
-We provide a paraphrasing task to show the usage of pretrained Chinese Word Dictionary and Embedding Model.
-
-### Data Preparation and Preprocess ###
-
-First, run the following commands to download and extract the in-house dataset. The dataset (using UTF-8 format) has 20 training samples, 5 testing samples and 2 generating samples.
-
-    cd $PADDLE_ROOT/demo/seqToseq/data
-    ./paraphrase_data.sh
-
-Second, preprocess data and build dictionary on train data by running the following commands, and the preprocessed dataset is stored in `$PADDLE_SOURCE_ROOT/demo/seqToseq/data/pre-paraphrase`:
-
-    cd $PADDLE_ROOT/demo/seqToseq/
-    python preprocess.py -i data/paraphrase [--mergeDict]
-
-- `--mergeDict`: if using this option, the source and target dictionary are merged, i.e, two dictionaries have the same context. Here, as source and target data are all chinese words, this option can be used.
-
-### User Specified Embedding Model ###
-The general command of extracting desired parameters from the pretrained embedding model based on user dictionary is:
-
-    cd $PADDLE_ROOT/demo/model_zoo/embedding
-    python extract_para.py --preModel PREMODEL --preDict PREDICT --usrModel USRMODEL--usrDict USRDICT -d DIM
-
-- `--preModel PREMODEL`: the name of pretrained embedding model
-- `--preDict PREDICT`: the name of pretrained dictionary
-- `--usrModel USRMODEL`: the name of extracted embedding model
-- `--usrDict USRDICT`: the name of user specified dictionary
-- `-d DIM`: dimension of parameter
-
-Here, you can simply run the command:
-
-    cd $PADDLE_ROOT/demo/seqToseq/data/
-    ./paraphrase_model.sh
-
-And you will see following embedding model structure:
-
-    paraphrase_model
-    |--- _source_language_embedding
-    |--- _target_language_embedding
-
-### Training Model in PaddlePaddle ###
-First, create a model config file, see example `demo/seqToseq/paraphrase/train.conf`:
-
-    from seqToseq_net import *
-    is_generating = False
-
-    ################## Data Definition #####################
-    train_conf = seq_to_seq_data(data_dir = "./data/pre-paraphrase",
-                                 job_mode = job_mode)
-
-    ############## Algorithm Configuration ##################
-    settings(
-          learning_method = AdamOptimizer(),
-          batch_size = 50,
-          learning_rate = 5e-4)
-
-    ################# Network configure #####################
-    gru_encoder_decoder(train_conf, is_generating, word_vector_dim = 32)
-
-This config is almost the same as `demo/seqToseq/translation/train.conf`.
-
-Then, train the model by running the command:
-
-    cd $PADDLE_SOURCE_ROOT/demo/seqToseq/paraphrase
-    ./train.sh
-
-where `train.sh` is almost the same as `demo/seqToseq/translation/train.sh`, the only difference is following two command arguments:
-
-- `--init_model_path`: path of the initialization model, here is `data/paraphrase_model`
-- `--load_missing_parameter_strategy`: operations when model file is missing, here use a normal distibution to initialize the other parameters except for the embedding layer
-
-For users who want to understand the dataset format, model architecture and training procedure in detail, please refer to [Text generation Tutorial](../text_generation/index_en.md).
-
-## Optional Function ##
-###  Embedding Parameters Observation
-For users who want to observe the embedding parameters, this function can convert a PaddlePaddle binary embedding model to a text model by running the command:
-
-    cd $PADDLE_ROOT/demo/model_zoo/embedding
-    python paraconvert.py --b2t -i INPUT -o OUTPUT -d DIM
-
-- `-i INPUT`: the name of input binary embedding model
-- `-o OUTPUT`: the name of output text embedding model
-- `-d DIM`: the dimension of parameter
-
-You will see parameters like this in output text model:
-
-    0,4,32156096
-    -0.7845433,1.1937413,-0.1704215,0.4154715,0.9566584,-0.5558153,-0.2503305, ......
-    0.0000909,0.0009465,-0.0008813,-0.0008428,0.0007879,0.0000183,0.0001984, ......
-    ......
-
-- 1st line is **PaddlePaddle format file head**, it has 3 attributes:
-  - version of PaddlePaddle, here is 0
-  - sizeof(float), here is 4
-  - total number of parameter, here is 32156096
-- Other lines print the paramters (assume `<dim>` = 32)
-  - each line print 32 paramters splitted by ','
-  - there is 32156096/32 = 1004877 lines, meaning there is 1004877 embedding words
-
-### Embedding Parameters Revision
-For users who want to revise the embedding parameters, this function can convert a revised text embedding model to a PaddlePaddle binary model by running the command:
-
-    cd $PADDLE_ROOT/demo/model_zoo/embedding
-    python paraconvert.py --t2b -i INPUT -o OUTPUT
-
-- `-i INPUT`: the name of input text embedding model.
-- `-o OUTPUT`: the name of output binary embedding model
-
-Note that the format of input text model is as follows:
-
-    -0.7845433,1.1937413,-0.1704215,0.4154715,0.9566584,-0.5558153,-0.2503305, ......
-    0.0000909,0.0009465,-0.0008813,-0.0008428,0.0007879,0.0000183,0.0001984, ......
-    ......
-- there is no file header in 1st line
-- each line stores parameters for one word, the separator is commas ','
diff --git a/doc/v1_api_tutorials/embedding_model/neural-n-gram-model.png b/doc/v1_api_tutorials/embedding_model/neural-n-gram-model.png
deleted file mode 100644
index f70b765b3fd69816345a79fc59adfea46008dbfd..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/embedding_model/neural-n-gram-model.png and /dev/null differ
diff --git a/doc/v1_api_tutorials/gan/gan.png b/doc/v1_api_tutorials/gan/gan.png
deleted file mode 100644
index 0eafd7cb49b545f412f8e775804bcd0b22c42454..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/gan/gan.png and /dev/null differ
diff --git a/doc/v1_api_tutorials/gan/index_en.md b/doc/v1_api_tutorials/gan/index_en.md
deleted file mode 100644
index ac9ed37b2264778869f92c0910b1cb946fb4427f..0000000000000000000000000000000000000000
--- a/doc/v1_api_tutorials/gan/index_en.md
+++ /dev/null
@@ -1,137 +0,0 @@
-# Generative Adversarial Networks (GAN) 
-
-This demo implements GAN training described in the original [GAN paper](https://arxiv.org/abs/1406.2661) and deep convolutional generative adversarial networks [DCGAN paper](https://arxiv.org/abs/1511.06434).
-
-The high-level structure of GAN is shown in Figure. 1 below. It is composed of two major parts: a generator and a discriminator, both of which are based on neural networks. The generator takes in some kind of noise with a known distribution and transforms it into an image. The discriminator takes in an image and determines whether it is artificially generated by the generator or a real image. So the generator and the discriminator are in a competitive game in which generator is trying to generate image to look as real as possible to fool the discriminator, while the discriminator is trying to distinguish between real and fake images. 
-
-<center>![](./gan.png)</center>
-<p align="center">
-    Figure 1. GAN-Model-Structure
-    <a href="https://ishmaelbelghazi.github.io/ALI/">figure credit</a>
-</p>
-
-The generator and discriminator take turn to be trained using SGD. The objective function of the generator is for its generated images being classified as real by the discriminator, and the objective function of the discriminator is to correctly classify real and fake images. When the GAN model is trained to converge to the equilibrium state, the generator will transform the given noise distribution to the distribution of real images, and the discriminator will not be able to distinguish between real and fake images at all. 
-
-## Implementation of GAN Model Structure
-Since GAN model involves multiple neural networks, it requires to use paddle python API. So the code walk-through below can also partially serve as an introduction to the usage of Paddle Python API.
-
-There are three networks defined in gan_conf.py, namely **generator_training**, **discriminator_training** and **generator**. The relationship to the model structure we defined above is that **discriminator_training** is the discriminator, **generator** is the generator, and the **generator_training** combined the generator and discriminator since training generator would require the discriminator to provide loss function. This relationship is described in the following code:
-```python
-if is_generator_training:
-    noise = data_layer(name="noise", size=noise_dim)
-    sample = generator(noise)
-
-if is_discriminator_training:
-    sample = data_layer(name="sample", size=sample_dim)
-
-if is_generator_training or is_discriminator_training:
-    label = data_layer(name="label", size=1)
-    prob = discriminator(sample)
-    cost = cross_entropy(input=prob, label=label)
-    classification_error_evaluator(
-        input=prob, label=label, name=mode + '_error')
-    outputs(cost)
-
-if is_generator:
-    noise = data_layer(name="noise", size=noise_dim)
-    outputs(generator(noise))
-```
-
-In order to train the networks defined in gan_conf.py, one first needs to initialize a Paddle environment, parse the config, create GradientMachine from the config and create trainer from GradientMachine as done in the code chunk below:
-```python
-import py_paddle.swig_paddle as api
-# init paddle environment
-api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10',
-               '--log_period=100', '--gpu_id=' + args.gpu_id,
-               '--save_dir=' + "./%s_params/" % data_source)
-
-# Parse config
-gen_conf = parse_config(conf, "mode=generator_training,data=" + data_source)
-dis_conf = parse_config(conf, "mode=discriminator_training,data=" + data_source)
-generator_conf = parse_config(conf, "mode=generator,data=" + data_source)
-
-# Create GradientMachine
-dis_training_machine = api.GradientMachine.createFromConfigProto(
-dis_conf.model_config)
-gen_training_machine = api.GradientMachine.createFromConfigProto(
-gen_conf.model_config)
-generator_machine = api.GradientMachine.createFromConfigProto(
-generator_conf.model_config)
-
-# Create trainer
-dis_trainer = api.Trainer.create(dis_conf, dis_training_machine)
-gen_trainer = api.Trainer.create(gen_conf, gen_training_machine)
-```
-
-In order to balance the strength between generator and discriminator, we schedule to train whichever one is performing worse by comparing their loss function value. The loss function value can be calculated by a forward pass through the GradientMachine.
-```python
-def get_training_loss(training_machine, inputs):
-    outputs = api.Arguments.createArguments(0)
-    training_machine.forward(inputs, outputs, api.PASS_TEST)
-    loss = outputs.getSlotValue(0).copyToNumpyMat()
-    return numpy.mean(loss)
-```
-
-After training one network, one needs to sync the new parameters to the other networks. The code below demonstrates one example of such use case:
-```python
-# Train the gen_training
-gen_trainer.trainOneDataBatch(batch_size, data_batch_gen)
-
-# Copy the parameters from gen_training to dis_training and generator
-copy_shared_parameters(gen_training_machine,
-dis_training_machine)
-copy_shared_parameters(gen_training_machine, generator_machine)
-```
-
-
-## A Toy Example 
-With the infrastructure explained above, we can now walk you through a toy example of generating two dimensional uniform distribution using 10 dimensional Gaussian noise. 
-
-The Gaussian noises are generated using the code below:
-```python
-def get_noise(batch_size, noise_dim):
-    return numpy.random.normal(size=(batch_size, noise_dim)).astype('float32')
-```
-
-The real samples (2-D uniform) are generated using the code below:
-```python
-# synthesize 2-D uniform data in gan_trainer.py:114
-def load_uniform_data():
-    data = numpy.random.rand(1000000, 2).astype('float32')
-    return data
-```
-
-The generator and discriminator network are built using fully-connected layer and batch_norm layer, and are defined in gan_conf.py. 
-
-To train the GAN model, one can use the command below. The flag -d specifies the training data (cifar, mnist or uniform) and flag --useGpu specifies whether to use gpu for training (0 is cpu, 1 is gpu).  
-```bash
-$python gan_trainer.py -d uniform --useGpu 1
-```
-The generated samples can be found in ./uniform_samples/ and one example is shown below as Figure 2. One can see that it roughly recovers the 2D uniform distribution. 
-
-<center>![](./uniform_sample.png)</center>
-<p align="center">
-    Figure 2. Uniform Sample
-</p>
-
-## MNIST Example
-### Data preparation
-To download the MNIST data, one can use the following commands:
-```bash
-$cd data/
-$./get_mnist_data.sh
-```
-
-### Model description
-Following the DC-Gan paper (https://arxiv.org/abs/1511.06434), we use convolution/convolution-transpose layer in the discriminator/generator network to better deal with images. The details of the network structures are defined in gan_conf_image.py. 
-
-### Training the model
-To train the GAN model on mnist data, one can use the following command:
-```bash
-$python gan_trainer.py -d mnist --useGpu 1
-```
-The generated sample images can be found at ./mnist_samples/ and one example is shown below as Figure 3. 
-<center>![](./mnist_sample.png)</center>
-<p align="center">
-    Figure 3. MNIST Sample
-</p>
diff --git a/doc/v1_api_tutorials/gan/mnist_sample.png b/doc/v1_api_tutorials/gan/mnist_sample.png
deleted file mode 100644
index f9c7bf7ddd7f148eac4fe347e9c38afaa8876760..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/gan/mnist_sample.png and /dev/null differ
diff --git a/doc/v1_api_tutorials/gan/uniform_sample.png b/doc/v1_api_tutorials/gan/uniform_sample.png
deleted file mode 100644
index e716c48e782019a757bed0cb443f2ed97386cbe2..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/gan/uniform_sample.png and /dev/null differ
diff --git a/doc/v1_api_tutorials/imagenet_model/resnet_block.jpg b/doc/v1_api_tutorials/imagenet_model/resnet_block.jpg
deleted file mode 100644
index e16bd3c624030c4c09b358a015b491141b42d8f1..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/imagenet_model/resnet_block.jpg and /dev/null differ
diff --git a/doc/v1_api_tutorials/imagenet_model/resnet_model_cn.md b/doc/v1_api_tutorials/imagenet_model/resnet_model_cn.md
deleted file mode 100644
index 82ec9d70b345c11aba3aa86f8206eedc8072bb88..0000000000000000000000000000000000000000
--- a/doc/v1_api_tutorials/imagenet_model/resnet_model_cn.md
+++ /dev/null
@@ -1,284 +0,0 @@
-# Model Zoo - ImageNet #
-
-[ImageNet](http://www.image-net.org/) 是通用物体分类领域一个众所周知的数据库。本教程提供了一个用于ImageNet上的卷积分类网络模型。
-
-## ResNet 介绍
-
-论文 [Deep Residual Learning for Image Recognition](http://arxiv.org/abs/1512.03385) 中提出的ResNet网络结构在2015年ImageNet大规模视觉识别竞赛(ILSVRC 2015)的分类任务中赢得了第一名。他们提出残差学习的框架来简化网络的训练，所构建网络结构的的深度比之前使用的网络有大幅度的提高。下图展示的是基于残差的连接方式。左图构造网络模块的方式被用于34层的网络中，而右图的瓶颈连接模块用于50层，101层和152层的网络结构中。
-
-<center>![resnet_block](./resnet_block.jpg)</center>
-<center>图 1. ResNet 网络模块</center>
-
-本教程中我们给出了三个ResNet模型，这些模型都是由原作者提供的模型<https://github.com/KaimingHe/deep-residual-networks>转换过来的。我们使用PaddlePaddle在ILSVRC的验证集共50,000幅图像上测试了模型的分类错误率，其中输入图像的颜色通道顺序为**BGR**，保持宽高比缩放到短边为256，只截取中心方形的图像区域。分类错误率和模型大小由下表给出。
-<center>
-<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
-<colgroup>
-<col  class="left" />
-<col  class="left" />
-<col  class="left" />
-</colgroup>
-<thead>
-<tr>
-<th scope="col" class="left">ResNet</th>
-<th scope="col" class="left">Top-1</th>
-<th scope="col" class="left">Model Size</th>
-</tr>
-</thead>
-
-<tbody>
-<tr>
-<td class="left">ResNet-50</td>
-<td class="left">24.9%</td>
-<td class="left">99M</td>
-</tr>
-<tr>
-<td class="left">ResNet-101</td>
-<td class="left">23.7%</td>
-<td class="left">173M</td>
-</tr>
-<tr>
-<td class="left">ResNet-152</td>
-<td class="left">23.2%</td>
-<td class="left">234M</td>
-</tr>
-</tbody>
-
-</table></center>
-<br>
-
-## ResNet 模型
-
-50层，101层和152层的网络配置文件可参照```demo/model_zoo/resnet/resnet.py```。你也可以通过在命令行参数中增加一个参数如```--config_args=layer_num=50```来指定网络层的数目。
-
-### 网络可视化
-
-你可以通过执行下面的命令来得到ResNet网络的结构可视化图。该脚本会生成一个dot文件，然后可以转换为图片。需要安装graphviz来转换dot文件为图片。
-
-```
-cd demo/model_zoo/resnet
-./net_diagram.sh
-```
-
-### 模型下载
-
-```
-cd demo/model_zoo/resnet
-./get_model.sh
-```
-你可以执行上述命令来下载所有的模型和均值文件，如果下载成功，这些文件将会被保存在```demo/model_zoo/resnet/model```路径下。
-
-```
-mean_meta_224  resnet_101  resnet_152  resnet_50
-```
-   * resnet_50: 50层网络模型。
-   * resnet_101: 101层网络模型。
-   * resnet_152: 152层网络模型。
-   * mean\_meta\_224: 均值图像文件，图像大小为3 x 224 x 224，颜色通道顺序为**BGR**。你也可以使用这三个值: 103.939, 116.779, 123.68。
-
-### 参数信息
-
-* **卷积层权重**
-
-  由于每个卷积层后面连接的是batch normalization层，因此该层中没有偏置(bias)参数，并且只有一个权重。
-  形状: `(Co, ky, kx, Ci)`
-   * Co: 输出特征图的通道数目
-   * ky: 滤波器核在垂直方向上的尺寸
-   * kx: 滤波器核在水平方向上的尺寸
-   * Ci: 输入特征图的通道数目
-
-  二维矩阵: (Co * ky * kx, Ci), 行优先次序存储。
-
-* **全连接层权重**
-
-  二维矩阵: (输入层尺寸, 本层尺寸), 行优先次序存储。
-
-* **[Batch Normalization](<http://arxiv.org/abs/1502.03167>) 层权重**
-
-本层有四个参数，实际上只有.w0和.wbias是需要学习的参数，另外两个分别是滑动均值和方差。在测试阶段它们将会被加载到模型中。下表展示了batch normalization层的参数。
-<center>
-<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
-<colgroup>
-<col  class="left" />
-<col  class="left" />
-<col  class="left" />
-</colgroup>
-<thead>
-<tr>
-<th scope="col" class="left">参数名</th>
-<th scope="col" class="left">尺寸</th>
-<th scope="col" class="left">含义</th>
-</tr>
-</thead>
-
-<tbody>
-<tr>
-<td class="left">_res2_1_branch1_bn.w0</td>
-<td class="left">256</td>
-<td class="left">gamma, 缩放参数</td>
-</tr>
-<tr>
-<td class="left">_res2_1_branch1_bn.w1</td>
-<td class="left">256</td>
-<td class="left">特征图均值</td>
-</tr>
-<tr>
-<td class="left">_res2_1_branch1_bn.w2</td>
-<td class="left">256</td>
-<td class="left">特征图方差</td>
-</tr>
-<tr>
-<td class="left">_res2_1_branch1_bn.wbias</td>
-<td class="left">256</td>
-<td class="left">beta, 偏置参数</td>
-</tr>
-</tbody>
-
-</table></center>
-<br>
-
-### 参数读取
-
-使用者可以使用下面的Python脚本来读取参数值:
-
-```
-import sys
-import numpy as np
-
-def load(file_name):
-    with open(file_name, 'rb') as f:
-        f.read(16) # skip header for float type.
-        return np.fromfile(f, dtype=np.float32)
-
-if __name__=='__main__':
-    weight = load(sys.argv[1])
-```
-
-或者直接使用下面的shell命令:
-
-```
-od -j 16 -f _res2_1_branch1_bn.w0
-```
-
-## 特征提取
-
-我们提供了C++和Python接口来提取特征。下面的例子使用了`demo/model_zoo/resnet/example`中的数据，详细地展示了整个特征提取的过程。
-
-### C++接口
-
-首先，在配置文件中的`define_py_data_sources2`里指定图像数据列表，具体请参照示例`demo/model_zoo/resnet/resnet.py`。
-
-```
-    train_list = 'train.list' if not is_test else None
-    # mean.meta is mean file of ImageNet dataset.
-    # mean.meta size : 3 x 224 x 224.
-    # If you use three mean value, set like:
-    # "mean_value:103.939,116.779,123.68;"
-    args={
-        'mean_meta': "model/mean_meta_224/mean.meta",
-        'image_size': 224, 'crop_size': 224,
-        'color': True,'swap_channel:': [2, 1, 0]}
-    define_py_data_sources2(train_list,
-                           'example/test.list',
-                           module="example.image_list_provider",
-                           obj="processData",
-                           args=args)
-```
-
-第二步，在`resnet.py`文件中指定要提取特征的网络层的名字。例如，
-
-```
-Outputs("res5_3_branch2c_conv", "res5_3_branch2c_bn")
-```
-
-第三步，在`extract_fea_c++.sh`文件中指定模型路径和输出的目录，然后执行下面的命令。
-
-```
-cd demo/model_zoo/resnet
-./extract_fea_c++.sh
-```
-
-如果执行成功，特征将会存到`fea_output/rank-00000`文件中，如下所示。同时你可以使用`load_feature.py`文件中的`load_feature_c`接口来加载该文件。
-
-```
--0.115318 -0.108358 ... -0.087884;-1.27664 ... -1.11516 -2.59123;
--0.126383 -0.116248 ... -0.00534909;-1.42593 ... -1.04501 -1.40769;
-```
-
-* 每行存储的是一个样本的特征。其中，第一行存的是图像`example/dog.jpg`的特征，第二行存的是图像`example/cat.jpg`的特征。
-* 不同层的特征由分号`;`隔开，并且它们的顺序与`Outputs()`中指定的层顺序一致。这里，左边是`res5_3_branch2c_conv`层的特征，右边是`res5_3_branch2c_bn`层特征。
-
-### Python接口
-
-示例`demo/model_zoo/resnet/classify.py`中展示了如何使用Python来提取特征。下面的例子同样使用了`./example/test.list`中的数据。执行的命令如下：
-
-```
-cd demo/model_zoo/resnet
-./extract_fea_py.sh
-```
-
-extract_fea_py.sh:
-
-```
-python classify.py \
-     --job=extract \
-     --conf=resnet.py\
-     --use_gpu=1 \
-     --mean=model/mean_meta_224/mean.meta \
-     --model=model/resnet_50 \
-     --data=./example/test.list \
-     --output_layer="res5_3_branch2c_conv,res5_3_branch2c_bn" \
-     --output_dir=features
-
-```
-* \--job=extract:              指定工作模式来提取特征。
-* \--conf=resnet.py:           网络配置文件。
-* \--use_gpu=1:                指定是否使用GPU。
-* \--model=model/resnet_50:    模型路径。
-* \--data=./example/test.list: 数据列表。
-* \--output_layer="xxx,xxx":   指定提取特征的层。
-* \--output_dir=features:      输出目录。
-
-如果运行成功，你将会看到特征存储在`features/batch_0`文件中，该文件是由cPickle产生的。你可以使用`load_feature.py`中的`load_feature_py`接口来打开该文件，它将返回如下的字典：
-
-```
-{
-'cat.jpg': {'res5_3_branch2c_conv': array([[-0.12638293, -0.116248  , -0.11883899, ..., -0.00895038, 0.01994277, -0.00534909]], dtype=float32), 'res5_3_branch2c_bn': array([[-1.42593431, -1.28918779, -1.32414699, ..., -1.45933616, -1.04501402, -1.40769434]], dtype=float32)},
-'dog.jpg': {'res5_3_branch2c_conv': array([[-0.11531784, -0.10835785, -0.08809858, ...,0.0055237, 0.01505112, -0.08788397]], dtype=float32), 'res5_3_branch2c_bn': array([[-1.27663755, -1.18272924, -0.90937918, ..., -1.25178063, -1.11515927, -2.59122872]], dtype=float32)}
-}
-```
-
-仔细观察，这些特征值与上述使用C++接口提取的结果是一致的。
-
-## 预测
-
-`classify.py`文件也可以用于对样本进行预测。我们提供了一个示例脚本`predict.sh`，它使用50层的ResNet模型来对`example/test.list`中的数据进行预测。
-
-```
-cd demo/model_zoo/resnet
-./predict.sh
-```
-
-predict.sh调用了`classify.py`:
-
-```
-python classify.py \
-     --job=predict \
-     --conf=resnet.py\
-     --multi_crop \
-     --model=model/resnet_50 \
-     --use_gpu=1 \
-     --data=./example/test.list
-```
-* \--job=extract:              指定工作模型进行预测。
-* \--conf=resnet.py:           网络配置文件。network configure.
-* \--multi_crop:               使用10个裁剪图像块，预测概率取平均。
-* \--use_gpu=1:                指定是否使用GPU。
-* \--model=model/resnet_50:    模型路径。
-* \--data=./example/test.list: 数据列表。
-
-如果运行成功，你将会看到如下结果，其中156和285是这些图像的分类标签。
-
-```
-Label of example/dog.jpg is: 156
-Label of example/cat.jpg is: 282
-```
diff --git a/doc/v1_api_tutorials/imagenet_model/resnet_model_en.md b/doc/v1_api_tutorials/imagenet_model/resnet_model_en.md
deleted file mode 100644
index 478ad06193b14ba7fe02238df621db1f7b0804d4..0000000000000000000000000000000000000000
--- a/doc/v1_api_tutorials/imagenet_model/resnet_model_en.md
+++ /dev/null
@@ -1,284 +0,0 @@
-# Model Zoo - ImageNet #
-
-[ImageNet](http://www.image-net.org/) is a popular dataset for generic object classification. This tutorial provides convolutional neural network(CNN) models for ImageNet.
-
-## ResNet Introduction
-
-ResNets from paper [Deep Residual Learning for Image Recognition](http://arxiv.org/abs/1512.03385) won the 1st place on the ILSVRC 2015 classification task. They present residual learning framework to ease the training of networks that are substantially deeper than those used previously. The residual connections are shown in following figure. The left building block is used in network of 34 layers and the right bottleneck building block is used in network of 50, 101, 152 layers .
-
-<center>![resnet_block](./resnet_block.jpg)</center>
-<center>Figure 1. ResNet Block</center>
-
-We present three ResNet models, which are converted from the models provided by the authors <https://github.com/KaimingHe/deep-residual-networks>.  The classfication errors tested in PaddlePaddle on 50,000 ILSVRC validation set with input images channel order of **BGR** by single scale with the shorter side of 256 and single crop as following table.
-<center>
-<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
-<colgroup>
-<col  class="left" />
-<col  class="left" />
-<col  class="left" />
-</colgroup>
-<thead>
-<tr>
-<th scope="col" class="left">ResNet</th>
-<th scope="col" class="left">Top-1</th>
-<th scope="col" class="left">Model Size</th>
-</tr>
-</thead>
-
-<tbody>
-<tr>
-<td class="left">ResNet-50</td>
-<td class="left">24.9%</td>
-<td class="left">99M</td>
-</tr>
-<tr>
-<td class="left">ResNet-101</td>
-<td class="left">23.7%</td>
-<td class="left">173M</td>
-</tr>
-<tr>
-<td class="left">ResNet-152</td>
-<td class="left">23.2%</td>
-<td class="left">234M</td>
-</tr>
-</tbody>
-
-</table></center>
-<br>
-
-## ResNet Model
-
-See ```demo/model_zoo/resnet/resnet.py```. This config contains network of 50, 101 and 152 layers. You can specify layer number by adding argument like ```--config_args=layer_num=50``` in command line arguments.
-
-### Network Visualization
-
-You can get a diagram of ResNet network by running the following commands. The script generates dot file and then converts dot file to PNG file, which needs to install graphviz to convert.
-
-```
-cd demo/model_zoo/resnet
-./net_diagram.sh
-```
-
-### Model Download
-
-```
-cd demo/model_zoo/resnet
-./get_model.sh
-```
-You can run above command to download all models and mean file and save them in ```demo/model_zoo/resnet/model``` if downloading successfully.
-
-```
-mean_meta_224  resnet_101  resnet_152  resnet_50
-```
-   * resnet_50: model of 50 layers.
-   * resnet_101: model of 101 layers.
-   * resnet_152: model of 152 layers.
-   * mean\_meta\_224: mean file with 3 x 224 x 224 size in **BGR** order. You also can use three mean values: 103.939, 116.779, 123.68.
-
-### Parameter Info
-
-* **Convolution Layer Weight**
-
-  As batch normalization layer is connected after each convolution layer, there is no parameter of bias and only one weight in this layer.
-  shape: `(Co, ky, kx, Ci)`
-   * Co: channle number of output feature map.
-   * ky: filter size in vertical direction.
-   * kx: filter size in horizontal direction.
-   * Ci: channle number of input feature map.
-
-  2-Dim matrix: (Co * ky * kx, Ci), saved in row-major order.
-
-* **Fully connected Layer Weight**
-
-  2-Dim matrix: (input layer size, this layer size), saved in row-major order.
-
-* **[Batch Normalization](<http://arxiv.org/abs/1502.03167>) Layer Weight**
-
-There are four parameters in this layer. In fact, only .w0 and .wbias are the learned parameters. The other two are therunning mean and variance respectively. They will be loaded in testing. Following table shows parameters of a batch normzalization layer.
-<center>
-<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
-<colgroup>
-<col  class="left" />
-<col  class="left" />
-<col  class="left" />
-</colgroup>
-<thead>
-<tr>
-<th scope="col" class="left">Parameter Name</th>
-<th scope="col" class="left">Number</th>
-<th scope="col" class="left">Meaning</th>
-</tr>
-</thead>
-
-<tbody>
-<tr>
-<td class="left">_res2_1_branch1_bn.w0</td>
-<td class="left">256</td>
-<td class="left">gamma, scale parameter</td>
-</tr>
-<tr>
-<td class="left">_res2_1_branch1_bn.w1</td>
-<td class="left">256</td>
-<td class="left">mean value of feature map</td>
-</tr>
-<tr>
-<td class="left">_res2_1_branch1_bn.w2</td>
-<td class="left">256</td>
-<td class="left">variance of feature map</td>
-</tr>
-<tr>
-<td class="left">_res2_1_branch1_bn.wbias</td>
-<td class="left">256</td>
-<td class="left">beta, shift parameter</td>
-</tr>
-</tbody>
-
-</table></center>
-<br>
-
-### Parameter Observation
-
-Users who want to observe the parameters can use Python to read:
-
-```
-import sys
-import numpy as np
-
-def load(file_name):
-    with open(file_name, 'rb') as f:
-        f.read(16) # skip header for float type.
-        return np.fromfile(f, dtype=np.float32)
-
-if __name__=='__main__':
-    weight = load(sys.argv[1])
-```
-
-or simply use following shell command:
-
-```
-od -j 16 -f _res2_1_branch1_bn.w0
-```
-
-## Feature Extraction
-
-We provide both C++ and Python interfaces to extract features. The following examples use data in `demo/model_zoo/resnet/example` to show the extracting process in detail.
-
-### C++ Interface
-
-First, specify image data list in `define_py_data_sources2` in the config, see example `demo/model_zoo/resnet/resnet.py`.
-
-```
-    train_list = 'train.list' if not is_test else None
-    # mean.meta is mean file of ImageNet dataset.
-    # mean.meta size : 3 x 224 x 224.
-    # If you use three mean value, set like:
-    # "mean_value:103.939,116.779,123.68;"
-    args={
-        'mean_meta': "model/mean_meta_224/mean.meta",
-        'image_size': 224, 'crop_size': 224,
-        'color': True,'swap_channel:': [2, 1, 0]}
-    define_py_data_sources2(train_list,
-                           'example/test.list',
-                           module="example.image_list_provider",
-                           obj="processData",
-                           args=args)
-```
-
-Second, specify layers to extract features in `Outputs()` of `resnet.py`. For example,
-
-```
-Outputs("res5_3_branch2c_conv", "res5_3_branch2c_bn")
-```
-
-Third, specify model path and output directory in `extract_fea_c++.sh`, and then run the following commands.
-
-```
-cd demo/model_zoo/resnet
-./extract_fea_c++.sh
-```
-
-If successful, features are saved in `fea_output/rank-00000` as follows. And you can use `load_feature_c` interface in `load_feature.py ` to load such a file.
-
-```
--0.115318 -0.108358 ... -0.087884;-1.27664 ... -1.11516 -2.59123;
--0.126383 -0.116248 ... -0.00534909;-1.42593 ... -1.04501 -1.40769;
-```
-
-* Each line stores features of a sample. Here, the first line stores features of `example/dog.jpg` and second line stores features of `example/cat.jpg`.
-* Features of different layers are splitted by `;`, and their order is consistent with the layer order in `Outputs()`. Here, the left features are `res5_3_branch2c_conv` layer and right features are `res5_3_branch2c_bn` layer.
-
-### Python Interface
-
-`demo/model_zoo/resnet/classify.py` is an example to show how to use Python to extract features. Following example still uses data of `./example/test.list`. Command is as follows:
-
-```
-cd demo/model_zoo/resnet
-./extract_fea_py.sh
-```
-
-extract_fea_py.sh:
-
-```
-python classify.py \
-     --job=extract \
-     --conf=resnet.py\
-     --use_gpu=1 \
-     --mean=model/mean_meta_224/mean.meta \
-     --model=model/resnet_50 \
-     --data=./example/test.list \
-     --output_layer="res5_3_branch2c_conv,res5_3_branch2c_bn" \
-     --output_dir=features
-
-```
-* \--job=extract:              specify job mode to extract feature.
-* \--conf=resnet.py:           network configure.
-* \--use_gpu=1:             speficy GPU mode.
-* \--model=model/resnet_5:     model path.
-* \--data=./example/test.list: data list.
-* \--output_layer="xxx,xxx":   specify layers to extract features.
-* \--output_dir=features:      output diretcoty.
-
-If run successfully, you will see features saved in `features/batch_0`, this file is produced with cPickle. You can use `load_feature_py` interface in `load_feature.py` to open the file, and it returns a dictionary as follows:
-
-```
-{
-'cat.jpg': {'res5_3_branch2c_conv': array([[-0.12638293, -0.116248  , -0.11883899, ..., -0.00895038, 0.01994277, -0.00534909]], dtype=float32), 'res5_3_branch2c_bn': array([[-1.42593431, -1.28918779, -1.32414699, ..., -1.45933616, -1.04501402, -1.40769434]], dtype=float32)},
-'dog.jpg': {'res5_3_branch2c_conv': array([[-0.11531784, -0.10835785, -0.08809858, ...,0.0055237, 0.01505112, -0.08788397]], dtype=float32), 'res5_3_branch2c_bn': array([[-1.27663755, -1.18272924, -0.90937918, ..., -1.25178063, -1.11515927, -2.59122872]], dtype=float32)}
-}
-```
-
-Observed carefully, these feature values are consistent with the above results extracted by C++ interface.
-
-## Prediction
-
-`classify.py` also can be used to predict. We provide an example script `predict.sh` to predict data in `example/test.list` using a ResNet model with 50 layers.
-
-```
-cd demo/model_zoo/resnet
-./predict.sh
-```
-
-predict.sh calls the `classify.py`:
-
-```
-python classify.py \
-     --job=predict \
-     --conf=resnet.py\
-     --multi_crop \
-     --model=model/resnet_50 \
-     --use_gpu=1 \
-     --data=./example/test.list
-```
-* \--job=extract:              speficy job mode to predict.
-* \--conf=resnet.py:           network configure.
-* \--multi_crop:               use 10 crops and average predicting probability.
-* \--use_gpu=1:             speficy GPU mode.
-* \--model=model/resnet_50:    model path.
-* \--data=./example/test.list: data list.
-
-If run successfully, you will see following results, where 156 and 285 are labels of the images.
-
-```
-Label of example/dog.jpg is: 156
-Label of example/cat.jpg is: 282
-```
diff --git a/doc/v1_api_tutorials/quick_start/index_cn.rst b/doc/v1_api_tutorials/quick_start/index_cn.rst
deleted file mode 100644
index d565fcf95ef8489eb22a5a1b5a552b5336f4e371..0000000000000000000000000000000000000000
--- a/doc/v1_api_tutorials/quick_start/index_cn.rst
+++ /dev/null
@@ -1,397 +0,0 @@
-=============
-快速入门教程
-=============
-
-我们将以 `文本分类问题 <https://en.wikipedia.org/wiki/Document_classification>`_ 为例,
-介绍PaddlePaddle的基本使用方法。
-
-安装
-====
-
-请参考 :ref:`install_steps` 安装PaddlePaddle。
-
-使用概述
-========
-
-**文本分类问题**：对于给定的一条文本，我们从提前给定的类别集合中选择其所属类别。
-
-比如, 在购物网站上，通过查看买家对某个产品的评价反馈, 评估该产品的质量。
-
-- 这个显示器很棒！ （好评）
-- 用了两个月之后这个显示器屏幕碎了。（差评）
-
-使用PaddlePaddle, 每一个任务流程都可以被划分为如下五个步骤。
-
-    ..  image:: src/Pipeline_cn.jpg
-        :align: center
-        :scale: 80%
-
-1. 数据格式准备
-    - 本例每行保存一条样本，类别Id和文本信息用 ``Tab`` 间隔，文本中的单词用空格分隔（如果不切词，则字与字之间用空格分隔），例如：``类别Id '\t' 这 个 显 示 器 很 棒 ！``
-2. 向系统传送数据
-    - PaddlePaddle可以执行用户的python脚本程序来读取各种格式的数据文件。
-    - 本例的所有字符都将转换为连续整数表示的Id传给模型。
-3. 描述网络结构和优化算法
-    - 本例由易到难展示4种不同的文本分类网络配置：逻辑回归模型，词向量模型，卷积模型，时序模型。
-    - 常用优化算法包括Momentum, RMSProp，AdaDelta，AdaGrad，Adam，Adamax等，本例采用Adam优化方法，加了L2正则和梯度截断。
-4. 训练模型
-5. 应用模型
-
-数据格式准备
-------------
-
-接下来我们将展示如何用PaddlePaddle训练一个文本分类模型，将 `Amazon电子产品评论数据 <http://jmcauley.ucsd.edu/data/amazon/>`_ 分为好评(正样本)和差评(负样本)两种类别。
-`源代码 <https://github.com/PaddlePaddle/Paddle>`_ 的 ``demo/quick_start`` 目录里提供了该数据的下载脚本和预处理脚本，你只需要在命令行输入以下命令，就能够很方便的完成数据下载和相应的预处理工作。
-
-.. code-block:: bash
-
-    cd demo/quick_start
-    ./data/get_data.sh
-    ./preprocess.sh
-
-数据预处理完成之后，通过配置类似于 ``dataprovider_*.py`` 的数据读取脚本和类似于 ``trainer_config.*.py`` 的训练模型脚本，PaddlePaddle将以设置参数的方式来设置
-相应的数据读取脚本和训练模型脚本。接下来，我们将对这两个步骤给出了详细的解释，你也可以先跳过本文的解释环节，直接进入训练模型章节, 使用 ``sh train.sh`` 开始训练模型，
-查看`train.sh`内容，通过 **自底向上法** (bottom-up approach)来帮助你理解PaddlePaddle的内部运行机制。
-
-
-向系统传送数据
-==============
-
-Python脚本读取数据
-------------------
-
-`DataProvider` 是PaddlePaddle负责提供数据的模块，主要职责在于将训练数据传入内存或者显存，让模型能够得到训练更新，其包括两个函数：
-
-* initializer：PaddlePaddle会在调用读取数据的Python脚本之前，先调用initializer函数。在下面例子里，我们在initialzier函数里初始化词表，并且在随后的读取数据过程中填充词表。
-* process：PaddlePaddle调用process函数来读取数据。每次读取一条数据后，process函数会用yield语句输出这条数据，从而能够被PaddlePaddle 捕获 (harvest)。
-
-``dataprovider_bow.py`` 文件给出了完整例子：
-
-..  literalinclude:: ../../../demo/quick_start/dataprovider_bow.py
-     :language: python
-     :lines: 21-70
-     :linenos:
-     :emphasize-lines: 8,33
-
-详细内容请参见 :ref:`api_dataprovider` 。
-
-配置中的数据加载定义
---------------------
-
-在模型配置中通过 ``define_py_data_sources2`` 接口来加载数据：
-
-..  literalinclude:: ../../../demo/quick_start/trainer_config.emb.py
-     :language: python
-     :lines: 19-35
-     :linenos:
-     :emphasize-lines: 12
-
-
-以下是对上述数据加载的解释：
-
-- data/train.list,data/test.list: 指定训练数据和测试数据
-- module="dataprovider_bow": 处理数据的Python脚本文件
-- obj="process": 指定生成数据的函数
-- args={"dictionary": word_dict}: 额外的参数，这里指定词典
-
-更详细数据格式和用例请参考 :ref:`api_pydataprovider2` 。
-
-模型网络结构
-============
-
-本小节我们将介绍模型网络结构。
-
-    ..  image:: src/PipelineNetwork_cn.jpg
-        :align: center
-        :scale: 80%
-
-
-我们将以最基本的逻辑回归网络作为起点，并逐渐展示更加深入的功能。更详细的网络配置连接请参考 :ref:`api_trainer_config_helpers_layers` 。
-所有配置都能在 `源代码 <https://github.com/PaddlePaddle/Paddle>`_ 的 ``demo/quick_start`` 目录下找到。
-
-逻辑回归模型
-------------
-
-具体流程如下:
-
-    ..  image:: src/NetLR_cn.jpg
-        :align: center
-        :scale: 80%
-
-- 获取利用 `one-hot vector <https://en.wikipedia.org/wiki/One-hot>`_ 表示的每个单词，维度是词典大小
-
-    .. code-block:: python
-
-        word = data_layer(name="word",  size=word_dim)
-
-- 获取该条样本类别Id，维度是类别个数。
-
-    .. code-block:: python
-
-        label = data_layer(name="label", size=label_dim)
-
-- 利用逻辑回归模型对该向量进行分类，同时会计算分类准确率
-
-    .. code-block:: python
-
-        # Define a fully connected layer with logistic activation (also called softmax activation).
-        output = fc_layer(input=word,
-                        size=label_dim,
-                        act_type=SoftmaxActivation())
-        # Define cross-entropy classification loss and error.
-        classification_cost(input=output, label=label)
-
-
- - input: 除去data层，每个层都有一个或多个input,多个input以list方式输入
- - size: 该层神经元个数
- - act_type: 激活函数类型
-
-**效果总结**：我们将在后面介绍训练和预测流程的脚本。在此为方便对比不同网络结构，我们总结了各个网络的复杂度和效果。
-
-    =====================  ===============================  =================
-    网络名称                        参数数量                    错误率
-    =====================  ===============================  =================
-    逻辑回归                      252 KB                       8.652 %
-    =====================  ===============================  =================
-
-词向量模型
-----------
-
-embedding模型需要稍微改变提供数据的Python脚本，即 ``dataprovider_emb.py``，词向量模型、
-卷积模型、时序模型均使用该脚本。其中文本输入类型定义为整数时序类型integer_value_sequence。
-
-.. code-block:: python
-
-    def initializer(settings, dictionary, **kwargs):
-        settings.word_dict = dictionary
-        settings.input_types = [
-            # Define the type of the first input as sequence of integer.
-            # The value of the integers range from 0 to len(dictrionary)-1
-            integer_value_sequence(len(dictionary)),
-            # Define the second input for label id
-            integer_value(2)]
-
-    @provider(init_hook=initializer)
-    def process(settings, file_name):
-        ...
-        # omitted, it is same as the data provider for LR model
-
-该模型依然使用逻辑回归分类网络的框架， 只是将句子用连续向量表示替换为用稀疏向量表示， 即对第三步进行替换。句子表示的计算更新为两步：
-
-..  image:: src/NetContinuous_cn.jpg
-    :align: center
-    :scale: 80%
-
-- 利用单词Id查找该单词对应的连续向量(维度为word_dim)， 输入N个单词，输出为N个word_dim维度向量
-
-    .. code-block:: python
-
-        emb = embedding_layer(input=word, size=word_dim)
-
-- 将该句话包含的所有单词向量求平均, 得到句子的表示
-
-    .. code-block:: python
-
-        avg = pooling_layer(input=emb, pooling_type=AvgPooling())
-
-其它部分和逻辑回归网络结构一致。
-
-**效果总结：**
-
-    =====================  ===============================  ==================
-    网络名称                        参数数量                    错误率
-    =====================  ===============================  ==================
-    词向量模型                      15 MB                       8.484 %
-    =====================  ===============================  ==================
-
-卷积模型
------------
-
-卷积网络是一种特殊的从词向量表示到句子表示的方法， 也就是将词向量模型进一步演化为三个新步骤。
-
-..  image:: src/NetConv_cn.jpg
-    :align: center
-    :scale: 80%
-
-文本卷积分可为三个步骤:
-
-1. 首先，从每个单词左右两端分别获取k个相邻的单词, 拼接成一个新的向量；
-
-2. 其次，对该向量进行非线性变换(例如Sigmoid变换), 使其转变为维度为hidden_dim的新向量；
-
-3. 最后，对整个新向量集合的每一个维度取最大值来表示最后的句子。
-
-这三个步骤可配置为:
-
-.. code-block:: python
-
-    text_conv = sequence_conv_pool(input=emb,
-                                context_start=k,
-                                context_len=2 * k + 1)
-
-**效果总结：**
-
-    =====================  ===============================  ========================
-    网络名称                        参数数量                    错误率
-    =====================  ===============================  ========================
-    卷积模型                      16 MB                       5.628 %
-    =====================  ===============================  ========================
-
-时序模型
-----------
-
-..  image:: src/NetRNN_cn.jpg
-    :align: center
-    :scale: 80%
-
-时序模型，也称为RNN模型, 包括简单的 `RNN模型 <https://en.wikipedia.org/wiki/Recurrent_neural_network>`_, `GRU模型 <https://en.wikipedia.org/wiki/Gated_recurrent_unit>`_ 和 `LSTM模型 <https://en.wikipedia.org/wiki/Long_short-term_memory>`_ 等等。
-
-- GRU模型配置：
-
-    .. code-block:: python
-
-        gru = simple_gru(input=emb, size=gru_size)
-
-
-- LSTM模型配置：
-
-    .. code-block:: python
-
-        lstm = simple_lstm(input=emb, size=lstm_size)
-
-本次试验，我们采用单层LSTM模型，并使用了Dropout，**效果总结：**
-
-    =====================  ===============================  =========================
-    网络名称                        参数数量                    错误率
-    =====================  ===============================  =========================
-    时序模型                      16 MB                       4.812 %
-    =====================  ===============================  =========================
-
-优化算法
-=========
-
-`优化算法 <http://www.paddlepaddle.org/doc/ui/api/trainer_config_helpers/optimizers_index.html>`_ 包括
-Momentum, RMSProp，AdaDelta，AdaGrad，ADAM，Adamax等，这里采用Adam优化方法，同时使用了L2正则(L2 Regularization)和梯度截断(Gradient Clipping)。
-
-.. code-block:: python
-
-    settings(batch_size=128,
-            learning_rate=2e-3,
-            learning_method=AdamOptimizer(),
-            regularization=L2Regularization(8e-4),
-            gradient_clipping_threshold=25)
-
-训练模型
-=========
-
-在数据加载和网络配置完成之后， 我们就可以训练模型了。
-
-..  image:: src/PipelineTrain_cn.jpg
-    :align: center
-    :scale: 80%
-
-训练模型，我们只需要运行 ``train.sh`` 训练脚本：
-
-    .. code-block:: bash
-
-        ./train.sh
-
-``train.sh`` 中包含了训练模型的基本命令。训练时所需设置的主要参数如下：
-
-    .. code-block:: bash
-
-        paddle train \
-        --config=trainer_config.py \
-        --log_period=20 \
-        --save_dir=./output \
-        --num_passes=15 \
-        --use_gpu=false
-
-这里只简单介绍了单机训练，如何进行分布式训练，请参考 :ref:`cluster_train` 。
-
-预测
-=====
-
-当模型训练好了之后，我们就可以进行预测了。
-
-..  image:: src/PipelineTest_cn.jpg
-    :align: center
-    :scale: 80%
-
-之前配置文件中 ``test.list`` 指定的数据将会被测试，这里直接通过预测脚本 ``predict.sh`` 进行预测,
-更详细的说明，请参考 :ref:`api_swig_py_paddle` 。
-
-    .. code-block:: bash
-
-        model="output/pass-00003"
-        paddle train \
-            --config=trainer_config.lstm.py \
-            --use_gpu=false \
-            --job=test \
-            --init_model_path=$model \
-            --config_args=is_predict=1 \
-            --predict_output_dir=. \
-
-        mv rank-00000 result.txt
-
-这里以 ``output/pass-00003`` 为例进行预测，用户可以根据训练日志，选择测试结果最好的模型来预测。
-
-预测结果以文本的形式保存在 ``result.txt`` 中，一行为一个样本，格式如下：
-
-    .. code-block:: bash
-
-        预测ID;ID为0的概率 ID为1的概率
-        预测ID;ID为0的概率 ID为1的概率
-
-总体效果总结
-==============
-
-在 ``/demo/quick_start`` 目录下，能够找到这里使用的所有数据, 网络配置, 训练脚本等等。
-对于Amazon-Elec测试集(25k), 如下表格，展示了上述网络模型的训练效果:
-
-    =====================  ===============================  =============  ==================================
-    网络名称                       参数数量                    错误率          配置文件
-    =====================  ===============================  =============  ==================================
-    逻辑回归模型                      252 KB                     8.652%          trainer_config.lr.py
-    词向量模型                         15 MB                      8.484%         trainer_config.emb.py
-    卷积模型                        16 MB                     5.628%          trainer_config.cnn.py
-    时序模型                         16 MB                     4.812%          trainer_config.lstm.py
-    =====================  ===============================  =============  ==================================
-
-
-附录
-=====
-
-命令行参数
-----------
-
-* \--config：网络配置
-* \--save_dir：模型存储路径
-* \--log_period：每隔多少batch打印一次日志
-* \--num_passes：训练轮次，一个pass表示过一遍所有训练样本
-* \--config_args：命令指定的参数会传入网络配置中。
-* \--init_model_path：指定初始化模型路径，可用在测试或训练时指定初始化模型。
-
-默认一个pass保存一次模型，也可以通过saving_period_by_batches设置每隔多少batch保存一次模型。
-可以通过show_parameter_stats_period设置打印参数信息等。
-其他参数请参考 命令行参数文档（链接待补充）。
-
-输出日志
----------
-
-.. code-block:: bash
-
-    TrainerInternal.cpp:160]  Batch=20 samples=2560 AvgCost=0.628761 CurrentCost=0.628761 Eval: classification_error_evaluator=0.304297  CurrentEval: classification_error_evaluator=0.304297
-
-模型训练会看到类似上面这样的日志信息，详细的参数解释，请参考如下表格：
-
-    ===========================================  ==============================================================
-    名称                                             解释
-    ===========================================  ==============================================================
-    Batch=20                                      表示过了20个batch
-    samples=2560                                  表示过了2560个样本
-    AvgCost                                          每个pass的第0个batch到当前batch所有样本的平均cost
-    CurrentCost                                      当前log_period个batch所有样本的平均cost
-    Eval: classification_error_evaluator          每个pass的第0个batch到当前batch所有样本的平均分类错误率
-    CurrentEval: classification_error_evaluator      当前log_period个batch所有样本的平均分类错误率
-    ===========================================  ==============================================================
diff --git a/doc/v1_api_tutorials/quick_start/index_en.md b/doc/v1_api_tutorials/quick_start/index_en.md
deleted file mode 100644
index ca110431cf921ae0480d3fb2b17c58f90a84cc0e..0000000000000000000000000000000000000000
--- a/doc/v1_api_tutorials/quick_start/index_en.md
+++ /dev/null
@@ -1,562 +0,0 @@
-# Quick Start
-
-This tutorial will teach the basics of deep learning (DL), including how to implement many different models in PaddlePaddle. You will learn how to:
-  - Prepare data into the standardized format that PaddlePaddle accepts.
-  - Write data providers that read data into PaddlePaddle.
-  - Configure neural networks in PaddlePaddle layer by layer.
-  - Train models.
-  - Perform inference with trained models.
-
-
-## Install
-
-To get started, please install PaddlePaddle on your computer. Throughout this tutorial, you will learn by implementing different DL models for text classification.
-
-To install PaddlePaddle, please follow the instructions here: <a href = "../../getstarted/build_and_install/index_en.html" >Build and Install</a>.
-
-## Overview
-For the first step, you will use PaddlePaddle to build a **text classification** system. For example, suppose you run an e-commence  website, and you want to analyze the sentiment of user reviews to evaluate product quality.
-
-For example, given the input
-
-```
-This monitor is fantastic.
-```
-
-Your classifier should output “positive”, since this text snippet shows that the user is satisfied with the product. Given this input:
-
-```
-The monitor breaks down two months after purchase.
-```
-
-the classifier should output “negative“.
-
-To build your text classification system, your code will need to perform five steps:
-<center> ![](./src/Pipeline_en.jpg) </center>
-
-  - Preprocess data into a standardized format.
-  - Provide data to the learning model.
-  - Specify the neural network structure.
-  - Train the model.
-  - Inference (make prediction on test examples).
-
-
-1. Preprocess data into standardized format
-    - In the text classification example, you will start with a text file with one training example per line. Each line contains category id (in machine learning, often denoted the target y), followed by the input text (often denoted x); these two elements are separated by a Tab. For example: ```positive [tab] This monitor is fantastic```. You will preprocess this raw data into a format that Paddle can use.
-
-2. Provide data to the learning model.
-    - You can write data providers in Python. For any required data preprocessing step, you can add the preprocessing code to the PyDataProvider Python file.
-    - In our text classification example, every word or character will be converted into an integer id, specified in a dictionary file. It perform a dictionary lookup in PyDataProvider to get the id.
-3. Specify neural network structure.  (From easy to hard, we provide 4 kinds of network configurations)
-    - A logistic regression model.
-    - A word embedding model.
-    - A convolutional neural network model.
-    - A sequential recurrent neural network model.
-    - You will also learn different learning algorithms.
-4. Training model.
-5. Inference.
-
-## Preprocess data into standardized format
-In this example, you are going to use [Amazon electronic product review dataset](http://jmcauley.ucsd.edu/data/amazon/) to build a bunch of deep neural network models for text classification. Each text in this dataset is a product review. This dataset has two categories: “positive” and “negative”. Positive means the reviewer likes the product, while negative means the reviewer does not like the product.
-
-`demo/quick_start` in the [source code](https://github.com/PaddlePaddle/Paddle) provides script for downloading the preprocessed data as shown below. (If you want to process the raw data, you can use the script `demo/quick_start/data/proc_from_raw_data/get_data.sh`).
-
-```bash
-cd demo/quick_start
-./data/get_data.sh
-```
-
-## Transfer Data to Model
-### Write Data Provider with Python
-The following `dataprovider_bow.py` gives a complete example of writing data provider with Python. It includes the following parts:
-
-* initalizer： define the additional meta-data of the data provider and the types of the input data.
-* process： Each `yield` returns a data sample. In this case, it return the text representation and category id. The order of features in the returned result needs to be consistent with the definition of the input types in `initalizer`.
-
-```python
-from paddle.trainer.PyDataProvider2 import *
-
-# id of the word not in dictionary
-UNK_IDX = 0
-
-# initializer is called by the framework during initialization.
-# It allows the user to describe the data types and setup the
-# necessary data structure for later use.
-# `settings` is an object. initializer need to properly fill settings.input_types.
-# initializer can also store other data structures needed to be used at process().
-# In this example, dictionary is stored in settings.
-# `dictionay` and `kwargs` are arguments passed from trainer_config.lr.py
-def initializer(settings, dictionary, **kwargs):
-    # Put the word dictionary into settings
-    settings.word_dict = dictionary
-
-    # setting.input_types specifies what the data types the data provider
-    # generates.
-    settings.input_types = [
-        # The first input is a sparse_binary_vector,
-        # which means each dimension of the vector is either 0 or 1. It is the
-        # bag-of-words (BOW) representation of the texts.
-        sparse_binary_vector(len(dictionary)),
-        # The second input is an integer. It represents the category id of the
-        # sample. 2 means there are two labels in the dataset.
-        # (1 for positive and 0 for negative)
-        integer_value(2)]
-
-# Delaring a data provider. It has an initializer 'data_initialzer'.
-# It will cache the generated data of the first pass in memory, so that
-# during later pass, no on-the-fly data generation will be needed.
-# `setting` is the same object used by initializer()
-# `file_name` is the name of a file listed train_list or test_list file given
-# to define_py_data_sources2(). See trainer_config.lr.py.
-@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
-def process(settings, file_name):
-    # Open the input data file.
-    with open(file_name, 'r') as f:
-        # Read each line.
-        for line in f:
-            # Each line contains the label and text of the comment, separated by \t.
-            label, comment = line.strip().split('\t')
-
-            # Split the words into a list.
-            words = comment.split()
-
-            # convert the words into a list of ids by looking them up in word_dict.
-            word_vector = [settings.word_dict.get(w, UNK_IDX) for w in words]
-
-            # Return the features for the current comment. The first is a list
-            # of ids representing a 0-1 binary sparse vector of the text,
-            # the second is the integer id of the label.
-            yield word_vector, int(label)
-```
-
-### Define Python Data Provider in Configuration files.
-You need to add a data provider definition `define_py_data_sources2` in our network configuration. This definition specifies:
-
-- The path of the training and testing data (`data/train.list`, `data/test.list`).
-- The location of the data provider file (`dataprovider_bow`).
-- The function to call to get data. (`process`).
-- Additional arguments or data. Here it passes the path of word dictionary.
-
-```python
-from paddle.trainer_config_helpers import *
-
-file = "data/dict.txt"
-word_dict = dict()
-with open(dict_file, 'r') as f:
-    for i, line in enumerate(f):
-        w = line.strip().split()[0]
-        word_dict[w] = i
-# define the data sources for the model.
-# We need to use different process for training and prediction.
-# For training, the input data includes both word IDs and labels.
-# For prediction, the input data only includs word Ids.
-define_py_data_sources2(train_list='data/train.list',
-                        test_list='data/test.list',
-                        module="dataprovider_bow",
-                        obj="process",
-                        args={"dictionary": word_dict})
-```
-You can refer to the following link for more detailed examples and data formats: <a href = "../../api/v1/data_provider/pydataprovider2_en.html">PyDataProvider2</a>.
-
-## Network Architecture
-We will describe four kinds of network architectures in this section.
-<center> ![](./src/PipelineNetwork_en.jpg) </center>
-
-First, you will build a logistic regression model. Later, you will also get chance to build other more powerful network architectures.
-For more detailed documentation, you could refer to: <a href = "../../api/v1/trainer_config_helpers/layers.html">layer documentation</a>. All configuration files are in `demo/quick_start` directory.
-
-### Logistic Regression
-The architecture is illustrated in the following picture:
-<center> ![](./src/NetLR_en.png) </center>
-
-- You need define the data for text features. The size of the data layer is the number of words in the dictionary.
-
-```python
-word = data_layer(name="word",  size=voc_dim)
-```
-
-- You also need to define the category id for each example. The size of the data layer is the number of labels.
-
-```python
-label = data_layer(name="label", size=label_dim)
-```
-
-- It uses logistic regression model to classify the vector, and it will output the classification error during training.
-    - Each layer has an *input* argument that specifies its input layer. Some layers can have multiple input layers. You can use a list of the input layers as input in that case.
-    - *size* for each layer means the number of neurons of the layer.
-    - *act_type* means activation function applied to the output of each neuron independently.
-    - Some layers can have additional special inputs. For example, `classification_cost` needs ground truth label as input to compute classification loss and error.
-```python
-# Define a fully connected layer with logistic activation (also called softmax activation).
-output = fc_layer(input=word,
-                  size=label_dim,
-                  act_type=SoftmaxActivation())
-# Define cross-entropy classification loss and error.
-classification_cost(input=output, label=label)
-```
-
-Performance summary: You can refer to the training and testing scripts later. In order to compare different network architectures, the model complexity and test classification error are listed in the following table:
-
-<html>
-<center>
-<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
-
-<thead>
-<th scope="col" class="left">Network name</th>
-<th scope="col" class="left">Number of parameters</th>
-<th scope="col" class="left">Test error</th>
-</tr>
-</thead>
-
-<tbody>
-<tr>
-<td class="left">Logistic regression</td>
-<td class="left">252 KB</td>
-<td class="left">8.652%</td>
-</tr>
-
-</tbody>
-</table></center>
-</html>
-<br>
-
-### Word Embedding Model
-In order to use the word embedding model, you need to change the data provider a little bit to make the input words as a sequence of word IDs. The revised data provider `dataprovider_emb.py` is listed below. You only need to change initializer() for the type of the first input. It is changed from sparse_binary_vector to sequence of intergers.  process() remains the same. This data provider can also be used for later sequence models.
-
-```python
-def initializer(settings, dictionary, **kwargs):
-    # Put the word dictionary into settings
-    settings.word_dict = dictionary
-    settings.input_types = [
-        # Define the type of the first input as a sequence of integers.
-        integer_value_sequence(len(dictionary)),
-        # Define the second input for label id
-        integer_value(2)]
-
-@provider(init_hook=initializer)
-def process(settings, file_name):
-    ...
-    # omitted, it is same as the data provider for LR model
-```
-
-This model is very similar to the framework of logistic regression, but it uses word embedding vectors instead of a sparse vectors to represent words.
-<center> ![](./src/NetContinuous_en.png) </center>
-
-- It can look up the dense word embedding vector in the dictionary  (its words embedding vector is `word_dim`). The input is a sequence of N words, the output is N word_dim dimensional vectors.
-
-```python
-emb = embedding_layer(input=word, dim=word_dim)
-```
-
-- It averages all the word embedding in a sentence to get its sentence representation.
-
-```python
-avg = pooling_layer(input=emb, pooling_type=AvgPooling())
-```
-
-The other parts of the model are the same as logistic regression network.
-
-The performance is summarized in the following table:
-
-<html>
-<center>
-<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
-
-<thead>
-<th scope="col" class="left">Network name</th>
-<th scope="col" class="left">Number of parameters</th>
-<th scope="col" class="left">Test error</th>
-</tr>
-</thead>
-
-<tbody>
-<tr>
-<td class="left">Word embedding model</td>
-<td class="left">15 MB</td>
-<td class="left">8.484%</td>
-</tr>
-
-</tbody>
-</table>
-</html></center>
-<br>
-
-### Convolutional Neural Network Model
-Convolutional neural network converts a sequence of word embeddings into a sentence representation using temporal convolutions. You will transform the fully connected layer of the word embedding model to 3 new sub-steps.
-<center> ![](./src/NetConv_en.png) </center>
-
-
-Text convolution has 3 steps:
-1. Get K nearest neighbor context of each word in a sentence, stack them into a 2D vector representation.
-2. Apply temporal convolution to this representation to produce a new hidden_dim dimensional vector.
-3. Apply max-pooling to the new vectors at all the time steps in a sentence to get a sentence representation.
-
-```python
-# context_len means convolution kernel size.
-# context_start means the start of the convolution. It can be negative. In that case, zero padding is applied.
-text_conv = sequence_conv_pool(input=emb,
-                               context_start=k,
-                               context_len=2 * k + 1)
-```
-
-The performance is summarized in the following table：
-
-<html>
-<center>
-<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
-
-<thead>
-<th scope="col" class="left">Network name</th>
-<th scope="col" class="left">Number of parameters</th>
-<th scope="col" class="left">Test error</th>
-</tr>
-</thead>
-
-<tbody>
-<tr>
-<td class="left">Convolutional model</td>
-<td class="left">16 MB</td>
-<td class="left">5.628%</td>
-</tr>
-
-</tbody>
-</table></center>
-<br>
-
-### Recurrent Model
-<center> ![](./src/NetRNN_en.png) </center>
-
-You can use Recurrent neural network as our time sequence model, including simple RNN model, GRU model, and LSTM model。
-
-- GRU model can be specified via：
-
-```python
-gru = simple_gru(input=emb, size=gru_size)
-```
-
-- LSTM model can be specified via：
-
-```python
-lstm = simple_lstm(input=emb, size=lstm_size)
-```
-
-You can use single layer LSTM model with Dropout for our text classification problem. The performance is summarized in the following table:
-
-<html>
-<center>
-<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
-
-<thead>
-<th scope="col" class="left">Network name</th>
-<th scope="col" class="left">Number of parameters</th>
-<th scope="col" class="left">Test error</th>
-</tr>
-</thead>
-
-<tbody>
-<tr>
-<td class="left">Recurrent model</td>
-<td class="left">16 MB</td>
-<td class="left">4.812%</td>
-</tr>
-
-</tbody>
-</table></center>
-</html>
-<br>
-
-## Optimization Algorithm
-<a href = "../../api/v1/trainer_config_helpers/optimizers.html">Optimization algorithms</a> include Momentum, RMSProp, AdaDelta, AdaGrad, Adam, and Adamax. You can use Adam optimization method here, with L2 regularization and gradient clipping, because Adam has been proved to work very well for training recurrent neural network.
-
-```python
-settings(batch_size=128,
-         learning_rate=2e-3,
-         learning_method=AdamOptimizer(),
-         regularization=L2Regularization(8e-4),
-         gradient_clipping_threshold=25)
-```
-
-## Training Model
-After completing data preparation and network architecture specification, you will run the training script.
-<center> ![](./src/PipelineTrain_en.png) </center>
-
-Training script: our training script is in `train.sh` file. The training arguments are listed below:
-
-```bash
-paddle train \
---config=trainer_config.py \
---log_period=20 \
---save_dir=./output \
---num_passes=15 \
---use_gpu=false
-```
-
-We do not provide examples on how to train on clusters here. If you want to train on clusters, please follow the <a href = "../../howto/usage/cluster/cluster_train_en.html">distributed training</a> documentation or other demos for more details.
-
-## Inference
-You can use the trained model to perform prediction on the dataset with no labels. You can also evaluate the model on dataset with labels to obtain its test accuracy.
-<center> ![](./src/PipelineTest_en.png) </center>
-
-The test script is listed below. PaddlePaddle can evaluate a model on the data with labels specified in `test.list`.
-
-```bash
-paddle train \
---config=trainer_config.lstm.py \
---use_gpu=false \
---job=test \
---init_model_path=./output/pass-0000x
-```
-
-We will give an example of performing prediction using Recurrent model on a dataset with no labels. You can refer to <a href = "../../api/v1/predict/swig_py_paddle_en.html">Python Prediction API</a> tutorial，or other <a href = "../../tutorials/index_en.html">demo</a> for the prediction process using Python. You can also use the following script for inference or evaluation.
-
-inference script (predict.sh)：
-
-```bash
-model="output/pass-00003"
-paddle train \
-    --config=trainer_config.lstm.py \
-    --use_gpu=false \
-    --job=test \
-    --init_model_path=$model \
-    --config_args=is_predict=1 \
-    --predict_output_dir=. \
-
-mv rank-00000 result.txt
-```
-User can choose the best model base on the training log instead of model `output/pass-00003`. There are several differences between training and inference network configurations.
-- You do not need labels during inference.
-- Outputs need to be specified to the classification probability layer (the output of softmax layer), or the id of maximum probability (`max_id` layer). An example to output the id and probability is given in the code snippet.
-- batch_size = 1.
-- You need to specify the location of `test_list` in the test data.
-
-The results in `result.txt` is as follows, each line is one sample.
-
-```
-predicted_label_id;probability_of_label_0 probability_of_label_1  # the first sample
-predicted_label_id;probability_of_label_0 probability_of_label_1  # the second sample
-```
-
-
-```python
-is_predict = get_config_arg('is_predict', bool, False)
-trn = 'data/train.list' if not is_predict else None
-tst = 'data/test.list' if not is_predict else 'data/pred.list'
-obj = 'process' if not is_predict else 'process_pre'
-batch_size = 128 if not is_predict else 1
-if is_predict:
-    maxid = maxid_layer(output)
-    outputs([maxid,output])
-else:
-    label = data_layer(name="label", size=2)
-    cls = classification_cost(input=output, label=label) outputs(cls)
-```
-
-## Summary
-The scripts of data downloading, network configurations, and training scrips are in `/demo/quick_start`. The following table summarizes the performance of our network architecture on Amazon-Elec dataset(25k):
-
-<center>
-<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
-
-<thead>
-<th scope="col" class="left">Network name</th>
-<th scope="col" class="left">Number of parameters</th>
-<th scope="col" class="left">Error rate</th>
-<th scope="col" class="left">Configuration file name</th>
-</tr>
-</thead>
-
-<tbody>
-<tr>
-<td class="left">Logistic regression model(BOW)</td>
-<td class="left"> 252KB </td>
-<td class="left">8.652%</td>
-<td class="left">trainer_config.lr.py</td>
-</tr>
-
-<tr>
-<td class="left">Word embedding</td>
-<td class="left"> 15MB </td>
-<td class="left"> 8.484%</td>
-<td class="left">trainer_config.emb.py</td>
-</tr>
-
-<tr>
-<td class="left">Convolution model</td>
-<td class="left"> 16MB </td>
-<td class="left"> 5.628%</td>
-<td class="left">trainer_config.cnn.py</td>
-</tr>
-
-<tr>
-<td class="left">Time sequence model</td>
-<td class="left"> 16MB </td>
-<td class="left"> 4.812%</td>
-<td class="left">trainer_config.lstm.py</td>
-</tr>
-
-</tbody>
-</table>
-</center>
-<br>
-
-## Appendix
-### Command Line Argument
-
-* \--config：network architecture path.
-* \--save_dir：model save directory.
-* \--log_period：the logging period per batch.
-* \--num_passes：number of training passes. One pass means the training would go over the whole training dataset once.
-* \--config_args：Other configuration arguments.
-* \--init_model_path：The path of the initial model parameter.
-
-By default, the trainer will save model every pass. You can also specify `saving_period_by_batches` to set the frequency of batch saving. You can use `show_parameter_stats_period` to print the statistics of the parameters, which are very useful for tuning parameters. Other command line arguments can be found in <a href = "../../howto/usage/cmd_parameter/index_en.html">command line argument documentation</a>。
-
-### Log
-
-```
-TrainerInternal.cpp:160]  Batch=20 samples=2560 AvgCost=0.628761 CurrentCost=0.628761 Eval: classification_error_evaluator=0.304297  CurrentEval: classification_error_evaluator=0.304297
-```
-During model training, you will see the log like the examples above:
-<center>
-<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
-
-<thead>
-<th scope="col" class="left">Name</th>
-<th scope="col" class="left">Explanation</th>
-</tr>
-</thead>
-
-<tr>
-<td class="left">Batch=20</td>
-<td class="left"> You have trained 20 batches. </td>
-</tr>
-
-<tr>
-<td class="left">samples=2560</td>
-<td class="left"> You have trained 2560 examples. </td>
-</tr>
-
-<tr>
-<td class="left">AvgCost</td>
-<td class="left"> The average cost from the first batch to the current batch. </td>
-</tr>
-
-<tr>
-<td class="left">CurrentCost</td>
-<td class="left"> the average cost of the last log_period batches </td>
-</tr>
-
-<tr>
-<td class="left">Eval: classification_error_evaluator</td>
-<td class="left"> The average classification error from the first batch to the current batch.</td>
-</tr>
-
-<tr>
-<td class="left">CurrentEval: classification_error_evaluator</td>
-<td class="left"> The average error rate of the last log_period batches </td>
-</tr>
-
-</tbody>
-</table>
-</center>
-<br>
diff --git a/doc/v1_api_tutorials/quick_start/src/NetContinuous_cn.jpg b/doc/v1_api_tutorials/quick_start/src/NetContinuous_cn.jpg
deleted file mode 100755
index b18e452a481232f47c453fc30521f8c78ad2da0b..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/quick_start/src/NetContinuous_cn.jpg and /dev/null differ
diff --git a/doc/v1_api_tutorials/quick_start/src/NetContinuous_en.png b/doc/v1_api_tutorials/quick_start/src/NetContinuous_en.png
deleted file mode 100644
index 7bdef1aa366711806585d35c8653c987fd63d59e..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/quick_start/src/NetContinuous_en.png and /dev/null differ
diff --git a/doc/v1_api_tutorials/quick_start/src/NetConv_cn.jpg b/doc/v1_api_tutorials/quick_start/src/NetConv_cn.jpg
deleted file mode 100755
index 0f5ebfa52ff2abcfc63464d48f00e406d4cbfe86..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/quick_start/src/NetConv_cn.jpg and /dev/null differ
diff --git a/doc/v1_api_tutorials/quick_start/src/NetConv_en.png b/doc/v1_api_tutorials/quick_start/src/NetConv_en.png
deleted file mode 100644
index ad618d1d6f8f4839f566f5f5cb5db37a4b7d9093..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/quick_start/src/NetConv_en.png and /dev/null differ
diff --git a/doc/v1_api_tutorials/quick_start/src/NetLR_cn.jpg b/doc/v1_api_tutorials/quick_start/src/NetLR_cn.jpg
deleted file mode 100755
index ee65d1f4127a347e991b4de5e5208347b2b0f51f..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/quick_start/src/NetLR_cn.jpg and /dev/null differ
diff --git a/doc/v1_api_tutorials/quick_start/src/NetLR_en.png b/doc/v1_api_tutorials/quick_start/src/NetLR_en.png
deleted file mode 100644
index 9d514bf1b18a0c330f98c28785e5d008f409fc1d..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/quick_start/src/NetLR_en.png and /dev/null differ
diff --git a/doc/v1_api_tutorials/quick_start/src/NetRNN_cn.jpg b/doc/v1_api_tutorials/quick_start/src/NetRNN_cn.jpg
deleted file mode 100755
index f8bc081827f3a50af80495b4561813c5efe9c9dc..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/quick_start/src/NetRNN_cn.jpg and /dev/null differ
diff --git a/doc/v1_api_tutorials/quick_start/src/NetRNN_en.png b/doc/v1_api_tutorials/quick_start/src/NetRNN_en.png
deleted file mode 100644
index 180f273d32ea59dc8ececa69c08e249f79f9d4f7..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/quick_start/src/NetRNN_en.png and /dev/null differ
diff --git a/doc/v1_api_tutorials/quick_start/src/PipelineNetwork_cn.jpg b/doc/v1_api_tutorials/quick_start/src/PipelineNetwork_cn.jpg
deleted file mode 100755
index 7e68891d7a536b06027ada6b40cb13dbc3c2d72e..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/quick_start/src/PipelineNetwork_cn.jpg and /dev/null differ
diff --git a/doc/v1_api_tutorials/quick_start/src/PipelineNetwork_en.jpg b/doc/v1_api_tutorials/quick_start/src/PipelineNetwork_en.jpg
deleted file mode 100644
index e779aed06d5cdb2b442754e7915e79b72946418e..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/quick_start/src/PipelineNetwork_en.jpg and /dev/null differ
diff --git a/doc/v1_api_tutorials/quick_start/src/PipelineTest_cn.jpg b/doc/v1_api_tutorials/quick_start/src/PipelineTest_cn.jpg
deleted file mode 100755
index 01715db886d811a776c9e7d47f32ba8b1ffe6230..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/quick_start/src/PipelineTest_cn.jpg and /dev/null differ
diff --git a/doc/v1_api_tutorials/quick_start/src/PipelineTest_en.png b/doc/v1_api_tutorials/quick_start/src/PipelineTest_en.png
deleted file mode 100644
index 7e7ef520b5effa2f43fd2964048f05c42f2ea890..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/quick_start/src/PipelineTest_en.png and /dev/null differ
diff --git a/doc/v1_api_tutorials/quick_start/src/PipelineTrain_cn.jpg b/doc/v1_api_tutorials/quick_start/src/PipelineTrain_cn.jpg
deleted file mode 100755
index 8049d3e53c483a4d4956a4840b9a0dce5c2990f1..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/quick_start/src/PipelineTrain_cn.jpg and /dev/null differ
diff --git a/doc/v1_api_tutorials/quick_start/src/PipelineTrain_en.png b/doc/v1_api_tutorials/quick_start/src/PipelineTrain_en.png
deleted file mode 100644
index 132d29bfd5d678d2518161d0b5ed2e16a233a048..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/quick_start/src/PipelineTrain_en.png and /dev/null differ
diff --git a/doc/v1_api_tutorials/quick_start/src/Pipeline_cn.jpg b/doc/v1_api_tutorials/quick_start/src/Pipeline_cn.jpg
deleted file mode 100755
index d5d99253ea528ebd2d8af6cddc6a9f00e20de8cd..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/quick_start/src/Pipeline_cn.jpg and /dev/null differ
diff --git a/doc/v1_api_tutorials/quick_start/src/Pipeline_en.jpg b/doc/v1_api_tutorials/quick_start/src/Pipeline_en.jpg
deleted file mode 100644
index 21a7a7bb6a1af746120e6f4f51f797b6aaafb9d8..0000000000000000000000000000000000000000
Binary files a/doc/v1_api_tutorials/quick_start/src/Pipeline_en.jpg and /dev/null differ
diff --git a/go/pserver/client/c/test/test_mnist.py b/go/pserver/client/c/test/test_mnist.py
index c3a3af55e2812fa0c965d22ddaba198f43f3c4ad..821d9adfcb3164b0982873354badc382b13645ea 100644
--- a/go/pserver/client/c/test/test_mnist.py
+++ b/go/pserver/client/c/test/test_mnist.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2 as paddle
 import gzip
 
diff --git a/go/pserver/client/c/test/test_train.py b/go/pserver/client/c/test/test_train.py
index 8d9c6b9b20f515ed0865df8cf46b6dfc2d8ffa34..445a8d3aa4840b6e206dec813ed6dc04c2a9a21d 100644
--- a/go/pserver/client/c/test/test_train.py
+++ b/go/pserver/client/c/test/test_train.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2 as paddle
 import paddle.v2.dataset.uci_housing as uci_housing
 import paddle.v2.master as master
diff --git a/paddle/api/test/testTrainConfig.py b/paddle/api/test/testTrainConfig.py
index 77e0cd37d566d2571fada76b9948a9b0616ad044..1a1283e1168650066ad6eca356ae0c1e5bd967ee 100644
--- a/paddle/api/test/testTrainConfig.py
+++ b/paddle/api/test/testTrainConfig.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=100, learning_method=AdamOptimizer())
diff --git a/paddle/capi/examples/model_inference/common/common.h b/paddle/capi/examples/model_inference/common/common.h
index e32f2f9836f63ba10ef5be447a4c41514e079219..9efcbc387e6080f8c9ed284412d1abced658a0cd 100644
--- a/paddle/capi/examples/model_inference/common/common.h
+++ b/paddle/capi/examples/model_inference/common/common.h
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #ifndef __CAPI_EXAMPLE_COMMON_H__
 #define __CAPI_EXAMPLE_COMMON_H__
 #include <stdio.h>
diff --git a/paddle/capi/examples/model_inference/dense/main.c b/paddle/capi/examples/model_inference/dense/main.c
index 376cd46fb09a156d426453986c87dcff6e2f71dd..f795bfe11d73dd6a4431b89d33bf143bec89ef76 100644
--- a/paddle/capi/examples/model_inference/dense/main.c
+++ b/paddle/capi/examples/model_inference/dense/main.c
@@ -1,3 +1,17 @@
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #include <paddle/capi.h>
 #include <time.h>
 
diff --git a/paddle/capi/examples/model_inference/dense/merge_v2_model.py b/paddle/capi/examples/model_inference/dense/merge_v2_model.py
index c030d572cbdb15cb5e90f2685723a81efb230f81..7aeb482903aa9fca1a23f9b68988ee3bd9886cca 100644
--- a/paddle/capi/examples/model_inference/dense/merge_v2_model.py
+++ b/paddle/capi/examples/model_inference/dense/merge_v2_model.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.utils.merge_model import merge_v2_model
 
 from mnist_v2 import network
diff --git a/paddle/capi/examples/model_inference/dense/mnist_v2.py b/paddle/capi/examples/model_inference/dense/mnist_v2.py
index ee28111153ca2cf24b9789452c65a0f4c7b64538..183eecfdf2cf146d9d919f94dd0fd5416c5ff97e 100644
--- a/paddle/capi/examples/model_inference/dense/mnist_v2.py
+++ b/paddle/capi/examples/model_inference/dense/mnist_v2.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import sys
 import gzip
diff --git a/paddle/capi/examples/model_inference/dense/trainer_config.py b/paddle/capi/examples/model_inference/dense/trainer_config.py
index 873ec119e7a3d4debe50af2ba259ace50b0cbf7c..b94a21a7e406b833797f8f521c62a2351c2bc30a 100644
--- a/paddle/capi/examples/model_inference/dense/trainer_config.py
+++ b/paddle/capi/examples/model_inference/dense/trainer_config.py
@@ -1,18 +1,13 @@
-from paddle.trainer_config_helpers import *
-
-img = data_layer(name='pixel', size=784)
-
-hidden = fc_layer(
-    input=img,
-    size=200,
-    param_attr=ParamAttr(name='hidden.w'),
-    bias_attr=ParamAttr(name='hidden.b'))
-
-prob = fc_layer(
-    input=hidden,
-    size=10,
-    act=SoftmaxActivation(),
-    param_attr=ParamAttr(name='prob.w'),
-    bias_attr=ParamAttr(name='prob.b'))
-
-outputs(prob)
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/paddle/capi/examples/model_inference/multi_thread/main.c b/paddle/capi/examples/model_inference/multi_thread/main.c
index d7675cd80a52f752b1a8567dae34123978113831..eecb9138e76e6e0c0e1065b57fa26e4fa4c7f1d6 100644
--- a/paddle/capi/examples/model_inference/multi_thread/main.c
+++ b/paddle/capi/examples/model_inference/multi_thread/main.c
@@ -1,3 +1,17 @@
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #include <paddle/capi.h>
 #include <pthread.h>
 #include <time.h>
diff --git a/paddle/capi/examples/model_inference/multi_thread/main_gpu.c b/paddle/capi/examples/model_inference/multi_thread/main_gpu.c
index 6fd376e0d1a2fee4f9a0f676b53c6f2891795cab..85bb45658438c7410d29943a401338bd314f383f 100644
--- a/paddle/capi/examples/model_inference/multi_thread/main_gpu.c
+++ b/paddle/capi/examples/model_inference/multi_thread/main_gpu.c
@@ -1,3 +1,17 @@
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #include <paddle/capi.h>
 #include <pthread.h>
 #include <time.h>
diff --git a/paddle/capi/examples/model_inference/sequence/main.c b/paddle/capi/examples/model_inference/sequence/main.c
index 50bc0c9201f207eff7389bfbee3bc2e43261b19a..80937c830d67432ff8cfb125172901bdd4257ffb 100644
--- a/paddle/capi/examples/model_inference/sequence/main.c
+++ b/paddle/capi/examples/model_inference/sequence/main.c
@@ -1,3 +1,17 @@
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #include <paddle/capi.h>
 #include <time.h>
 #include "../common/common.h"
diff --git a/paddle/capi/examples/model_inference/sequence/trainer_config.py b/paddle/capi/examples/model_inference/sequence/trainer_config.py
index 6bbc7a909aa03950ce621efa43fa47d9cdd016f8..889f8acdfd254548ead975c5b8c19b7372ccf3d7 100644
--- a/paddle/capi/examples/model_inference/sequence/trainer_config.py
+++ b/paddle/capi/examples/model_inference/sequence/trainer_config.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 WORD_DIM = 3000
diff --git a/paddle/capi/examples/model_inference/sparse_binary/main.c b/paddle/capi/examples/model_inference/sparse_binary/main.c
index 029b94ee63ba282aa48193ffd4f625657ddc3a60..efec010a91ab933c8e1ab1715293cdeaaf376327 100644
--- a/paddle/capi/examples/model_inference/sparse_binary/main.c
+++ b/paddle/capi/examples/model_inference/sparse_binary/main.c
@@ -1,3 +1,17 @@
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #include <paddle/capi.h>
 #include <time.h>
 
diff --git a/paddle/capi/tests/test_predict_network.py b/paddle/capi/tests/test_predict_network.py
index 82ef5cb1a70398df65ace3c802076743c3ebe341..6560417b2a111c3c1b6849468697abeea6e159b6 100644
--- a/paddle/capi/tests/test_predict_network.py
+++ b/paddle/capi/tests/test_predict_network.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=100)
diff --git a/paddle/cuda/src/avx_mathfun.h b/paddle/cuda/src/avx_mathfun.h
index 2412ed5abc13b2a83521a75524f581e106788b60..a0ba71faba9131f6f3f031c8276c3851090fac45 100644
--- a/paddle/cuda/src/avx_mathfun.h
+++ b/paddle/cuda/src/avx_mathfun.h
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 /*
    AVX implementation of sin, cos, sincos, exp and log
 
diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index 597ea959f230d88350796cef05b7d6f2a42e594a..afb55bdaaae8e6d46d5791991c92f2b61652ac55 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -33,8 +33,14 @@ cc_library(scope SRCS scope.cc DEPS glog threadpool)
 cc_test(scope_test SRCS scope_test.cc DEPS scope)
 
 cc_library(data_device_transform SRCS data_device_transform.cc DEPS tensor)
+nv_test(data_device_transform_test SRCS data_device_transform_test.cu
+        DEPS operator op_registry init math_function)
+
 cc_library(data_type_transform SRCS data_type_transform.cc DEPS tensor)
+cc_test(data_type_transform_test SRCS data_type_transform_test.cc DEPS data_type_transform)
+
 cc_library(data_layout_transform SRCS data_layout_transform.cc DEPS tensor math_function)
+cc_test(data_layout_transform_test SRCS data_layout_transform_test.cc DEPS data_layout_transform)
 
 cc_library(data_transform SRCS data_transform.cc DEPS math_function tensor
         framework_proto selected_rows data_device_transform data_type_transform data_layout_transform)
@@ -82,5 +88,3 @@ cc_test(init_test SRCS init_test.cc DEPS init)
 
 cc_test(op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context framework_proto)
 cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc)
-nv_test(data_device_transform_test SRCS data_device_transform_test.cu
-        DEPS operator op_registry init math_function)
diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc
index 692406b1c37d0c02714eafb5cf9a28329ed873bc..72743b5fd0b32479ccbf28fbf98032df8fa371e9 100644
--- a/paddle/framework/backward_test.cc
+++ b/paddle/framework/backward_test.cc
@@ -1,16 +1,16 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include "paddle/framework/backward.h"
 
diff --git a/paddle/framework/data_device_transform.cc b/paddle/framework/data_device_transform.cc
index b3fd48ae12c368ac7d83c4f3b6e2fb1939932ac0..5daf5a4e0ab47cf90119ee2f48f1fe89559a1972 100644
--- a/paddle/framework/data_device_transform.cc
+++ b/paddle/framework/data_device_transform.cc
@@ -31,15 +31,14 @@ static const platform::DeviceContext* GetDeviceContext(
   }
 }
 
-Tensor* DeviceTransform(const Tensor& in, const platform::Place& dst_place) {
+void TransDataDevice(const Tensor& in, const platform::Place& dst_place,
+                     Tensor* out) {
   VLOG(3) << "DeviceTransform in, src_place " << in.place()
           << " dst_place: " << dst_place;
-  Tensor* out = new Tensor();
   auto* dev_ctx = GetDeviceContext(in.place(), dst_place);
   dev_ctx->Wait();
   Copy(in, dst_place, *dev_ctx, out);
   dev_ctx->Wait();
-  return out;
 }
 
 }  // namespace framework
diff --git a/paddle/framework/data_device_transform.h b/paddle/framework/data_device_transform.h
index bebf0d1b320183f46ab226dc6493ba09a365fc35..39750a85f2787c6d35723bb908011349a771fb7f 100644
--- a/paddle/framework/data_device_transform.h
+++ b/paddle/framework/data_device_transform.h
@@ -21,7 +21,8 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
 
-Tensor* DeviceTransform(const Tensor& in, const platform::Place& dst_place);
+void TransDataDevice(const Tensor& in, const platform::Place& dst_place,
+                     Tensor* out);
 
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/framework/data_device_transform_test.cu b/paddle/framework/data_device_transform_test.cu
index 5d89f5546fa87241dec6364d86a100ca51bce687..efc05b3106b40bdaa6cd03ce707c677dd58b0730 100644
--- a/paddle/framework/data_device_transform_test.cu
+++ b/paddle/framework/data_device_transform_test.cu
@@ -150,6 +150,7 @@ TEST(Operator, CPUtoGPU) {
   // get output
   auto* output2 = scope.Var("OUT2");
   gpu_op->Run(scope, cuda_place);
+  VLOG(3) << "after gpu_op run";
 
   // auto* output2_ptr = output2->Get<LoDTensor>().data<float>();
   DeviceContextPool& pool = DeviceContextPool::Instance();
diff --git a/paddle/framework/data_layout.h b/paddle/framework/data_layout.h
index 3ab976ecac4dfb0571ebf5dc93f726939da01116..31817251ed09a7a1da7223c1e99b2eb369a3de30 100644
--- a/paddle/framework/data_layout.h
+++ b/paddle/framework/data_layout.h
@@ -14,7 +14,9 @@ limitations under the License. */
 
 #pragma once
 
-#include <iostream>
+#include <cctype>
+#include <ostream>
+
 #include "paddle/platform/enforce.h"
 
 namespace paddle {
@@ -27,12 +29,19 @@ enum class DataLayout {
 };
 
 inline DataLayout StringToDataLayout(const std::string& str) {
-  if (str == "NHWC" || str == "nhwc") {
+  std::string s(str);
+  for (size_t i = 0; i < s.size(); ++i) {
+    s[i] = toupper(s[i]);
+  }
+
+  if (s == "NHWC") {
     return DataLayout::kNHWC;
-  } else if (str == "NCHW" || str == "nchw") {
+  } else if (s == "NCHW") {
     return DataLayout::kNCHW;
+  } else if (s == "ANYLAYOUT") {
+    return DataLayout::kAnyLayout;
   } else {
-    PADDLE_THROW("Unknown storage order string: %s", str);
+    PADDLE_THROW("Unknown storage order string: %s", s);
   }
 }
 
@@ -49,7 +58,7 @@ inline std::string DataLayoutToString(const DataLayout& data_layout) {
   }
 }
 
-inline std::ostream& operator<<(std::ostream& out, DataLayout l) {
+inline std::ostream& operator<<(std::ostream& out, const DataLayout& l) {
   out << DataLayoutToString(l);
   return out;
 }
diff --git a/paddle/framework/data_layout_transform.cc b/paddle/framework/data_layout_transform.cc
index 96794cae97d460e86fe83ac1395e1dfc7e371e3b..9d0a6d5ea3eb4127763acbd1f7a219aa19db4eca 100644
--- a/paddle/framework/data_layout_transform.cc
+++ b/paddle/framework/data_layout_transform.cc
@@ -1,25 +1,36 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include "paddle/framework/data_layout_transform.h"
 
-#include "paddle/framework/tensor.h"
 #include "paddle/operators/math/math_function.h"
 
 namespace paddle {
 namespace framework {
 
+std::vector<int> GetAxis(const DataLayout& from, const DataLayout& to) {
+  PADDLE_ENFORCE_NE(from, to,
+                    "layout transform should transform different layout");
+  if (from == DataLayout::kNCHW && to == DataLayout::kNHWC) {
+    return {0, 2, 3, 1};
+  } else if (from == DataLayout::kNHWC && to == DataLayout::kNCHW) {
+    return {0, 3, 1, 2};
+  } else {
+    PADDLE_THROW("unsupported transform");
+  }
+}
+
 struct CastDataLayout {
   CastDataLayout(const platform::DeviceContext* ctx,
                  const std::vector<int>& axis, const framework::Tensor& in,
@@ -44,38 +55,36 @@ struct CastDataLayout {
   }
 };
 
-void TransDataLayout(const std::vector<int>& axis,
-                     const platform::DeviceContext* ctx,
-                     const KernelTypePair& kernel_pair, const Variable& in,
-                     Variable* out) {
-  PADDLE_ENFORCE(in.IsType<Tensor>(), "Only support Tensor transform!.");
+void TransDataLayout(const OpKernelType& kernel_type_for_var,
+                     const OpKernelType& expected_kernel_type, const Tensor& in,
+                     Tensor* out) {
   PADDLE_ENFORCE(
-      platform::places_are_same_class(kernel_pair.first.place_,
-                                      kernel_pair.second.place_),
+      platform::places_are_same_class(kernel_type_for_var.place_,
+                                      expected_kernel_type.place_),
       "TransDataLayout only support DataLayout transform on same place!");
-  PADDLE_ENFORCE(kernel_pair.first.data_type_ == kernel_pair.second.data_type_,
-                 "TransDataLayout only support Datatype are same!");
 
-  auto src = in.Get<Tensor>();
-  auto* dst = out->GetMutable<Tensor>();
-  PADDLE_ENFORCE(arity(src.dims()) == 4, "Input Arity Only Suppport 4!");
+  PADDLE_ENFORCE(arity(in.dims()) == 4, "Input Arity only support 4!");
+
+  auto& pool = platform::DeviceContextPool::Instance();
 
-  auto src_dim = src.dims();
+  auto src_dim = in.dims();
   std::vector<int64_t> dst_dim;
 
+  auto axis = GetAxis(kernel_type_for_var.data_layout_,
+                      expected_kernel_type.data_layout_);
   dst_dim.resize(axis.size());
   for (size_t i = 0; i < axis.size(); i++) {
     dst_dim[i] = src_dim[axis[i]];
   }
 
-  dst->Resize(make_ddim(dst_dim));
-  auto place = kernel_pair.second.place_;
-  dst->mutable_data(place, src.type());
+  out->Resize(make_ddim(dst_dim));
+  out->mutable_data(expected_kernel_type.place_, in.type());
 
-  auto src_type = kernel_pair.first.data_type_;
-  framework::VisitDataType(src_type, CastDataLayout(ctx, axis, src, dst));
+  framework::VisitDataType(
+      framework::ToDataType(in.type()),
+      CastDataLayout(pool.Get(expected_kernel_type.place_), axis, in, out));
 
-  dst->set_layout(kernel_pair.second.data_layout_);
+  out->set_layout(expected_kernel_type.data_layout_);
 }
 
 }  // namespace framework
diff --git a/paddle/framework/data_layout_transform.h b/paddle/framework/data_layout_transform.h
index befae1f63616a4c21d998c6b784b8ef288d00617..368f7fc9898338af0f9502cbc1e94cc40ae12e3b 100644
--- a/paddle/framework/data_layout_transform.h
+++ b/paddle/framework/data_layout_transform.h
@@ -1,31 +1,31 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #pragma once
 
 #include "paddle/framework/op_kernel_type.h"
+#include "paddle/framework/tensor.h"
 #include "paddle/framework/variable.h"
 
 namespace paddle {
 namespace framework {
 
-using KernelTypePair = std::pair<OpKernelType, OpKernelType>;
+std::vector<int> GetAxis(const DataLayout& from, const DataLayout& to);
 
-void TransDataLayout(const std::vector<int>& axis,
-                     const platform::DeviceContext* ctx,
-                     const KernelTypePair& kernel_pair, const Variable& in,
-                     Variable* out);
+void TransDataLayout(const OpKernelType& kernel_type_for_var,
+                     const OpKernelType& expected_kernel_type, const Tensor& in,
+                     Tensor* out);
 
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/framework/data_layout_transform_test.cc b/paddle/framework/data_layout_transform_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..093e8d4d3458479763415dbc24957eaaa0f6f0fe
--- /dev/null
+++ b/paddle/framework/data_layout_transform_test.cc
@@ -0,0 +1,44 @@
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/framework/data_layout_transform.h"
+
+#include "gtest/gtest.h"
+#include "paddle/platform/device_context.h"
+
+TEST(DataTransform, DataLayoutFunction) {
+  using namespace paddle::framework;
+  using namespace paddle::platform;
+
+  auto place = CPUPlace();
+  Tensor in = Tensor();
+  Tensor out = Tensor();
+  in.mutable_data<double>(make_ddim({2, 3, 1, 2}), place);
+  in.set_layout(DataLayout::kNHWC);
+
+  auto kernel_nhwc = OpKernelType(proto::DataType::FP32, place,
+                                  DataLayout::kNHWC, LibraryType::kPlain);
+  auto kernel_ncwh = OpKernelType(proto::DataType::FP32, place,
+                                  DataLayout::kNCHW, LibraryType::kPlain);
+
+  TransDataLayout(kernel_nhwc, kernel_ncwh, in, &out);
+
+  EXPECT_TRUE(out.layout() == DataLayout::kNCHW);
+  EXPECT_TRUE(out.dims() == make_ddim({2, 2, 3, 1}));
+
+  TransDataLayout(kernel_ncwh, kernel_nhwc, in, &out);
+
+  EXPECT_TRUE(in.layout() == DataLayout::kNHWC);
+  EXPECT_TRUE(in.dims() == make_ddim({2, 3, 1, 2}));
+}
\ No newline at end of file
diff --git a/paddle/framework/data_transform.cc b/paddle/framework/data_transform.cc
index e56edb95396ef8de44da95ce795161d7cf1debc6..b6fd46401ffdd50cfdb00cc6e4ecd821bb39aba5 100644
--- a/paddle/framework/data_transform.cc
+++ b/paddle/framework/data_transform.cc
@@ -15,20 +15,50 @@ limitations under the License. */
 #include "paddle/framework/data_transform.h"
 
 #include "paddle/framework/data_device_transform.h"
+#include "paddle/framework/data_layout_transform.h"
+#include "paddle/framework/data_type_transform.h"
 
 namespace paddle {
 namespace framework {
 
-Tensor* DataTransform(const OpKernelType& expected_kernel_type,
-                      const OpKernelType& kernel_type_for_var,
-                      const Tensor& input_tensor) {
-  Tensor* out = nullptr;
+static void PassTensorData(Tensor* from, Tensor* to) {
+  to->ShareDataWith(*from);
+  *from = Tensor();
+}
+
+void DataTransform(const OpKernelType& expected_kernel_type,
+                   const OpKernelType& kernel_type_for_var,
+                   const Tensor& input_tensor, Tensor* output_tensor) {
+  bool transformed = false;
+  Tensor in;
+  in.ShareDataWith(input_tensor);
+  Tensor out;
+
+  // do layout transform
+  if (NeedTransformLayout(expected_kernel_type.data_layout_,
+                          kernel_type_for_var.data_layout_)) {
+    TransDataLayout(kernel_type_for_var, expected_kernel_type, in, &out);
+    transformed = true;
+    PassTensorData(&out, &in);
+  }
+
+  if (expected_kernel_type.data_type_ != kernel_type_for_var.data_type_) {
+    TransDataType(kernel_type_for_var, expected_kernel_type, in, &out);
+    transformed = true;
+    PassTensorData(&out, &in);
+  }
+
+  // do device transform
   if (!platform::is_same_place(kernel_type_for_var.place_,
                                expected_kernel_type.place_)) {
-    out = DeviceTransform(input_tensor, expected_kernel_type.place_);
+    TransDataDevice(in, expected_kernel_type.place_, &out);
+    transformed = true;
+    PassTensorData(&out, &in);
   }
-  PADDLE_ENFORCE_NOT_NULL(out, "out should not be null");
-  return out;
+
+  PADDLE_ENFORCE(transformed, "No transform is applied, please check!");
+  // get output data
+  output_tensor->ShareDataWith(in);
 }
 
 void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor,
diff --git a/paddle/framework/data_transform.h b/paddle/framework/data_transform.h
index ee95c7e8564d0392c8f25fce161d0f722c04761a..a4b78902379d5eb89f92ec47655ff93b49d0bfab 100644
--- a/paddle/framework/data_transform.h
+++ b/paddle/framework/data_transform.h
@@ -30,9 +30,9 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
 
-Tensor* DataTransform(const OpKernelType& expected_kernel_type,
-                      const OpKernelType& kernel_type_for_var,
-                      const Tensor& input_tensor);
+void DataTransform(const OpKernelType& expected_kernel_type,
+                   const OpKernelType& kernel_type_for_var,
+                   const Tensor& input_tensor, Tensor* out);
 
 void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor,
                             Variable& out_var);
diff --git a/paddle/framework/data_type_transform.cc b/paddle/framework/data_type_transform.cc
index 63373232e910d44eb0996f9280f9c166ad092030..7df1cc6b75b4e87b57813aa01a1b29302d616158 100644
--- a/paddle/framework/data_type_transform.cc
+++ b/paddle/framework/data_type_transform.cc
@@ -38,14 +38,11 @@ struct CastDataType {
 
   template <typename OutType>
   void operator()() {
-    auto place = ctx_->GetPlace();
-
     auto* in_begin = in_.data<InType>();
-    auto numel = in_.numel();
-    auto* in_end = in_begin + numel;
-    auto* out_begin = out_->mutable_data<OutType>(place);
+    auto* in_end = in_begin + in_.numel();
+    auto* out_begin = out_->mutable_data<OutType>(in_.place());
 
-    if (platform::is_cpu_place(place)) {
+    if (platform::is_cpu_place(in_.place())) {
       platform::Transform<platform::CPUDeviceContext> trans;
       auto* context = static_cast<const platform::CPUDeviceContext*>(ctx_);
       trans(*context, in_begin, in_end, out_begin,
@@ -57,38 +54,31 @@ struct CastDataType {
   }
 };
 
-void TransDataType(const platform::DeviceContext* ctx,
-                   const KernelTypePair& kernel_pair, const Variable& in,
-                   Variable* out) {
-  PADDLE_ENFORCE(in.IsType<Tensor>(), "Only Support Tensor transform!.");
-  PADDLE_ENFORCE(
-      platform::places_are_same_class(kernel_pair.first.place_,
-                                      kernel_pair.second.place_),
-      "TransDataType Only Support DataType transform on same place!");
-
-  auto src = in.Get<Tensor>();
-  auto* dst = out->GetMutable<Tensor>();
+void TransDataType(const OpKernelType& kernel_type_for_var,
+                   const OpKernelType& expected_kernel_type, const Tensor& in,
+                   Tensor* out) {
+  platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
 
-  auto dims = src.dims();
-  dst->Resize(dims);
-  auto dst_type = kernel_pair.second.data_type_;
-  auto src_type = kernel_pair.first.data_type_;
+  out->Resize(in.dims());
+  auto src_type = kernel_type_for_var.data_type_;
+  auto dst_type = expected_kernel_type.data_type_;
+  auto ctx = pool.Get(in.place());
 
   switch (src_type) {
     case proto::DataType::FP32:
-      framework::VisitDataType(dst_type, CastDataType<float>(src, dst, ctx));
+      framework::VisitDataType(dst_type, CastDataType<float>(in, out, ctx));
       break;
     case proto::DataType::FP64:
-      framework::VisitDataType(dst_type, CastDataType<double>(src, dst, ctx));
+      framework::VisitDataType(dst_type, CastDataType<double>(in, out, ctx));
       break;
     case proto::DataType::INT32:
-      framework::VisitDataType(dst_type, CastDataType<int>(src, dst, ctx));
+      framework::VisitDataType(dst_type, CastDataType<int>(in, out, ctx));
       break;
     case proto::DataType::INT64:
-      framework::VisitDataType(dst_type, CastDataType<int64_t>(src, dst, ctx));
+      framework::VisitDataType(dst_type, CastDataType<int64_t>(in, out, ctx));
       break;
     case proto::DataType::BOOL:
-      framework::VisitDataType(dst_type, CastDataType<bool>(src, dst, ctx));
+      framework::VisitDataType(dst_type, CastDataType<bool>(in, out, ctx));
       break;
     default:
       PADDLE_THROW("Not support type %d", src_type);
diff --git a/paddle/framework/data_type_transform.h b/paddle/framework/data_type_transform.h
index 8ec90742256c2308a242d993838e46e51a6fc167..067c0c2a5b14465a31cabf7b5d4442d6a3a1c773 100644
--- a/paddle/framework/data_type_transform.h
+++ b/paddle/framework/data_type_transform.h
@@ -15,6 +15,7 @@ limitations under the License. */
 #pragma once
 
 #include "paddle/framework/op_kernel_type.h"
+#include "paddle/framework/tensor.h"
 #include "paddle/framework/variable.h"
 #include "paddle/platform/device_context.h"
 
@@ -23,9 +24,9 @@ namespace framework {
 
 using KernelTypePair = std::pair<OpKernelType, OpKernelType>;
 
-void TransDataType(const platform::DeviceContext* ctx,
-                   const KernelTypePair& kernel_pair, const Variable& in,
-                   Variable* out);
+void TransDataType(const OpKernelType& kernel_type_for_var,
+                   const OpKernelType& expected_kernel_type, const Tensor& in,
+                   Tensor* out);
 
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/framework/data_type_transform_test.cc b/paddle/framework/data_type_transform_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..89d32f528334a159e1b015802ff2670f9cfc1584
--- /dev/null
+++ b/paddle/framework/data_type_transform_test.cc
@@ -0,0 +1,53 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/framework/data_type_transform.h"
+
+#include "gtest/gtest.h"
+
+TEST(DataTypeTransform, CPUTransform) {
+  using namespace paddle::framework;
+  using namespace paddle::platform;
+
+  auto place = CPUPlace();
+
+  Tensor in;
+  Tensor out;
+
+  float* ptr = in.mutable_data<float>(make_ddim({2, 3}), place);
+  int data_number = 2 * 3;
+
+  for (int i = 0; i < data_number; ++i) {
+    ptr[i] = i / 3;
+  }
+
+  auto kernel_fp32 = OpKernelType(proto::DataType::FP32, place,
+                                  DataLayout::kAnyLayout, LibraryType::kPlain);
+  auto kernel_fp64 = OpKernelType(proto::DataType::FP64, place,
+                                  DataLayout::kAnyLayout, LibraryType::kPlain);
+  auto kernel_int32 = OpKernelType(proto::DataType::INT32, place,
+                                   DataLayout::kAnyLayout, LibraryType::kPlain);
+
+  TransDataType(kernel_fp32, kernel_fp64, in, &out);
+  double* out_data_double = out.data<double>();
+  for (int i = 0; i < data_number; ++i) {
+    ASSERT_EQ(out_data_double[i], static_cast<double>(i / 3));
+  }
+
+  TransDataType(kernel_fp32, kernel_int32, in, &out);
+  int* out_data_int = out.data<int>();
+  for (int i = 0; i < data_number; ++i) {
+    ASSERT_EQ(out_data_int[i], static_cast<int>(i / 3));
+  }
+}
diff --git a/paddle/framework/dim.h b/paddle/framework/dim.h
index 04d4b0e604e6f73ad94e0ca79d6b69f663bd4076..ec17d7c6156351d21a4f9431f85fb0bcf00e4331 100644
--- a/paddle/framework/dim.h
+++ b/paddle/framework/dim.h
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #pragma once
 
 #include <iostream>
diff --git a/paddle/framework/dim_test.cu b/paddle/framework/dim_test.cu
index 0a6a87669c900de6cb507dd48f0cfc871defe279..2bcab7c5c2e454e86a148fde003164d369c46ef2 100644
--- a/paddle/framework/dim_test.cu
+++ b/paddle/framework/dim_test.cu
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include <thrust/device_vector.h>
 #include <sstream>
 
diff --git a/paddle/framework/eigen_test.cc b/paddle/framework/eigen_test.cc
index bc4a2db32cfba66bef2c444e1f822e0d2a57b91e..9e368a522ce71d73213ba4c781e8a56fad917b0a 100644
--- a/paddle/framework/eigen_test.cc
+++ b/paddle/framework/eigen_test.cc
@@ -1,15 +1,16 @@
-/*
-  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-  http://www.apache.org/licenses/LICENSE-2.0
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
-*/
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include "paddle/framework/eigen.h"
 #include <gtest/gtest.h>
diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc
index 87a57d095141cc456af2cbabbc227715a02375e9..b29f528f3f749efa3463125c774c2f4d4ebcbc7c 100644
--- a/paddle/framework/lod_tensor.cc
+++ b/paddle/framework/lod_tensor.cc
@@ -135,6 +135,65 @@ bool operator==(const LoD &a, const LoD &b) {
   return true;
 }
 
+bool CheckLoD(const LoD &in, int tensor_height) {
+  if (in.empty()) return true;
+  for (const auto &level : in) {
+    // check: there should be more than 2 offsets existing in each level.
+    if (level.size() < 2) return false;
+    // check: the first offset(the begin offset) of each level should be 0.
+    if (level.front() != 0) return false;
+    // check: all the offsets in a level should be ascending(no same items
+    // allows).
+    if (!std::is_sorted(level.begin(), level.begin(), [](size_t a, size_t b) {
+          if (a < b) return true;
+          return false;
+        })) {
+      LOG(INFO) << "ascending error";
+      return false;
+    }
+  }
+  // check: the lowest level's last offset should equals `tensor_height` if
+  //        tensor_height>0.
+  if (tensor_height > 0 && (size_t)tensor_height != in.back().back())
+    return false;
+
+  // check: the higher level's last offset should equals the lower level's
+  // size-1.
+  // NOTE LoD store the levels from top to bottom, so the higher level goes
+  // first.
+  for (size_t level = 0; level < in.size() - 1; level++) {
+    if (in[level].back() != in[level + 1].size() - 1) return false;
+  }
+  return true;
+}
+
+bool CheckAbsLoD(const LoD &in, int tensor_height) {
+  if (in.empty()) return true;
+  for (const auto &level : in) {
+    // check: all the offsets in a level should be ascending(no same items
+    // allows).
+    if (!std::is_sorted(level.begin(), level.begin(), [](size_t a, size_t b) {
+          if (a < b) return true;
+          return false;
+        })) {
+      return false;
+    }
+
+    // check: there should be more than 2 offsets existing in each level.
+    if (level.size() < 2) return false;
+
+    // check: the first offset of each level should be 0, and the last should be
+    // the same(the height of underlying tensor).
+    if (level.front() != 0) return false;
+    if (tensor_height < 0) {
+      tensor_height = level.back();
+    } else if ((size_t)tensor_height != level.back()) {
+      return false;
+    }
+  }
+  return true;
+}
+
 using LoDAndOffset = std::pair<LoD, std::pair<size_t, size_t>>;
 LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx,
                                         size_t end_idx, size_t start_level) {
@@ -227,48 +286,86 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor,
   DeserializeFromStream(is, static_cast<Tensor *>(tensor), dev_ctx);
 }
 
-// TODO(tonyyang-svail): make this function support LoD
 std::vector<LoDTensor> LoDTensor::SplitLoDTensor(
     const std::vector<platform::Place> places) const {
   check_memory_size();
-  PADDLE_ENFORCE(lod().empty(), "Disable parallel lod for now");
-  PADDLE_ENFORCE(dims()[0] % places.size() == 0,
-                 "Batch size should be divided by places size");
-
-  std::vector<LoDTensor> lods;
-  for (size_t place_idx = 0; place_idx < places.size(); ++place_idx) {
-    int begin = place_idx * dims()[0] / places.size();
-    int end = (place_idx + 1) * dims()[0] / places.size();
+  int batch_size =
+      lod().empty() ? dims()[0] : static_cast<int>(lod()[0].size()) - 1;
+  size_t result_size = std::min(static_cast<size_t>(batch_size), places.size());
+  size_t remainder = batch_size % places.size();
+
+  std::vector<LoDTensor> results;
+  results.reserve(result_size);
+
+  int step_width = static_cast<int>(batch_size / result_size);
+  for (size_t i = 0; i < result_size; ++i) {
+    int begin = static_cast<int>(i * step_width);
+    int end = static_cast<int>((i + 1) * step_width);
+    if (i + 1 == places.size()) {  // last
+      end += remainder;
+    }
 
-    auto src = Slice(begin, end);
-    auto &dst_place = places[place_idx];
     LoDTensor dst;
-    framework::Copy(src, dst_place, &dst);
-
-    lods.emplace_back(dst);
+    if (lod().empty()) {
+      auto src = Slice(begin, end);
+      auto &dst_place = places[i];
+      framework::Copy(src, dst_place, &dst);
+    } else {
+      auto lod_and_offset = GetSubLoDAndAbsoluteOffset(lod(), begin, end, 0);
+
+      auto &offset = lod_and_offset.second;
+      auto src = Slice(offset.first, offset.second);
+      auto &dst_place = places[i];
+      framework::Copy(src, dst_place, &dst);
+
+      LoD my_lod;
+      for (auto &l : lod_and_offset.first) {
+        std::vector<size_t> v{0};
+        for (auto &ll : l) {
+          v.push_back(ll + v.back());
+        }
+        my_lod.emplace_back(v);
+      }
+      dst.set_lod(my_lod);
+    }
+    results.emplace_back(dst);
   }
 
-  return lods;
+  return results;
 }
 
-// TODO(tonyyang-svail): make this function support LoD
 void LoDTensor::MergeLoDTensor(
     const std::vector<const LoDTensor *> &lod_tensors,
     platform::Place dst_place) {
   PADDLE_ENFORCE(!lod_tensors.empty());
+
   framework::DDim new_dim = lod_tensors[0]->dims();
   std::type_index new_type = lod_tensors[0]->type();
-  auto new_layout = lod_tensors[0]->layout();
-  for (auto *lod : lod_tensors) {
-    PADDLE_ENFORCE(new_dim == lod->dims());
-    PADDLE_ENFORCE(new_type == lod->type());
-    PADDLE_ENFORCE(new_layout == lod->layout());
+  framework::DataLayout new_layout = lod_tensors[0]->layout();
+  LoD new_lod = lod_tensors[0]->lod();
+  for (size_t i = 1; i < lod_tensors.size(); ++i) {
+    auto *t = lod_tensors[i];
+    PADDLE_ENFORCE_EQ(new_type.hash_code(), t->type().hash_code());
+    PADDLE_ENFORCE_EQ(new_layout, t->layout());
+
+    PADDLE_ENFORCE_EQ(framework::product(new_dim) / new_dim[0],
+                      framework::product(t->dims()) / t->dims()[0]);
+    new_dim[0] += t->dims()[0];
+
+    auto &lod = t->lod();
+    for (size_t j = 0; j < lod.size(); ++j) {
+      auto &sub_lod = new_lod[j];
+      auto &offset = sub_lod.back();
+      for (size_t k = 1; k < lod[j].size(); ++k) {
+        sub_lod.push_back(lod[j][k] + offset);
+      }
+    }
   }
-  new_dim[0] *= lod_tensors.size();
   Resize(new_dim);
   set_layout(new_layout);
-
+  set_lod(new_lod);
   mutable_data(dst_place, new_type);
+
   int begin = 0;
   for (auto *src : lod_tensors) {
     int end = begin + src->dims()[0];
diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h
index 88ea78f2682b2ffc962c9663f6b3c636dedb931d..9d1294fdeb9bd76bf944f7ec3687e3c5bb333241 100644
--- a/paddle/framework/lod_tensor.h
+++ b/paddle/framework/lod_tensor.h
@@ -71,6 +71,38 @@ LoD ToAbsOffset(const LoD& in);
 
 bool operator==(const LoD& a, const LoD& b);
 
+/*
+ * Check whether this lod's format is valid.
+ *
+ * ATTENTION:
+ *   - Empty lod is treated as valid.
+ *
+ * It will check two things:
+ *
+ *  1. all the offsets in a level should be ascending(no same items allows).
+ *  2. there should be more than 2 offsets existing in each level.
+ *  3. the higher level's last offset should equals the lower level's size-1.
+ *  4. the first offset(the begin offset) of each level should be 0.
+ *  5. the lowest level's last offset should equals `tensor_height` if
+ * tensor_height>0.
+ */
+
+bool CheckLoD(const LoD& in, int tensor_height = -1);
+/*
+ * Check whether this absolute lod's format is valid.
+ *
+ * ATTENTION:
+ *   - Empty lod is treated as valid.
+ *
+ * It will check two things:
+ *  1. all the offsets in a level should be ascending(no same items allows)
+ *  2. there should be more than 2 offsets existing in each level.
+ *  3. the first offset of each level should be 0, and the last should be the
+ *     same(the height of underlying tensor) or `tensor_height` if
+ *     tensor_height>0.
+ */
+bool CheckAbsLoD(const LoD& in, int tensor_height = -1);
+
 /*
  * LoDTensor (Level of details Tensor)
  * see https://en.wikipedia.org/wiki/Level_of_details for reference.
diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc
index baad9c6f98ac135c3650fe3113522850328c1298..4d172c43c7cceacb7d0dfaf1c4d3028717350268 100644
--- a/paddle/framework/lod_tensor_test.cc
+++ b/paddle/framework/lod_tensor_test.cc
@@ -1,15 +1,16 @@
-/*
-  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-  http://www.apache.org/licenses/LICENSE-2.0
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
-*/
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include "paddle/framework/lod_tensor.h"
 
@@ -22,38 +23,6 @@
 namespace paddle {
 namespace framework {
 
-const int kLodTensorSize = 20 * 128;
-
-class LoDTensorTester : public ::testing::Test {
- public:
-  virtual void SetUp() override {
-    // tensor's batch_size: 30
-    // 3 levels
-    // 0 10 20
-    // 0 5 10 15 20
-    // 0 2 5 7 10 12 15 20
-    LoD lod;
-    lod.push_back(std::vector<size_t>{0, 2, 3});
-    lod.push_back(std::vector<size_t>{0, 2, 5, 8});
-    lod.push_back(std::vector<size_t>{0, 2, 5, 7, 10, 12, 15, 17, 20});
-
-    ASSERT_EQ(lod.size(), 3UL);
-
-    lod_tensor_.Resize({20 /*batch size*/, 128 /*dim*/});
-    // malloc memory
-    float* dst_ptr = lod_tensor_.mutable_data<float>(place);
-    for (int i = 0; i < kLodTensorSize; ++i) {
-      dst_ptr[i] = i;
-    }
-
-    lod_tensor_.set_lod(lod);
-  }
-
- protected:
-  platform::CPUPlace place;
-  LoDTensor lod_tensor_;
-};
-
 TEST(LodExpand, test) {
   LoD lod{{0, 2}};
   LoDTensor tensor;
@@ -131,5 +100,118 @@ TEST(LoD, ToAbsOffset) {
   EXPECT_EQ(abs_lod, expected);
 }
 
+TEST(LoD, SplitLoDTensor) {
+  LoD lod;
+  lod.push_back(std::vector<size_t>({0, 2, 4, 5, 6}));
+  lod.push_back(std::vector<size_t>({0, 1, 6, 8, 13, 15, 20}));
+
+  platform::CPUPlace place;
+  LoDTensor lod_tensor;
+  lod_tensor.Resize({20, 1});
+  float* dst_ptr = lod_tensor.mutable_data<float>(place);
+  for (int i = 0; i < lod_tensor.numel(); ++i) {
+    dst_ptr[i] = i;
+  }
+  lod_tensor.set_lod(lod);
+
+  std::vector<platform::Place> places{platform::CPUPlace(),
+                                      platform::CPUPlace()};
+  LoD lod0;
+  lod0.push_back(std::vector<size_t>({0, 2, 4}));
+  lod0.push_back(std::vector<size_t>({0, 1, 6, 8, 13}));
+  LoD lod1;
+  lod1.push_back(std::vector<size_t>({0, 1, 2}));
+  lod1.push_back(std::vector<size_t>({0, 2, 7}));
+
+  auto lods = lod_tensor.SplitLoDTensor(places);
+  EXPECT_EQ(lods[0].lod(), lod0);
+  EXPECT_EQ(lods[1].lod(), lod1);
+}
+
+TEST(LoD, MergeLoDTensor) {
+  LoD lod;
+  lod.push_back(std::vector<size_t>({0, 2, 4, 5, 6}));
+  lod.push_back(std::vector<size_t>({0, 1, 6, 8, 13, 15, 20}));
+
+  platform::CPUPlace place;
+
+  LoDTensor lod_tensor0;
+  LoD lod0;
+  lod0.push_back(std::vector<size_t>({0, 2, 4}));
+  lod0.push_back(std::vector<size_t>({0, 1, 6, 8, 13}));
+  lod_tensor0.set_lod(lod0);
+
+  lod_tensor0.Resize({13, 1});
+  float* dst_ptr = lod_tensor0.mutable_data<float>(place);
+  for (int i = 0; i < lod_tensor0.numel(); ++i) {
+    dst_ptr[i] = i;
+  }
+
+  LoDTensor lod_tensor1;
+  LoD lod1;
+  lod1.push_back(std::vector<size_t>({0, 1, 2}));
+  lod1.push_back(std::vector<size_t>({0, 2, 7}));
+  lod_tensor1.set_lod(lod1);
+  lod_tensor1.Resize({7, 1});
+  dst_ptr = lod_tensor1.mutable_data<float>(place);
+  for (int i = 0; i < lod_tensor1.numel(); ++i) {
+    dst_ptr[i] = i;
+  }
+
+  std::vector<const LoDTensor*> lods{&lod_tensor0, &lod_tensor1};
+
+  LoDTensor lod_tensor;
+  lod_tensor.MergeLoDTensor(lods, place);
+  EXPECT_EQ(lod_tensor.lod(), lod);
+}
+
+TEST(LoD, CheckLoD) {
+  LoD relative_lod;
+  relative_lod.push_back(std::vector<size_t>({0, 2}));
+  relative_lod.push_back(std::vector<size_t>({0, 1, 3}));
+  relative_lod.push_back(std::vector<size_t>({0, 2, 4, 5}));
+
+  // check compatible
+  ASSERT_TRUE(CheckLoD(relative_lod));
+  relative_lod[1].back()++;
+  ASSERT_FALSE(CheckLoD(relative_lod));
+  relative_lod[1].back()--;  // recover it
+
+  // check empty
+  LoD empty_lod;
+  ASSERT_TRUE(CheckLoD(empty_lod));
+
+  // check less than 2 offsets in a level
+  LoD some_lod0;
+  some_lod0.push_back(std::vector<size_t>({0}));
+  ASSERT_FALSE(CheckLoD(some_lod0));
+
+  // check with underlying tensor storage.
+  ASSERT_TRUE(CheckLoD(relative_lod, 5));
+  ASSERT_FALSE(CheckLoD(relative_lod, 9));
+}
+
+TEST(LoD, CheckAbsLoD) {
+  LoD relative_lod;
+  relative_lod.push_back(std::vector<size_t>({0, 2}));
+  relative_lod.push_back(std::vector<size_t>({0, 1, 3}));
+  relative_lod.push_back(std::vector<size_t>({0, 2, 4, 5}));
+
+  auto abs_lod = ToAbsOffset(relative_lod);
+
+  ASSERT_TRUE(CheckAbsLoD(abs_lod));
+
+  // check less than 2 offsets in a level.
+
+  // check the last item should be compatible with tensor height.
+  abs_lod.back().back()++;
+  ASSERT_FALSE(CheckAbsLoD(abs_lod));
+  abs_lod.back().back()--;  // restore
+
+  // check less than 2 offsets in a lod.
+  LoD abs_lod0;
+  abs_lod0.push_back(std::vector<size_t>({0}));
+  ASSERT_FALSE(CheckAbsLoD(abs_lod0));
+}
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/framework/lod_tensor_test.cu b/paddle/framework/lod_tensor_test.cu
index e8508ad2658ae850e4c98aa798b5db6d007e67d0..1e253a2f6f35e827fb2e5db6270da03705b39514 100644
--- a/paddle/framework/lod_tensor_test.cu
+++ b/paddle/framework/lod_tensor_test.cu
@@ -1,15 +1,16 @@
-/*
-  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-  http://www.apache.org/licenses/LICENSE-2.0
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
-*/
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include <cuda.h>
 #include <cuda_runtime.h>
diff --git a/paddle/framework/op_kernel_type.h b/paddle/framework/op_kernel_type.h
index 053897784c1c4350deadf39e2a009220d38f65f9..44adb94d2a8feb79a5ff93c6e32cdff52333166e 100644
--- a/paddle/framework/op_kernel_type.h
+++ b/paddle/framework/op_kernel_type.h
@@ -85,5 +85,15 @@ inline std::string KernelTypeToString(const OpKernelType& kernel_key) {
   return stream.str();
 }
 
+inline bool NeedTransformLayout(const DataLayout& l, const DataLayout& r) {
+  return l != DataLayout::kAnyLayout && r != DataLayout::kAnyLayout && l != r;
+}
+
+inline bool TransFromNeeded(const OpKernelType& l, const OpKernelType& r) {
+  return (!platform::places_are_same_class(l.place_, r.place_)) ||
+         (l.data_type_ != r.data_type_) ||
+         NeedTransformLayout(l.data_layout_, r.data_layout_);
+}
+
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h
index d75c0233e8e0134ddf4edc50c07490a234b65cd0..5de9ae559c435439f30931c7840e54e0d2bb744c 100644
--- a/paddle/framework/op_registry.h
+++ b/paddle/framework/op_registry.h
@@ -177,16 +177,16 @@ class OpKernelRegistrar : public Registrar {
 /**
  * Macro to register OperatorKernel.
  */
-#define REGISTER_OP_KERNEL(op_type, DEVICE_TYPE, place_class, ...)        \
-  STATIC_ASSERT_GLOBAL_NAMESPACE(                                         \
-      __reg_op_kernel_##op_type##_##DEVICE_TYPE##__,                      \
-      "REGISTER_OP_KERNEL must be called in global namespace");           \
-  static ::paddle::framework::OpKernelRegistrar<place_class, __VA_ARGS__> \
-      __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__(#op_type,       \
-                                                          #DEVICE_TYPE);  \
-  int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() {                \
-    __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__.Touch();          \
-    return 0;                                                             \
+#define REGISTER_OP_KERNEL(op_type, LIBRARY_TYPE, place_class, ...)        \
+  STATIC_ASSERT_GLOBAL_NAMESPACE(                                          \
+      __reg_op_kernel_##op_type##_##LIBRARY_TYPE##__,                      \
+      "REGISTER_OP_KERNEL must be called in global namespace");            \
+  static ::paddle::framework::OpKernelRegistrar<place_class, __VA_ARGS__>  \
+      __op_kernel_registrar_##op_type##_##LIBRARY_TYPE##__(#op_type,       \
+                                                           #LIBRARY_TYPE); \
+  int TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE() {                \
+    __op_kernel_registrar_##op_type##_##LIBRARY_TYPE##__.Touch();          \
+    return 0;                                                              \
   }
 
 #define REGISTER_OP_CUDA_KERNEL(op_type, ...) \
@@ -208,14 +208,14 @@ class OpKernelRegistrar : public Registrar {
   static int use_op_itself_##op_type##_ __attribute__((unused)) = \
       TouchOpRegistrar_##op_type()
 
-#define USE_OP_DEVICE_KERNEL(op_type, DEVICE_TYPE)               \
-  STATIC_ASSERT_GLOBAL_NAMESPACE(                                \
-      __use_op_kernel_##op_type##_##DEVICE_TYPE##__,             \
-      "USE_OP_DEVICE_KERNEL must be in global namespace");       \
-  extern int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE(); \
-  static int use_op_kernel_##op_type##_##DEVICE_TYPE##_          \
-      __attribute__((unused)) =                                  \
-          TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE()
+#define USE_OP_DEVICE_KERNEL(op_type, LIBRARY_TYPE)               \
+  STATIC_ASSERT_GLOBAL_NAMESPACE(                                 \
+      __use_op_kernel_##op_type##_##LIBRARY_TYPE##__,             \
+      "USE_OP_DEVICE_KERNEL must be in global namespace");        \
+  extern int TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE(); \
+  static int use_op_kernel_##op_type##_##LIBRARY_TYPE##_          \
+      __attribute__((unused)) =                                   \
+          TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE()
 
 // TODO(fengjiayi): The following macros
 // seems ugly, do we have better method?
diff --git a/paddle/framework/op_registry_test.cc b/paddle/framework/op_registry_test.cc
index 66f07b6757fe1fe613e61ac66057be43ef5aced7..341da8befd45abd1a3fc86581be33319a8791567 100644
--- a/paddle/framework/op_registry_test.cc
+++ b/paddle/framework/op_registry_test.cc
@@ -368,24 +368,6 @@ TEST(OperatorRegistrar, OpWithMultiKernel) {
 
   // TODO(qiao) add priority back
   // use all available kernels
-  paddle::framework::UseALL();
   op->Run(scope, cuda_place);
   EXPECT_EQ(op_test_value, -10);
-
-  // remove cuda kernels
-  paddle::framework::UseCPU();
-  op->Run(scope, cpu_place);
-
-  EXPECT_EQ(op_test_value, -9);
-
-  // add cuda kernels
-  paddle::framework::UseCUDA();
-  op->Run(scope, cuda_place);
-
-  EXPECT_EQ(op_test_value, -10);
-
-  // use cudnn kernel
-  paddle::framework::UseCUDNN();
-  op->Run(scope, cuda_place);
-  EXPECT_EQ(op_test_value, -20);
 }
diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc
index be1373dc2a86b18f780422da9528a376f59a5837..831b1e2a1e10777d9e89364adcd4b1f367e86080 100644
--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@@ -29,52 +29,12 @@ DEFINE_bool(op_sync, false,
 namespace paddle {
 namespace framework {
 
-std::vector<std::tuple<platform::Place, LibraryType>> kKernelPriority;
-
-void UseCPU() {
-  kKernelPriority.clear();
-  /*Plain CPU*/
-  auto pair0 = std::make_tuple(platform::CPUPlace(), LibraryType::kPlain);
-  kKernelPriority.insert(kKernelPriority.begin(), pair0);
-}
-
-void UseMKLDNN() {
-  UseCPU();
-#if PADDLE_WITH_MKLML
-  {
-    /*MKLDNN Kernel*/
-    auto pair0 = std::make_tuple(platform::CPUPlace(), LibraryType::kMKLDNN);
-    kKernelPriority.insert(kKernelPriority.begin(), pair0);
-  }
-#endif
-}
-
-void UseCUDA() {
-  UseMKLDNN();
-#if PADDLE_WITH_CUDA
-  /*Plain GPU*/
-  auto pair0 = std::make_tuple(platform::CUDAPlace(0), LibraryType::kPlain);
-  kKernelPriority.insert(kKernelPriority.begin(), pair0);
-#endif
-}
-
-void UseCUDNN() {
-  UseCUDA();
-#if PADDLE_WITH_CUDA
-  if (platform::dynload::HasCUDNN()) {
-    /*CUDNN Kernel*/
-    auto pair0 = std::make_tuple(platform::CUDAPlace(0), LibraryType::kCUDNN);
-    kKernelPriority.insert(kKernelPriority.begin(), pair0);
-  }
-#endif
-}
-
-void UseALL() {
-  UseCPU();
-  UseMKLDNN();
-  UseCUDA();
-  UseCUDNN();
-}
+std::vector<std::tuple<platform::Place, LibraryType>> kKernelPriority = {
+    std::make_tuple(platform::CUDAPlace(0), LibraryType::kCUDNN),
+    std::make_tuple(platform::CUDAPlace(0), LibraryType::kPlain),
+    std::make_tuple(platform::CPUPlace(), LibraryType::kMKLDNN),
+    std::make_tuple(platform::CPUPlace(), LibraryType::kPlain),
+};
 
 static DDim GetDims(const Scope& scope, const std::string& name) {
   Variable* var = scope.FindVar(name);
@@ -271,36 +231,33 @@ static bool VarIsTensor(const Variable* var) {
   return var->IsType<LoDTensor>() || var->IsType<SelectedRows>();
 }
 
-static const Tensor* GetTensorFromVar(const Variable* var) {
-  const Tensor* t = nullptr;
+static const Tensor* GetTensorFromVar(Variable* var) {
   if (var->IsType<LoDTensor>()) {
-    t = &(var->Get<LoDTensor>());
+    return var->GetMutable<LoDTensor>();
   } else if (var->IsType<SelectedRows>()) {
-    t = &(var->Get<SelectedRows>().value());
+    return var->GetMutable<SelectedRows>()->mutable_value();
   } else {
     PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.",
                  var->Type().name());
   }
-  return t;
 }
 
 static Tensor* GetMutableTensorFromVar(Variable* var) {
-  Tensor* t = nullptr;
   if (var->IsType<LoDTensor>()) {
-    t = var->GetMutable<LoDTensor>();
+    return var->GetMutable<LoDTensor>();
   } else if (var->IsType<SelectedRows>()) {
-    t = var->GetMutable<SelectedRows>()->mutable_value();
+    return var->GetMutable<SelectedRows>()->mutable_value();
   } else {
     PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.",
                  var->Type().name());
   }
-  return t;
 }
 
 template <>
 const Tensor* ExecutionContext::Input<Tensor>(const std::string& name) const {
   auto* var = InputVar(name);
-  return var == nullptr ? nullptr : GetTensorFromVar(var);
+  return var == nullptr ? nullptr
+                        : GetTensorFromVar(const_cast<Variable*>(var));
 }
 
 template <>
@@ -343,6 +300,7 @@ bool OpSupportGPU(const std::string& op_type) {
   auto it = all_kernels.find(op_type);
   if (it == all_kernels.end()) {
     // All control operator must support GPU
+
     return true;
   }
   for (auto& kern_pair : it->second) {
@@ -516,24 +474,26 @@ void OperatorWithKernel::Run(const Scope& scope,
   }
 
   ExecutionContext ctx(*this, scope, *dev_ctx);
-  auto expected_kernel_key = this->GetExpectedKernelType(ctx);
 
   OpKernelMap& kernels = kernels_iter->second;
 
-  for (auto& candidate : kKernelPriority) {
-    auto candidate_key =
-        OpKernelType(expected_kernel_key.data_type_, std::get<0>(candidate),
-                     expected_kernel_key.data_layout_, std::get<1>(candidate));
+  // TODO(dzhwinter) : kernel fallback mechanism will be added when all the
+  // transform functions are ready.
 
-    if ((candidate_key == expected_kernel_key) ||
-        (kernels.count(candidate_key))) {
-      expected_kernel_key = candidate_key;
-      break;
-    }
-  }
+  // for (auto& candidate : kKernelPriority) {
+  //   Do selection
+  // }
 
+  auto expected_kernel_key = this->GetExpectedKernelType(ctx);
   VLOG(3) << "expected_kernel_key:" << expected_kernel_key;
 
+  auto kernel_iter = kernels.find(expected_kernel_key);
+  if (kernel_iter == kernels.end()) {
+    PADDLE_THROW("op %s does not have kernel for %s", type_,
+                 KernelTypeToString(expected_kernel_key));
+  }
+
+  // do data transform
   Scope& new_scope = scope.NewScope();
 
   for (auto& var_name_item : this->Inputs()) {
@@ -544,7 +504,7 @@ void OperatorWithKernel::Run(const Scope& scope,
         if (tensor_in->IsInitialized()) {
           auto kernel_type_for_var = this->GetKernelTypeForVar(
               var_name_item.first, *tensor_in, expected_kernel_key);
-          if (kernel_type_for_var != expected_kernel_key) {
+          if (TransFromNeeded(kernel_type_for_var, expected_kernel_key)) {
             auto out_var_names = OutputVars(true);
             if (std::find(out_var_names.begin(), out_var_names.end(),
                           var_name) != out_var_names.end()) {
@@ -553,19 +513,19 @@ void OperatorWithKernel::Run(const Scope& scope,
                   "does not support transform",
                   var_name);
             }
-            VLOG(3) << "need to do transform for var " << var_name;
+            VLOG(3) << "Transform Variable " << var_name << " from "
+                    << kernel_type_for_var << " to " << expected_kernel_key;
             auto* trans_var = new_scope.Var(var_name);
-            auto* out = DataTransform(expected_kernel_key, kernel_type_for_var,
-                                      *tensor_in);
-            CopyVariableWithTensor(*var, *out, *trans_var);
+            std::shared_ptr<Tensor> out(new Tensor);
+            DataTransform(expected_kernel_key, kernel_type_for_var, *tensor_in,
+                          out.get());
+            CopyVariableWithTensor(*var, *(out.get()), *trans_var);
           }
         }
       }
     }
   }
 
-  auto kernel_iter = kernels.find(expected_kernel_key);
-
   auto* new_dev_ctx = pool.Get(expected_kernel_key.place_);
   kernel_iter->second->Compute(
       ExecutionContext(*this, new_scope, *new_dev_ctx));
diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h
index d5feb598649c97a9517b7c2b1764fd54ff9f8693..c9140f304c89e32a0fa8bd24722cc2e5dbc4e2e1 100644
--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@@ -54,33 +54,9 @@ constexpr char kGradVarSuffix[] = "@GRAD";
 constexpr char kZeroVarSuffix[] = "@ZERO";
 
 // define some kernel priority
+/* Define multiple kernel type fallback order*/
 extern std::vector<std::tuple<platform::Place, LibraryType>> kKernelPriority;
 
-/**
- * @brief Use cpu kernel only
- */
-void UseCPU();
-
-/**
- * @brief Perfer MKLDNN kernel than Plain CPU kernel
- */
-void UseMKLDNN();
-
-/**
- * @brief Perfer CUDA kernel than Plain CPU kernel
- */
-void UseCUDA();
-
-/**
- * @brief Perfer cudnn kernel than Plain CUDA kernel
- */
-void UseCUDNN();
-
-/**
- * @brief Use all available kernels
- */
-void UseALL();
-
 inline std::string GradVarName(const std::string& var_name) {
   return var_name + kGradVarSuffix;
 }
diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc
index a1b4a03289eca4c8b9d8c23ede4221853cb31f79..9a387526ac2b0982aa4c931a9d92d98b08fb4f98 100644
--- a/paddle/framework/tensor_test.cc
+++ b/paddle/framework/tensor_test.cc
@@ -1,15 +1,16 @@
-/*
-  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-  http://www.apache.org/licenses/LICENSE-2.0
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
-*/
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include "paddle/framework/tensor.h"
 #include <gtest/gtest.h>
@@ -47,9 +48,6 @@ TEST(Tensor, DataAssert) {
   ASSERT_TRUE(caught);
 }
 
-/* following tests are not available at present
-   because Memory::Alloc() and Memory::Free() have not been ready.
-*/
 TEST(Tensor, MutableData) {
   {
     framework::Tensor src_tensor;
diff --git a/paddle/framework/tensor_util.h b/paddle/framework/tensor_util.h
index 091b63bf0f907a5449f08f0e36abb6577fa5e43e..b49c61449984f51d65963958c87191b0799bcf5b 100644
--- a/paddle/framework/tensor_util.h
+++ b/paddle/framework/tensor_util.h
@@ -315,9 +315,8 @@ inline void DeserializeFromStream(std::istream& is, Tensor* tensor,
           desc.data_type(),
           DeserializedDataFunctor(&buf, &cpu_tensor, ctx.GetPlace()));
       is.read(static_cast<char*>(buf), cpu_tensor.memory_size());
-      auto cpu_place = new platform::CPUPlace();
-      framework::Copy(cpu_tensor, *cpu_place, dev_ctx, tensor);
-      delete cpu_place;
+      auto dst_place = dev_ctx.GetPlace();
+      framework::Copy(cpu_tensor, dst_place, dev_ctx, tensor);
 #else
       PADDLE_THROW("Unexpected branch");
 #endif
diff --git a/paddle/framework/tensor_util_test.cc b/paddle/framework/tensor_util_test.cc
index 3636125f2052200238ff82d4f708b62224322cdf..906b0b5656301eebbb9f61fad2a3cb4e464a83e8 100644
--- a/paddle/framework/tensor_util_test.cc
+++ b/paddle/framework/tensor_util_test.cc
@@ -1,15 +1,16 @@
-/*
-  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-  http://www.apache.org/licenses/LICENSE-2.0
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
-*/
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include "paddle/framework/tensor_util.h"
 #include <gtest/gtest.h>
diff --git a/paddle/framework/variable.h b/paddle/framework/variable.h
index 36b76fb196cfd4c7b3697dcf0cda9a23ff53deb3..3b7ec0a2a90d8f88bfb7f1629f484b3a8a8078df 100644
--- a/paddle/framework/variable.h
+++ b/paddle/framework/variable.h
@@ -1,15 +1,16 @@
-/*
-  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-  http://www.apache.org/licenses/LICENSE-2.0
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
-*/
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #pragma once
 
 #include <memory>
diff --git a/paddle/framework/variable_test.cc b/paddle/framework/variable_test.cc
index aea03bcf5719dacc01d2d78b52b33e8a0b29b5e5..e4732d9718e2b46a068963d44c4c1e04024f2330 100644
--- a/paddle/framework/variable_test.cc
+++ b/paddle/framework/variable_test.cc
@@ -1,3 +1,17 @@
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 /*
   Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
   Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/paddle/gserver/layers/MKLDNNConcatLayer.cpp b/paddle/gserver/layers/MKLDNNConcatLayer.cpp
index 44bb0883b89c712d70e2d4fdfe16bdfde86f81b7..520ccc1a995e966de73080b61a8c20cbee722267 100644
--- a/paddle/gserver/layers/MKLDNNConcatLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNConcatLayer.cpp
@@ -43,7 +43,7 @@ void MKLDNNConcatLayer::reshape(
   channels_[0] = ic;
   oc = ic;
   for (size_t i = 1; i < inputLayers_.size(); i++) {
-    int batchsize, height, witdh;
+    int batchsize = 0, height = 0, witdh = 0;
     reshapeInput(batchsize, height, witdh, i);
     CHECK_EQ(bs, batchsize);
     CHECK_EQ(ih, height);
@@ -84,6 +84,7 @@ void MKLDNNConcatLayer::resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
   bool has8c = false, has16c = false, hasnc = false;
   for (size_t i = 0; i < inputs.size(); i++) {
     resetInValue(inputs[i], nullptr, i, channels_[i]);
+    inputs[i]->downSpatial();
     CHECK(inputs[i]);
     auto dm = inputs[i]->getDims();
     // inputs format can be different, but ndims must equal
diff --git a/paddle/gserver/layers/PriorBox.cpp b/paddle/gserver/layers/PriorBox.cpp
index 331bc7672ec0d39a7317c39f1d14e8dcadea471a..337b9ba7bc0fc4e4bb80ee7b248d934f111379d5 100644
--- a/paddle/gserver/layers/PriorBox.cpp
+++ b/paddle/gserver/layers/PriorBox.cpp
@@ -65,14 +65,19 @@ bool PriorBoxLayer::init(const LayerMap& layerMap,
   std::copy(pbConf.aspect_ratio().begin(),
             pbConf.aspect_ratio().end(),
             std::back_inserter(tmp));
-  // flip
-  int inputRatioLength = tmp.size();
-  for (int index = 0; index < inputRatioLength; index++) {
-    aspectRatio_.push_back(tmp[index]);
-    aspectRatio_.push_back(1 / tmp[index]);
+
+  if (maxSize_.size() > 0) CHECK_EQ(minSize_.size(), maxSize_.size());
+
+  // flip aspect ratios
+  for (int index = 0; index < tmp.size(); index++) {
+    real ar = tmp[index];
+    if (fabs(ar - 1.) < 1e-6) continue;
+    aspectRatio_.push_back(ar);
+    aspectRatio_.push_back(1. / ar);
   }
-  numPriors_ = aspectRatio_.size();
-  if (maxSize_.size() > 0) numPriors_++;
+
+  numPriors_ = aspectRatio_.size() * minSize_.size() + maxSize_.size();
+
   return true;
 }
 
@@ -99,50 +104,39 @@ void PriorBoxLayer::forward(PassType passType) {
     for (int w = 0; w < layerWidth; ++w) {
       real centerX = (w + 0.5) * stepW;
       real centerY = (h + 0.5) * stepH;
-      real minSize = 0;
       for (size_t s = 0; s < minSize_.size(); s++) {
-        // first prior.
-        minSize = minSize_[s];
+        real minSize = minSize_[s];
         real boxWidth = minSize;
         real boxHeight = minSize;
-        // xmin, ymin, xmax, ymax.
-        tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth;
-        tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight;
-        tmpPtr[idx++] = (centerX + boxWidth / 2.) / imageWidth;
-        tmpPtr[idx++] = (centerY + boxHeight / 2.) / imageHeight;
-        // set the variance.
-        for (int t = 0; t < 4; t++) tmpPtr[idx++] = variance_[t];
+
+        // priors with different aspect ratios
+        for (size_t r = 0; r < aspectRatio_.size(); r++) {
+          real ar = aspectRatio_[r];
+          boxWidth = minSize * sqrt(ar);
+          boxHeight = minSize / sqrt(ar);
+          tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth;
+          tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight;
+          tmpPtr[idx++] = (centerX + boxWidth / 2.) / imageWidth;
+          tmpPtr[idx++] = (centerY + boxHeight / 2.) / imageHeight;
+          // set the variance.
+          for (int t = 0; t < 4; t++) tmpPtr[idx++] = variance_[t];
+        }
 
         if (maxSize_.size() > 0) {
-          CHECK_EQ(minSize_.size(), maxSize_.size());
-          // second prior.
-          for (size_t s = 0; s < maxSize_.size(); s++) {
-            real maxSize = maxSize_[s];
-            boxWidth = boxHeight = sqrt(minSize * maxSize);
-            tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth;
-            tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight;
-            tmpPtr[idx++] = (centerX + boxWidth / 2.) / imageWidth;
-            tmpPtr[idx++] = (centerY + boxHeight / 2.) / imageHeight;
-            // set the variance.
-            for (int t = 0; t < 4; t++) tmpPtr[idx++] = variance_[t];
-          }
+          // square prior with size sqrt(minSize * maxSize)
+          real maxSize = maxSize_[s];
+          boxWidth = boxHeight = sqrt(minSize * maxSize);
+          tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth;
+          tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight;
+          tmpPtr[idx++] = (centerX + boxWidth / 2.) / imageWidth;
+          tmpPtr[idx++] = (centerY + boxHeight / 2.) / imageHeight;
+          // set the variance.
+          for (int t = 0; t < 4; t++) tmpPtr[idx++] = variance_[t];
         }
       }
-      // rest of priors.
-      for (size_t r = 0; r < aspectRatio_.size(); r++) {
-        real ar = aspectRatio_[r];
-        if (fabs(ar - 1.) < 1e-6) continue;
-        real boxWidth = minSize * sqrt(ar);
-        real boxHeight = minSize / sqrt(ar);
-        tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth;
-        tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight;
-        tmpPtr[idx++] = (centerX + boxWidth / 2.) / imageWidth;
-        tmpPtr[idx++] = (centerY + boxHeight / 2.) / imageHeight;
-        // set the variance.
-        for (int t = 0; t < 4; t++) tmpPtr[idx++] = variance_[t];
-      }
     }
   }
+
   // clip the prior's coordidate such that it is within [0, 1]
   for (int d = 0; d < dim * 2; ++d)
     if ((d % 8) < 4)
diff --git a/paddle/gserver/tests/img_conv_cudnn.py b/paddle/gserver/tests/img_conv_cudnn.py
index 3934607fa41f9b6d401f1c9ff4aec6715786799b..0ea6d6bae66b0a307748bd0d0fa9a53ed5f7927d 100644
--- a/paddle/gserver/tests/img_conv_cudnn.py
+++ b/paddle/gserver/tests/img_conv_cudnn.py
@@ -1,17 +1,16 @@
-#edit-mode: -*- python -*-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 
 from paddle.trainer_config_helpers import *
 
diff --git a/paddle/gserver/tests/img_conv_exconv.py b/paddle/gserver/tests/img_conv_exconv.py
index ad5a8ba2bde17000ca3d7057c6f399ae28d938b0..c618cdab27c52d70042b0a118f7f6fe935a6b9d7 100644
--- a/paddle/gserver/tests/img_conv_exconv.py
+++ b/paddle/gserver/tests/img_conv_exconv.py
@@ -1,17 +1,16 @@
-#edit-mode: -*- python -*-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 
 from paddle.trainer_config_helpers import *
 
diff --git a/paddle/gserver/tests/pyDataProvider.py b/paddle/gserver/tests/pyDataProvider.py
index 7235a239439b7544805d1bd06dfb1a72c2e0e937..d2ad5888b5a4c79d8b663ce8c2f313184151beb6 100644
--- a/paddle/gserver/tests/pyDataProvider.py
+++ b/paddle/gserver/tests/pyDataProvider.py
@@ -1,17 +1,16 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import numpy
 import struct
 import traceback
diff --git a/paddle/gserver/tests/rnn_data_provider.py b/paddle/gserver/tests/rnn_data_provider.py
index 913365a5a4037d14fcba1e1546508ba89668e0d6..063a4127e542d23012359a2eac0045bf69a51356 100644
--- a/paddle/gserver/tests/rnn_data_provider.py
+++ b/paddle/gserver/tests/rnn_data_provider.py
@@ -1,17 +1,16 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer.PyDataProvider2 import *
 
 # Note that each config should has an independent provider
diff --git a/paddle/gserver/tests/sequenceGen.py b/paddle/gserver/tests/sequenceGen.py
index fd725727c04677b5ea8918f6721f0c007e80915d..04a1732d61c8618984d16550acf7c94da1bd3578 100644
--- a/paddle/gserver/tests/sequenceGen.py
+++ b/paddle/gserver/tests/sequenceGen.py
@@ -1,17 +1,16 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import os
 import sys
 
diff --git a/paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py b/paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py
index 7303d088043d5096a3491d3b3b32b231bde09a0a..aeaaa221f9fab981af88cfd63c30349e1b02a0ee 100644
--- a/paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py
+++ b/paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py
@@ -1,18 +1,16 @@
-# edit-mode: -*- python -*-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 ######################## data source ################################
diff --git a/paddle/gserver/tests/sequence_recurrent.py b/paddle/gserver/tests/sequence_recurrent.py
index 4895df186bfecc5cb5263676a9cd5bac5039d565..8786a5465db82d786d3772357b02ab837073a576 100644
--- a/paddle/gserver/tests/sequence_recurrent.py
+++ b/paddle/gserver/tests/sequence_recurrent.py
@@ -1,17 +1,16 @@
-#!/usr/bin/env python
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 
 from paddle.trainer_config_helpers import *
 
diff --git a/paddle/gserver/tests/sequence_recurrent_group.py b/paddle/gserver/tests/sequence_recurrent_group.py
index a1d54542e3bc4e89f70d31d5e89c0f44953c9f90..8b5a3d49838c9bb49321a9d7514fc0241e6d67cd 100644
--- a/paddle/gserver/tests/sequence_recurrent_group.py
+++ b/paddle/gserver/tests/sequence_recurrent_group.py
@@ -1,18 +1,16 @@
-#!/usr/bin/env python
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 from paddle.trainer_config_helpers import *
 
 ######################## data source ################################
diff --git a/paddle/gserver/tests/sequence_rnn_matched_inputs.py b/paddle/gserver/tests/sequence_rnn_matched_inputs.py
index 59e8c91733c42b6f13f723321d21bca98ab78bb7..0c55f2cf9d07b194aa06f88892f831f1a9ce6436 100644
--- a/paddle/gserver/tests/sequence_rnn_matched_inputs.py
+++ b/paddle/gserver/tests/sequence_rnn_matched_inputs.py
@@ -1,17 +1,16 @@
-# edit-mode: -*- python -*-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 
 from paddle.trainer_config_helpers import *
 
diff --git a/paddle/gserver/tests/sequence_rnn_mixed_inputs.py b/paddle/gserver/tests/sequence_rnn_mixed_inputs.py
index 6fe9dca6e2cb0e14fee346b8307f67b804328471..22b376b91aa4736d16fead698105466d679dd248 100644
--- a/paddle/gserver/tests/sequence_rnn_mixed_inputs.py
+++ b/paddle/gserver/tests/sequence_rnn_mixed_inputs.py
@@ -1,17 +1,16 @@
-# edit-mode: -*- python -*-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 
 from paddle.trainer_config_helpers import *
 
diff --git a/paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.py b/paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.py
index 786a0c6d780e4e8deadb35e52901e42dae67a281..3ce87490bbd0f30a3c42b947b073adb2a6c5b51c 100644
--- a/paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.py
+++ b/paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.py
@@ -1,17 +1,16 @@
-#edit-mode: -*- python -*-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 
 from paddle.trainer_config_helpers import *
 
diff --git a/paddle/gserver/tests/test_PyDataProvider2.py b/paddle/gserver/tests/test_PyDataProvider2.py
index 0d0fe476ff5eac8bf8ad1c9fe09b32c1a8f73ebc..044aede98e684a432c48b3ea5bb82a4a677682d4 100644
--- a/paddle/gserver/tests/test_PyDataProvider2.py
+++ b/paddle/gserver/tests/test_PyDataProvider2.py
@@ -1,17 +1,16 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import random
 
 from paddle.trainer.PyDataProvider2 import *
diff --git a/paddle/inference/CMakeLists.txt b/paddle/inference/CMakeLists.txt
index 8437b2b21942ead544dab8636db1b355b7cf7bd5..02ca8a45a851d262eed6962a9a227b5009ef03a5 100644
--- a/paddle/inference/CMakeLists.txt
+++ b/paddle/inference/CMakeLists.txt
@@ -8,27 +8,6 @@ cc_library(paddle_fluid_api
 # Merge all modules into a simgle static library
 cc_library(paddle_fluid DEPS paddle_fluid_api ${FLUID_CORE_MODULES})
 
-# ptools
-# just for testing, we may need to change the storing format for inference_model
-# and move the dependent of pickle.
-# download from http://www.picklingtools.com/
-# build in the C++ sub-directory, using command
-#     make -f Makefile.Linux libptools.so
-set(PTOOLS_LIB)
-set(PTOOLS_ROOT $ENV{PTOOLS_ROOT} CACHE PATH "Folder contains PicklingTools")
-find_path(PTOOLS_INC_DIR chooseser.h PATHS ${PTOOLS_ROOT}/C++)
-find_library(PTOOLS_SHARED_LIB NAMES ptools PATHS ${PTOOLS_ROOT}/C++)
-if(PTOOLS_INC_DIR AND PTOOLS_SHARED_LIB)
-  add_definitions(-DPADDLE_USE_PTOOLS)
-  set(PTOOLS_LIB ptools)
-  message(STATUS "Found PicklingTools: ${PTOOLS_SHARED_LIB}")
-  add_library(${PTOOLS_LIB} SHARED IMPORTED GLOBAL)
-  set_property(TARGET ${PTOOLS_LIB} PROPERTY IMPORTED_LOCATION ${PTOOLS_SHARED_LIB})
-  include_directories(${PTOOLS_ROOT}/C++)
-  include_directories(${PTOOLS_ROOT}/C++/opencontainers_1_8_5/include)
-  add_definitions(-DOC_NEW_STYLE_INCLUDES) # used in ptools
-endif()
-
 add_executable(example example.cc)
 if(APPLE)
   set(OPTIONAL_LINK_FLAGS)
diff --git a/paddle/inference/example.cc b/paddle/inference/example.cc
index 9711b20e6fb4099a2cc497029468ebd1fd0b3456..0c18b45624dedcb5839d4b771e044b4a7b32af52 100644
--- a/paddle/inference/example.cc
+++ b/paddle/inference/example.cc
@@ -18,33 +18,21 @@ limitations under the License. */
 #include "paddle/inference/inference.h"
 
 DEFINE_string(dirname, "", "Directory of the inference model.");
-DEFINE_string(feed_var_names, "", "Names of feeding variables");
-DEFINE_string(fetch_var_names, "", "Names of fetching variables");
 
 int main(int argc, char** argv) {
   google::ParseCommandLineFlags(&argc, &argv, true);
-  if (FLAGS_dirname.empty() || FLAGS_feed_var_names.empty() ||
-      FLAGS_fetch_var_names.empty()) {
+  if (FLAGS_dirname.empty()) {
     // Example:
     //   ./example --dirname=recognize_digits_mlp.inference.model
-    //             --feed_var_names="x"
-    //             --fetch_var_names="fc_2.tmp_2"
-    std::cout << "Usage: ./example --dirname=path/to/your/model "
-                 "--feed_var_names=x --fetch_var_names=y"
-              << std::endl;
+    std::cout << "Usage: ./example --dirname=path/to/your/model" << std::endl;
     exit(1);
   }
 
   std::cout << "FLAGS_dirname: " << FLAGS_dirname << std::endl;
-  std::cout << "FLAGS_feed_var_names: " << FLAGS_feed_var_names << std::endl;
-  std::cout << "FLAGS_fetch_var_names: " << FLAGS_fetch_var_names << std::endl;
-
   std::string dirname = FLAGS_dirname;
-  std::vector<std::string> feed_var_names = {FLAGS_feed_var_names};
-  std::vector<std::string> fetch_var_names = {FLAGS_fetch_var_names};
 
   paddle::InferenceEngine* engine = new paddle::InferenceEngine();
-  engine->LoadInferenceModel(dirname, feed_var_names, fetch_var_names);
+  engine->LoadInferenceModel(dirname);
 
   paddle::framework::LoDTensor input;
   srand(time(0));
diff --git a/paddle/inference/inference.cc b/paddle/inference/inference.cc
index 37b8b20ddfcf2566b8410f950308309e5b2b2a7c..49001778808173b82865a4b6632a6b175ef96242 100644
--- a/paddle/inference/inference.cc
+++ b/paddle/inference/inference.cc
@@ -25,19 +25,37 @@ limitations under the License. */
 
 namespace paddle {
 
+void InferenceEngine::LoadInferenceModel(const std::string& dirname) {
+  std::string model_filename = dirname + "/__model__.dat";
+  LOG(INFO) << "loading model from " << model_filename;
+  std::ifstream inputfs(model_filename, std::ios::in | std::ios::binary);
+  std::string program_desc_str;
+  inputfs.seekg(0, std::ios::end);
+  program_desc_str.resize(inputfs.tellg());
+  inputfs.seekg(0, std::ios::beg);
+  LOG(INFO) << "program_desc_str's size: " << program_desc_str.size();
+  inputfs.read(&program_desc_str[0], program_desc_str.size());
+  inputfs.close();
+
+  program_ = new framework::ProgramDesc(program_desc_str);
+  GenerateLoadProgram(dirname);
+
+  framework::BlockDesc* global_block = program_->MutableBlock(0);
+  feed_var_names_.clear();
+  fetch_var_names_.clear();
+  for (auto* op : global_block->AllOps()) {
+    if (op->Type() == "feed") {
+      feed_var_names_.insert(feed_var_names_.begin(), op->Output("Out")[0]);
+    } else if (op->Type() == "fetch") {
+      fetch_var_names_.push_back(op->Input("X")[0]);
+    }
+  }
+}
+
 void InferenceEngine::LoadInferenceModel(
     const std::string& dirname,
     const std::vector<std::string>& feed_var_names,
     const std::vector<std::string>& fetch_var_names) {
-#ifdef PADDLE_USE_PTOOLS
-  std::string model_filename = dirname + "/__model__";
-  LOG(INFO) << "Using PicklingTools, loading model from " << model_filename;
-  Val v;
-  LoadValFromFile(model_filename.c_str(), v, SERIALIZE_P0);
-  std::string program_desc_str = v["program_desc_str"];
-  LOG(INFO) << "program_desc_str's size: " << program_desc_str.size();
-// PicklingTools cannot parse the vector of strings correctly.
-#else
   std::string model_filename = dirname + "/__model__.dat";
   LOG(INFO) << "loading model from " << model_filename;
   std::ifstream inputfs(model_filename, std::ios::in | std::ios::binary);
@@ -48,7 +66,7 @@ void InferenceEngine::LoadInferenceModel(
   LOG(INFO) << "program_desc_str's size: " << program_desc_str.size();
   inputfs.read(&program_desc_str[0], program_desc_str.size());
   inputfs.close();
-#endif
+
   program_ = new framework::ProgramDesc(program_desc_str);
   GenerateLoadProgram(dirname);
 
@@ -62,7 +80,7 @@ void InferenceEngine::LoadInferenceModel(
 }
 
 bool InferenceEngine::IsParameter(const framework::VarDesc* var) {
-  if (var->Persistable()) {
+  if (var->Persistable() && var->Name() != "feed" && var->Name() != "fetch") {
     // There are many unreachable variables in the program
     for (size_t i = 0; i < program_->Size(); ++i) {
       const framework::BlockDesc& block = program_->Block(i);
diff --git a/paddle/inference/inference.h b/paddle/inference/inference.h
index a3f3ef4b440036a0b27353cc092eed1bbf96eeb3..7fc09cb9e539a65a8cd3cceb1543bc7d111c22b3 100644
--- a/paddle/inference/inference.h
+++ b/paddle/inference/inference.h
@@ -28,6 +28,7 @@ public:
     delete load_program_;
   }
 
+  void LoadInferenceModel(const std::string& dirname);
   void LoadInferenceModel(const std::string& dirname,
                           const std::vector<std::string>& feed_var_names,
                           const std::vector<std::string>& fetch_var_names);
diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt
index e1b695e8cd3dbf01ebe1ece7c72ed9fd2b60a58e..6745a8da17723d663913a29f28e5ea9eedc0372a 100644
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -137,8 +137,6 @@ op_library(sum_op DEPS selected_rows_functor)
 op_library(sgd_op DEPS selected_rows_functor)
 op_library(print_op DEPS lod_tensor)
 op_library(adagrad_op DEPS selected_rows_functor)
-op_library(conv_op DEPS vol2col)
-op_library(pool_op DEPS pooling)
 op_library(maxout_op DEPS maxouting)
 op_library(unpool_op DEPS unpooling)
 op_library(pool_with_index_op DEPS pooling)
@@ -149,12 +147,27 @@ op_library(max_sequence_len_op DEPS lod_rank_table)
 op_library(sequence_conv_op DEPS context_project)
 op_library(sequence_pool_op DEPS sequence_pooling)
 op_library(lstm_op DEPS sequence2batch lstm_compute)
-op_library(conv_transpose_op DEPS vol2col)
 op_library(gru_op DEPS sequence2batch gru_compute)
 op_library(recurrent_op DEPS executor)
-op_library(warpctc_op DEPS dynload_warpctc sequence_padding math_function)
+op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale math_function)
 op_library(cos_sim_op DEPS cos_sim_functor)
 op_library(parallel_do_op DEPS executor)
+
+# Regist multiple Kernel to pybind
+if (WITH_GPU)
+op_library(conv_op SRCS conv_op.cc conv_op.cu.cc conv_cudnn_op.cu.cc DEPS vol2col)
+op_library(pool_op SRCS pool_op.cc pool_op.cu.cc pool_cudnn_op.cu.cc DEPS pooling)
+op_library(conv_transpose_op SRCS conv_transpose_op.cc conv_transpose_op.cu.cc
+  conv_transpose_cudnn_op.cu.cc DEPS vol2col)
+file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(conv2d, CUDNN);\n")
+file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(pool2d, CUDNN);\n")
+file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(conv2d_transpose, CUDNN);\n")
+else()
+op_library(conv_op SRCS conv_op.cc DEPS vol2col)
+op_library(pool_op SRCS pool_op.cc DEPS pooling)
+op_library(conv_transpose_op SRCS conv_transpose_op.cc DEPS vol2col)
+endif()
+
 # FIXME(typhoonzero): save/load depends lodtensor serialization functions
 op_library(save_op DEPS lod_tensor)
 op_library(load_op DEPS lod_tensor)
@@ -165,14 +178,13 @@ foreach(src ${GENERAL_OPS})
 endforeach()
 file(APPEND ${pybind_file} "USE_OP(less_than);\nUSE_OP(logical_and);\nUSE_NO_KERNEL_OP(read_from_array);\n")
 
-
 set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
 
-
 cc_test(gather_test SRCS gather_test.cc DEPS tensor)
 cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
 cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
 cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor)
+cc_test(beam_search_op_test SRCS beam_search_op_test.cc DEPS lod_tensor beam_search_op)
 cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory)
 if(WITH_GPU)
     cc_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context)
diff --git a/paddle/operators/assign_value_op.cc b/paddle/operators/assign_value_op.cc
index d5671c1183a0f58d2aedb0723bd462684ac5636e..8e3a53048920d9875f1b1a178b367cf02b2c9cf8 100644
--- a/paddle/operators/assign_value_op.cc
+++ b/paddle/operators/assign_value_op.cc
@@ -1,16 +1,16 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include "paddle/operators/assign_value_op.h"
 
diff --git a/paddle/operators/assign_value_op.h b/paddle/operators/assign_value_op.h
index db2e43077999fa0f9aaada74026dd701ab2bf464..ec98c535132e454958e38c385f7da4df404fab50 100644
--- a/paddle/operators/assign_value_op.h
+++ b/paddle/operators/assign_value_op.h
@@ -1,16 +1,16 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #pragma once
 
diff --git a/paddle/operators/beam_search_op.cc b/paddle/operators/beam_search_op.cc
index ed2e7b738acd81140467ff22ad077155bba2fde1..4c71d66d22899d2cf6418935bf9358a0f73cec27 100644
--- a/paddle/operators/beam_search_op.cc
+++ b/paddle/operators/beam_search_op.cc
@@ -29,7 +29,7 @@ void BeamSearch::operator()(const framework::LoDTensor &pre_ids,
   PruneEndidCandidates(pre_ids, &selected_items);
   // calculate the output tensor's height
   size_t num_instances = std::accumulate(
-      std::begin(items), std::end(items), 0,
+      std::begin(selected_items), std::end(selected_items), 0,
       [](size_t a, std::vector<Item> &b) { return a + b.size(); });
   // the output tensor shape should be [num_instances, 1]
   auto dims = framework::make_ddim(
@@ -48,12 +48,20 @@ void BeamSearch::operator()(const framework::LoDTensor &pre_ids,
   size_t low_offset = 0;
   for (auto &items : selected_items) {
     low_level.push_back(low_offset);
+    sort(items.begin(), items.end(), [](const Item &a, const Item &b) {
+      if (a.offset < b.offset) {
+        return true;
+      }
+      return a.id < b.id;
+    });
     for (auto &item : items) {
       ids_data[low_offset] = item.id;
       scores_data[low_offset] = item.score;
       low_offset++;
     }
   }
+  low_level.push_back(low_offset);
+
   // fill lod
   auto abs_lod = framework::ToAbsOffset(ids_->lod());
   auto &high_level = abs_lod[lod_level_];
@@ -64,16 +72,21 @@ void BeamSearch::operator()(const framework::LoDTensor &pre_ids,
   selected_scores->set_lod(lod);
 }
 
-void BeamSearch::PruneEndidCandidates(const framework::LoDTensor &pre_ids,
-                                      std::vector<std::vector<Item>> *items) {
+int BeamSearch::PruneEndidCandidates(const framework::LoDTensor &pre_ids,
+                                     std::vector<std::vector<Item>> *items) {
   auto *pre_ids_data = pre_ids.data<int64_t>();
 
+  int res = 0;
   for (size_t offset = 0; offset < items->size(); offset++) {
     auto prefix_id = pre_ids_data[offset];
     if (prefix_id == end_id_) {
       items->at(offset).clear();
+    } else {
+      res++;
     }
   }
+
+  return res;
 }
 
 std::vector<std::vector<BeamSearch::Item>> BeamSearch::ToMap(
@@ -121,11 +134,7 @@ bool BeamSearch::NextItemSet(std::vector<BeamSearch::Item> *items) {
   auto ids = *ids_;
   auto scores = *scores_;
 
-  auto source_abs_two_level_lod = framework::SliceInLevel(
-      ids.lod(), lod_level_, sent_offset_, sent_offset_ + 1);
-  source_abs_two_level_lod = framework::ToAbsOffset(source_abs_two_level_lod);
   auto abs_lod = framework::ToAbsOffset(ids.lod());
-  PADDLE_ENFORCE_GE(source_abs_two_level_lod.size(), 2UL);
 
   auto *ids_data = ids.data<int64_t>();
   auto *scores_data = scores.data<float>();
diff --git a/paddle/operators/beam_search_op.h b/paddle/operators/beam_search_op.h
index 08b551ef9bd63106ed222d3a956a912294f827ec..45d14d68fe8d1c4a84aa826e68e76692444765a8 100644
--- a/paddle/operators/beam_search_op.h
+++ b/paddle/operators/beam_search_op.h
@@ -73,7 +73,15 @@ namespace operators {
  * second level:
  * [0, 2, 4]
  *
- * tensor's data
+ * id tensor's data
+ * [[
+ * 4,
+ * 1,
+ * 3,
+ * 8,
+ * ]]
+ *
+ * score tensor's data
  * [[
  * 0.5,
  * 0.3,
@@ -137,16 +145,21 @@ class BeamSearch {
     Item() {}
     Item(size_t offset, size_t id, float score)
         : offset(offset), id(id), score(score) {}
-    // offset in the lod_level_+1
+    // offset in the higher lod level.
     size_t offset;
+    // // prefix id in the lower lod level.
+    // size_t prefix;
     // the candidate id
     id_t id;
     // the corresponding score
     score_t score;
   };
 
-  void PruneEndidCandidates(const framework::LoDTensor& pre_ids,
-                            std::vector<std::vector<Item>>* items);
+  /*
+   * Delete all the records that follows the end token.
+   */
+  int PruneEndidCandidates(const framework::LoDTensor& pre_ids,
+                           std::vector<std::vector<Item>>* items);
 
   /*
    * Transform the items into a map whose key is offset, value is the items.
diff --git a/paddle/operators/beam_search_op_test.cc b/paddle/operators/beam_search_op_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d4beb64a85a1645b3fed22c3325bd8c0b7cd12b1
--- /dev/null
+++ b/paddle/operators/beam_search_op_test.cc
@@ -0,0 +1,86 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/operators/beam_search_op.h"
+
+#include <gtest/gtest.h>
+#include <vector>
+
+namespace paddle {
+namespace test {
+
+using std::vector;
+using framework::LoDTensor;
+using framework::LoD;
+using operators::BeamSearch;
+using paddle::platform::CPUPlace;
+using std::cout;
+using std::endl;
+
+void CreateInput(LoDTensor* ids, LoDTensor* scores) {
+  LoD lod;
+  vector<size_t> level0({0, 1, 4});
+  vector<size_t> level1({0, 1, 2, 3, 4});
+  lod.push_back(level0);
+  lod.push_back(level1);
+  ids->set_lod(lod);
+  scores->set_lod(lod);
+
+  auto dims = framework::make_ddim(vector<int64_t>({4, 3}));
+  ids->Resize(dims);
+  scores->Resize(dims);
+  CPUPlace place;
+
+  auto* ids_data = ids->mutable_data<int64_t>(place);
+  auto* scores_data = scores->mutable_data<float>(place);
+  vector<int64_t> _ids({4, 2, 5, 2, 1, 3, 3, 5, 2, 8, 2, 1});
+  vector<float> _scores(
+      {0.5, 0.3, 0.2, 0.6, 0.3, 0.1, 0.9, 0.5, 0.1, 0.7, 0.5, 0.1});
+
+  for (int i = 0; i < 12; i++) {
+    ids_data[i] = _ids[i];
+    scores_data[i] = _scores[i];
+  }
+}
+
+TEST(beam_search_op, run) {
+  CPUPlace place;
+  LoDTensor ids, scores;
+  CreateInput(&ids, &scores);
+
+  LoDTensor pre_ids;
+  pre_ids.Resize(framework::make_ddim(vector<int64_t>(4, 1)));
+  for (int i = 0; i < 4; i++) {
+    pre_ids.mutable_data<int64_t>(place)[i] = i + 1;
+  }
+
+  BeamSearch beamsearch(ids, scores, (int64_t)0, (int64_t)2, 0);
+  LoDTensor sids, sscores;
+  beamsearch(pre_ids, &sids, &sscores);
+
+  LOG(INFO) << "score: " << sscores << endl;
+
+  ASSERT_EQ(sids.lod(), sscores.lod());
+
+  vector<int> tids({2, 4, 3, 8});
+  vector<float> tscores({0.3, 0.5, 0.9, 0.7});
+
+  for (int i = 0; i < 4; i++) {
+    ASSERT_EQ(tids[i], sids.data<int64_t>()[i]);
+    ASSERT_EQ(tscores[i], sscores.data<float>()[i]);
+  }
+}
+
+}  // namespace test
+}  // namespace paddle
diff --git a/paddle/operators/clip_op.cc b/paddle/operators/clip_op.cc
index 573bb9c7dfdac2366c2458dd9f27a035a9f9b813..7adb74eab78dcdd0251b8db60781f6e24e348634 100644
--- a/paddle/operators/clip_op.cc
+++ b/paddle/operators/clip_op.cc
@@ -51,8 +51,8 @@ class ClipOpMaker : public framework::OpProtoAndCheckerMaker {
     AddComment(R"DOC(
 Clip Operator.
 
-The clip operator limits the value of given input within an interval. The interval is
-specified with arguments 'min' and 'max':
+The clip operator limits the value of given input within an interval. The
+interval is specified with arguments 'min' and 'max':
 
 $$
 Out = \min(\max(X, min), max)
diff --git a/paddle/operators/conv_cudnn_op.cc b/paddle/operators/conv_cudnn_op.cc
deleted file mode 100644
index 84d9ce1973a4cccadcb8f78feaecbcaa9e7af312..0000000000000000000000000000000000000000
--- a/paddle/operators/conv_cudnn_op.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/operators/conv_op.h"
-
-namespace paddle {
-namespace operators {
-
-class CudnnConv2DOpMaker : public Conv2DOpMaker {
- public:
-  CudnnConv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
-      : Conv2DOpMaker(proto, op_checker) {
-    AddAttr<int>("workspace_size_MB",
-                 "workspace size for cudnn, in MB, "
-                 "workspace is a section of GPU memory which will be "
-                 "allocated/freed each time the operator runs, larger "
-                 "workspace size can increase performance but also requires "
-                 "better hardware. This size should be chosen carefully.")
-        .SetDefault(4096);
-  }
-};
-
-class CudnnConv3DOpMaker : public Conv3DOpMaker {
- public:
-  CudnnConv3DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
-      : Conv3DOpMaker(proto, op_checker) {
-    AddAttr<int>("workspace_size_MB",
-                 "workspace size for cudnn, in MB, "
-                 "workspace is a section of GPU memory which will be "
-                 "allocated/freed each time the operator runs, larger "
-                 "workspace size can increase performance but also requires "
-                 "better hardware. This size should be chosen carefully.")
-        .SetDefault(4096);
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-REGISTER_OP(conv2d_cudnn, ops::ConvOp, ops::CudnnConv2DOpMaker,
-            conv2d_cudnn_grad, ops::ConvOpGrad);
-
-REGISTER_OP(conv3d_cudnn, ops::ConvOp, ops::CudnnConv3DOpMaker,
-            conv3d_cudnn_grad, ops::ConvOpGrad);
-
-REGISTER_OP_CPU_KERNEL(
-    conv2d_cudnn,
-    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
-REGISTER_OP_CPU_KERNEL(
-    conv2d_cudnn_grad,
-    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
-
-REGISTER_OP_CPU_KERNEL(
-    conv3d_cudnn,
-    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
-REGISTER_OP_CPU_KERNEL(
-    conv3d_cudnn_grad,
-    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
diff --git a/paddle/operators/conv_cudnn_op.cu.cc b/paddle/operators/conv_cudnn_op.cu.cc
index 0c5ed3e4e80304c6fd174975166804347feb18b1..3a5409a7e3f29a4c46839d6395760fc7fe8c086e 100644
--- a/paddle/operators/conv_cudnn_op.cu.cc
+++ b/paddle/operators/conv_cudnn_op.cu.cc
@@ -32,7 +32,7 @@ static constexpr size_t kCONV_CUDNN_WORKSPACE_LIMIT_BYTES =
     static_cast<size_t>(1024) * 1024 * 1024;
 
 template <typename T>
-class CudnnConvOpKernel : public framework::OpKernel<T> {
+class CUDNNConvOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
@@ -147,7 +147,7 @@ class CudnnConvOpKernel : public framework::OpKernel<T> {
 };
 
 template <typename T>
-class CudnnConvGradOpKernel : public framework::OpKernel<T> {
+class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
@@ -315,17 +315,16 @@ class CudnnConvGradOpKernel : public framework::OpKernel<T> {
 }  // namespace operators
 }  // namespace paddle
 
-// TODO(dzhwinter) : below register should be removed
-REGISTER_OP_CUDA_KERNEL(conv2d_cudnn,
-                        paddle::operators::CudnnConvOpKernel<float>,
-                        paddle::operators::CudnnConvOpKernel<double>);
-REGISTER_OP_CUDA_KERNEL(conv2d_cudnn_grad,
-                        paddle::operators::CudnnConvGradOpKernel<float>,
-                        paddle::operators::CudnnConvGradOpKernel<double>);
-
-REGISTER_OP_CUDA_KERNEL(conv3d_cudnn,
-                        paddle::operators::CudnnConvOpKernel<float>,
-                        paddle::operators::CudnnConvOpKernel<double>);
-REGISTER_OP_CUDA_KERNEL(conv3d_cudnn_grad,
-                        paddle::operators::CudnnConvGradOpKernel<float>,
-                        paddle::operators::CudnnConvGradOpKernel<double>);
+REGISTER_OP_KERNEL(conv2d, CUDNN, ::paddle::platform::CUDAPlace,
+                   paddle::operators::CUDNNConvOpKernel<float>,
+                   paddle::operators::CUDNNConvOpKernel<double>);
+REGISTER_OP_KERNEL(conv2d_grad, CUDNN, ::paddle::platform::CUDAPlace,
+                   paddle::operators::CUDNNConvGradOpKernel<float>,
+                   paddle::operators::CUDNNConvGradOpKernel<double>);
+
+REGISTER_OP_KERNEL(conv3d, CUDNN, ::paddle::platform::CUDAPlace,
+                   paddle::operators::CUDNNConvOpKernel<float>,
+                   paddle::operators::CUDNNConvOpKernel<double>);
+REGISTER_OP_KERNEL(conv3d_grad, CUDNN, ::paddle::platform::CUDAPlace,
+                   paddle::operators::CUDNNConvGradOpKernel<float>,
+                   paddle::operators::CUDNNConvGradOpKernel<double>);
diff --git a/paddle/operators/conv_op.cc b/paddle/operators/conv_op.cc
index 1468e3eb960a2b7c2e7af83ff701338596606922..d6882b275b22b9a2a2b6ff8cfb53a3462bbdbefe 100644
--- a/paddle/operators/conv_op.cc
+++ b/paddle/operators/conv_op.cc
@@ -67,6 +67,30 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
   ctx->ShareLoD("Input", "Output");
 }
 
+framework::OpKernelType ConvOp::GetExpectedKernelType(
+    const framework::ExecutionContext& ctx) const {
+  bool use_cudnn = ctx.Attr<bool>("use_cudnn");
+  use_cudnn &= platform::is_gpu_place(ctx.GetPlace());
+#ifdef PADDLE_WITH_CUDA
+  if (platform::is_gpu_place(ctx.GetPlace())) {
+    auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
+    use_cudnn &= dev_ctx.cudnn_handle() != nullptr;
+  }
+#endif
+  framework::LibraryType library_;
+  if (use_cudnn) {
+    library_ = framework::LibraryType::kCUDNN;
+  } else {
+    library_ = framework::LibraryType::kPlain;
+  }
+
+  std::string data_format = ctx.Attr<std::string>("data_format");
+  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
+  return framework::OpKernelType(
+      framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
+      layout_, library_);
+}
+
 Conv2DOpMaker::Conv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
     : OpProtoAndCheckerMaker(proto, op_checker) {
   AddInput(
@@ -108,6 +132,26 @@ Conv2DOpMaker::Conv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
                             "dilations(h_dilation, w_dilation) of "
                             "convolution operator.")
       .SetDefault({1, 1});
+  AddAttr<bool>(
+      "use_cudnn",
+      "(bool, default false) Only used in cudnn kernel, need install cudnn")
+      .SetDefault(false);
+  AddAttr<std::string>(
+      "data_format",
+      "(string, default NCHW) Only used in "
+      "An optional string from: \"NHWC\", \"NCHW\". "
+      "Defaults to \"NHWC\". Specify the data format of the output data, "
+      "the input will be transformed automatically. ")
+      .SetDefault("AnyLayout");
+  // TODO(dzhwinter): need to registered layout transform function
+  AddAttr<int>("workspace_size_MB",
+               "Only used in cudnn kernel. Need set use_cudnn to true."
+               "workspace size for cudnn, in MB, "
+               "workspace is a section of GPU memory which will be "
+               "allocated/freed each time the operator runs, larger "
+               "workspace size can increase performance but also requires "
+               "better hardware. This size should be chosen carefully.")
+      .SetDefault(4096);
   AddComment(R"DOC(
 Convolution Operator.
 
@@ -181,6 +225,25 @@ Conv3DOpMaker::Conv3DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
                             "dilations(d_dilation, h_dilation, w_dilation) of "
                             "convolution operator.")
       .SetDefault({1, 1, 1});
+  AddAttr<bool>(
+      "use_cudnn",
+      "(bool, default false) Only used in cudnn kernel, need install cudnn")
+      .SetDefault(false);
+  AddAttr<std::string>(
+      "data_format",
+      "(string, default NCHW) Only used in "
+      "An optional string from: \"NHWC\", \"NCHW\". "
+      "Defaults to \"NHWC\". Specify the data format of the output data, "
+      "the input will be transformed automatically. ")
+      .SetDefault("AnyLayout");
+  // TODO(dzhwinter): need to registered layout transform function
+  AddAttr<int>("workspace_size_MB",
+               "Only used in cudnn kernel. workspace size for cudnn, in MB, "
+               "workspace is a section of GPU memory which will be "
+               "allocated/freed each time the operator runs, larger "
+               "workspace size can increase performance but also requires "
+               "better hardware. This size should be chosen carefully.")
+      .SetDefault(4096);
 
   AddComment(R"DOC(
 Convolution3D Operator.
@@ -224,6 +287,31 @@ void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
   }
 }
 
+framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
+    const framework::ExecutionContext& ctx) const {
+  bool use_cudnn = ctx.Attr<bool>("use_cudnn");
+  use_cudnn &= platform::is_gpu_place(ctx.GetPlace());
+#ifdef PADDLE_WITH_CUDA
+  if (platform::is_gpu_place(ctx.GetPlace())) {
+    auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
+    use_cudnn &= dev_ctx.cudnn_handle() != nullptr;
+  }
+#endif
+
+  framework::LibraryType library_;
+  if (use_cudnn) {
+    library_ = framework::LibraryType::kCUDNN;
+  } else {
+    library_ = framework::LibraryType::kPlain;
+  }
+
+  std::string data_format = ctx.Attr<std::string>("data_format");
+  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
+  return framework::OpKernelType(
+      framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
+      layout_, library_);
+}
+
 }  // namespace operators
 }  // namespace paddle
 
diff --git a/paddle/operators/conv_op.h b/paddle/operators/conv_op.h
index 83786e2329e7ae3c2908fdfdaeb1f79d19a53f47..5a8933e7915960f9fcbe92ae73c4f37b3b69ecaf 100644
--- a/paddle/operators/conv_op.h
+++ b/paddle/operators/conv_op.h
@@ -62,12 +62,20 @@ class ConvOp : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
   void InferShape(framework::InferShapeContext* ctx) const override;
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override;
 };
 
 class ConvOpGrad : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
   void InferShape(framework::InferShapeContext* ctx) const override;
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override;
 };
 
 template <typename DeviceContext, typename T>
diff --git a/paddle/operators/conv_transpose_cudnn_op.cc b/paddle/operators/conv_transpose_cudnn_op.cc
deleted file mode 100644
index 2e5333a265f2f59f31c651b8bb080599ec6e31a4..0000000000000000000000000000000000000000
--- a/paddle/operators/conv_transpose_cudnn_op.cc
+++ /dev/null
@@ -1,78 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/operators/conv_transpose_op.h"
-
-namespace paddle {
-namespace operators {
-
-class CudnnConv2DTransposeOpMaker : public Conv2DTransposeOpMaker {
- public:
-  CudnnConv2DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker)
-      : Conv2DTransposeOpMaker(proto, op_checker) {
-    AddAttr<int>("workspace_size_MB",
-                 "workspace size for cudnn, in MB, "
-                 "workspace is a section of GPU memory which will be "
-                 "allocated/freed each time the operator runs, larger "
-                 "workspace size can increase performance but also requires "
-                 "better hardward. This size should be carefully setted.")
-        .SetDefault(4096);
-  }
-};
-
-class CudnnConv3DTransposeOpMaker : public Conv3DTransposeOpMaker {
- public:
-  CudnnConv3DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker)
-      : Conv3DTransposeOpMaker(proto, op_checker) {
-    AddAttr<int>("workspace_size_MB",
-                 "workspace size for cudnn, in MB, "
-                 "workspace is a section of GPU memory which will be "
-                 "allocated/freed each time the operator runs, larger "
-                 "workspace size can increase performance but also requires "
-                 "better hardward. This size should be carefully setted.")
-        .SetDefault(4096);
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-REGISTER_OP(conv2d_transpose_cudnn, ops::ConvTransposeOp,
-            ops::CudnnConv2DTransposeOpMaker, conv2d_transpose_cudnn_grad,
-            ops::ConvTransposeOpGrad);
-
-REGISTER_OP_CPU_KERNEL(
-    conv2d_transpose_cudnn,
-    ops::GemmConvTransposeKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::GemmConvTransposeKernel<paddle::platform::CPUDeviceContext, double>);
-REGISTER_OP_CPU_KERNEL(
-    conv2d_transpose_cudnn_grad,
-    ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext,
-                                     double>);
-
-REGISTER_OP(conv3d_transpose_cudnn, ops::ConvTransposeOp,
-            ops::CudnnConv3DTransposeOpMaker, conv3d_transpose_cudnn_grad,
-            ops::ConvTransposeOpGrad);
-
-REGISTER_OP_CPU_KERNEL(
-    conv3d_transpose_cudnn,
-    ops::GemmConvTransposeKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::GemmConvTransposeKernel<paddle::platform::CPUDeviceContext, double>);
-REGISTER_OP_CPU_KERNEL(
-    conv3d_transpose_cudnn_grad,
-    ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext,
-                                     double>);
diff --git a/paddle/operators/conv_transpose_cudnn_op.cu.cc b/paddle/operators/conv_transpose_cudnn_op.cu.cc
index fc37776ba1ed35aa6b2523eb593e9713cfcc54eb..23bc97e13c13e5c1d62a406be5e528ab272fa93a 100644
--- a/paddle/operators/conv_transpose_cudnn_op.cu.cc
+++ b/paddle/operators/conv_transpose_cudnn_op.cu.cc
@@ -28,10 +28,10 @@ using ScopedFilterDescriptor = platform::ScopedFilterDescriptor;
 using ScopedConvolutionDescriptor = platform::ScopedConvolutionDescriptor;
 using DataLayout = platform::DataLayout;
 
-static constexpr size_t kConvCudnnWorkspaceLimitBytes = 1024 * 1024 * 1024;
+static constexpr size_t kConvCUDNNWorkspaceLimitBytes = 1024 * 1024 * 1024;
 
 template <typename T>
-class CudnnConvTransposeOpKernel : public framework::OpKernel<T> {
+class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
@@ -77,7 +77,7 @@ class CudnnConvTransposeOpKernel : public framework::OpKernel<T> {
     // ------------------- cudnn conv workspace ---------------------
     void* cudnn_workspace = nullptr;
     size_t workspace_size_in_bytes;  // final workspace to allocate.
-    size_t workspace_size_limit = kConvCudnnWorkspaceLimitBytes;
+    size_t workspace_size_limit = kConvCUDNNWorkspaceLimitBytes;
     if (user_workspace_size > 0) {
       workspace_size_limit = user_workspace_size * 1024 * 1024;
     }
@@ -116,7 +116,7 @@ class CudnnConvTransposeOpKernel : public framework::OpKernel<T> {
 };
 
 template <typename T>
-class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
+class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
@@ -161,7 +161,7 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
     cudnnConvolutionBwdFilterAlgo_t filter_algo;
     size_t bwd_filter_ws_size, fwd_ws_size;
     size_t workspace_size_in_bytes = 0;
-    size_t workspace_size_limit = kConvCudnnWorkspaceLimitBytes;
+    size_t workspace_size_limit = kConvCUDNNWorkspaceLimitBytes;
     if (user_workspace_size > 0) {
       workspace_size_limit = user_workspace_size * 1024 * 1024;
     }
@@ -236,16 +236,16 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
 
 namespace ops = paddle::operators;
 
-REGISTER_OP_CUDA_KERNEL(conv2d_transpose_cudnn,
-                        ops::CudnnConvTransposeOpKernel<float>,
-                        ops::CudnnConvTransposeOpKernel<double>);
-REGISTER_OP_CUDA_KERNEL(conv2d_transpose_cudnn_grad,
-                        ops::CudnnConvTransposeGradOpKernel<float>,
-                        ops::CudnnConvTransposeGradOpKernel<double>);
-
-REGISTER_OP_CUDA_KERNEL(conv3d_transpose_cudnn,
-                        ops::CudnnConvTransposeOpKernel<float>,
-                        ops::CudnnConvTransposeOpKernel<double>);
-REGISTER_OP_CUDA_KERNEL(conv3d_transpose_cudnn_grad,
-                        ops::CudnnConvTransposeGradOpKernel<float>,
-                        ops::CudnnConvTransposeGradOpKernel<double>);
+REGISTER_OP_KERNEL(conv2d_transpose, CUDNN, ::paddle::platform::CUDAPlace,
+                   ops::CUDNNConvTransposeOpKernel<float>,
+                   ops::CUDNNConvTransposeOpKernel<double>);
+REGISTER_OP_KERNEL(conv2d_transpose_grad, CUDNN, ::paddle::platform::CUDAPlace,
+                   ops::CUDNNConvTransposeGradOpKernel<float>,
+                   ops::CUDNNConvTransposeGradOpKernel<double>);
+
+REGISTER_OP_KERNEL(conv3d_transpose, CUDNN, ::paddle::platform::CUDAPlace,
+                   ops::CUDNNConvTransposeOpKernel<float>,
+                   ops::CUDNNConvTransposeOpKernel<double>);
+REGISTER_OP_KERNEL(conv3d_transpose_grad, CUDNN, ::paddle::platform::CUDAPlace,
+                   ops::CUDNNConvTransposeGradOpKernel<float>,
+                   ops::CUDNNConvTransposeGradOpKernel<double>);
diff --git a/paddle/operators/conv_transpose_op.cc b/paddle/operators/conv_transpose_op.cc
index 74636d138f1e40474a1cc5453609dafe14fcaaab..a2382a7e42eb9c5c6a8f13265b0e6173e6b05f76 100644
--- a/paddle/operators/conv_transpose_op.cc
+++ b/paddle/operators/conv_transpose_op.cc
@@ -58,6 +58,30 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
   ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
 }
 
+framework::OpKernelType ConvTransposeOp::GetExpectedKernelType(
+    const framework::ExecutionContext& ctx) const {
+  bool use_cudnn = ctx.Attr<bool>("use_cudnn");
+  use_cudnn &= platform::is_gpu_place(ctx.GetPlace());
+#ifdef PADDLE_WITH_CUDA
+  if (platform::is_gpu_place(ctx.GetPlace())) {
+    auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
+    use_cudnn &= dev_ctx.cudnn_handle() != nullptr;
+  }
+#endif
+  framework::LibraryType library_;
+  if (use_cudnn) {
+    library_ = framework::LibraryType::kCUDNN;
+  } else {
+    library_ = framework::LibraryType::kPlain;
+  }
+
+  std::string data_format = ctx.Attr<std::string>("data_format");
+  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
+  return framework::OpKernelType(
+      framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
+      layout_, library_);
+}
+
 Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(OpProto* proto,
                                                OpAttrChecker* op_checker)
     : OpProtoAndCheckerMaker(proto, op_checker) {
@@ -94,6 +118,25 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(OpProto* proto,
       "(vector<int> default:{0, 0}), the paddings(h_pad, w_pad) of convolution "
       "transpose operator.")
       .SetDefault({0, 0});
+  AddAttr<bool>(
+      "use_cudnn",
+      "(bool, default false) Only used in cudnn kernel, need install cudnn")
+      .SetDefault(false);
+  AddAttr<std::string>(
+      "data_format",
+      "(string, default NCHW) Only used in "
+      "An optional string from: \"NHWC\", \"NCHW\". "
+      "Defaults to \"NHWC\". Specify the data format of the output data, "
+      "the input will be transformed automatically. ")
+      .SetDefault("AnyLayout");
+  // TODO(dzhwinter): need to registered layout transform function
+  AddAttr<int>("workspace_size_MB",
+               "Used in cudnn kernel only. workspace size for cudnn, in MB, "
+               "workspace is a section of GPU memory which will be "
+               "allocated/freed each time the operator runs, larger "
+               "workspace size can increase performance but also requires "
+               "better hardward. This size should be carefully setted.")
+      .SetDefault(4096);
   AddComment(R"DOC(
 Convolution2D Transpose Operator.
 
@@ -163,6 +206,25 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(OpProto* proto,
                             "(vector<int> default:{0, 0, 0}), paddings(d_pad, "
                             "h_pad, w_pad) of convolution transpose operator.")
       .SetDefault({0, 0, 0});
+  AddAttr<bool>(
+      "use_cudnn",
+      "(bool, default false) Only used in cudnn kernel, need install cudnn")
+      .SetDefault(false);
+  AddAttr<std::string>(
+      "data_format",
+      "(string, default NCHW) Only used in "
+      "An optional string from: \"NHWC\", \"NCHW\". "
+      "Defaults to \"NHWC\". Specify the data format of the output data, "
+      "the input will be transformed automatically. ")
+      .SetDefault("AnyLayout");
+  // TODO(dzhwinter): need to registered layout transform function
+  AddAttr<int>("workspace_size_MB",
+               "Used in cudnn kernel only. workspace size for cudnn, in MB, "
+               "workspace is a section of GPU memory which will be "
+               "allocated/freed each time the operator runs, larger "
+               "workspace size can increase performance but also requires "
+               "better hardward. This size should be carefully setted.")
+      .SetDefault(4096);
   AddComment(R"DOC(
 Convolution3D Transpose Operator.
 
@@ -205,6 +267,30 @@ void ConvTransposeOpGrad::InferShape(framework::InferShapeContext* ctx) const {
   }
 }
 
+framework::OpKernelType ConvTransposeOpGrad::GetExpectedKernelType(
+    const framework::ExecutionContext& ctx) const {
+  bool use_cudnn = ctx.Attr<bool>("use_cudnn");
+  use_cudnn &= platform::is_gpu_place(ctx.GetPlace());
+#ifdef PADDLE_WITH_CUDA
+  if (platform::is_gpu_place(ctx.GetPlace())) {
+    auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
+    use_cudnn &= dev_ctx.cudnn_handle() != nullptr;
+  }
+#endif
+  framework::LibraryType library_;
+  if (use_cudnn) {
+    library_ = framework::LibraryType::kCUDNN;
+  } else {
+    library_ = framework::LibraryType::kPlain;
+  }
+
+  std::string data_format = ctx.Attr<std::string>("data_format");
+  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
+  return framework::OpKernelType(
+      framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
+      layout_, library_);
+}
+
 }  // namespace operators
 }  // namespace paddle
 
diff --git a/paddle/operators/conv_transpose_op.h b/paddle/operators/conv_transpose_op.h
index 4c8f8a80672788e8b2919e500d3627adec1ad035..a42ade41b165d1bfa00d2db0e45d40cf5d7b00bc 100644
--- a/paddle/operators/conv_transpose_op.h
+++ b/paddle/operators/conv_transpose_op.h
@@ -42,12 +42,20 @@ class ConvTransposeOp : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
   void InferShape(framework::InferShapeContext* ctx) const override;
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override;
 };
 
 class ConvTransposeOpGrad : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
   void InferShape(framework::InferShapeContext* ctx) const override;
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override;
 };
 
 template <typename DeviceContext, typename T>
diff --git a/paddle/operators/ctc_align_op.cc b/paddle/operators/ctc_align_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..eeecbd32127d2cf9756432817fc5d36673685aa7
--- /dev/null
+++ b/paddle/operators/ctc_align_op.cc
@@ -0,0 +1,93 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/ctc_align_op.h"
+
+namespace paddle {
+namespace operators {
+
+class CTCAlignOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("Input"),
+                   "Input of CTCAlignOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("Output"),
+                   "Output of CTCAlignOp should not be null.");
+
+    auto input_dims = ctx->GetInputDim("Input");
+
+    // TODO(wanghaoshuang): it is tricky to set the wrong dimension here.
+    ctx->SetOutputDim("Output", input_dims);
+  }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    return framework::OpKernelType(
+        framework::ToDataType(ctx.Input<Tensor>("Input")->type()),
+        ctx.device_context());
+  }
+};
+
+class CTCAlignOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  CTCAlignOpMaker(OpProto* proto, OpAttrChecker* op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("Input",
+             "(LodTensor, default: LoDTensor<int>), Its shape is "
+             "[Lp, 1], where Lp is the sum of all input sequences' length.");
+    AddOutput("Output", "(Tensor, default: Tensor<int>), The align result.");
+    AddAttr<int>("blank",
+                 "(int, default: 0), the blank label setted in Connectionist "
+                 "Temporal Classification (CTC) op.")
+        .SetDefault(0);
+    AddAttr<bool>("merge_repeated",
+                  "(bool, default: true), whether to "
+                  "merge repeated elements between two blanks. ")
+        .SetDefault(true);
+    AddComment(R"DOC(
+CTCAlign op is used to merge repeated elements between two blanks
+and then delete all blanks in sequence.
+
+Given:
+    Input.data = [0, 1, 2, 2, 0, 4, 0, 4, 5, 0, 6,
+                  6, 0, 0, 7, 7, 7, 0]
+    Input.dims = {18, 1}
+    Input.LoD = [[0, 11, 18]]
+
+And:
+    blank = 0
+    merge_repeated = True
+
+Then:
+    Output.data = [1, 2, 4, 4, 5, 6,
+                   6, 7]
+    Output.dims = {8, 1}
+    Output.LoD = [[0, 6, 8]]
+
+)DOC");
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(ctc_align, ops::CTCAlignOp, ops::CTCAlignOpMaker,
+                  paddle::framework::EmptyGradOpMaker);
+REGISTER_OP_CPU_KERNEL(
+    ctc_align, ops::CTCAlignKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::CTCAlignKernel<paddle::platform::CPUDeviceContext, int64_t>);
diff --git a/paddle/operators/ctc_align_op.cu b/paddle/operators/ctc_align_op.cu
new file mode 100644
index 0000000000000000000000000000000000000000..45635f16745346b08f7e31db2f25905bdbc3aeeb
--- /dev/null
+++ b/paddle/operators/ctc_align_op.cu
@@ -0,0 +1,91 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <stdio.h>
+#include <thrust/device_vector.h>
+#include <thrust/host_vector.h>
+#include "paddle/operators/ctc_align_op.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+__global__ void MergeAndDelCudaKernel(const int64_t num_token, const T* tokens,
+                                      const size_t num_seq, size_t* lod0,
+                                      const int blank, const int merge_repeated,
+                                      size_t* out_lod0, T* output) {
+  int ouput_idx = 0;
+  out_lod0[0] = 0;
+
+  for (int i = 0; i < num_seq; ++i) {
+    T pre_token = -1;
+    for (int j = lod0[i]; j < lod0[i + 1]; ++j) {
+      if (tokens[j] != blank && !(merge_repeated && tokens[j] == pre_token)) {
+        output[ouput_idx] = tokens[j];
+        ++ouput_idx;
+      }
+      pre_token = tokens[j];
+    }
+    out_lod0[i + 1] = ouput_idx;
+  }
+}
+
+template <typename T>
+class CTCAlignOpCUDAKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
+                   "It must use CUDAPlace.");
+    const size_t level = 0;
+    auto* input = ctx.Input<LoDTensor>("Input");
+    auto* output = ctx.Output<LoDTensor>("Output");
+    auto input_lod = framework::ToAbsOffset(input->lod());
+
+    const T* tokens = input->data<T>();
+    const int64_t num_tokens = input->dims()[0];
+    const size_t num_seq = input_lod[level].size() - 1;
+
+    const int blank = ctx.Attr<int>("blank");
+    const int merge_repeated =
+        static_cast<int>(ctx.Attr<bool>("merge_repeated"));
+
+    // prepare a lod to record lod information while merging elements
+    thrust::device_vector<size_t> dev_out_lod0(input_lod[level].size());
+    size_t* dev_out_lod0_ptr = thrust::raw_pointer_cast(dev_out_lod0.data());
+
+    // merge elements and delete blank
+    T* output_data = output->mutable_data<T>({num_tokens, 1}, ctx.GetPlace());
+
+    auto stream = ctx.cuda_device_context().stream();
+    MergeAndDelCudaKernel<T><<<1, 1, 0, stream>>>(
+        num_tokens, tokens, num_seq, input_lod[level].data(), blank,
+        merge_repeated, dev_out_lod0_ptr, output_data);
+
+    // set output lod
+    thrust::host_vector<size_t> host_out_lod0(dev_out_lod0.begin(),
+                                              dev_out_lod0.end());
+    framework::LoD out_lod;
+    out_lod.push_back(host_out_lod0);
+    output->set_lod(out_lod);
+
+    // resize output dims
+    output->Resize({static_cast<int64_t>(host_out_lod0.back()), 1});
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+REGISTER_OP_CUDA_KERNEL(ctc_align, paddle::operators::CTCAlignOpCUDAKernel<int>,
+                        paddle::operators::CTCAlignOpCUDAKernel<int64_t>);
diff --git a/paddle/operators/ctc_align_op.h b/paddle/operators/ctc_align_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..589413feb3dcbb7fea1f0a878b35d4bf714b5318
--- /dev/null
+++ b/paddle/operators/ctc_align_op.h
@@ -0,0 +1,75 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <string.h>
+#include "paddle/framework/op_registry.h"
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+using LoDTensor = framework::LoDTensor;
+
+template <typename DeviceContext, typename T>
+class CTCAlignKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* input = ctx.Input<LoDTensor>("Input");
+    auto* output = ctx.Output<LoDTensor>("Output");
+    const size_t level = 0;
+    auto input_lod = framework::ToAbsOffset(input->lod());
+
+    // check input dims and lod
+    auto input_dims = input->dims();
+    PADDLE_ENFORCE_EQ(input_dims[0],
+                      static_cast<int64_t>(input_lod[level].back()),
+                      "The first dimension of Input(Input) should be equal to "
+                      "the sum of all sequences' lengths.");
+
+    const size_t num_sequences = input_lod[level].size() - 1;
+    size_t blank = static_cast<size_t>(ctx.Attr<int>("blank"));
+    bool merge_repeated = ctx.Attr<bool>("merge_repeated");
+
+    // merge repeated tokens and delete blank
+    T* output_data = output->mutable_data<T>(ctx.GetPlace());
+    size_t output_idx = 0;
+    std::vector<size_t> output_lod0(1, 0);
+    const T* input_data = input->data<T>();
+    for (size_t seq_idx = 0; seq_idx < num_sequences; ++seq_idx) {
+      T prev_token = -1;
+      for (size_t i = input_lod[level][seq_idx];
+           i < input_lod[level][seq_idx + 1]; ++i) {
+        if (input_data[i] != blank &&
+            !(merge_repeated && input_data[i] == prev_token)) {
+          output_data[output_idx] = input_data[i];
+          ++output_idx;
+        }
+        prev_token = input_data[i];
+      }
+      output_lod0.push_back(output_idx);
+    }
+
+    // set output lod
+    framework::LoD output_lod;
+    output_lod.push_back(output_lod0);
+    output->set_lod(output_lod);
+
+    // resize output dims
+    output->Resize({static_cast<int64_t>(output_lod0.back()), 1});
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/detail/grpc_client.cc b/paddle/operators/detail/grpc_client.cc
index 5a4db2d7e686ce84abef620f890be8f3aa82cb73..1e41587c418fb0ce4e452d5c6735c54e2d42f798 100644
--- a/paddle/operators/detail/grpc_client.cc
+++ b/paddle/operators/detail/grpc_client.cc
@@ -63,9 +63,6 @@ bool RPCClient::AsyncGetVariable(const std::string& ep,
   sendrecv::VariableMessage req;
   req.set_varname(var_name);
 
-  auto* var = scope.FindVar(var_name);
-  SerializeToMessage(var_name, var, ctx, &req);
-
   // varhandle
   VarHandle var_h;
   var_h.ep = ep;
@@ -87,7 +84,7 @@ bool RPCClient::AsyncGetVariable(const std::string& ep,
   return true;
 }
 
-bool RPCClient::wait() {
+bool RPCClient::Wait() {
   bool ok = true;
 
   while (true) {
@@ -96,7 +93,6 @@ bool RPCClient::wait() {
     }
 
     if (!Proceed()) {
-      LOG(ERROR) << "Get meets CompletionQueue error";
       return false;
     }
   }
@@ -110,9 +106,9 @@ bool RPCClient::Proceed() {
 
   // request counts.
   if (!cq_.Next(&tag, &ok)) {
+    LOG(ERROR) << "Get meets CompletionQueue error";
     return false;
   }
-  req_count_--;
 
   GPR_ASSERT(ok);
   PADDLE_ENFORCE(tag);
@@ -120,12 +116,15 @@ bool RPCClient::Proceed() {
   // TODO(gongwb): add more retries.
   ClientBase* c = static_cast<ClientBase*>(tag);
   if (!c->status_.ok()) {
+    LOG(ERROR) << "proc param error:" << c->var_h_.String()
+               << " grpc error:" << c->status_.error_message();
     delete c;
-    return true;
+    return false;
   }
 
   c->Process();
   delete c;
+  req_count_--;
   return true;
 }
 
@@ -135,8 +134,12 @@ std::shared_ptr<grpc::Channel> RPCClient::GetChannel(const std::string& ep) {
     return it->second;
   }
 
+  grpc::ChannelArguments args;
+  args.SetMaxSendMessageSize(std::numeric_limits<int>::max());
+  args.SetMaxReceiveMessageSize(std::numeric_limits<int>::max());
+
   auto ch = std::shared_ptr<grpc::Channel>(
-      grpc::CreateChannel(ep, grpc::InsecureChannelCredentials()));
+      grpc::CreateCustomChannel(ep, grpc::InsecureChannelCredentials(), args));
 
   channels_[ep] = ch;
   return ch;
diff --git a/paddle/operators/detail/grpc_client.h b/paddle/operators/detail/grpc_client.h
index d27b5ced9ece67f9b9da3b7f87ec231477603580..a62e70a2533ae52d84d010504b19fed5aeb15dc0 100644
--- a/paddle/operators/detail/grpc_client.h
+++ b/paddle/operators/detail/grpc_client.h
@@ -130,7 +130,7 @@ class RPCClient {
                         const framework::Scope& scope,
                         const std::string& var_name,
                         int64_t time_out = 600 * 1000);
-  bool wait();
+  bool Wait();
 
  private:
   bool Proceed();
diff --git a/paddle/operators/detail/grpc_server.cc b/paddle/operators/detail/grpc_server.cc
index e8d561a57ff59e9221400241f881cb26fb6c6f06..3ddcd839bdd23547216465dfaf44a3cd8285fe6d 100644
--- a/paddle/operators/detail/grpc_server.cc
+++ b/paddle/operators/detail/grpc_server.cc
@@ -28,12 +28,18 @@ class RequestBase {
  public:
   explicit RequestBase(sendrecv::SendRecvService::AsyncService* service,
                        grpc::ServerCompletionQueue* cq)
-      : service_(service), cq_(cq), status_(PROCESS) {}
+      : service_(service), cq_(cq), status_(PROCESS) {
+    PADDLE_ENFORCE(cq_);
+  }
   virtual ~RequestBase() {}
   virtual void Process() { assert(false); }
 
   CallStatus Status() { return status_; }
   void SetStatus(CallStatus status) { status_ = status; }
+  virtual std::string GetReqName() {
+    assert(false);
+    return "";
+  }
 
  protected:
   grpc::ServerContext ctx_;
@@ -56,12 +62,14 @@ class RequestSend final : public RequestBase {
 
   virtual ~RequestSend() {}
 
+  virtual std::string GetReqName() { return request_.varname(); }
+
   virtual void Process() {
     MessageWithName msg_with_name =
         std::make_pair(request_.varname(), std::move(request_));
     queue_->Push(std::move(msg_with_name));
-    // TODO(gongwb): check var's info.
     responder_.Finish(reply_, grpc::Status::OK, this);
+    status_ = FINISH;
   }
 
  protected:
@@ -74,20 +82,30 @@ class RequestSend final : public RequestBase {
 class RequestGet final : public RequestBase {
  public:
   explicit RequestGet(sendrecv::SendRecvService::AsyncService* service,
-                      grpc::ServerCompletionQueue* cq, framework::Scope* scope)
-      : RequestBase(service, cq), responder_(&ctx_), scope_(scope) {
+                      grpc::ServerCompletionQueue* cq, framework::Scope* scope,
+                      const platform::DeviceContext* dev_ctx,
+                      SimpleBlockQueue<char>* queue)
+      : RequestBase(service, cq),
+        responder_(&ctx_),
+        scope_(scope),
+        dev_ctx_(dev_ctx),
+        queue_(queue) {
     service_->RequestGetVariable(&ctx_, &request_, &responder_, cq_, cq_, this);
   }
 
   virtual ~RequestGet() {}
 
+  virtual std::string GetReqName() { return request_.varname(); }
+
   virtual void Process() {
     // proc request.
     std::string var_name = request_.varname();
     auto* var = scope_->FindVar(var_name);
-    SerializeToMessage(var_name, var, platform::CPUDeviceContext(), &reply_);
+    SerializeToMessage(var_name, var, *dev_ctx_, &reply_);
     // TODO(gongwb): check var's info.
     responder_.Finish(reply_, grpc::Status::OK, this);
+    status_ = FINISH;
+    queue_->Push('c');
   }
 
  protected:
@@ -95,11 +113,21 @@ class RequestGet final : public RequestBase {
   sendrecv::VariableMessage reply_;
   ServerAsyncResponseWriter<sendrecv::VariableMessage> responder_;
   framework::Scope* scope_;
+  const platform::DeviceContext* dev_ctx_;
+  SimpleBlockQueue<char>* queue_;
 };
 
+void AsyncGRPCServer::WaitClientGet(int count) {
+  for (int i = 0; i < count; ++i) {
+    var_get_queue_.Pop();
+  }
+}
+
 void AsyncGRPCServer::RunSyncUpdate() {
   grpc::ServerBuilder builder;
   builder.AddListeningPort(address_, grpc::InsecureServerCredentials());
+  builder.SetMaxSendMessageSize(std::numeric_limits<int>::max());
+  builder.SetMaxReceiveMessageSize(std::numeric_limits<int>::max());
   builder.RegisterService(&service_);
 
   cq_send_ = builder.AddCompletionQueue();
@@ -134,7 +162,6 @@ void AsyncGRPCServer::ShutdownQueue() {
 }
 
 // This URL explains why shutdown is complicate:
-// https://stackoverflow.com/questions/35708348/grpc-what-is-the-recommended-way-to-shut-down-an-asynchronous-server-in-c
 void AsyncGRPCServer::ShutDown() {
   server_->Shutdown();
   ShutdownQueue();
@@ -155,22 +182,12 @@ void AsyncGRPCServer::TryToRegisterNewGetOne() {
   if (is_shut_down_) {
     return;
   }
-  RequestGet* get = new RequestGet(&service_, cq_get_.get(), scope_);
+  RequestGet* get = new RequestGet(&service_, cq_get_.get(), scope_, dev_ctx_,
+                                   &var_get_queue_);
   VLOG(4) << "create Requestget status:" << get->Status();
 }
 
-void AsyncGRPCServer::SetFinishOrDelete(RequestBase*& last) {
-  std::unique_lock<std::mutex> lock(cq_mutex_);
-  if (is_shut_down_) {
-    delete last;
-    last = NULL;
-    return;
-  }
-
-  last->SetStatus(FINISH);
-  return;
-}
-
+// FIXME(typhoonzero): remove wait argument and change cq_name to enum.
 void AsyncGRPCServer::HandleRequest(bool wait, grpc::ServerCompletionQueue* cq,
                                     std::string cq_name,
                                     std::function<void()> TryToRegisterNewOne) {
@@ -184,13 +201,19 @@ void AsyncGRPCServer::HandleRequest(bool wait, grpc::ServerCompletionQueue* cq,
       break;
     }
 
-    if (wait && !done_) {
-      Wait();
-    }
+    PADDLE_ENFORCE(tag);
+    // FIXME(typhoonzero): de-couple the barriers with recv_op
+    if (cq_name == "cq_get") WaitCond(1);
+    if (cq_name == "cq_send") WaitCond(0);
 
     RequestBase* base = (RequestBase*)tag;
+    // reference:
+    // https://github.com/tensorflow/tensorflow/issues/5596
+    // https://groups.google.com/forum/#!topic/grpc-io/xftlRy-IQwM
+    // https://groups.google.com/forum/#!topic/grpc-io/ywATt88Ef_I
     if (!ok) {
-      VLOG(4) << cq_name << " recv no regular event";
+      LOG(WARNING) << cq_name << " recv no regular event:argument name"
+                   << base->GetReqName();
       TryToRegisterNewOne();
       delete base;
       continue;
@@ -201,7 +224,6 @@ void AsyncGRPCServer::HandleRequest(bool wait, grpc::ServerCompletionQueue* cq,
         VLOG(4) << cq_name << " status:" << base->Status();
         TryToRegisterNewOne();
         base->Process();
-        SetFinishOrDelete(base);
         break;
       }
       case FINISH: {
@@ -214,22 +236,18 @@ void AsyncGRPCServer::HandleRequest(bool wait, grpc::ServerCompletionQueue* cq,
   }
 }
 
-void AsyncGRPCServer::Wait() {
-  std::unique_lock<std::mutex> lock(this->mutex_);
-  condition_.wait(lock, [=] { return this->done_ == true; });
-}
-
-void AsyncGRPCServer::Reset() {
-  std::lock_guard<std::mutex> lock(this->mutex_);
-  done_ = false;
+void AsyncGRPCServer::WaitCond(int cond) {
+  std::unique_lock<std::mutex> lock(this->barrier_mutex_);
+  barrier_condition_.wait(lock,
+                          [=] { return this->barrier_cond_step_ == cond; });
 }
 
-void AsyncGRPCServer::Done() {
+void AsyncGRPCServer::SetCond(int cond) {
   {
-    std::lock_guard<std::mutex> lock(this->mutex_);
-    done_ = true;
+    std::lock_guard<std::mutex> lock(this->barrier_mutex_);
+    barrier_cond_step_ = cond;
   }
-  condition_.notify_all();
+  barrier_condition_.notify_all();
 }
 
 }  // namespace detail
diff --git a/paddle/operators/detail/grpc_server.h b/paddle/operators/detail/grpc_server.h
index 041fe05b2e9c37e8a91669b8f523c47b56e14cba..1ca9086c744c558fd05fb4fc1a7280729afbec28 100644
--- a/paddle/operators/detail/grpc_server.h
+++ b/paddle/operators/detail/grpc_server.h
@@ -37,16 +37,19 @@ class RequestBase;
 
 class AsyncGRPCServer final : public sendrecv::SendRecvService::Service {
  public:
-  explicit AsyncGRPCServer(std::string address) { address_ = address; }
+  explicit AsyncGRPCServer(const std::string &address) : address_(address) {}
 
   void RunSyncUpdate();
 
-  void Reset();
-
-  void Done();
+  // functions to sync server barrier status.
+  void WaitCond(int cond);
+  void SetCond(int cond);
+  void WaitClientGet(int count);
 
   void SetScope(framework::Scope *scope) { scope_ = scope; }
 
+  void SetDevCtx(const platform::DeviceContext *dev_ctx) { dev_ctx_ = dev_ctx; }
+
   const MessageWithName Get() { return this->var_recv_queue_.Pop(); }
 
   void Push(const MessageWithName &msg) { this->var_recv_queue_.Push(msg); }
@@ -54,13 +57,11 @@ class AsyncGRPCServer final : public sendrecv::SendRecvService::Service {
   void ShutDown();
 
  protected:
-  void Wait();
   void HandleRequest(bool wait, grpc::ServerCompletionQueue *cq,
                      std::string cq_name,
                      std::function<void()> TryToRegisterNewOne);
   void TryToRegisterNewSendOne();
   void TryToRegisterNewGetOne();
-  void SetFinishOrDelete(RequestBase *&last);
   void ShutdownQueue();
 
  private:
@@ -74,13 +75,15 @@ class AsyncGRPCServer final : public sendrecv::SendRecvService::Service {
 
   std::string address_;
   framework::Scope *scope_;
+  const platform::DeviceContext *dev_ctx_;
   // received variable from RPC, operators fetch variable from this queue.
   SimpleBlockQueue<MessageWithName> var_recv_queue_;
+  SimpleBlockQueue<char> var_get_queue_;
 
   // condition of the sub program
-  std::mutex mutex_;
-  volatile mutable bool done_;
-  std::condition_variable condition_;
+  std::mutex barrier_mutex_;
+  mutable int barrier_cond_step_;
+  std::condition_variable barrier_condition_;
 
   std::unique_ptr<std::thread> t_send_;
   std::unique_ptr<std::thread> t_get_;
diff --git a/paddle/operators/edit_distance_op.cc b/paddle/operators/edit_distance_op.cc
index e383f07fa9b53a3def10f6405a0d36f48f52ff08..62a1fcebe7b7222ffceafc3ca2bc74e3998225f6 100644
--- a/paddle/operators/edit_distance_op.cc
+++ b/paddle/operators/edit_distance_op.cc
@@ -49,10 +49,10 @@ class EditDistanceOpMaker : public framework::OpProtoAndCheckerMaker {
   EditDistanceOpMaker(OpProto *proto, OpAttrChecker *op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
     AddInput("Hyps",
-             "(2-D LoDTensor<int>, 2nd dim. equal to 1) "
+             "(2-D LoDTensor<int64_t>, 2nd dim. equal to 1) "
              "The indices for hypothesis strings.");
     AddInput("Refs",
-             "(2-D LoDTensor<int>, 2nd dim. equal to 1) "
+             "(2-D LoDTensor<int64_t>, 2nd dim. equal to 1) "
              "The indices for reference strings.");
     AddAttr<bool>("normalized",
                   "(bool, default false) Indicated whether to normalize "
@@ -66,22 +66,22 @@ class EditDistanceOpMaker : public framework::OpProtoAndCheckerMaker {
 EditDistance operator computes the edit distances between a batch of hypothesis
 strings and their references.
 
-Edit distance, also called Levenshtein distance, measures how dissimilar two strings 
-are by counting the minimum number of operations to transform one string into anthor. 
-Here the operations include insertion, deletion, and substitution. For example, 
-given hypothesis string A = "kitten" and reference B = "sitting", the edit distance 
-is 3 for A will be transformed into B at least after two substitutions and one 
+Edit distance, also called Levenshtein distance, measures how dissimilar two strings
+are by counting the minimum number of operations to transform one string into anthor.
+Here the operations include insertion, deletion, and substitution. For example,
+given hypothesis string A = "kitten" and reference B = "sitting", the edit distance
+is 3 for A will be transformed into B at least after two substitutions and one
 insertion:
-  
+
    "kitten" -> "sitten" -> "sittin" -> "sitting"
 
-Input(Hyps) is a LoDTensor consisting of all the hypothesis strings with the total 
-number denoted by `batch_size`, and the separation is specified by the LoD information. 
-And the `batch_size` reference strings are arranged in order in the same way in the 
+Input(Hyps) is a LoDTensor consisting of all the hypothesis strings with the total
+number denoted by `batch_size`, and the separation is specified by the LoD information.
+And the `batch_size` reference strings are arranged in order in the same way in the
 LoDTensor Input(Refs).
 
-Output(Out) contains the `batch_size` results and each stands for the edit stance 
-for a pair of strings respectively. If Attr(normalized) is true, the edit distance 
+Output(Out) contains the `batch_size` results and each stands for the edit stance
+for a pair of strings respectively. If Attr(normalized) is true, the edit distance
 will be divided by the length of reference string.
 )DOC");
   }
diff --git a/paddle/operators/edit_distance_op.cu b/paddle/operators/edit_distance_op.cu
index cf5ebc5c38fd006d10de790e45e9bff3409bd20c..338fd79bcc125b86c7764645c2fd8953d4477d2a 100644
--- a/paddle/operators/edit_distance_op.cu
+++ b/paddle/operators/edit_distance_op.cu
@@ -39,8 +39,8 @@ __global__ void FillFirstColumn(T* dist, const int M, const int N) {
 }
 
 template <typename T>
-__global__ void Levenshtein(T* dist, const int* x1, const int* x2, const int M,
-                            const int N, const int start) {
+__global__ void Levenshtein(T* dist, const int64_t* x1, const int64_t* x2,
+                            const int M, const int N, const int start) {
   int idx = blockDim.x * blockIdx.x + threadIdx.x;
   int offset = N;
   int index = start + idx * offset;
@@ -113,8 +113,8 @@ class EditDistanceGPUKernel : public framework::OpKernel<T> {
         dist_t.Resize({m + 1, n + 1});
         dist_t.mutable_data<T>(ctx.GetPlace());
         auto dist = dist_t.data<T>();
-        auto x1 = x1_t->data<int>() + hyp_lod[num];
-        auto x2 = x2_t->data<int>() + ref_lod[num];
+        auto x1 = x1_t->data<int64_t>() + hyp_lod[num];
+        auto x2 = x2_t->data<int64_t>() + ref_lod[num];
 
         FillFirstColumn<T><<<1 + m / PADDLE_CUDA_NUM_THREADS,
                              PADDLE_CUDA_NUM_THREADS, 0, stream>>>(dist, m, n);
diff --git a/paddle/operators/edit_distance_op.h b/paddle/operators/edit_distance_op.h
index 537e70281a5a750db480468a8f8e3c0465de6c5a..4c5a29813ce39e42111c0ee5f3c16d5cefac4651 100644
--- a/paddle/operators/edit_distance_op.h
+++ b/paddle/operators/edit_distance_op.h
@@ -60,8 +60,8 @@ class EditDistanceKernel : public framework::OpKernel<T> {
         dist_t.Resize({m + 1, n + 1});
         dist_t.mutable_data<T>(ctx.GetPlace());
         auto dist = dist_t.data<T>();
-        auto x1 = x1_t->data<int>() + hyp_lod[num];
-        auto x2 = x2_t->data<int>() + ref_lod[num];
+        auto x1 = x1_t->data<int64_t>() + hyp_lod[num];
+        auto x2 = x2_t->data<int64_t>() + ref_lod[num];
         for (int64_t i = 0; i < m + 1; ++i) {
           dist[i * (n + 1)] = i;
         }
diff --git a/paddle/operators/elementwise_add_op.h b/paddle/operators/elementwise_add_op.h
index 59abbb57d1dcfbef6ead70e4afa9a3816d60d9b5..a8389429f26c17ceab1db22175c90888546ead6f 100644
--- a/paddle/operators/elementwise_add_op.h
+++ b/paddle/operators/elementwise_add_op.h
@@ -28,39 +28,7 @@ template <typename DeviceContext, typename T>
 class ElementwiseAddKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    using Tensor = framework::Tensor;
-
-    auto* x = ctx.Input<Tensor>("X");
-    auto* y = ctx.Input<Tensor>("Y");
-    auto* z = ctx.Output<Tensor>("Out");
-    z->mutable_data<T>(ctx.GetPlace());
-    TransformFunctor<AddFunctor<T>, T, DeviceContext> functor(
-        x, y, z, ctx.template device_context<DeviceContext>(), AddFunctor<T>());
-
-    auto x_dims = x->dims();
-    auto y_dims = y->dims();
-    PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(),
-                      "Rank of first input must >= rank of second input.");
-
-    if (x_dims == y_dims) {
-      functor.Run();
-      return;
-    }
-
-    int axis = ctx.Attr<int>("axis");
-    axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
-    PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
-                   "Axis should be in range [0, x_dims)");
-
-    int pre, n, post;
-    get_mid_dims(x_dims, y_dims, axis, pre, n, post);
-    if (post == 1) {
-      functor.RunRowWise(n, pre);
-      return;
-    } else {
-      functor.RunMidWise(n, pre, post);
-      return;
-    }
+    ElementwiseComputeEx<AddFunctor<T>, DeviceContext, T>(ctx);
   }
 };
 
@@ -81,23 +49,6 @@ struct ElementwiseAddGradFunctor {
   }
 };
 
-template <typename T>
-struct ElementwiseAddOneGradFunctor {
-  template <typename Device, typename X, typename Y, typename Z, typename dX,
-            typename dY, typename dZ>
-  void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz) {
-    auto dz_e = framework::EigenVector<T>::Flatten(*dz);
-    if (dx) {
-      auto dx_e = framework::EigenVector<T>::Flatten(*dx);
-      dx_e.device(d) = dz_e;
-    }
-    if (dy) {
-      auto dy_e = framework::EigenVector<T>::Flatten(*dy);
-      dy_e.device(d) = dz_e.sum();
-    }
-  }
-};
-
 template <typename T>
 struct ElementwiseAddBroadCastGradFunctor {
   template <typename Device, typename X, typename Y, typename Z, typename dX,
@@ -142,7 +93,6 @@ class ElementwiseAddGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     ElementwiseGradCompute<DeviceContext, T, ElementwiseAddGradFunctor<T>,
-                           ElementwiseAddOneGradFunctor<T>,
                            ElementwiseAddBroadCastGradFunctor<T>,
                            ElementwiseAddBroadCast2GradFunctor<T>>(ctx);
   }
diff --git a/paddle/operators/elementwise_div_op.h b/paddle/operators/elementwise_div_op.h
index 875abd313ffc8fdf910d461922ff41f65ef276e7..ef26cb6c914f50ded07cc9d0d8de3f49f2151129 100644
--- a/paddle/operators/elementwise_div_op.h
+++ b/paddle/operators/elementwise_div_op.h
@@ -19,11 +19,16 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
+template <typename T>
+struct DivFunctor {
+  inline HOSTDEVICE T operator()(T a, T b) const { return a / b; }
+};
+
 template <typename DeviceContext, typename T>
 class ElementwiseDivKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    ElementwiseCompute<EigenDivFunctor, DeviceContext, T>(ctx);
+    ElementwiseComputeEx<DivFunctor<T>, DeviceContext, T>(ctx);
   }
 };
 
@@ -107,7 +112,6 @@ class ElementwiseDivGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     ElementwiseGradCompute<DeviceContext, T, ElementwiseDivGradFunctor<T>,
-                           ElementwiseDivGradFunctor<T>,
                            ElementwiseDivBroadCastGradFunctor<T>,
                            ElementwiseDivBroadCast2GradFunctor<T>>(ctx);
   }
diff --git a/paddle/operators/elementwise_max_op.cc b/paddle/operators/elementwise_max_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..53c27ae5be4cbfe85ce61aa27196594ae152eea4
--- /dev/null
+++ b/paddle/operators/elementwise_max_op.cc
@@ -0,0 +1,45 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/elementwise_max_op.h"
+#include "paddle/operators/elementwise_op.h"
+
+namespace paddle {
+namespace operators {
+class ElementwiseMaxOpMaker : public ElementwiseOpMaker {
+ public:
+  ElementwiseMaxOpMaker(OpProto* proto, OpAttrChecker* op_checker)
+      : ElementwiseOpMaker(proto, op_checker) {
+    SetComment("Max", "Out = max(X, Y)");
+    AddComment(comment_);
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP(elementwise_max, ops::ElementwiseOp, ops::ElementwiseMaxOpMaker,
+            elementwise_max_grad, ops::ElementwiseOpGrad);
+REGISTER_OP_CPU_KERNEL(
+    elementwise_max,
+    ops::ElementwiseMaxKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::ElementwiseMaxKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::ElementwiseMaxKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::ElementwiseMaxKernel<paddle::platform::CPUDeviceContext, int64_t>);
+REGISTER_OP_CPU_KERNEL(
+    elementwise_max_grad,
+    ops::ElementwiseMaxGradKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::ElementwiseMaxGradKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::ElementwiseMaxGradKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::ElementwiseMaxGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
diff --git a/paddle/operators/elementwise_max_op.cu b/paddle/operators/elementwise_max_op.cu
new file mode 100644
index 0000000000000000000000000000000000000000..5ff4af17477cbd35b765cc00d46c95fda620e2df
--- /dev/null
+++ b/paddle/operators/elementwise_max_op.cu
@@ -0,0 +1,32 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#define EIGEN_USE_GPU
+#include "paddle/operators/elementwise_max_op.h"
+
+namespace ops = paddle::operators;
+
+REGISTER_OP_CUDA_KERNEL(
+    elementwise_max,
+    ops::ElementwiseMaxKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::ElementwiseMaxKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::ElementwiseMaxKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::ElementwiseMaxKernel<paddle::platform::CUDADeviceContext, int64_t>);
+REGISTER_OP_CUDA_KERNEL(
+    elementwise_max_grad,
+    ops::ElementwiseMaxGradKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::ElementwiseMaxGradKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::ElementwiseMaxGradKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::ElementwiseMaxGradKernel<paddle::platform::CUDADeviceContext,
+                                  int64_t>);
diff --git a/paddle/operators/elementwise_max_op.h b/paddle/operators/elementwise_max_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..255728e8e620665a7de225b228c19d6c510da1c8
--- /dev/null
+++ b/paddle/operators/elementwise_max_op.h
@@ -0,0 +1,120 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/operators/elementwise_op_function.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+struct MaxFunctor {
+  inline HOSTDEVICE T operator()(T a, T b) const { return a > b ? a : b; }
+};
+
+template <typename DeviceContext, typename T>
+class ElementwiseMaxKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    ElementwiseComputeEx<MaxFunctor<T>, DeviceContext, T>(ctx);
+  }
+};
+
+template <typename T>
+struct ElementwiseMaxGradFunctor {
+  template <typename Device, typename X, typename Y, typename Z, typename dX,
+            typename dY, typename dZ>
+  void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz) {
+    auto x_e = framework::EigenVector<T>::Flatten(*x);
+    auto y_e = framework::EigenVector<T>::Flatten(*y);
+    auto dz_e = framework::EigenVector<T>::Flatten(*dz);
+
+    if (dx) {
+      auto dx_e = framework::EigenVector<T>::Flatten(*dx);
+      dx_e.device(d) = (x_e > y_e).template cast<T>() * dz_e;
+    }
+    if (dy) {
+      auto dy_e = framework::EigenVector<T>::Flatten(*dy);
+      dy_e.device(d) = (x_e <= y_e).template cast<T>() * dz_e;
+    }
+  }
+};
+
+template <typename T>
+struct ElementwiseMaxBroadCastGradFunctor {
+  template <typename Device, typename X, typename Y, typename Z, typename dX,
+            typename dY, typename dZ, typename Pre, typename N>
+  void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz, Pre pre, N n) {
+    auto x_e = framework::EigenVector<T>::Flatten(*x);
+    auto y_e = framework::EigenVector<T>::Flatten(*y);
+    auto dz_e = framework::EigenVector<T>::Flatten(*dz);
+
+    auto y_e_bcast = y_e.reshape(Eigen::DSizes<int, 2>(1, n))
+                         .broadcast(Eigen::DSizes<int, 2>(pre, 1))
+                         .reshape(Eigen::DSizes<int, 1>(x_e.size()));
+
+    if (dx) {
+      auto dx_e = framework::EigenVector<T>::Flatten(*dx);
+      dx_e.device(d) = (x_e > y_e_bcast).template cast<T>() * dz_e;
+    }
+
+    if (dy) {
+      auto dy_e = framework::EigenVector<T>::Flatten(*dy);
+      dy_e.device(d) = ((x_e <= y_e_bcast).template cast<T>() * dz_e)
+                           .reshape(Eigen::DSizes<int, 2>(pre, n))
+                           .sum(Eigen::array<int, 1>{{0}});
+    }
+  }
+};
+
+template <typename T>
+struct ElementwiseMaxBroadCast2GradFunctor {
+  template <typename Device, typename X, typename Y, typename Z, typename dX,
+            typename dY, typename dZ, typename Pre, typename N, typename Post>
+  void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz, Pre pre, N n,
+                  Post post) {
+    auto x_e = framework::EigenVector<T>::Flatten(*x);
+    auto y_e = framework::EigenVector<T>::Flatten(*y);
+    auto dz_e = framework::EigenVector<T>::Flatten(*dz);
+
+    auto y_e_bcast = y_e.reshape(Eigen::DSizes<int, 3>(1, n, 1))
+                         .broadcast(Eigen::DSizes<int, 3>(pre, 1, post))
+                         .reshape(Eigen::DSizes<int, 1>(x_e.size()));
+    if (dx) {
+      auto dx_e = framework::EigenVector<T>::Flatten(*dx);
+      dx_e.device(d) = (x_e > y_e_bcast).template cast<T>() * dz_e;
+    }
+
+    if (dy) {
+      auto dy_e = framework::EigenVector<T>::Flatten(*dy);
+      dy_e.device(d) = ((x_e <= y_e_bcast).template cast<T>() * dz_e)
+                           .reshape(Eigen::DSizes<int, 3>(pre, n, post))
+                           .sum(Eigen::array<int, 2>{{0, 2}});
+    }
+  }
+};
+
+template <typename DeviceContext, typename T>
+class ElementwiseMaxGradKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    ElementwiseGradCompute<DeviceContext, T, ElementwiseMaxGradFunctor<T>,
+                           ElementwiseMaxBroadCastGradFunctor<T>,
+                           ElementwiseMaxBroadCast2GradFunctor<T>>(ctx);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/elementwise_min_op.cc b/paddle/operators/elementwise_min_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..99482e1bf60c88062087c5fe0105e90aa0a8677c
--- /dev/null
+++ b/paddle/operators/elementwise_min_op.cc
@@ -0,0 +1,45 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/elementwise_min_op.h"
+#include "paddle/operators/elementwise_op.h"
+
+namespace paddle {
+namespace operators {
+class ElementwiseMinOpMaker : public ElementwiseOpMaker {
+ public:
+  ElementwiseMinOpMaker(OpProto* proto, OpAttrChecker* op_checker)
+      : ElementwiseOpMaker(proto, op_checker) {
+    SetComment("Max", "Out = min(X, Y)");
+    AddComment(comment_);
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP(elementwise_min, ops::ElementwiseOp, ops::ElementwiseMinOpMaker,
+            elementwise_min_grad, ops::ElementwiseOpGrad);
+REGISTER_OP_CPU_KERNEL(
+    elementwise_min,
+    ops::ElementwiseMinKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::ElementwiseMinKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::ElementwiseMinKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::ElementwiseMinKernel<paddle::platform::CPUDeviceContext, int64_t>);
+REGISTER_OP_CPU_KERNEL(
+    elementwise_min_grad,
+    ops::ElementwiseMinGradKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::ElementwiseMinGradKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::ElementwiseMinGradKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::ElementwiseMinGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
diff --git a/paddle/operators/elementwise_min_op.cu b/paddle/operators/elementwise_min_op.cu
new file mode 100644
index 0000000000000000000000000000000000000000..3547e6ccb77177002b1ecbee4e4604b602f72209
--- /dev/null
+++ b/paddle/operators/elementwise_min_op.cu
@@ -0,0 +1,32 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#define EIGEN_USE_GPU
+#include "paddle/operators/elementwise_min_op.h"
+
+namespace ops = paddle::operators;
+
+REGISTER_OP_CUDA_KERNEL(
+    elementwise_min,
+    ops::ElementwiseMinKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::ElementwiseMinKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::ElementwiseMinKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::ElementwiseMinKernel<paddle::platform::CUDADeviceContext, int64_t>);
+REGISTER_OP_CUDA_KERNEL(
+    elementwise_min_grad,
+    ops::ElementwiseMinGradKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::ElementwiseMinGradKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::ElementwiseMinGradKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::ElementwiseMinGradKernel<paddle::platform::CUDADeviceContext,
+                                  int64_t>);
diff --git a/paddle/operators/elementwise_min_op.h b/paddle/operators/elementwise_min_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..e6627a0f1bb468c8e4661b83489cb964b72dddb0
--- /dev/null
+++ b/paddle/operators/elementwise_min_op.h
@@ -0,0 +1,120 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/operators/elementwise_op_function.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+struct MinFunctor {
+  inline HOSTDEVICE T operator()(T a, T b) const { return a < b ? a : b; }
+};
+
+template <typename DeviceContext, typename T>
+class ElementwiseMinKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    ElementwiseComputeEx<MinFunctor<T>, DeviceContext, T>(ctx);
+  }
+};
+
+template <typename T>
+struct ElementwiseMinGradFunctor {
+  template <typename Device, typename X, typename Y, typename Z, typename dX,
+            typename dY, typename dZ>
+  void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz) {
+    auto x_e = framework::EigenVector<T>::Flatten(*x);
+    auto y_e = framework::EigenVector<T>::Flatten(*y);
+    auto dz_e = framework::EigenVector<T>::Flatten(*dz);
+
+    if (dx) {
+      auto dx_e = framework::EigenVector<T>::Flatten(*dx);
+      dx_e.device(d) = (x_e < y_e).template cast<T>() * dz_e;
+    }
+    if (dy) {
+      auto dy_e = framework::EigenVector<T>::Flatten(*dy);
+      dy_e.device(d) = (x_e >= y_e).template cast<T>() * dz_e;
+    }
+  }
+};
+
+template <typename T>
+struct ElementwiseMinBroadCastGradFunctor {
+  template <typename Device, typename X, typename Y, typename Z, typename dX,
+            typename dY, typename dZ, typename Pre, typename N>
+  void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz, Pre pre, N n) {
+    auto x_e = framework::EigenVector<T>::Flatten(*x);
+    auto y_e = framework::EigenVector<T>::Flatten(*y);
+    auto dz_e = framework::EigenVector<T>::Flatten(*dz);
+
+    auto y_e_bcast = y_e.reshape(Eigen::DSizes<int, 2>(1, n))
+                         .broadcast(Eigen::DSizes<int, 2>(pre, 1))
+                         .reshape(Eigen::DSizes<int, 1>(x_e.size()));
+
+    if (dx) {
+      auto dx_e = framework::EigenVector<T>::Flatten(*dx);
+      dx_e.device(d) = (x_e < y_e_bcast).template cast<T>() * dz_e;
+    }
+
+    if (dy) {
+      auto dy_e = framework::EigenVector<T>::Flatten(*dy);
+      dy_e.device(d) = ((x_e >= y_e_bcast).template cast<T>() * dz_e)
+                           .reshape(Eigen::DSizes<int, 2>(pre, n))
+                           .sum(Eigen::array<int, 1>{{0}});
+    }
+  }
+};
+
+template <typename T>
+struct ElementwiseMinBroadCast2GradFunctor {
+  template <typename Device, typename X, typename Y, typename Z, typename dX,
+            typename dY, typename dZ, typename Pre, typename N, typename Post>
+  void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz, Pre pre, N n,
+                  Post post) {
+    auto x_e = framework::EigenVector<T>::Flatten(*x);
+    auto y_e = framework::EigenVector<T>::Flatten(*y);
+    auto dz_e = framework::EigenVector<T>::Flatten(*dz);
+
+    auto y_e_bcast = y_e.reshape(Eigen::DSizes<int, 3>(1, n, 1))
+                         .broadcast(Eigen::DSizes<int, 3>(pre, 1, post))
+                         .reshape(Eigen::DSizes<int, 1>(x_e.size()));
+    if (dx) {
+      auto dx_e = framework::EigenVector<T>::Flatten(*dx);
+      dx_e.device(d) = (x_e < y_e_bcast).template cast<T>() * dz_e;
+    }
+
+    if (dy) {
+      auto dy_e = framework::EigenVector<T>::Flatten(*dy);
+      dy_e.device(d) = ((x_e >= y_e_bcast).template cast<T>() * dz_e)
+                           .reshape(Eigen::DSizes<int, 3>(pre, n, post))
+                           .sum(Eigen::array<int, 2>{{0, 2}});
+    }
+  }
+};
+
+template <typename DeviceContext, typename T>
+class ElementwiseMinGradKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    ElementwiseGradCompute<DeviceContext, T, ElementwiseMinGradFunctor<T>,
+                           ElementwiseMinBroadCastGradFunctor<T>,
+                           ElementwiseMinBroadCast2GradFunctor<T>>(ctx);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/elementwise_mul_op.h b/paddle/operators/elementwise_mul_op.h
index 3ee50207c07fa2b7ccf2c002903a4f055dbfb352..4b86b00b5a095ae898f9ce0c17cde2cc91060ba9 100644
--- a/paddle/operators/elementwise_mul_op.h
+++ b/paddle/operators/elementwise_mul_op.h
@@ -18,11 +18,16 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
+template <typename T>
+struct MulFunctor {
+  inline HOSTDEVICE T operator()(T a, T b) const { return a * b; }
+};
+
 template <typename DeviceContext, typename T>
 class ElementwiseMulKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    ElementwiseCompute<EigenMulFunctor, DeviceContext, T>(ctx);
+    ElementwiseComputeEx<MulFunctor<T>, DeviceContext, T>(ctx);
   }
 };
 
@@ -106,7 +111,6 @@ class ElementwiseMulGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     ElementwiseGradCompute<DeviceContext, T, ElementwiseMulGradFunctor<T>,
-                           ElementwiseMulGradFunctor<T>,
                            ElementwiseMulBroadCastGradFunctor<T>,
                            ElementwiseMulBroadCast2GradFunctor<T>>(ctx);
   }
diff --git a/paddle/operators/elementwise_op.h b/paddle/operators/elementwise_op.h
index a342595b546bfca1a344cf8a549597df6a29adec..1a0131d8b943da3deebd0c461f78cb02b34e6dc2 100644
--- a/paddle/operators/elementwise_op.h
+++ b/paddle/operators/elementwise_op.h
@@ -26,9 +26,9 @@ class ElementwiseOp : public framework::OperatorWithKernel {
   using Tensor = framework::Tensor;
   void InferShape(framework::InferShapeContext* ctx) const override {
     PADDLE_ENFORCE(ctx->HasInput("X"),
-                   "Input(X) of elementwise op should not be null");
+                   "Input(X) of elementwise op should not be null.");
     PADDLE_ENFORCE(ctx->HasInput("Y"),
-                   "Input(Y) of elementwise op should not be null");
+                   "Input(Y) of elementwise op should not be null.");
     PADDLE_ENFORCE(ctx->HasOutput("Out"),
                    "Output(Out) of elementwise op should not be null.");
 
@@ -45,12 +45,12 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   ElementwiseOpMaker(OpProto* proto, OpAttrChecker* op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("X", "(Tensor) The first input tensor of elementwise op");
-    AddInput("Y", "(Tensor) The second input tensor of elementwise op");
-    AddOutput("Out", "The output of elementwise op");
+    AddInput("X", "(Tensor), The first input tensor of elementwise op.");
+    AddInput("Y", "(Tensor), The second input tensor of elementwise op.");
+    AddOutput("Out", "The output of elementwise op.");
     AddAttr<int>("axis",
-                 "(int, default -1) The starting dimension index "
-                 "for broadcasting Y onto X")
+                 "(int, default -1). The start dimension index "
+                 "for broadcasting Y onto X.")
         .SetDefault(-1)
         .EqualGreaterThan(-1);
     comment_ = R"DOC(
@@ -58,19 +58,18 @@ Limited Elementwise {name} Operator.
 
 The equation is:
 
-.. math::
-  {equation}
+$${equation}$$
 
-X is a tensor of any dimension and the dimensions of tensor Y must be smaller than
-or equal to the dimensions of X. 
+$X$ is a tensor of any dimension and the dimensions of tensor $Y$ must be
+smaller than or equal to the dimensions of $X$.
 
 There are two cases for this operator:
-1. The shape of Y is same with X;
-2. The shape of Y is a subset of X.
+1. The shape of $Y$ is same with $X$;
+2. The shape of $Y$ is a subset of $X$.
 
 For case 2:
-Y will be broadcasted to match the shape of X and axis should be 
-the starting dimension index for broadcasting Y onto X.
+$Y$ will be broadcasted to match the shape of $X$ and axis should be
+set to index of the start dimension to broadcast $Y$ onto $X$.
 
 For example
   .. code-block:: python
@@ -81,7 +80,8 @@ For example
     shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1
     shape(X) = (2, 3, 4, 5), shape(Y) = (2), with axis=0
 
-Either of the inputs X and Y or none can carry the LoD (Level of Details) information. However, the output only shares the LoD information with input X.
+Either of the inputs $X$ and $Y$ or none can carry the LoD (Level of Details)
+information. However, the output only shares the LoD information with input $X$.
 
 )DOC";
     AddComment(comment_);
diff --git a/paddle/operators/elementwise_op_function.h b/paddle/operators/elementwise_op_function.h
index 560247cb108dce5432bfe66556b9e675a3accc27..db5d30c1af286913f8decd7ab74058fd732ead65 100644
--- a/paddle/operators/elementwise_op_function.h
+++ b/paddle/operators/elementwise_op_function.h
@@ -311,8 +311,7 @@ EIGEN_FUNCTOR(Mul, EIGEN_MUL);
 EIGEN_FUNCTOR(Div, EIGEN_DIV);
 
 template <typename DeviceContext, typename T, typename functor,
-          typename functor1, typename broadcastfunctor,
-          typename broadcast2functor>
+          typename broadcastfunctor, typename broadcast2functor>
 void ElementwiseGradCompute(const framework::ExecutionContext& ctx) {
   using Tensor = framework::Tensor;
 
@@ -341,6 +340,13 @@ void ElementwiseGradCompute(const framework::ExecutionContext& ctx) {
     return;
   }
 
+  if (y_dims.size() == 1 && y_dims[0] == 1) {
+    // y is a scalar
+    auto extended_dims = framework::vectorize(x_dims);
+    extended_dims.push_back(1);
+    x_dims = framework::make_ddim(extended_dims);
+  }
+
   int axis = ctx.Attr<int>("axis");
   axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
 
@@ -357,5 +363,50 @@ void ElementwiseGradCompute(const framework::ExecutionContext& ctx) {
     return;
   }
 }
+
+template <typename Functor, typename DeviceContext, typename T>
+void ElementwiseComputeEx(const framework::ExecutionContext& ctx) {
+  using Tensor = framework::Tensor;
+
+  auto* x = ctx.Input<Tensor>("X");
+  auto* y = ctx.Input<Tensor>("Y");
+  auto* z = ctx.Output<Tensor>("Out");
+  z->mutable_data<T>(ctx.GetPlace());
+  TransformFunctor<Functor, T, DeviceContext> functor(
+      x, y, z, ctx.template device_context<DeviceContext>(), Functor());
+
+  auto x_dims = x->dims();
+  auto y_dims = y->dims();
+  PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(),
+                    "Rank of first input must >= rank of second input.");
+
+  if (x_dims == y_dims) {
+    functor.Run();
+    return;
+  }
+
+  if (y_dims.size() == 1 && y_dims[0] == 1) {
+    // y is a scalar
+    auto extended_dims = framework::vectorize(x_dims);
+    extended_dims.push_back(1);
+    x_dims = framework::make_ddim(extended_dims);
+  }
+
+  int axis = ctx.Attr<int>("axis");
+  axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
+  PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
+                 "Axis should be in range [0, x_dims)");
+
+  int pre, n, post;
+  get_mid_dims(x_dims, y_dims, axis, pre, n, post);
+  if (post == 1) {
+    functor.RunRowWise(n, pre);
+    return;
+  } else {
+    functor.RunMidWise(n, pre, post);
+    return;
+  }
+}
+
 }  // namespace operators
 }  // namespace paddle
diff --git a/paddle/operators/elementwise_sub_op.h b/paddle/operators/elementwise_sub_op.h
index 66edf8672d13086f883f0a2ad7ef5802317cc79a..a2aca793026189ec87e00b52d7c351689f870400 100644
--- a/paddle/operators/elementwise_sub_op.h
+++ b/paddle/operators/elementwise_sub_op.h
@@ -18,11 +18,16 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
+template <typename T>
+struct SubFunctor {
+  inline HOSTDEVICE T operator()(T a, T b) const { return a - b; }
+};
+
 template <typename DeviceContext, typename T>
 class ElementwiseSubKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    ElementwiseCompute<EigenSubFunctor, DeviceContext, T>(ctx);
+    ElementwiseComputeEx<SubFunctor<T>, DeviceContext, T>(ctx);
   }
 };
 
@@ -43,23 +48,6 @@ struct ElementwiseSubGradFunctor {
   }
 };
 
-template <typename T>
-struct ElementwiseSubOneGradFunctor {
-  template <typename Device, typename X, typename Y, typename Z, typename dX,
-            typename dY, typename dZ>
-  void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz) {
-    auto dz_e = framework::EigenVector<T>::Flatten(*dz);
-    if (dx) {
-      auto dx_e = framework::EigenVector<T>::Flatten(*dx);
-      dx_e.device(d) = dz_e;
-    }
-    if (dy) {
-      auto dy_e = framework::EigenVector<T>::Flatten(*dy);
-      dy_e.device(d) = (-1.0) * dz_e.sum();
-    }
-  }
-};
-
 template <typename T>
 struct ElementwiseSubBroadCastGradFunctor {
   template <typename Device, typename X, typename Y, typename Z, typename dX,
@@ -106,7 +94,6 @@ class ElementwiseSubGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     ElementwiseGradCompute<DeviceContext, T, ElementwiseSubGradFunctor<T>,
-                           ElementwiseSubOneGradFunctor<T>,
                            ElementwiseSubBroadCastGradFunctor<T>,
                            ElementwiseSubBroadCast2GradFunctor<T>>(ctx);
   }
diff --git a/paddle/operators/expand_op.cc b/paddle/operators/expand_op.cc
index 08fa91ed72aa41ed2f513c090b9085410bb5cc47..043c93654d33f7c105c89960e18ec72d3557237d 100644
--- a/paddle/operators/expand_op.cc
+++ b/paddle/operators/expand_op.cc
@@ -58,21 +58,21 @@ class ExpandOpMaker : public framework::OpProtoAndCheckerMaker {
   ExpandOpMaker(OpProto* proto, OpAttrChecker* op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
     AddInput("X",
-             "(Tensor, default Tensor<float>) A tensor with rank in [1, 6]."
-             "X is the input tensor to be expanded.");
+             "(Tensor, default Tensor<float>). A tensor with rank in [1, 6]."
+             "X is the input to be expanded.");
     AddOutput("Out",
-              "(Tensor, default Tensor<float>) A tensor with rank in [1, 6]."
-              "The rank of Output(Out) is same as Input(X) except that each "
-              "dimension size of Output(Out) is equal to corresponding "
-              "dimension size of Input(X) multiplying corresponding value of "
-              "Attr(expand_times).");
+              "(Tensor, default Tensor<float>). A tensor with rank in [1, 6]."
+              "The rank of Output(Out) have the same with Input(X). "
+              "After expanding, size of each dimension of Output(Out) is equal "
+              "to size of the corresponding dimension of Input(X) multiplying "
+              "the corresponding value given by Attr(expand_times).");
     AddAttr<std::vector<int>>("expand_times",
                               "Expand times number for each dimension.");
     AddComment(R"DOC(
 Expand operator tiles the input by given times number. You should set times
 number for each dimension by providing attribute 'expand_times'. The rank of X
-should be in [1, 6]. Please notice that size of 'expand_times' must be same with
-X's rank. Following is a using case:
+should be in [1, 6]. Please note that size of 'expand_times' must be the same
+with X's rank. Following is a using case:
 
 Input(X) is a 3-D tensor with shape [2, 3, 1]:
 
diff --git a/paddle/operators/linear_chain_crf_op.cc b/paddle/operators/linear_chain_crf_op.cc
index 975e394c78db037a125adeb2c86e3c74dc0eb6f8..e24bf622b7f11e61198ab5238f47ba7edff2f4da 100644
--- a/paddle/operators/linear_chain_crf_op.cc
+++ b/paddle/operators/linear_chain_crf_op.cc
@@ -187,7 +187,7 @@ class LinearChainCRFOp : public framework::OperatorWithKernel {
       const framework::ExecutionContext& ctx) const override {
     return framework::OpKernelType(
         framework::ToDataType(ctx.Input<LoDTensor>("Emission")->type()),
-        ctx.device_context());
+        platform::CPUPlace());
   }
 };
 
@@ -248,7 +248,7 @@ class LinearChainCRFGradOp : public framework::OperatorWithKernel {
         framework::ToDataType(
             ctx.Input<LoDTensor>(framework::GradVarName("LogLikelihood"))
                 ->type()),
-        ctx.device_context());
+        platform::CPUPlace());
   }
 };
 
diff --git a/paddle/operators/linear_chain_crf_op.h b/paddle/operators/linear_chain_crf_op.h
index f502ebefde1fbd4b366f76d2915d94a23a124e5f..afc197a1c38091df5bf7d11ef07a4193ad6417cd 100644
--- a/paddle/operators/linear_chain_crf_op.h
+++ b/paddle/operators/linear_chain_crf_op.h
@@ -65,57 +65,14 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
     const size_t level = 0;
     const size_t seq_num = in_lod[level].size() - 1;
 
-    // These local variables hold the inputs and outputs, garanteeing them on
-    // CPU memory, to provide a consistent reference.
-    // TODO(caoying) Fix this by moving all these local variables into the
-    // class's data members once we can profile the whole training process.
-    LoDTensor* emission_weights = nullptr;
-    LoDTensor emission_weight_tensor;
-    Tensor* transition_weights = nullptr;
-    Tensor transition_weight_tensor;
-    LoDTensor* label = nullptr;
-    LoDTensor label_tensor;
-
-    Tensor* emission_exps = nullptr;
-    Tensor emission_exps_tensor;
-    Tensor* transition_exps = nullptr;
-    Tensor transition_exps_tensor;
-    Tensor* alpha = nullptr;
-    Tensor alpha_tensor;
-    Tensor* ll = nullptr;
-    Tensor ll_tensor;
-
-    if (platform::is_gpu_place(ctx.GetPlace())) {
-      emission_weights = &emission_weight_tensor;
-      transition_weights = &transition_weight_tensor;
-      label = &label_tensor;
-
-      CopyInputsToCpuMemory(
-          ctx.device_context(), *ctx.Input<LoDTensor>("Emission"),
-          *ctx.Input<Tensor>("Transition"), *ctx.Input<LoDTensor>("Label"),
-          emission_weights, transition_weights, label);
-
-      emission_exps = &emission_exps_tensor;
-      emission_exps->Resize(emission_weights->dims());
-
-      transition_exps = &transition_exps_tensor;
-      transition_exps->Resize(transition_weights->dims());
-
-      alpha = &alpha_tensor;
-      alpha->Resize(ctx.Output<Tensor>("Alpha")->dims());
-
-      ll = &ll_tensor;
-    } else {
-      emission_weights =
-          const_cast<LoDTensor*>(ctx.Input<LoDTensor>("Emission"));
-      transition_weights = const_cast<Tensor*>(ctx.Input<Tensor>("Transition"));
-      label = const_cast<LoDTensor*>(ctx.Input<LoDTensor>("Label"));
-
-      emission_exps = ctx.Output<Tensor>("EmissionExps");
-      transition_exps = ctx.Output<Tensor>("TransitionExps");
-      alpha = ctx.Output<Tensor>("Alpha");
-      ll = ctx.Output<Tensor>("LogLikelihood");
-    }
+    const LoDTensor* emission_weights = ctx.Input<LoDTensor>("Emission");
+    const Tensor* transition_weights = ctx.Input<Tensor>("Transition");
+    const LoDTensor* label = ctx.Input<LoDTensor>("Label");
+
+    Tensor* emission_exps = ctx.Output<Tensor>("EmissionExps");
+    Tensor* transition_exps = ctx.Output<Tensor>("TransitionExps");
+    Tensor* alpha = ctx.Output<Tensor>("Alpha");
+    Tensor* ll = ctx.Output<Tensor>("LogLikelihood");
 
     // Because the computation codes only runs on CPU, here the memory for all
     // the outputs is FIXED to be allocated on the CPU memory.
@@ -173,61 +130,9 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
           one_seq, one_seq_row_max, one_seq_exps, *transition_weights,
           *transition_exps, one_seq_label, &one_seq_alpha);
     }
-
-    if (platform::is_gpu_place(ctx.GetPlace())) {
-      CopyOutputsToGpuMemory(
-          ctx.device_context(), *emission_exps, *transition_exps, *alpha, *ll,
-          ctx.Output<Tensor>("EmissionExps"),
-          ctx.Output<Tensor>("TransitionExps"), ctx.Output<Tensor>("Alpha"),
-          ctx.Output<Tensor>("LogLikelihood"));
-    }
   };
 
  private:
-  void CopyInputsToCpuMemory(const platform::DeviceContext& ctx,
-                             const LoDTensor& emission_weights_src,
-                             const Tensor& transition_weights_src,
-                             const LoDTensor& label_src,
-                             LoDTensor* emission_weights_dst,
-                             Tensor* transition_weights_dst,
-                             LoDTensor* label_dst) const {
-    // Copy the inputs from GPU memory to CPU memory if this operators runs on
-    // GPU device.
-    auto copyLoDTensor = [](const platform::DeviceContext& ctx,
-                            const LoDTensor& src, LoDTensor* dst) {
-      dst->mutable_data<T>(src.dims(), platform::CPUPlace());
-      framework::Copy(src, platform::CPUPlace(), ctx, dst);
-    };
-
-    copyLoDTensor(ctx, emission_weights_src, emission_weights_dst);
-    copyLoDTensor(ctx, label_src, label_dst);
-
-    transition_weights_dst->mutable_data<T>(transition_weights_src.dims(),
-                                            platform::CPUPlace());
-    framework::Copy(transition_weights_src, platform::CPUPlace(), ctx,
-                    transition_weights_dst);
-  }
-
-  void CopyOutputsToGpuMemory(const platform::DeviceContext& ctx,
-                              const Tensor& emission_exps_src,
-                              const Tensor& transition_exps_src,
-                              const Tensor& alpha_src, const Tensor& ll_src,
-                              Tensor* emission_exps_dst,
-                              Tensor* transition_exps_dst, Tensor* alpha_dst,
-                              Tensor* ll_dst) const {
-    // Copy the forward results from CPU memory to GPU memory if this
-    // operators runs on GPU device.
-    auto copyTensor = [](const platform::DeviceContext& ctx, const Tensor& src,
-                         Tensor* dst) {
-      dst->mutable_data<T>(platform::CUDAPlace());
-      framework::Copy(src, platform::CUDAPlace(), ctx, dst);
-    };
-    copyTensor(ctx, emission_exps_src, emission_exps_dst);
-    copyTensor(ctx, transition_exps_src, transition_exps_dst);
-    copyTensor(ctx, alpha_src, alpha_dst);
-    copyTensor(ctx, ll_src, ll_dst);
-  }
-
   T ForwardOneSequence(const Tensor& emission, const Tensor& emission_row_max,
                        const Tensor& emission_exps, const Tensor& trans_weights,
                        const Tensor& trans_weight_exps, const Tensor& label,
@@ -296,63 +201,17 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
     auto lod = ctx.Input<LoDTensor>("Label")->lod();
     PADDLE_ENFORCE(lod.size(), "Input(Label) must be a sequence.");
 
-    // These local variables hold the inputs and outputs, garanteeing them on
-    // CPU memory, to provide a consistent reference.
-    // TODO(caoying) Fix this by moving all these local variables into the
-    // class's data members once we can profile the training process, or
-    // implementing a real GPU kernel for CRF.
-    Tensor* label = nullptr;
-    Tensor label_tensor;
-    Tensor* emission_exps = nullptr;
-    Tensor emission_exps_tensor;
-    Tensor* transition_exps = nullptr;
-    Tensor transition_exps_tensor;
-    Tensor* alpha = nullptr;
-    Tensor alpha_tensor;
-    Tensor ll_grad_tensor;
-    T* ll_grad = nullptr;
-
-    Tensor* emission_grad = nullptr;
-    Tensor emission_grad_tensor;
-    Tensor* transition_grad = nullptr;
-    Tensor transition_grad_tensor;
-
-    if (platform::is_gpu_place(ctx.GetPlace())) {
-      label = &label_tensor;
-      emission_exps = &emission_exps_tensor;
-      transition_exps = &transition_exps_tensor;
-      alpha = &alpha_tensor;
-      CopyInputsToCpuMemory(
-          ctx.device_context(), *ctx.Input<LoDTensor>("Label"),
-          *ctx.Input<Tensor>("EmissionExps"),
-          *ctx.Input<Tensor>("TransitionExps"), *ctx.Input<Tensor>("Alpha"),
-          *ctx.Input<Tensor>(framework::GradVarName("LogLikelihood")), label,
-          emission_exps, transition_exps, alpha, &ll_grad_tensor);
-      ll_grad = ll_grad_tensor.data<T>();
-
-      if (ctx.Output<Tensor>(framework::GradVarName("Emission"))) {
-        emission_grad = &emission_grad_tensor;
-        emission_grad->Resize(emission_exps->dims());
-      }
+    const Tensor* label = ctx.Input<LoDTensor>("Label");
+    const Tensor* emission_exps = ctx.Input<Tensor>("EmissionExps");
+    const Tensor* transition_exps = ctx.Input<Tensor>("TransitionExps");
+    const Tensor* alpha = ctx.Input<Tensor>("Alpha");
+    const T* ll_grad =
+        ctx.Input<Tensor>(framework::GradVarName("LogLikelihood"))->data<T>();
 
-      if (ctx.Output<Tensor>(framework::GradVarName("Transition"))) {
-        transition_grad = &transition_grad_tensor;
-        transition_grad->Resize(transition_exps->dims());
-      }
-    } else {
-      label = const_cast<LoDTensor*>(ctx.Input<LoDTensor>("Label"));
-      emission_exps = const_cast<Tensor*>(ctx.Input<Tensor>("EmissionExps"));
-      transition_exps =
-          const_cast<Tensor*>(ctx.Input<Tensor>("TransitionExps"));
-      alpha = const_cast<Tensor*>(ctx.Input<Tensor>("Alpha"));
-      ll_grad = const_cast<Tensor*>(
-                    ctx.Input<Tensor>(framework::GradVarName("LogLikelihood")))
-                    ->data<T>();
-
-      emission_grad = ctx.Output<Tensor>(framework::GradVarName("Emission"));
-      transition_grad =
-          ctx.Output<Tensor>(framework::GradVarName("Transition"));
-    }
+    Tensor* emission_grad =
+        ctx.Output<Tensor>(framework::GradVarName("Emission"));
+    Tensor* transition_grad =
+        ctx.Output<Tensor>(framework::GradVarName("Transition"));
 
     // TODO(caoying) Fix this constraint. When the Input(Emission) is from the
     // data reader operator, it can have no gradients.
@@ -389,58 +248,9 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
           one_seq_emission_exps, *transition_exps, one_seq_alpha, one_seq_label,
           &one_seq_beta, transition_grad, &one_seq_emission_grad);
     }
-
-    if (platform::is_gpu_place(ctx.GetPlace())) {
-      CopyOutputsToGpuMemory(
-          ctx.device_context(), emission_grad, transition_grad,
-          ctx.Output<Tensor>(framework::GradVarName("Emission")),
-          ctx.Output<Tensor>(framework::GradVarName("Transition")));
-    }
   };
 
  private:
-  void CopyInputsToCpuMemory(const platform::DeviceContext& ctx,
-                             const LoDTensor& label_src,
-                             const Tensor& emission_exps_src,
-                             const Tensor& transition_exps_src,
-                             const Tensor& alpha_src, const Tensor& ll_grad_src,
-                             Tensor* label_dst, Tensor* emission_exps_dst,
-                             Tensor* transition_exps_dst, Tensor* alpha_dst,
-                             Tensor* ll_grad_dst) const {
-    // Copy the inputs from GPU memory to CPU memory when this operators runs on
-    // GPU device.
-    label_dst->mutable_data<T>(label_src.dims(), platform::CPUPlace());
-    framework::Copy(label_src, platform::CPUPlace(), ctx, label_dst);
-
-    auto copyTensor = [](const platform::DeviceContext& ctx, const Tensor& src,
-                         Tensor* dst) {
-      dst->mutable_data<T>(src.dims(), platform::CPUPlace());
-      framework::Copy(src, platform::CPUPlace(), ctx, dst);
-    };
-    copyTensor(ctx, emission_exps_src, emission_exps_dst);
-    copyTensor(ctx, transition_exps_src, transition_exps_dst);
-    copyTensor(ctx, alpha_src, alpha_dst);
-    copyTensor(ctx, ll_grad_src, ll_grad_dst);
-  }
-
-  void CopyOutputsToGpuMemory(const platform::DeviceContext& ctx,
-                              const Tensor* emission_grad_src,
-                              const Tensor* transition_grad_src,
-                              Tensor* emission_grad_dst,
-                              Tensor* transition_grad_dst) const {
-    // Copy the backward results from CPU memory to GPU
-    // memory if this operators runs on GPU device.
-    auto copyTensor = [](const platform::DeviceContext& ctx, const Tensor* src,
-                         Tensor* dst) {
-      if (src && dst) {
-        dst->mutable_data<T>(platform::CUDAPlace());
-        framework::Copy(*src, platform::CUDAPlace(), ctx, dst);
-      }
-    };
-    copyTensor(ctx, emission_grad_src, emission_grad_dst);
-    copyTensor(ctx, transition_grad_src, transition_grad_dst);
-  }
-
   void BackwardOneSequence(const platform::CPUDeviceContext& ctx,
                            const T ll_grad, const Tensor& emission_exps,
                            const Tensor& transition_exps, const Tensor& alpha,
diff --git a/paddle/operators/lstm_op.cc b/paddle/operators/lstm_op.cc
index 3b90b64b4effacf7240fb1bee8c0aa44251ad727..afb095a04e73c2f09b828c01630ef2347ff49613 100644
--- a/paddle/operators/lstm_op.cc
+++ b/paddle/operators/lstm_op.cc
@@ -117,7 +117,7 @@ class LSTMOpMaker : public framework::OpProtoAndCheckerMaker {
     AddInput("C0",
              "(Tensor, optional) the initial cell state is an optional "
              "input. This is a tensor with shape (N x D), where N is the "
-             "batch size. `H0` and `C0` can be NULL but only at the same time")
+             "batch size. `H0` and `C0` can be NULL but only at the same time.")
         .AsDispensable();
     AddInput("Weight",
              "(Tensor) the learnable hidden-hidden weights."
diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt
index fd59eef7d650b48feae68c89be54ec4e48cbcc7e..c607704efac86982c8c22e462381aaab488a9b69 100644
--- a/paddle/operators/math/CMakeLists.txt
+++ b/paddle/operators/math/CMakeLists.txt
@@ -13,6 +13,7 @@ if(WITH_GPU)
     nv_library(context_project SRCS context_project.cc context_project.cu DEPS device_context math_function)
     nv_library(sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context tensor)
     nv_library(sequence_padding SRCS sequence_padding.cc sequence_padding.cu DEPS lod_tensor device_context)
+    nv_library(sequence_scale SRCS sequence_scale.cc sequence_scale.cu DEPS lod_tensor device_context)
     nv_library(lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions)
     nv_library(maxouting SRCS maxouting.cc maxouting.cu DEPS device_context)
     nv_library(unpooling SRCS unpooling.cc unpooling.cu DEPS device_context)
@@ -29,6 +30,7 @@ else()
     cc_library(context_project SRCS context_project.cc DEPS device_context math_function)
     cc_library(sequence2batch SRCS sequence2batch.cc DEPS device_context tensor)
     cc_library(sequence_padding SRCS sequence_padding.cc DEPS lod_tensor device_context)
+    cc_library(sequence_scale SRCS sequence_scale.cc DEPS lod_tensor device_context)
     cc_library(lstm_compute SRCS lstm_compute.cc DEPS device_context activation_functions)
     cc_library(maxouting SRCS maxouting.cc DEPS device_context)
     cc_library(unpooling SRCS unpooling.cc DEPS device_context)
diff --git a/paddle/operators/math/math_function_test.cc b/paddle/operators/math/math_function_test.cc
index 7c6f098ca9065ded1644420a3ab47911bf7bc3b3..c9f322b92e5476d889b57bcc91a0ce1d9e5339d5 100644
--- a/paddle/operators/math/math_function_test.cc
+++ b/paddle/operators/math/math_function_test.cc
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "paddle/operators/math/math_function.h"
 #include "gtest/gtest.h"
 
diff --git a/paddle/operators/math/math_function_test.cu b/paddle/operators/math/math_function_test.cu
index d1139ac988c0077fd3e107c6ffee0fd84c5b7041..6f16d6679248a88425e1de487988e50ee8a469bf 100644
--- a/paddle/operators/math/math_function_test.cu
+++ b/paddle/operators/math/math_function_test.cu
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "gtest/gtest.h"
 #include "paddle/operators/math/math_function.h"
 
diff --git a/paddle/operators/math/matmul.h b/paddle/operators/math/matmul.h
index 7048e11e6f27a075892c28681a3c4913a27b3f3e..ae7f1fe9be5066a0ac0ac522849d481fc66a19be 100644
--- a/paddle/operators/math/matmul.h
+++ b/paddle/operators/math/matmul.h
@@ -41,10 +41,24 @@ class MatMulFunctor {
                       "Input tensor a must be at least 1-dimensional.");
     PADDLE_ENFORCE_GE(dim_b.size(), 1,
                       "Input tensor b must be at least 1-dimensional.");
-    PADDLE_ENFORCE_LE(dim_a.size(), 3,
-                      "Input tensor a must be at most 3-dimensional.");
-    PADDLE_ENFORCE_LE(dim_b.size(), 3,
-                      "Input tensor b must be at most 3-dimensional.");
+
+    std::vector<int64_t> out_dim;
+    int64_t batch_count = 1;
+    if (dim_a.size() > 3) {
+      PADDLE_ENFORCE(dim_b.size() == dim_a.size(),
+                     "The dimensions of X and Y must be the same, and both of "
+                     "them should be %d-dimensional.",
+                     dim_b.size());
+      // The first rank-2 dimensions are accumulated on the batch_count, and the
+      // last two dimensions are used for matrix multiplication.
+      for (int j = 0; j < dim_a.size() - 2; ++j) {
+        PADDLE_ENFORCE_EQ(dim_b[j], dim_a[j],
+                          "The %d-th dimension of X and Y must be the same.",
+                          j);
+        out_dim.push_back(dim_a[j]);
+        batch_count *= dim_a[j];
+      }
+    }
 
     int M = 0, N = 0, kA = 0, kB = 0, batchCountA = 0, batchCountB = 0,
         strideA = 0, strideB = 0;
@@ -67,7 +81,11 @@ class MatMulFunctor {
         strideA = M * kA;
         break;
       default:
-        assert(false);
+        batchCountA = batch_count;
+        size_t mat_s = dim_a.size() - 2;
+        M = trans_a ? dim_a[mat_s + 1] : dim_a[mat_s];
+        kA = trans_a ? dim_a[mat_s] : dim_a[mat_s + 1];
+        strideA = M * kA;
     }
 
     switch (dim_b.size()) {
@@ -88,7 +106,11 @@ class MatMulFunctor {
         strideB = kB * N;
         break;
       default:
-        assert(false);
+        batchCountB = batch_count;
+        size_t mat_s = dim_b.size() - 2;
+        kB = trans_b ? dim_b[mat_s + 1] : dim_b[mat_s];
+        N = trans_b ? dim_b[mat_s] : dim_b[mat_s + 1];
+        strideB = kB * N;
     }
 
     PADDLE_ENFORCE_EQ(
diff --git a/paddle/operators/math/sampler.cc b/paddle/operators/math/sampler.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4f1cbfe31ac68499a51eda600b38b879f7ca055f
--- /dev/null
+++ b/paddle/operators/math/sampler.cc
@@ -0,0 +1,70 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "sampler.h"
+
+namespace paddle {
+namespace random {
+
+Sampler::~Sampler() {}
+
+UniformSampler::UniformSampler(int64 range)
+    : Sampler(range), inv_range_(1.0 / range) {
+  random_engine_ = std::make_shared<std::mt19937>(seed_);
+  dist_ = std::make_shared<std::uniform_int_distribution<>>(0, range);
+}
+
+UniformSampler::UniformSampler(int64 range, unsigned int seed)
+    : Sampler(range, seed), inv_range_(1.0 / range) {
+  random_engine_ = std::make_shared<std::mt19937>(seed_);
+  dist_ = std::make_shared<std::uniform_int_distribution<>>(0, range);
+}
+
+int64 UniformSampler::Sample() const { return (*dist_)(*random_engine_); }
+
+float UniformSampler::Probability(int64 value) const { return inv_range_; }
+
+LogUniformSampler::LogUniformSampler(int64 range)
+    : Sampler(range), log_range_(log(range + 1)) {
+  random_engine_ = std::make_shared<std::mt19937>(seed_);
+  dist_ = std::make_shared<std::uniform_real_distribution<>>(0, 1);
+}
+
+LogUniformSampler::LogUniformSampler(int64 range, unsigned int seed)
+    : Sampler(range, seed), log_range_(log(range + 1)) {
+  random_engine_ = std::make_shared<std::mt19937>(seed_);
+  dist_ = std::make_shared<std::uniform_real_distribution<>>(0, 1);
+}
+int64 LogUniformSampler::Sample() const {
+  // Got Log Uniform distribution from uniform distribution by
+  // inverse_transform_sampling method
+  // More details:
+  // https://wanghaoshuang.github.io/2017/11/Log-uniform-distribution-sampler/
+  const int64 value =
+      static_cast<int64>(exp((*dist_)(*random_engine_) * log_range_)) - 1;
+  // Mathematically, value should be <= range_, but might not be due to some
+  // floating point roundoff, so we mod by range_.
+  return value % range_;
+}
+
+float LogUniformSampler::Probability(int64 value) const {
+  // Given f(x) = 1/[(x+1) * log_range_]
+  // The value's  probability  is integral of f(x) from value to (value + 1)
+  // More details:
+  // https://wanghaoshuang.github.io/2017/11/Log-uniform-distribution-sampler
+  return (log((value + 2.0) / (value + 1.0))) / log_range_;
+}
+
+}  // namespace random
+}  // namespace paddle
diff --git a/paddle/operators/math/sampler.h b/paddle/operators/math/sampler.h
new file mode 100644
index 0000000000000000000000000000000000000000..8f82089e7bd9e0ae6282459b650c225d6765faee
--- /dev/null
+++ b/paddle/operators/math/sampler.h
@@ -0,0 +1,100 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include <memory>
+#include <random>
+typedef long int64;
+namespace paddle {
+namespace operators {
+namespace math {
+
+// TODO(wanghaoshuang): Support for GPU
+
+/**
+* Sample integers from [0, range).
+*/
+class Sampler {
+ public:
+  explicit Sampler(int64 range) : range_(range) {
+    PADDLE_ENFORCE_GT(range, 0);
+    std::random_device r;
+    seed_ = r();
+  }
+  explicit Sampler(int64 range, unsigned int seed)
+      : range_(range), seed_(seed) {
+    PADDLE_ENFORCE_GT(range, 0);
+  }
+  virtual ~Sampler();
+  // Sample a single value
+  virtual int64 Sample() const = 0;
+  // The probability that a single call to Sample() returns the given value.
+  virtual float Probability(int64 value) const = 0;
+
+  int64 range() { return range_; };
+
+ protected:
+  const int64 range_;
+  unsigned int seed_;
+};
+
+/**
+ * Sample integers from [0, range).
+ * And the distribution function is:
+ * P(x) = 1 / range
+ */
+class UniformSampler : public Sampler {
+ public:
+  explicit UniformSampler(int64 range);
+
+  explicit UniformSampler(int64 range, unsigned int seed);
+
+  ~UniformSampler() override {}
+
+  int64 Sample() const override;
+
+  float Probability(int64 value) const override;
+
+ private:
+  const float inv_range_;
+  std::shared_ptr<std::mt19937_64> random_engine_;
+  std::shared_ptr<std::uniform_int_distribution<>> dist_;
+};
+
+/**
+ * Sample integers from [0, range).
+ * And the distribution function is:
+ * P(x) = (1/ln(range+1)) * ln(1 + 1/(x + 1))
+ */
+class LogUniformSampler : public Sampler {
+ public:
+  explicit LogUniformSampler(int64 range);
+
+  explicit LogUniformSampler(int64 range, unsigned int seed);
+
+  ~LogUniformSampler() override {}
+
+  int64 Sample() const override;
+
+  float Probability(int64 value) const override;
+
+ private:
+  const float log_range_;
+  std::shared_ptr<std::mt19937_64> random_engine_;
+  std::shared_ptr<std::uniform_real_distribution<>> dist_;
+};
+
+}  // math
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/math/sequence2batch.cc b/paddle/operators/math/sequence2batch.cc
index 88977be1f8c030741c3a3a8f07a4feeb1d8bb4d9..e459a42ca251a9fc79f745f48a118ce898a0f77e 100644
--- a/paddle/operators/math/sequence2batch.cc
+++ b/paddle/operators/math/sequence2batch.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/operators/math/sequence2batch.h"
+#include "paddle/operators/math/math_function.h"
 
 namespace paddle {
 namespace operators {
diff --git a/paddle/operators/math/sequence_padding.cc b/paddle/operators/math/sequence_padding.cc
index fd66455eaef60209b9ca334480951a9f7687729b..2e69aa47eb8a060a6cac6588b2f37960898aba92 100644
--- a/paddle/operators/math/sequence_padding.cc
+++ b/paddle/operators/math/sequence_padding.cc
@@ -32,7 +32,8 @@ class PaddingLoDTensorFunctor<platform::CPUDeviceContext, T> {
     framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
 
     auto seq_dims = seq.dims();
-    PADDLE_ENFORCE_EQ(seq_dims[0], abs_offset_lod[level].back(),
+    PADDLE_ENFORCE_EQ(seq_dims[0],
+                      static_cast<int64_t>(abs_offset_lod[level].back()),
                       "The first dimension of LoDTensor seq should be "
                       "equal to the sum of all sequences's length.");
 
@@ -41,32 +42,32 @@ class PaddingLoDTensorFunctor<platform::CPUDeviceContext, T> {
                       "The input padding should be a 3-D Tensor of shape "
                       "[max_sequence_length, num_sequences, sequence_width].");
 
-    const size_t max_sequence_length = MaximumSequenceLength(lod, level);
+    const int64_t max_sequence_length = MaximumSequenceLength(lod, level);
     PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length,
                       "The first dimension of Tensor padding should be the "
                       "maximum length of all sequences in LoDTensor seq.");
 
-    const size_t num_sequences = abs_offset_lod[level].size() - 1;
+    const int64_t num_sequences = abs_offset_lod[level].size() - 1;
     PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences,
                       "The second dimension of Tensor padding should be the "
                       "number of sequences in LoDTensor seq.");
 
-    const size_t sequence_width = seq.numel() / seq_dims[0];
+    const int64_t sequence_width = seq.numel() / seq_dims[0];
     PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width,
                       "The third dimension of Tensor padding should be the "
                       "width of sequence in LoDTensor seq.");
 
     const T* seq_data = seq.data<T>();
     T* padding_data = padding.data<T>();
-    for (size_t i = 0; i < max_sequence_length; ++i) {
-      for (size_t j = 0; j < num_sequences; ++j) {
-        size_t start_pos = abs_offset_lod[level][j];
-        size_t sequence_length = abs_offset_lod[level][j + 1] - start_pos;
+    for (int64_t i = 0; i < max_sequence_length; ++i) {
+      for (int64_t j = 0; j < num_sequences; ++j) {
+        int64_t start_pos = abs_offset_lod[level][j];
+        int64_t sequence_length = abs_offset_lod[level][j + 1] - start_pos;
         if (i < sequence_length) {
           // i > 0 => sequence_length > 0
           T scale =
               norm_by_times ? (1.0f / static_cast<T>(sequence_length)) : 1.0f;
-          for (size_t k = 0; k < sequence_width; ++k) {
+          for (int64_t k = 0; k < sequence_width; ++k) {
             padding_data[(i * num_sequences + j) * sequence_width + k] =
                 seq_data[(start_pos + i) * sequence_width + k] * scale;
           }
@@ -93,7 +94,8 @@ class UnpaddingLoDTensorFunctor<platform::CPUDeviceContext, T> {
     framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
 
     auto seq_dims = seq.dims();
-    PADDLE_ENFORCE_EQ(seq_dims[0], abs_offset_lod[level].back(),
+    PADDLE_ENFORCE_EQ(seq_dims[0],
+                      static_cast<int64_t>(abs_offset_lod[level].back()),
                       "The first dimension of LoDTensor seq should be "
                       "equal to the sum of all sequences's length.");
 
@@ -102,31 +104,31 @@ class UnpaddingLoDTensorFunctor<platform::CPUDeviceContext, T> {
                       "The input padding should be a 3-D Tensor of shape "
                       "[max_sequnece_length, num_sequences, sequence_width].");
 
-    const size_t max_sequence_length = MaximumSequenceLength(lod, level);
+    const int64_t max_sequence_length = MaximumSequenceLength(lod, level);
     PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length,
                       "The first dimension of Tensor padding should be "
                       "the maximum length of all sequences in LoDTensor seq.");
 
-    const size_t num_sequences = abs_offset_lod[level].size() - 1;
+    const int64_t num_sequences = abs_offset_lod[level].size() - 1;
     PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences,
                       "The second dimension of Tensor padding should be "
                       "the number of sequences in LoDTensor seq.");
 
-    const size_t sequence_width = seq.numel() / seq_dims[0];
+    const int64_t sequence_width = seq.numel() / seq_dims[0];
     PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width,
                       "The third dimension of Tensor padding should be the "
                       "width of sequence in LoDTensor seq.");
 
     const T* padding_data = padding.data<T>();
     T* seq_data = seq.data<T>();
-    for (size_t i = 0; i < num_sequences; ++i) {
-      size_t start_pos = abs_offset_lod[level][i];
-      size_t sequence_length = abs_offset_lod[level][i + 1] - start_pos;
-      for (size_t j = 0; j < sequence_length; ++j) {
+    for (int64_t i = 0; i < num_sequences; ++i) {
+      int64_t start_pos = abs_offset_lod[level][i];
+      int64_t sequence_length = abs_offset_lod[level][i + 1] - start_pos;
+      for (int64_t j = 0; j < sequence_length; ++j) {
         // sequence_width > j > 0
         T scale =
             norm_by_times ? (1.0f / static_cast<T>(sequence_length)) : 1.0f;
-        for (size_t k = 0; k < sequence_width; ++k) {
+        for (int64_t k = 0; k < sequence_width; ++k) {
           seq_data[(start_pos + j) * sequence_width + k] =
               padding_data[(j * num_sequences + i) * sequence_width + k] *
               scale;
diff --git a/paddle/operators/math/sequence_padding.cu b/paddle/operators/math/sequence_padding.cu
index e4be178f81581dea2e84cf488b01d5f7f4cc0030..a38df26f59569c4fd54a1ba5691b2cd5f3245344 100644
--- a/paddle/operators/math/sequence_padding.cu
+++ b/paddle/operators/math/sequence_padding.cu
@@ -71,7 +71,8 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
     framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
 
     auto seq_dims = seq.dims();
-    PADDLE_ENFORCE_EQ(seq_dims[0], abs_offset_lod[level].back(),
+    PADDLE_ENFORCE_EQ(seq_dims[0],
+                      static_cast<int64_t>(abs_offset_lod[level].back()),
                       "The first dimension of LoDTensor seq should be "
                       "equal to the sum of all sequences's length.");
 
@@ -80,17 +81,17 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
                       "The input padding should be a 3-D Tensor of shape "
                       "[max_sequence_length, num_sequences, sequence_width].");
 
-    size_t max_sequence_length = MaximumSequenceLength(lod, level);
+    int64_t max_sequence_length = MaximumSequenceLength(lod, level);
     PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length,
                       "The first dimension of Tensor padding should be the "
                       "maximum length of all sequences in LoDTensor seq.");
 
-    const size_t num_sequences = abs_offset_lod[level].size() - 1;
+    const int64_t num_sequences = abs_offset_lod[level].size() - 1;
     PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences,
                       "The second dimension of Tensor padding should be the "
                       "number of sequences in LoDTensor seq.");
 
-    const size_t sequence_width = seq.numel() / seq_dims[0];
+    const int64_t sequence_width = seq.numel() / seq_dims[0];
     PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width,
                       "The third dimension of Tensor padding should be the "
                       "width of sequence in LoDTensor seq.");
@@ -101,7 +102,7 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
       return;
     }
 
-    const size_t kBlockSize = 512;
+    const int64_t kBlockSize = 512;
 
     /* At least use 32 threads to copy sequence_width elements,
      * and at least 8 elements for each thread.
@@ -143,7 +144,8 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
     framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
 
     auto seq_dims = seq.dims();
-    PADDLE_ENFORCE_EQ(seq_dims[0], abs_offset_lod[level].back(),
+    PADDLE_ENFORCE_EQ(seq_dims[0],
+                      static_cast<int64_t>(abs_offset_lod[level].back()),
                       "The first dimension of LoDTensor seq should be "
                       "equal to the sum of all sequences's length.");
 
@@ -152,17 +154,17 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
                       "The input padding should be a 3-D Tensor of shape "
                       "[max_sequnece_length, num_sequences, sequence_width].");
 
-    size_t max_sequence_length = MaximumSequenceLength(lod, level);
+    int64_t max_sequence_length = MaximumSequenceLength(lod, level);
     PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length,
                       "The first dimension of Tensor padding should be "
                       "the maximum length of all sequences in LoDTensor seq.");
 
-    const size_t num_sequences = abs_offset_lod[level].size() - 1;
+    const int64_t num_sequences = abs_offset_lod[level].size() - 1;
     PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences,
                       "The second dimension of Tensor padding should be "
                       "the number of sequences in LoDTensor seq.");
 
-    const size_t sequence_width = seq.numel() / seq_dims[0];
+    const int64_t sequence_width = seq.numel() / seq_dims[0];
     PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width,
                       "The third dimension of Tensor padding should be the "
                       "width of sequence in LoDTensor seq.");
@@ -173,7 +175,7 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
       return;
     }
 
-    const size_t kBlockSize = 512;
+    const int64_t kBlockSize = 512;
 
     /* At least use 32 threads to copy sequence_width elements,
      * and at least 8 elements for each thread.
diff --git a/paddle/operators/math/sequence_padding_test.cc b/paddle/operators/math/sequence_padding_test.cc
index 9799bcd65dc65d5741813374c68a2640eaf4556c..3e504f4a15c2cb4e2380f5ff8a39d83626dae062 100644
--- a/paddle/operators/math/sequence_padding_test.cc
+++ b/paddle/operators/math/sequence_padding_test.cc
@@ -31,7 +31,7 @@ void TestSequencePadding(const paddle::framework::LoD& lod,
 
   cpu_seq.set_lod(lod);
   cpu_seq.mutable_data<T>(seq_dims, paddle::platform::CPUPlace());
-  for (size_t i = 0; i < cpu_seq.numel(); ++i) {
+  for (int64_t i = 0; i < cpu_seq.numel(); ++i) {
     cpu_seq.data<T>()[i] = static_cast<T>(i);
   }
 
@@ -69,7 +69,7 @@ void TestSequencePadding(const paddle::framework::LoD& lod,
 
   EXPECT_EQ(cpu_seq.numel(), cpu_seq_back.numel());
   EXPECT_EQ(cpu_seq.dims(), cpu_seq_back.dims());
-  for (size_t i = 0; i < cpu_seq.numel(); ++i) {
+  for (int64_t i = 0; i < cpu_seq.numel(); ++i) {
     EXPECT_EQ(cpu_seq.data<T>()[i], cpu_seq_back.data<T>()[i]);
   }
 
diff --git a/paddle/operators/math/sequence_scale.cc b/paddle/operators/math/sequence_scale.cc
new file mode 100644
index 0000000000000000000000000000000000000000..7e439e9a2cebaa5d494b185fd878e293a6895e45
--- /dev/null
+++ b/paddle/operators/math/sequence_scale.cc
@@ -0,0 +1,46 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/math/sequence_scale.h"
+
+namespace paddle {
+namespace operators {
+namespace math {
+
+template <typename T>
+class ScaleLoDTensorFunctor<platform::CPUDeviceContext, T> {
+ public:
+  void operator()(const platform::CPUDeviceContext& context,
+                  framework::LoDTensor& seq, const T* scales) {
+    const size_t level = 0;
+    auto lod = seq.lod();
+    const size_t num_seq = lod[level].size() - 1;
+    size_t seq_width = seq.dims()[1];
+    framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
+
+    T* seq_data = seq.mutable_data<T>(context.GetPlace());
+    for (size_t i = 0; i < num_seq; ++i) {
+      for (size_t j = lod[level][i] * seq_width;
+           j < lod[level][i + 1] * seq_width; ++j) {
+        seq_data[j] *= scales[i];
+      }
+    }
+  }
+};
+
+template class ScaleLoDTensorFunctor<platform::CPUDeviceContext, float>;
+
+}  // namespace math
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/math/sequence_scale.cu b/paddle/operators/math/sequence_scale.cu
new file mode 100644
index 0000000000000000000000000000000000000000..ceaabd8e0fd81c927fbd4333c0aa7954b8da8513
--- /dev/null
+++ b/paddle/operators/math/sequence_scale.cu
@@ -0,0 +1,57 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/math/sequence_scale.h"
+#include "paddle/platform/cuda_helper.h"
+
+namespace paddle {
+namespace operators {
+namespace math {
+
+using platform::PADDLE_CUDA_NUM_THREADS;
+
+template <typename T, int BlockSize>
+__global__ void SequenceScaleKernel(T* seq, size_t* lod, const T* scales,
+                                    const size_t seq_width) {
+  for (int i = threadIdx.x;
+       i < (lod[blockIdx.x + 1] - lod[blockIdx.x]) * seq_width;
+       i += BlockSize) {
+    int idx = lod[blockIdx.x] * seq_width + i;
+    seq[idx] *= scales[blockIdx.x];
+  }
+}
+
+template <typename T>
+class ScaleLoDTensorFunctor<platform::CUDADeviceContext, T> {
+ public:
+  void operator()(const platform::CUDADeviceContext& context,
+                  framework::LoDTensor& seq, const T* scales) {
+    const size_t level = 0;
+    auto lod = seq.lod();
+    const size_t num_seq = lod[level].size() - 1;
+    const size_t seq_width = seq.numel() / seq.dims()[0];
+    framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
+    T* seq_data = seq.mutable_data<T>(context.GetPlace());
+
+    SequenceScaleKernel<T, PADDLE_CUDA_NUM_THREADS><<<
+        num_seq, PADDLE_CUDA_NUM_THREADS, 0, context.stream()>>>(
+        seq_data, abs_offset_lod[level].data(), scales, seq_width);
+  }
+};
+
+template class ScaleLoDTensorFunctor<platform::CUDADeviceContext, float>;
+
+}  // namespace math
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/math/sequence_scale.h b/paddle/operators/math/sequence_scale.h
new file mode 100644
index 0000000000000000000000000000000000000000..ecd9a57c3f4d8d91bfb8933a0fd38355c227744d
--- /dev/null
+++ b/paddle/operators/math/sequence_scale.h
@@ -0,0 +1,55 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/framework/lod_tensor.h"
+#include "paddle/platform/device_context.h"
+
+namespace paddle {
+namespace operators {
+namespace math {
+
+/*
+ * \brief   Scale a sequence.
+ *
+ *  All sequences will be padded to the same length and stored in a transposed
+ * shape.
+ *  Example:
+ *    Given:
+ *      seq = (s0, s0, s0, s0; s1, s1; s2, s2, s2; s3)
+ *      scales = (2, 3, 4, 5)
+ *    then:
+ *      result = (2*s0, 2*s0, 2*s0, 2*s0; 3*s1, 3*s1; 4*s2, 4*s2, 4*s2; 5*s3)
+
+ *
+ * \param context       Device context of this functor.
+ * \param seq           LoDTensor which is stored in sequence format, the shape
+ *                      is [total_sequence_length, sequence_width] where
+ *                      total_sequence_length is the sum of all sequences'
+ *                      length.
+ * \param scales        Array<T>. The i-th sequence will be scaled by scales[i].
+ * \param num_seq       Number of sequence
+ *
+ */
+template <typename DeviceContext, typename T>
+class ScaleLoDTensorFunctor {
+ public:
+  void operator()(const DeviceContext& context, framework::LoDTensor& seq,
+                  const T* scales);
+};
+
+}  // namespace math
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/matmul_op.cc b/paddle/operators/matmul_op.cc
index fd65d894d5749c97f860d614de354e89f6d9441d..3336978c8d8d94c69b986970274661a01ad2161d 100644
--- a/paddle/operators/matmul_op.cc
+++ b/paddle/operators/matmul_op.cc
@@ -41,10 +41,26 @@ class MatMulOp : public framework::OperatorWithKernel {
                       "Input tensor X must be at least 1-dimensional.");
     PADDLE_ENFORCE_GE(dim_y.size(), 1,
                       "Input tensor Y must be at least 1-dimensional.");
-    PADDLE_ENFORCE_LE(dim_x.size(), 3,
-                      "Input tensor X must be at most 3-dimensional.");
-    PADDLE_ENFORCE_LE(dim_y.size(), 3,
-                      "Input tensor Y must be at most 3-dimensional.");
+
+    std::vector<int64_t> out_dim;
+    int64_t batch_count = 1;
+    if (dim_x.size() > 3) {
+      PADDLE_ENFORCE_EQ(
+          dim_y.size(), dim_x.size(),
+          "The dimensions of X and Y must be the same, and both of "
+          "them should be %d-dimensional.",
+          dim_x.size());
+
+      // The first rank-2 dimensions are accumulated on the batch_count, and the
+      // last two dimensions are used for matrix multiplication.
+      for (int j = 0; j < dim_x.size() - 2; ++j) {
+        PADDLE_ENFORCE_EQ(dim_y[j], dim_x[j],
+                          "The %d-th dimension of X and Y must be the same.",
+                          j);
+        out_dim.push_back(dim_x[j]);
+        batch_count *= dim_x[j];
+      }
+    }
 
     int M = 0, N = 0, KX = 0, KY = 0, batchCountX = 0, batchCountY = 0;
     bool remove_initial_dim = false, remove_final_dim = false;
@@ -70,7 +86,11 @@ class MatMulOp : public framework::OperatorWithKernel {
         KX = transpose_x ? dim_x[1] : dim_x[2];
         break;
       default:
-        assert(false);
+        batchCountX = batch_count;
+        size_t mat_s = dim_x.size() - 2;
+        M = transpose_x ? dim_x[mat_s + 1] : dim_x[mat_s];
+        KX = transpose_x ? dim_x[mat_s] : dim_x[mat_s + 1];
+        break;
     }
 
     switch (dim_y.size()) {
@@ -94,7 +114,10 @@ class MatMulOp : public framework::OperatorWithKernel {
         N = transpose_y ? dim_y[1] : dim_y[2];
         break;
       default:
-        assert(false);
+        batchCountY = batch_count;
+        size_t mat_s = dim_y.size() - 2;
+        KY = transpose_y ? dim_y[mat_s + 1] : dim_y[mat_s];
+        N = transpose_y ? dim_y[mat_s] : dim_y[mat_s + 1];
     }
 
     PADDLE_ENFORCE_EQ(
@@ -110,7 +133,11 @@ class MatMulOp : public framework::OperatorWithKernel {
 
     std::vector<int64_t> dim_out;
     if (batchCount) {
-      dim_out.push_back(batchCount);
+      if (dim_x.size() > 3) {
+        dim_out.insert(dim_out.begin(), out_dim.begin(), out_dim.end());
+      } else {
+        dim_out.push_back(batchCount);
+      }
     }
     if (!remove_initial_dim) {
       dim_out.push_back(M);
@@ -162,10 +189,14 @@ Examples without transpose:
 - X: [B, M, K], Y: [K] => Out: [B, M]
 - X: [M, K], Y: [B, K, N] => Out: [B, M, N]
 - X: [B, M, K], Y: [B, K, N] => Out: [B, M, N]
+- X: [B, ..., M, K], Y: [B, ..., K, N] => Out: [B, ..., M, N]
 
 The behavior is designed to be similar to the `numpy.matmul` function.
 The differences are:
-- Currently only rank 1 to rank 3 input tensors are supported.
+- When the rank of the input data is less than or equal to 3, it
+  is similar to the `numpy.matmul` function.
+- When the rank of the input is greater than 3, the rank of X and
+  Y must be equal, and the first `rank - 2` dimensions must be equal.
 - We add `transpose_X` and `transpose_Y` flags.
 
 Both the input `X` and `Y` can carry the LoD (Level of Details) information,
diff --git a/paddle/operators/matmul_op.h b/paddle/operators/matmul_op.h
index 78adc64f76f45afce64c49bcf734647e0db2d6b3..fe6a97465f8992281c909d4600bcfd8121d6a64a 100644
--- a/paddle/operators/matmul_op.h
+++ b/paddle/operators/matmul_op.h
@@ -137,6 +137,13 @@ class MatMulGradKernel : public framework::OpKernel<T> {
       y_dims.push_back(1);
     }
 
+    int batch_count = 0;
+    // The first rank-2 dimensions are accumulated on the batch_count, and the
+    // last two dimensions are used for matrix multiplication.
+    if (x_dims.size() > 3) {
+      batch_count = accumulate(x_dims.begin(), x_dims.end() - 2, 1,
+                               std::multiplies<int>());
+    }
     // Fix the dOut dimensions.
     int M = 0, N = 0, batchCountX = 0, batchCountY = 0;
 
@@ -149,7 +156,9 @@ class MatMulGradKernel : public framework::OpKernel<T> {
         M = transpose_x ? x_dims[2] : x_dims[1];
         break;
       default:
-        assert(false);
+        batchCountX = batch_count;
+        size_t mat_s = x_dims.size() - 2;
+        M = transpose_x ? x_dims[mat_s + 1] : x_dims[mat_s];
     }
 
     switch (y_dims.size()) {
@@ -161,7 +170,9 @@ class MatMulGradKernel : public framework::OpKernel<T> {
         N = transpose_y ? y_dims[1] : y_dims[2];
         break;
       default:
-        assert(false);
+        batchCountY = batch_count;
+        size_t mat_s = y_dims.size() - 2;
+        N = transpose_y ? y_dims[mat_s] : y_dims[mat_s + 1];
     }
     if (batchCountX && batchCountY) {
       PADDLE_ENFORCE_EQ(
@@ -172,7 +183,11 @@ class MatMulGradKernel : public framework::OpKernel<T> {
     int batchCount = std::max(batchCountX, batchCountY);
     std::vector<int64_t> dout_dims = {M, N};
     if (batchCount) {
-      dout_dims.insert(dout_dims.begin(), batchCount);
+      if (x_dims.size() > 3) {
+        dout_dims.insert(dout_dims.begin(), x_dims.begin(), x_dims.end() - 2);
+      } else {
+        dout_dims.insert(dout_dims.begin(), batchCount);
+      }
     }
     Tensor X = Reshape<T>(x, make_ddim(x_dims));
     Tensor Y = Reshape<T>(y, make_ddim(y_dims));
diff --git a/paddle/operators/net_op.cc b/paddle/operators/net_op.cc
index 03302f5cbf5674dca1d22a84137579090b4d5eac..000e029840ceb842941f9cbad5758209b6fd4dd5 100644
--- a/paddle/operators/net_op.cc
+++ b/paddle/operators/net_op.cc
@@ -1,18 +1,16 @@
-/*
-  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
-*/
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include "paddle/operators/net_op.h"
 #include <set>
diff --git a/paddle/operators/net_op_test.cc b/paddle/operators/net_op_test.cc
index dfd86546e83a6276aedd198eaeb6fad2c50944df..9358f29f62fc21801f8036400d2baebdfd663a3a 100644
--- a/paddle/operators/net_op_test.cc
+++ b/paddle/operators/net_op_test.cc
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "paddle/operators/net_op.h"
 
 #include <gtest/gtest.h>
diff --git a/paddle/operators/op_documentation/batch_norm_op.md b/paddle/operators/op_documentation/batch_norm_op.md
index 80948adf2b9047a9685dbdd90b2296b5a955f9c1..d1392619c42d9206bf4bddcd33ad11b033e6cbdb 100644
--- a/paddle/operators/op_documentation/batch_norm_op.md
+++ b/paddle/operators/op_documentation/batch_norm_op.md
@@ -66,7 +66,7 @@ As most C++ operators do, `batch_norm_op` is defined by inputs, outputs, attribu
 
 The following graph showes the training computational process of `batch_norm_op`:
 
-<img src="./images/batch_norm_op_kernel.png" width="800"/>
+<img src="../images/batch_norm_op_kernel.png" width="800"/>
 
 cudnn provides APIs to finish the whole series of computation, we can use them in our GPU kernel.
 
@@ -124,7 +124,7 @@ for pass_id in range(PASS_NUM):
 `is_infer` is an attribute. Once an operator is created, its attributes can not be changed. It suggests us that we shall maintain two `batch_norm_op` in the model, one's `is_infer` is `True`(we call it `infer_batch_norm_op`) and the other one's is `False`(we call it `train_batch_norm_op`). They share all parameters and variables, but be placed in two different branches. That is to say, if a network contains a `batch_norm_op`, it will fork into two branches, one go through `train_batch_norm_op` and the other one go through `infer_batch_norm_op`:
 
 <div align=center>
-<img src="./images/batch_norm_fork.png" width="500"/>
+<img src="../images/batch_norm_fork.png" width="500"/>
 </div>
 
 Just like what is shown in the above graph, the net forks before `batch_norm_op` and will never merge again. All the operators after `batch_norm_op` will duplicate. 
diff --git a/paddle/operators/parallel_do_op.cc b/paddle/operators/parallel_do_op.cc
index e1bec0421e76143bef669a4f6fa373cdf01226b2..a00458ea068dd703d2c7f362511ed08bc212d2a8 100644
--- a/paddle/operators/parallel_do_op.cc
+++ b/paddle/operators/parallel_do_op.cc
@@ -30,16 +30,13 @@ static constexpr char kParallelScopes[] = "parallel_scopes";
 
 static constexpr char kParallelBlock[] = "sub_block";
 
-// using ParallelScopeVar = std::vector<framework::Scope *>;
 using LoDTensor = framework::LoDTensor;
-using OperatorBase = framework::OperatorBase;
 
-void SplitTensorAndMoveTensorToScopes(
-    const framework::Scope &scope,
-    const std::vector<framework::Scope *> &sub_scopes,
+static void SplitTensorAndMoveTensorToScopes(
+    const framework::Scope &scope, std::vector<framework::Scope *> *sub_scopes,
     const std::vector<platform::Place> &places,
     const std::vector<std::string> &names) {
-  PADDLE_ENFORCE_EQ(sub_scopes.size(), places.size());
+  size_t num_sub_scopes = 0;
   for (auto &argu : names) {
     auto *var = scope.FindVar(argu);
     const auto &tensor = var->Get<LoDTensor>();
@@ -48,13 +45,31 @@ void SplitTensorAndMoveTensorToScopes(
     for (auto &lod : lod_tensors) {
       VLOG(3) << lod.dims();
     }
+    if (num_sub_scopes == 0) {
+      num_sub_scopes = lod_tensors.size();
+    } else {
+      PADDLE_ENFORCE_EQ(num_sub_scopes, lod_tensors.size());
+    }
+    PADDLE_ENFORCE_NE(num_sub_scopes, 0);
+    if (sub_scopes->size() == 0) {
+      sub_scopes->reserve(num_sub_scopes);
+      for (size_t i = 0; i < num_sub_scopes; ++i) {
+        sub_scopes->emplace_back(&scope.NewScope());
+      }
+    }
 
-    for (size_t i = 0; i < sub_scopes.size(); ++i) {
-      *sub_scopes[i]->Var(argu)->GetMutable<LoDTensor>() = lod_tensors[i];
+    for (size_t i = 0; i < lod_tensors.size(); ++i) {
+      *(*sub_scopes)[i]->Var(argu)->GetMutable<LoDTensor>() = lod_tensors[i];
     }
   }
 }
 
+void WaitOnPlace(const platform::Place place) {
+  platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
+  auto &dev_ctx = *pool.Get(place);
+  dev_ctx.Wait();
+}
+
 void WaitOnPlaces(const std::vector<platform::Place> places) {
   platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
 
@@ -70,7 +85,7 @@ class ParallelDoOp : public framework::OperatorBase {
                const framework::VariableNameMap &inputs,
                const framework::VariableNameMap &outputs,
                const framework::AttributeMap &attrs)
-      : OperatorBase(type, inputs, outputs, attrs) {}
+      : framework::OperatorBase(type, inputs, outputs, attrs) {}
 
   void Run(const framework::Scope &scope,
            const platform::Place &place) const override {
@@ -85,19 +100,17 @@ class ParallelDoOp : public framework::OperatorBase {
 
     auto &sub_scopes = *scope.FindVar(Output(kParallelScopes))
                             ->GetMutable<std::vector<framework::Scope *>>();
-    for (size_t place_idx = 0; place_idx < places.size(); ++place_idx) {
-      sub_scopes.push_back(&scope.NewScope());
-    }
 
     // split input
-    SplitTensorAndMoveTensorToScopes(scope, sub_scopes, places,
+    SplitTensorAndMoveTensorToScopes(scope, &sub_scopes, places,
                                      Inputs(kInputs));
+
     // copy parameter
     for (auto &param : Inputs(kParameters)) {
       PADDLE_ENFORCE(scope.FindVar(param)->IsType<LoDTensor>(),
                      "Only support parameter type as LoDTensor");
       auto &src = scope.FindVar(param)->Get<LoDTensor>();
-      for (size_t i = 0; i < places.size(); ++i) {
+      for (size_t i = 0; i < sub_scopes.size(); ++i) {
         auto &place = places[i];
         auto *sub_scope = sub_scopes[i];
         auto *dst = sub_scope->Var(param)->GetMutable<LoDTensor>();
@@ -108,9 +121,7 @@ class ParallelDoOp : public framework::OperatorBase {
 
     std::vector<std::future<void>> workers;
     workers.reserve(places.size());
-    for (size_t place_idx = 0; place_idx < places.size(); ++place_idx) {
-      VLOG(3) << "Run " << place_idx;
-
+    for (size_t place_idx = 0; place_idx < sub_scopes.size(); ++place_idx) {
       auto &place = places[place_idx];
       auto *cur_scope = sub_scopes[place_idx];
 
@@ -157,21 +168,16 @@ ParallelDo Operator.
   }
 };
 
-class ParallelDoGradOp : public OperatorBase {
+class ParallelDoGradOp : public framework::OperatorBase {
  public:
   ParallelDoGradOp(const std::string &type,
                    const framework::VariableNameMap &inputs,
                    const framework::VariableNameMap &outputs,
                    const framework::AttributeMap &attrs)
-      : OperatorBase(type, inputs, outputs, attrs) {}
+      : framework::OperatorBase(type, inputs, outputs, attrs) {}
 
   void Run(const framework::Scope &scope,
            const platform::Place &place) const override {
-    // // get device context from pool
-    // platform::DeviceContextPool &pool =
-    //        platform::DeviceContextPool::Instance();
-    // auto &dev_ctx = *pool.Get(place);
-
     auto *block = Attr<framework::BlockDesc *>(kParallelBlock);
     auto *program = block->Program();
 
@@ -181,26 +187,16 @@ class ParallelDoGradOp : public OperatorBase {
     auto &places = scope.FindVar(Input(kPlaces))->Get<platform::PlaceList>();
 
     // feed output@grad
-    SplitTensorAndMoveTensorToScopes(scope, sub_scopes, places,
-                                     Inputs(framework::GradVarName(kOutputs)));
+    SplitTensorAndMoveTensorToScopes(
+        scope, const_cast<std::vector<framework::Scope *> *>(&sub_scopes),
+        places, Inputs(framework::GradVarName(kOutputs)));
     WaitOnPlaces(places);
 
-    // for debugging
-    for (auto &s : Inputs(framework::GradVarName(kOutputs))) {
-      VLOG(3) << s;
-      VLOG(3) << scope.FindVar(s)->Get<LoDTensor>();
-      for (auto *sub_scope : sub_scopes) {
-        VLOG(3) << sub_scope->FindVar(s)->Get<LoDTensor>();
-      }
-    }
-
     // exe run
     std::vector<std::future<void>> workers;
-    for (size_t place_idx = 0; place_idx < places.size(); ++place_idx) {
-      VLOG(3) << "Run " << place_idx;
-
-      auto &place = places[place_idx];
-      auto *cur_scope = sub_scopes[place_idx];
+    for (size_t i = 0; i < sub_scopes.size(); ++i) {
+      auto &place = places[i];
+      auto *cur_scope = sub_scopes[i];
 
       // execute
       workers.emplace_back(framework::Async([program, cur_scope, place, block] {
@@ -216,33 +212,40 @@ class ParallelDoGradOp : public OperatorBase {
 
     // merge grad
     for (auto &s : Outputs(framework::GradVarName(kParameters))) {
-      VLOG(3) << "merge grad " << s;
-
-      auto &t = sub_scopes[0]->FindVar(s)->Get<LoDTensor>();
-      VLOG(3) << t;
-
-      std::string s_buf = s + "@BUF";
-      auto *t_buf = sub_scopes[0]->Var(s_buf)->GetMutable<LoDTensor>();
-
-      for (size_t place_idx = 1; place_idx < places.size(); ++place_idx) {
-        auto &tt = sub_scopes[place_idx]->FindVar(s)->Get<LoDTensor>();
-        VLOG(3) << place_idx;
-        VLOG(3) << tt;
-        framework::Copy(tt, places[0], t_buf);
+      auto &result = sub_scopes[0]->FindVar(s)->Get<LoDTensor>();
+      std::string tmp_name;
+      auto *tmp = sub_scopes[0]->Var(&tmp_name)->GetMutable<LoDTensor>();
+
+      for (size_t i = 1; i < sub_scopes.size(); ++i) {
+        auto &tensor_to_merge = sub_scopes[i]->FindVar(s)->Get<LoDTensor>();
+        if (!(places[i] == places[0])) {
+          framework::Copy(tensor_to_merge, places[0], tmp);
+          WaitOnPlace(places[0]);
+        } else {
+          tmp->ShareDataWith(tensor_to_merge);
+        }
 
         auto sum_op = framework::OpRegistry::CreateOp(
-            "sum", {{"X", {s, s_buf}}}, {{"Out", {s}}},
+            "sum", {{"X", {s, tmp_name}}}, {{"Out", {s}}},
             framework::AttributeMap{});
         sum_op->Run(*sub_scopes[0], places[0]);
-        WaitOnPlaces(places);
+        WaitOnPlace(places[0]);
       }
 
-      VLOG(3) << t;
-      framework::Copy(t, place, scope.FindVar(s)->GetMutable<LoDTensor>());
+      VLOG(3) << result;
+      framework::Copy(result, place, scope.FindVar(s)->GetMutable<LoDTensor>());
     }
+    WaitOnPlaces(places);
   }
 };
 
+std::ostream &operator<<(std::ostream &sout,
+                         const std::vector<std::string> &strs) {
+  std::copy(strs.begin(), strs.end(),
+            std::ostream_iterator<std::string>(sout, ","));
+  return sout;
+}
+
 class ParallelDoGradOpDescMaker : public framework::SingleGradOpDescMaker {
  public:
   using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
@@ -283,18 +286,30 @@ class ParallelDoGradOpShapeInference : public framework::InferShapeBase {
   void operator()(framework::InferShapeContext *ctx) const override {
     std::vector<std::string> input{kParameters, kInputs};
     std::vector<std::string> output{kOutputs};
-    for (auto &s : input) {
-      PADDLE_ENFORCE(ctx->HasInputs(s));
-      PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName(s)),
-                     "Cannot find the gradient variable %s",
-                     framework::GradVarName(s));
-    }
+
+    PADDLE_ENFORCE(ctx->HasInputs(kParameters));
+    PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName(kParameters)));
+    PADDLE_ENFORCE(ctx->HasInput(kInputs));
+
     for (auto &s : output) {
       PADDLE_ENFORCE(ctx->HasInputs(s));
     }
-    for (auto &s : input) {
-      ctx->SetOutputsDim(framework::GradVarName(s), ctx->GetInputsDim(s));
+
+    ctx->SetOutputsDim(framework::GradVarName(kParameters),
+                       ctx->GetInputsDim(kParameters));
+
+    auto i_dims = ctx->GetInputsDim(kInputs);
+    auto ig_names = ctx->Outputs(framework::GradVarName(kInputs));
+
+    for (size_t i = 0; i < ig_names.size(); ++i) {
+      auto &ig_name = ig_names[i];
+      if (ig_name == framework::kEmptyVarName) {
+        continue;
+      }
+
+      ctx->SetDims({ig_name}, {i_dims[i]});
     }
+
     if (ctx->HasInputs(kParameters)) {
       PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName(kParameters)));
       ctx->SetOutputsDim(framework::GradVarName(kParameters),
diff --git a/paddle/operators/pool_cudnn_op.cc b/paddle/operators/pool_cudnn_op.cc
deleted file mode 100644
index 77407f5cdf7e4ef7b76c38ef8992517b4bd1c5fe..0000000000000000000000000000000000000000
--- a/paddle/operators/pool_cudnn_op.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/operators/pool_cudnn_op.h"
-
-namespace ops = paddle::operators;
-
-REGISTER_OP(pool2d_cudnn, ops::PoolOp, ops::Pool2dOpMaker, pool2d_cudnn_grad,
-            ops::PoolOpGrad);
-
-REGISTER_OP_CPU_KERNEL(
-    pool2d_cudnn, ops::PoolKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::PoolKernel<paddle::platform::CPUDeviceContext, double>);
-REGISTER_OP_CPU_KERNEL(
-    pool2d_cudnn_grad,
-    ops::PoolGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::PoolGradKernel<paddle::platform::CPUDeviceContext, double>)
-
-REGISTER_OP(pool3d_cudnn, ops::PoolOp, ops::Pool3dOpMaker, pool3d_cudnn_grad,
-            ops::PoolOpGrad);
-
-REGISTER_OP_CPU_KERNEL(
-    pool3d_cudnn, ops::PoolKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::PoolKernel<paddle::platform::CPUDeviceContext, double>);
-REGISTER_OP_CPU_KERNEL(
-    pool3d_cudnn_grad,
-    ops::PoolGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::PoolGradKernel<paddle::platform::CPUDeviceContext, double>)
diff --git a/paddle/operators/pool_cudnn_op.cu.cc b/paddle/operators/pool_cudnn_op.cu.cc
index 2d0001ba1184c99d9fc642f60c97ba89cec97ccd..446fb0819d98e0eb3d81bb202a67c55a23fc06b6 100644
--- a/paddle/operators/pool_cudnn_op.cu.cc
+++ b/paddle/operators/pool_cudnn_op.cu.cc
@@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/operators/pool_cudnn_op.h"
+#include "paddle/framework/op_registry.h"
+#include "paddle/operators/pool_op.h"
 #include "paddle/platform/cudnn_helper.h"
 
 namespace paddle {
@@ -25,7 +26,7 @@ using DataLayout = platform::DataLayout;
 using PoolingMode = platform::PoolingMode;
 
 template <typename T>
-class PoolCudnnOpKernel : public framework::OpKernel<T> {
+class PoolCUDNNOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &ctx) const override {
     PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
@@ -86,7 +87,7 @@ class PoolCudnnOpKernel : public framework::OpKernel<T> {
 };
 
 template <typename T>
-class PoolCudnnGradOpKernel : public framework::OpKernel<T> {
+class PoolCUDNNGradOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &ctx) const override {
     PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
@@ -162,12 +163,16 @@ class PoolCudnnGradOpKernel : public framework::OpKernel<T> {
 
 namespace ops = paddle::operators;
 
-REGISTER_OP_CUDA_KERNEL(pool2d_cudnn, ops::PoolCudnnOpKernel<float>,
-                        ops::PoolCudnnOpKernel<double>);
-REGISTER_OP_CUDA_KERNEL(pool2d_cudnn_grad, ops::PoolCudnnGradOpKernel<float>,
-                        ops::PoolCudnnGradOpKernel<double>);
-
-REGISTER_OP_CUDA_KERNEL(pool3d_cudnn, ops::PoolCudnnOpKernel<float>,
-                        ops::PoolCudnnOpKernel<double>);
-REGISTER_OP_CUDA_KERNEL(pool3d_cudnn_grad, ops::PoolCudnnGradOpKernel<float>,
-                        ops::PoolCudnnGradOpKernel<double>);
+REGISTER_OP_KERNEL(pool2d, CUDNN, ::paddle::platform::CUDAPlace,
+                   ops::PoolCUDNNOpKernel<float>,
+                   ops::PoolCUDNNOpKernel<double>);
+REGISTER_OP_KERNEL(pool2d_grad, CUDNN, ::paddle::platform::CUDAPlace,
+                   ops::PoolCUDNNGradOpKernel<float>,
+                   ops::PoolCUDNNGradOpKernel<double>);
+
+REGISTER_OP_KERNEL(pool3d, CUDNN, ::paddle::platform::CUDAPlace,
+                   ops::PoolCUDNNOpKernel<float>,
+                   ops::PoolCUDNNOpKernel<double>);
+REGISTER_OP_KERNEL(pool3d_grad, CUDNN, ::paddle::platform::CUDAPlace,
+                   ops::PoolCUDNNGradOpKernel<float>,
+                   ops::PoolCUDNNGradOpKernel<double>);
diff --git a/paddle/operators/pool_op.cc b/paddle/operators/pool_op.cc
index d3cf5fa638c53dfdfacec153211f447a1e2fa3bf..b97333bb1a13a0170c325520b86ac73e68282f91 100644
--- a/paddle/operators/pool_op.cc
+++ b/paddle/operators/pool_op.cc
@@ -61,6 +61,30 @@ void PoolOp::InferShape(framework::InferShapeContext *ctx) const {
   ctx->ShareLoD("X", "Out");
 }
 
+framework::OpKernelType PoolOp::GetExpectedKernelType(
+    const framework::ExecutionContext &ctx) const {
+  bool use_cudnn = ctx.Attr<bool>("use_cudnn");
+  use_cudnn &= platform::is_gpu_place(ctx.GetPlace());
+#ifdef PADDLE_WITH_CUDA
+  if (platform::is_gpu_place(ctx.GetPlace())) {
+    auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
+    use_cudnn &= dev_ctx.cudnn_handle() != nullptr;
+  }
+#endif
+  framework::LibraryType library_;
+  if (use_cudnn) {
+    library_ = framework::LibraryType::kCUDNN;
+  } else {
+    library_ = framework::LibraryType::kPlain;
+  }
+
+  std::string data_format = ctx.Attr<std::string>("data_format");
+  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
+  return framework::OpKernelType(
+      framework::ToDataType(ctx.Input<Tensor>("X")->type()), ctx.GetPlace(),
+      layout_, library_);
+}
+
 void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const {
   PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
   PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
@@ -68,6 +92,30 @@ void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const {
   ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
 }
 
+framework::OpKernelType PoolOpGrad::GetExpectedKernelType(
+    const framework::ExecutionContext &ctx) const {
+  bool use_cudnn = ctx.Attr<bool>("use_cudnn");
+  use_cudnn &= platform::is_gpu_place(ctx.GetPlace());
+#ifdef PADDLE_WITH_CUDA
+  if (platform::is_gpu_place(ctx.GetPlace())) {
+    auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
+    use_cudnn &= dev_ctx.cudnn_handle() != nullptr;
+  }
+#endif
+  framework::LibraryType library_;
+  if (use_cudnn) {
+    library_ = framework::LibraryType::kCUDNN;
+  } else {
+    library_ = framework::LibraryType::kPlain;
+  }
+
+  std::string data_format = ctx.Attr<std::string>("data_format");
+  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
+  return framework::OpKernelType(
+      framework::ToDataType(ctx.Input<Tensor>("X")->type()), ctx.GetPlace(),
+      layout_, library_);
+}
+
 Pool2dOpMaker::Pool2dOpMaker(OpProto *proto, OpAttrChecker *op_checker)
     : OpProtoAndCheckerMaker(proto, op_checker) {
   AddInput(
@@ -101,15 +149,27 @@ Pool2dOpMaker::Pool2dOpMaker(OpProto *proto, OpAttrChecker *op_checker)
   AddAttr<std::vector<int>>("strides",
                             "(vector<int>, default {1, 1}), strides(height, "
                             "width) of pooling operator.")
-      .SetDefault({1, 1});  // TODO(Chengduo): Add checker. (Currently,
+      .SetDefault({1, 1});
+  // TODO(Chengduo): Add checker. (Currently,
   // TypedAttrChecker don't support vector type.)
   AddAttr<std::vector<int>>(
       "paddings",
       "(vector<int>, default {0,0}), paddings(height, width) of pooling "
       "operator."
       "If global_pooling = true, paddings and ksize will be ignored.")
-      .SetDefault({0, 0});  // TODO(Chengduo): Add checker. (Currently,
-  // TypedAttrChecker don't support vector type.)
+      .SetDefault({0, 0});
+  AddAttr<bool>(
+      "use_cudnn",
+      "(bool, default false) Only used in cudnn kernel, need install cudnn")
+      .SetDefault(false);
+  AddAttr<std::string>(
+      "data_format",
+      "(string, default NCHW) Only used in "
+      "An optional string from: \"NHWC\", \"NCHW\". "
+      "Defaults to \"NHWC\". Specify the data format of the output data, "
+      "the input will be transformed automatically. ")
+      .SetDefault("AnyLayout");
+  // TODO(dzhwinter): need to registered layout transform function
 
   AddComment(R"DOC(
 Pool2d Operator.
@@ -182,6 +242,19 @@ Pool3dOpMaker::Pool3dOpMaker(OpProto *proto, OpAttrChecker *op_checker)
       .SetDefault({0, 0, 0});  // TODO(Chengduo): Add checker. (Currently,
                                // TypedAttrChecker don't support vector type.)
 
+  AddAttr<bool>(
+      "use_cudnn",
+      "(bool, default false) Only used in cudnn kernel, need install cudnn")
+      .SetDefault(false);
+  AddAttr<std::string>(
+      "data_format",
+      "(string, default NCHW) Only used in "
+      "An optional string from: \"NHWC\", \"NCHW\". "
+      "Defaults to \"NHWC\". Specify the data format of the output data, "
+      "the input will be transformed automatically. ")
+      .SetDefault("AnyLayout");
+  // TODO(dzhwinter): need to registered layout transform function
+
   AddComment(R"DOC(
 Pool3d Operator.
 
diff --git a/paddle/operators/pool_op.h b/paddle/operators/pool_op.h
index 3860e295f4b4dbeb2d60cfb304847de39083f1e1..c3d82ecbdeb412f0234fcddc27361d79b58c7122 100644
--- a/paddle/operators/pool_op.h
+++ b/paddle/operators/pool_op.h
@@ -29,6 +29,10 @@ class PoolOp : public framework::OperatorWithKernel {
   using framework::OperatorWithKernel::OperatorWithKernel;
 
   void InferShape(framework::InferShapeContext* ctx) const override;
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override;
 };
 
 class PoolOpGrad : public framework::OperatorWithKernel {
@@ -36,6 +40,10 @@ class PoolOpGrad : public framework::OperatorWithKernel {
   using framework::OperatorWithKernel::OperatorWithKernel;
 
   void InferShape(framework::InferShapeContext* ctx) const override;
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override;
 };
 
 class Pool2dOpMaker : public framework::OpProtoAndCheckerMaker {
diff --git a/paddle/operators/print_op.cc b/paddle/operators/print_op.cc
index 89e41d806c7661a3e61e0a944a2a980704297dd9..8b233d64c904a8870212af33c5839cfc555b5dc8 100644
--- a/paddle/operators/print_op.cc
+++ b/paddle/operators/print_op.cc
@@ -16,12 +16,17 @@
 #include <ctime>
 
 #include "paddle/framework/op_registry.h"
+#include "paddle/framework/variable.h"
 
 namespace paddle {
 namespace operators {
 
 #define CLOG std::cout
 
+const std::string kForward = "FORWARD";
+const std::string kBackward = "BACKWARD";
+const std::string kBoth = "BOTH";
+
 struct Formater {
   std::string message;
   std::string name;
@@ -122,40 +127,77 @@ class TensorPrintOp : public framework::OperatorBase {
   TensorPrintOp(const TensorPrintOp& o)
       : framework::OperatorBase(
             static_cast<const framework::OperatorBase&>(o)) {
-    PADDLE_THROW("Not implemented");
+    PADDLE_THROW("Not implemented.");
   }
 
   void Run(const framework::Scope& scope,
            const platform::Place& place) const override {
-    // Only run the `first_n` times.
+    const framework::Variable* in_var_ptr = nullptr;
+    std::string phase = kForward;
+    std::string printed_var_name = "";
+
+    auto& inputs = Inputs();
+    if (inputs.find("In") != inputs.end() && !Inputs("In").empty()) {
+      in_var_ptr = scope.FindVar(Input("In"));
+      printed_var_name = Inputs("In").front();
+    } else if (inputs.find("In@GRAD") != inputs.end() &&
+               !Inputs("In@GRAD").empty()) {
+      in_var_ptr = scope.FindVar(Input("In@GRAD"));
+      printed_var_name = Inputs("In@GRAD").front();
+      phase = kBackward;
+    } else {
+      PADDLE_THROW("Unknown phase, should be forward or backward.");
+    }
+
+    PADDLE_ENFORCE_NOT_NULL(in_var_ptr);
+
+    auto& in_tensor = in_var_ptr->Get<framework::LoDTensor>();
+    auto* out_var_ptr = scope.FindVar(Output("Out"));
+    auto& out_tensor = *out_var_ptr->GetMutable<framework::LoDTensor>();
+
+    // Just copy data from input tensor to output tensor
+    // output tensor share same memory with input tensor
+    out_tensor.ShareDataWith(in_tensor);
+    out_tensor.set_lod(in_tensor.lod());
+
+    std::string print_phase = Attr<std::string>("print_phase");
+    if (print_phase != phase && print_phase != kBoth) {
+      return;
+    }
+
     int first_n = Attr<int>("first_n");
     if (first_n > 0 && ++times_ > first_n) return;
 
-    PADDLE_ENFORCE(!Inputs("input").empty(), "input should be set");
-    auto* input_var = scope.FindVar(Input("input"));
-    PADDLE_ENFORCE_NOT_NULL(input_var);
-    auto& tensor = input_var->Get<framework::LoDTensor>();
+    framework::LoDTensor printed_tensor;
+    printed_tensor.set_lod(in_tensor.lod());
+    printed_tensor.Resize(in_tensor.dims());
 
-    // TODO(ChunweiYan) support GPU
-    PADDLE_ENFORCE(platform::is_cpu_place(tensor.place()));
+    if (platform::is_cpu_place(in_tensor.place())) {
+      printed_tensor.ShareDataWith(in_tensor);
+    } else {
+      // copy data to cpu to print
+      platform::CPUPlace place;
+      framework::Copy(in_tensor, place, &printed_tensor);
+    }
 
     Formater formater;
     if (Attr<bool>("print_tensor_name")) {
-      formater.name = Inputs("input").front();
+      formater.name = printed_var_name;
     }
     if (Attr<bool>("print_tensor_type")) {
-      formater.dtype = tensor.type();
+      formater.dtype = printed_tensor.type();
     }
     if (Attr<bool>("print_tensor_shape")) {
-      formater.dims.assign(tensor.dims()[0],
-                           tensor.dims()[tensor.dims().size() - 1]);
+      auto& dims = printed_tensor.dims();
+      formater.dims.resize(dims.size());
+      for (int i = 0; i < dims.size(); ++i) formater.dims[i] = dims[i];
     }
     if (Attr<bool>("print_tensor_lod")) {
-      formater.lod = tensor.lod();
+      formater.lod = printed_tensor.lod();
     }
     formater.summarize = Attr<int>("summarize");
-    formater.data = (void*)tensor.data<void>();
-    formater(tensor.numel());
+    formater.data = (void*)printed_tensor.data<void>();
+    formater(printed_tensor.numel());
   }
 
  private:
@@ -166,27 +208,46 @@ class PrintOpProtoAndCheckMaker : public framework::OpProtoAndCheckerMaker {
  public:
   PrintOpProtoAndCheckMaker(OpProto* proto, OpAttrChecker* op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("input", "the tensor that will be displayed.");
+    AddInput("In", "Input tensor to be displayed.");
     AddAttr<int>("first_n", "Only log `first_n` number of times.");
     AddAttr<std::string>("message", "A string message to print as a prefix.");
-    AddAttr<int>("summarize", "Print this number of elements in the tensor.");
+    AddAttr<int>("summarize", "Number of elements printed.");
     AddAttr<bool>("print_tensor_name", "Whether to print the tensor name.");
     AddAttr<bool>("print_tensor_type", "Whether to print the tensor's dtype.");
     AddAttr<bool>("print_tensor_shape", "Whether to print the tensor's shape.");
     AddAttr<bool>("print_tensor_lod", "Whether to print the tensor's lod.");
+    AddAttr<std::string>(
+        "print_phase",
+        "(string, default 'BOTH') Which phase to display including 'FORWARD' "
+        "'BACKWARD' and 'BOTH'.")
+        .SetDefault(kBoth)
+        .InEnum({kForward, kBackward, kBoth});
+    AddOutput("Out", "Output tensor with same data as input tensor.");
     AddComment(R"DOC(
-    Creates a print op that will print when a tensor is accessed.
+Creates a print op that will print when a tensor is accessed.
 
-    Wraps the tensor passed in so that whenever that a tensor is accessed,
-    the message `message` is printed, along with the current value of the
-    tensor `t`.)DOC");
+Wraps the tensor passed in so that whenever that a tensor is accessed,
+the message `message` is printed, along with the current value of the
+tensor `t`.)DOC");
   }
 };
 
-class InferShape : public framework::InferShapeBase {
+class InferShapeForward : public framework::InferShapeBase {
  public:
   void operator()(framework::InferShapeContext* context) const override {
-    PADDLE_ENFORCE(context->HasInput("input"), "input should be set");
+    PADDLE_ENFORCE(context->HasInput("In"), "Input(In) should not be null.");
+    context->ShareLoD("In", /*->*/ "Out");
+    context->SetOutputDim("Out", context->GetInputDim("In"));
+  }
+};
+
+class InferShapeBackward : public framework::InferShapeBase {
+ public:
+  void operator()(framework::InferShapeContext* context) const override {
+    PADDLE_ENFORCE(context->HasInput("In@GRAD"),
+                   "Input(In@GRAD) should not be null.");
+    context->ShareLoD("In@GRAD", /*->*/ "Out");
+    context->SetOutputDim("Out", context->GetInputDim("In@GRAD"));
   }
 };
 
@@ -196,11 +257,27 @@ class InferVarType : public framework::VarTypeInference {
                   framework::BlockDesc* block) const override {}
 };
 
+class PrintOpProtoAndCheckGradOpMaker
+    : public framework::SingleGradOpDescMaker {
+ public:
+  using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
+
+  std::unique_ptr<framework::OpDesc> Apply() const override {
+    auto* op_desc_ptr = new framework::OpDesc();
+    op_desc_ptr->SetType("print_grad");
+    op_desc_ptr->SetInput("In@GRAD", OutputGrad("Out"));
+    op_desc_ptr->SetOutput("Out", InputGrad("In"));
+    op_desc_ptr->SetAttrMap(Attrs());
+    return std::unique_ptr<framework::OpDesc>(op_desc_ptr);
+  }
+};
+
 }  // namespace operators
 }  // namespace paddle
 
-REGISTER_OPERATOR(print, paddle::operators::TensorPrintOp,
-                  paddle::operators::PrintOpProtoAndCheckMaker,
-                  paddle::operators::InferShape,
-                  paddle::operators::InferVarType,
-                  paddle::framework::EmptyGradOpMaker);
+namespace ops = paddle::operators;
+
+REGISTER_OPERATOR(print, ops::TensorPrintOp, ops::PrintOpProtoAndCheckMaker,
+                  ops::PrintOpProtoAndCheckGradOpMaker, ops::InferShapeForward,
+                  ops::InferVarType);
+REGISTER_OPERATOR(print_grad, ops::TensorPrintOp, ops::InferShapeBackward);
diff --git a/paddle/operators/recv_op.cc b/paddle/operators/recv_op.cc
index 55b33343af43802e1b6b95a32603bfee806c9764..60360807351f1cf41c19aa06e30640e5cf473e07 100644
--- a/paddle/operators/recv_op.cc
+++ b/paddle/operators/recv_op.cc
@@ -27,12 +27,15 @@ limitations under the License. */
 #include "paddle/operators/detail/grpc_server.h"
 #include "paddle/operators/detail/sendrecvop_utils.h"
 #include "paddle/operators/detail/simple_block_queue.h"
+#include "paddle/string/printf.h"
 
 #define LISTEN_TERMINATE_MESSAGE "TERMINATE@RECV"
 
 namespace paddle {
 namespace operators {
 
+constexpr char kOptimizeBlock[] = "OptimizeBlock";
+
 void RunServer(std::shared_ptr<detail::AsyncGRPCServer> service) {
   service->RunSyncUpdate();
   VLOG(4) << "RunServer thread end";
@@ -77,35 +80,39 @@ class RecvOp : public framework::OperatorBase {
     if (grads_counter_.find(varname) == grads_counter_.end()) {
       grads_counter_[varname] = 0;
     }
-    char ret[256];
-    snprintf(ret, sizeof(ret), "%s.trainer_%d", varname.c_str(),
-             grads_counter_[varname]++);
-    return std::string(ret);
+    return string::Sprintf("%s.trainer_%d", varname, grads_counter_[varname]++);
   }
 
   void Run(const framework::Scope &scope,
            const platform::Place &dev_place) const override {
-    // FIXME(typhoonzero): no new scopes for every run.
+    platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
+    auto &dev_ctx = *pool.Get(dev_place);
     framework::Scope &recv_scope = scope.NewScope();
+
+    // FIXME(Yancey1989): initialize rpc server with laze mode.
     rpc_service_->SetScope(&recv_scope);
+    rpc_service_->SetDevCtx(&dev_ctx);
     auto param_list = Attr<std::vector<std::string>>("ParamList");
     auto grad_list = Attr<std::vector<std::string>>("GradList");
-    auto trainer_count = Attr<int>("Trainers");
+    auto fan_in = Attr<int>("Fanin");
     size_t param_count = param_list.size();
 
-    rpc_service_->Reset();
+    auto *block = Attr<framework::BlockDesc *>(kOptimizeBlock);
+    auto *program = block->Program();
+    framework::Executor executor(dev_place);
+
     // TODO(typhoonzero): change this to a while_op for every cluster-batch.
     bool exit_flag = false;
+    size_t barrier_size = param_count * fan_in;
     while (!exit_flag) {
-      // TODO(gognwb): simply this loop.
-      // Get from multiple trainers, we don't care about order in which
-      // the gradient arrives, just add suffix 0~n then average the gradient.
-      for (size_t i = 0; i < param_count * trainer_count; ++i) {
-        // blocking get one var from client.
+      // Get from multiple trainers, we don't care about the order in which
+      // the gradients arrives, just add suffix 0~n and merge the gradient.
+      rpc_service_->SetCond(0);
+      for (size_t i = 0; i < barrier_size; ++i) {
         const detail::MessageWithName &v = rpc_service_->Get();
         auto grad_var_name = v.first;
         if (grad_var_name == LISTEN_TERMINATE_MESSAGE) {
-          VLOG(4) << "received LISTEN_TERMINATE_MESSAGE and RunOp.Run() exit";
+          LOG(INFO) << "received terminate message and exit";
           exit_flag = true;
           break;
         }
@@ -114,52 +121,32 @@ class RecvOp : public framework::OperatorBase {
         if (it != grad_list.end()) {
           param_var_name = param_list[it - grad_list.begin()];
         } else {
-          LOG(ERROR) << "grad have no paired param found!\"" << grad_var_name
-                     << "\"";
+          LOG(ERROR) << "grad have no paired param:" << grad_var_name;
         }
         VLOG(3) << "recved grad: " << grad_var_name
                 << " updating param: " << param_var_name;
-
-        auto *merged_grad = recv_scope.FindVar(grad_var_name);
-        if (merged_grad == nullptr) {
-          auto *ptr = recv_scope.Var(grad_var_name);
-          CreateTensorFromMessageType(ptr, v.second.type());
-          VLOG(3) << "Create Variable " << grad_var_name
-                  << " on recv scope, which pointer is " << ptr << " type is "
-                  << v.second.type();
-        }
-
-        if (trainer_count > 1) {
+        if (fan_in > 1) {
           grad_var_name = this->GetGradVarNameForTrainer(grad_var_name);
         }
-
-        auto *var = recv_scope.Var(grad_var_name);
-        platform::DeviceContextPool &pool =
-            platform::DeviceContextPool::Instance();
-        auto &dev_ctx = *pool.Get(dev_place);
+        auto *var = recv_scope.FindVar(grad_var_name);
+        if (var == nullptr) {
+          LOG(ERROR) << "can not find server side var: " << grad_var_name;
+          PADDLE_THROW("can not find server side var");
+        }
         detail::DeserializeFromMessage(v.second, dev_ctx, var);
       }
-
       if (exit_flag) {
         break;
       }
 
-      rpc_service_->Reset();
-
-      std::string program_str = Attr<std::string>("OptimizeProgram");
-      framework::proto::ProgramDesc program_desc;
-      program_desc.ParseFromString(program_str);
-      framework::ProgramDesc program(program_desc);
-      framework::Executor executor(dev_place);
-      // Run sub graph to get optimized tensor
       try {
-        executor.Run(program, &recv_scope, 0, /*global_block*/
+        executor.Run(*program, &recv_scope, block->ID(), /*global_block*/
                      false /*create_local_scope*/, false /*create_vars*/);
       } catch (std::exception &e) {
         LOG(ERROR) << "run sub program error " << e.what();
       }
-
-      rpc_service_->Done();
+      rpc_service_->SetCond(1);
+      rpc_service_->WaitClientGet(barrier_size);
       grads_counter_.clear();
     }  // while(true)
   }
@@ -185,8 +172,8 @@ This operator will recv tensor from send_op
                          "IP address to listen on.")
         .SetDefault("127.0.0.1:6164")
         .AddCustomChecker([](const std::string &ip) { return !ip.empty(); });
-    AddAttr<std::string>("OptimizeProgram", "type string",
-                         "Serialized ProgramDesc string for recv to run.");
+    AddAttr<framework::BlockDesc *>(
+        kOptimizeBlock, "Serialized ProgramDesc string for recv to run.");
     AddAttr<std::vector<std::string>>(
         "ParamList", "type list of string",
         "grad->param name mapping to find which param to optimize.")
@@ -195,7 +182,7 @@ This operator will recv tensor from send_op
         "GradList", "type list of string",
         "grad->param name mapping to find which param to optimize.")
         .SetDefault({});
-    AddAttr<int>("Trainers", "type int",
+    AddAttr<int>("Fanin", "type int",
                  "Number of trainers in the current cluster job")
         .SetDefault(1);
   }
diff --git a/paddle/operators/reduce_op.cc b/paddle/operators/reduce_op.cc
index 172d28bb3b647901d4de7bc03c9de21e3468a364..09b7091358e65221374a604122b742d763cfbafc 100644
--- a/paddle/operators/reduce_op.cc
+++ b/paddle/operators/reduce_op.cc
@@ -129,7 +129,7 @@ If reduce_all is true, just reduce along all dimensions and output a scalar.
   }
 
   void SetComment(std::string name, std::string op) {
-    Replace(comment_, "{ReduceOP}", name);
+    Replace(comment_, "{ReduceOp}", name);
     Replace(comment_, "{reduce}", op);
   }
 };
diff --git a/paddle/operators/scale_op.cc b/paddle/operators/scale_op.cc
index f634ebe9a2a4648bd08f00af635ef22e8d86a8de..c0e614743a894dece2cdc395d0b28df7e86e921d 100644
--- a/paddle/operators/scale_op.cc
+++ b/paddle/operators/scale_op.cc
@@ -48,7 +48,7 @@ Scale operator
 $$Out = scale*X$$
 )DOC");
     AddAttr<AttrType>("scale",
-                      "(float, default 0)"
+                      "(float, default 1.0)"
                       "The scaling factor of the scale operator.")
         .SetDefault(1.0);
   }
diff --git a/paddle/operators/send_op.cc b/paddle/operators/send_op.cc
index 4d145250bdc73607c8817e20fdb753f4c96e2391..807533a6c6129af073c5efa7817bd5ad637fec23 100644
--- a/paddle/operators/send_op.cc
+++ b/paddle/operators/send_op.cc
@@ -33,22 +33,25 @@ class SendOp : public framework::OperatorBase {
       : OperatorBase(type, inputs, outputs, attrs) {}
 
   void Run(const framework::Scope& scope,
-           const platform::Place& dev_place) const override {
+           const platform::Place& place) const override {
     auto ins = Inputs("X");
     auto outs = Outputs("Out");
     std::vector<std::string> epmap = Attr<std::vector<std::string>>("epmap");
 
-    // FIXME(gongwb): DeviceContext?
-    auto ctx = platform::CPUDeviceContext();
+    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
+    auto& ctx = *pool.Get(place);
     for (size_t i = 0; i < ins.size(); i++) {
+      VLOG(3) << "sending " << ins[i];
       client_.AsyncSendVariable(epmap[i], ctx, scope, ins[i]);
     }
+    PADDLE_ENFORCE(client_.Wait());
 
     for (size_t i = 0; i < outs.size(); i++) {
+      VLOG(3) << "getting " << outs[i];
       client_.AsyncGetVariable(epmap[i], ctx, scope, outs[i]);
     }
 
-    client_.wait();
+    PADDLE_ENFORCE(client_.Wait());
   }
 
  private:
@@ -63,7 +66,7 @@ class SendOpMaker : public framework::OpProtoAndCheckerMaker {
     AddOutput("Out", "(Tensor) Output tensor to get from server")
         .AsDuplicable();
     AddComment(R"DOC(
-Recv operator
+Send operator
 
 This operator will send tensor to recv_op.
 )DOC");
diff --git a/paddle/operators/send_recv_op_test.cc b/paddle/operators/send_recv_op_test.cc
index ea091694798475dfd9631910a750405be950c20c..045a0f5434f339bab345d14881ed05450ce6588d 100644
--- a/paddle/operators/send_recv_op_test.cc
+++ b/paddle/operators/send_recv_op_test.cc
@@ -130,10 +130,7 @@ void StartServerNet(bool is_sparse) {
   attrs.insert({"endpoint", std::string("127.0.0.1:6174")});
   attrs.insert({"ParamList", std::vector<std::string>({"Out"})});
   attrs.insert({"GradList", std::vector<std::string>({"x1"})});
-  std::string program_proto;
-  PADDLE_ENFORCE(program.Proto()->SerializeToString(&program_proto));
-
-  attrs.insert({"OptimizeProgram", program_proto});
+  attrs.insert({"OptimizeBlock", block});
   recv_op = f::OpRegistry::CreateOp("recv", {{"RX", {"x1"}}}, {}, attrs);
   recv_op->Run(scope, place);
 }
diff --git a/paddle/operators/sequence_erase_op.cc b/paddle/operators/sequence_erase_op.cc
index d17b2686238b2d2f872331edfdbb095fb8693b87..aa0c00aa6f7854ee5e34aef78970971b78df6514 100644
--- a/paddle/operators/sequence_erase_op.cc
+++ b/paddle/operators/sequence_erase_op.cc
@@ -86,4 +86,5 @@ REGISTER_OP_WITHOUT_GRADIENT(sequence_erase, ops::SequenceEraseOp,
                              ops::SequenceEraseOpMaker);
 REGISTER_OP_CPU_KERNEL(
     sequence_erase,
-    ops::SequenceEraseKernel<paddle::platform::CPUDeviceContext, int32_t>);
+    ops::SequenceEraseKernel<paddle::platform::CPUDeviceContext, int32_t>,
+    ops::SequenceEraseKernel<paddle::platform::CPUDeviceContext, int64_t>);
diff --git a/paddle/operators/sequence_erase_op.cu b/paddle/operators/sequence_erase_op.cu
index 5da8eba3e1ac1fb85dfc65c2fd801574599e02d9..f1e3b96acd0259de2b3ca1348834bd17e1e174a2 100644
--- a/paddle/operators/sequence_erase_op.cu
+++ b/paddle/operators/sequence_erase_op.cu
@@ -23,27 +23,22 @@ using platform::PADDLE_CUDA_NUM_THREADS;
 using LoDTensor = framework::LoDTensor;
 
 template <typename T>
-__global__ void LabelErasedIdx(const T* in_dat, const int in_len,
-                               const T* tokens, const int tokens_len,
-                               int* num_erased) {
+__global__ void LabelErasedIdx(const T* in_dat, const int64_t in_len,
+                               const int* tokens, const size_t tokens_len,
+                               size_t* num_erased) {
   int index = blockIdx.x * blockDim.x + threadIdx.x;
   if (index < in_len) {
-    int erased = 0;
-    for (int i = 0; i < tokens_len; ++i) {
+    for (size_t i = 0; i < tokens_len; ++i) {
       if (in_dat[index] == tokens[i]) {
-        erased = 1;
+        num_erased[index + 1] = 1;
+        break;
       }
     }
-    num_erased[index + 1] = erased;
-    if (index == 0) {
-      num_erased[0] = 0;
-    }
   }
 }
 
-template <typename T>
-__global__ void GetOutLod(const T* num_erased, const int* in_lod,
-                          const int lod_len, int* out_lod0) {
+__global__ void GetOutLod(const size_t* num_erased, const size_t* in_lod,
+                          const size_t lod_len, size_t* out_lod0) {
   int index = blockIdx.x * blockDim.x + threadIdx.x;
   if (index < lod_len) {
     out_lod0[index] = in_lod[index] - num_erased[in_lod[index]];
@@ -51,11 +46,11 @@ __global__ void GetOutLod(const T* num_erased, const int* in_lod,
 }
 
 template <typename T>
-__global__ void SetOutput(const T* in_dat, const int in_len,
-                          const int* num_erased, T* out_dat) {
+__global__ void SetOutput(const T* in_dat, const int64_t in_len,
+                          const size_t* num_erased, T* out_dat) {
   int index = blockIdx.x * blockDim.x + threadIdx.x;
   if (index < in_len) {
-    if (in_dat[index] != in_dat[index + 1]) {
+    if (num_erased[index] == num_erased[index + 1]) {
       out_dat[index - num_erased[index]] = in_dat[index];
     }
   }
@@ -72,53 +67,44 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel<T> {
     PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now.");
     PADDLE_ENFORCE_EQ(lod[0].back(), (size_t)in->numel(),
                       "The actual size mismatches with the LoD information.");
-    auto tokens = ctx.Attr<std::vector<T>>("tokens");
-    auto tokens_len = tokens.size();
+    auto tokens = ctx.Attr<std::vector<int>>("tokens");
     auto in_len = in->numel();
     auto in_dat = in->data<T>();
-    auto lod0 = lod[0];
-
-    thrust::host_vector<T> host_tokens(tokens_len);
-    for (size_t i = 0; i < tokens.size(); ++i) {
-      host_tokens[i] = tokens[i];
-    }
-    thrust::device_vector<T> dev_tokens = host_tokens;
-    thrust::device_vector<int> num_erased(in_len + 1);
-
-    T* dev_tokens_ptr = thrust::raw_pointer_cast(dev_tokens.data());
-    int* num_erased_ptr = thrust::raw_pointer_cast(num_erased.data());
+    // Copy tokens to GPU
+    thrust::device_vector<int> dev_tokens(tokens.begin(), tokens.end());
+    int* dev_tokens_ptr = thrust::raw_pointer_cast(dev_tokens.data());
 
+    // Count number of elements to be erased
+    thrust::device_vector<size_t> num_erased(in_len + 1, 0);
+    size_t* num_erased_ptr = thrust::raw_pointer_cast(num_erased.data());
     auto stream = ctx.cuda_device_context().stream();
     LabelErasedIdx<<<(in_len - 1) / PADDLE_CUDA_NUM_THREADS + 1,
                      PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
-        in_dat, in_len, dev_tokens_ptr, tokens_len, num_erased_ptr);
+        in_dat, in_len, dev_tokens_ptr, tokens.size(), num_erased_ptr);
     thrust::inclusive_scan(num_erased.begin() + 1, num_erased.end(),
                            num_erased.begin() + 1);
 
-    // Calc LoD
+    // Copy LoD to GPU
+    auto lod0 = lod[0];
     auto lod_len = lod0.size();
-    thrust::host_vector<int> host_lod(lod_len);
-    for (size_t i = 0; i < lod_len; ++i) {
-      host_lod[i] = lod0[i];
-    }
-    thrust::device_vector<int> dev_in_lod = host_lod;
-    thrust::device_vector<int> dev_out_lod(lod_len);
-    int* dev_in_lod_ptr = thrust::raw_pointer_cast(dev_in_lod.data());
-    int* dev_out_lod_ptr = thrust::raw_pointer_cast(dev_out_lod.data());
+    thrust::device_vector<size_t> dev_in_lod = lod0;
+    size_t* dev_in_lod_ptr = thrust::raw_pointer_cast(dev_in_lod.data());
+
+    // Calc output LoD
+    thrust::device_vector<size_t> dev_out_lod(lod_len);
+    size_t* dev_out_lod_ptr = thrust::raw_pointer_cast(dev_out_lod.data());
     GetOutLod<<<(lod_len - 1) / PADDLE_CUDA_NUM_THREADS + 1,
                 PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
         num_erased_ptr, dev_in_lod_ptr, lod_len, dev_out_lod_ptr);
-    thrust::host_vector<int> host_out_lod = dev_out_lod;
-    std::vector<int> out_lod0(lod_len, 0);
-    for (size_t i = 0; i < lod_len; i++) {
-      out_lod0[i] = host_out_lod[i];
-    }
+
+    // Set LoD for output
+    thrust::host_vector<size_t> out_lod0 = dev_out_lod;
     framework::LoD out_lod;
     out_lod.push_back(out_lod0);
     out->set_lod(out_lod);
 
     // Set output
-    out->Resize({out_lod0.back(), 1});
+    out->Resize({static_cast<int64_t>(out_lod0.back()), 1});
     auto out_dat = out->mutable_data<T>(ctx.GetPlace());
     SetOutput<<<(in_len - 1) / PADDLE_CUDA_NUM_THREADS + 1,
                 PADDLE_CUDA_NUM_THREADS, 0, stream>>>(in_dat, in_len,
@@ -130,4 +116,5 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel<T> {
 }  // namespace paddle
 
 REGISTER_OP_CUDA_KERNEL(sequence_erase,
-                        paddle::operators::SequenceEraseOpCUDAKernel<int32_t>);
+                        paddle::operators::SequenceEraseOpCUDAKernel<int32_t>,
+                        paddle::operators::SequenceEraseOpCUDAKernel<int64_t>);
diff --git a/paddle/operators/sequence_expand_op.cc b/paddle/operators/sequence_expand_op.cc
index b40ec617e42110e0ab5168a8ac675adaf760fb3c..d34dbd35b6df2dac275fbe2c41f99b8549217d5b 100644
--- a/paddle/operators/sequence_expand_op.cc
+++ b/paddle/operators/sequence_expand_op.cc
@@ -58,7 +58,7 @@ This operator expands input(X) according to LOD of input(Y).
 Following are cases to better explain how this works:
 Case 1:
 
-Given 2-level a LoDTensor input(X)
+Given a 2-level LoDTensor input(X)
     X.lod = [[0,       2, 3],
              [0, 1,    3, 4]]
     X.data = [a, b, c, d]
@@ -75,9 +75,8 @@ then we get 2-level LoDTensor
 
 Case 2:
 
-Given a 0-level LoDTensor input(X)
+Given a common Tensor input(X)
     X.data = [a, b, c]
-    X.lod = NULL
     X.dims = [3, 1]
 and input(Y)
     Y.lod = [[0, 2, 3, 6]]
@@ -89,9 +88,8 @@ then we get 1-level LoDTensor
 
 Case 3:
 
-Given a 0-level LoDTensor input(X)
+Given a common Tensor input(X)
     X.data = [[a, b], [c, d], [e, f]]
-    X.lod = NULL
     X.dims = [3, 2]
 and input(Y)
     Y.lod = [[0, 2, 3, 6]]
diff --git a/paddle/operators/sequence_reshape_op.cc b/paddle/operators/sequence_reshape_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..57cca13105537d88fe942b850cae10650d3096e2
--- /dev/null
+++ b/paddle/operators/sequence_reshape_op.cc
@@ -0,0 +1,130 @@
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/operators/sequence_reshape_op.h"
+#include "paddle/framework/ddim.h"
+
+namespace paddle {
+namespace operators {
+
+class SequenceReshapeOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"),
+                   "Input(X) of SequenceReshapeOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("Out"),
+                   "Output(Out) of SequenceReshapeOp should not be null.");
+    auto x_dims = ctx->GetInputDim("X");
+    auto x_numel = product(x_dims);
+    PADDLE_ENFORCE_EQ(x_dims.size(), 2U, "Rank of Input(X) should be 2.");
+    int new_dim = ctx->Attrs().Get<int>("new_dim");
+    ctx->SetOutputDim("Out",
+                      {x_numel / new_dim, static_cast<int64_t>(new_dim)});
+  }
+};
+
+class SequenceReshapeOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  SequenceReshapeOpMaker(OpProto* proto, OpAttrChecker* op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X",
+             "(LoDTensor, default LoDTensor<float>) A 2-D LoDTensor with shape "
+             "being [N, M].");
+    AddOutput("Out",
+              "(LoDTensor, default LoDTensor<float>) A 2-D LoDTensor with "
+              "shape [T, new_dim] where T is calculated based on X.lod, M and "
+              "new_dim.");
+    AddAttr<int>("new_dim", "Sequence dimension of the output LoDTensor.");
+    AddComment(R"DOC(
+Sequence Reshape Operator.
+
+This operator will rearrange the input sequences. The new dimension is set by
+attribute and length of each sequence may change longer or shorter which is
+decided by original length, original dimension and new dimension. The following
+example will help to illustrate the function of this operator:
+
+x is a LoDTensor:
+    x.lod  = [[0, 2, 6]]
+    x.data = [[1, 2], [3, 4],
+              [5, 6], [7, 8], [9, 10], [11, 12]]
+    x.dims = [6, 2]
+
+set new_dim = 4
+
+then out is a LoDTensor:
+    out.lod  = [[0, 1, 3]]
+    out.data = [[1, 2, 3, 4],
+                [5, 6, 7, 8], [9, 10, 11, 12]]
+    out.dims = [3, 4]
+
+Currently, only 1-level LoDTensor is supported and please make sure (original
+length * original dimension) can be divided by new_dim with no remainder for
+each sequence.
+
+)DOC");
+  }
+};
+
+class SequenceReshapeGradOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(
+        ctx->HasInput(framework::GradVarName("Out")),
+        "Input(Out@GRAD) of SequenceReshapeGradOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("X"),
+                   "Input(X) of SequenceReshapeGradOp should  not be null.");
+
+    ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
+    ctx->ShareLoD("X", /*->*/ framework::GradVarName("X"));
+  }
+};
+
+class SequenceReshapeGradOpMaker : public framework::SingleGradOpDescMaker {
+ public:
+  using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
+
+ protected:
+  std::unique_ptr<framework::OpDesc> Apply() const override {
+    auto* op_desc_ptr = new framework::OpDesc();
+    op_desc_ptr->SetType("sequence_reshape_grad");
+    op_desc_ptr->SetInput("X", Input("X"));
+    op_desc_ptr->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
+    op_desc_ptr->SetOutput(framework::GradVarName("X"), InputGrad("X"));
+    op_desc_ptr->SetAttrMap(Attrs());
+    return std::unique_ptr<framework::OpDesc>(op_desc_ptr);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(sequence_reshape, ops::SequenceReshapeOp,
+                  ops::SequenceReshapeOpMaker, ops::SequenceReshapeGradOpMaker);
+REGISTER_OPERATOR(sequence_reshape_grad, ops::SequenceReshapeGradOp);
+REGISTER_OP_CPU_KERNEL(
+    sequence_reshape,
+    ops::SequenceReshapeKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::SequenceReshapeKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::SequenceReshapeKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::SequenceReshapeKernel<paddle::platform::CPUDeviceContext, int64_t>);
+REGISTER_OP_CPU_KERNEL(
+    sequence_reshape_grad,
+    ops::SequenceReshapeGradKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::SequenceReshapeGradKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::SequenceReshapeGradKernel<paddle::platform::CPUDeviceContext, int64_t>,
+    ops::SequenceReshapeGradKernel<paddle::platform::CPUDeviceContext, int>);
diff --git a/paddle/operators/sequence_reshape_op.cu b/paddle/operators/sequence_reshape_op.cu
new file mode 100644
index 0000000000000000000000000000000000000000..d9c2f7e9a4149371867cf2a8b81d58566999bfba
--- /dev/null
+++ b/paddle/operators/sequence_reshape_op.cu
@@ -0,0 +1,30 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/sequence_reshape_op.h"
+
+namespace ops = paddle::operators;
+REGISTER_OP_CUDA_KERNEL(
+    sequence_reshape,
+    ops::SequenceReshapeKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::SequenceReshapeKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::SequenceReshapeKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::SequenceReshapeKernel<paddle::platform::CUDADeviceContext, int64_t>);
+REGISTER_OP_CUDA_KERNEL(
+    sequence_reshape_grad,
+    ops::SequenceReshapeGradKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::SequenceReshapeGradKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::SequenceReshapeGradKernel<paddle::platform::CUDADeviceContext,
+                                   int64_t>,
+    ops::SequenceReshapeGradKernel<paddle::platform::CUDADeviceContext, int>);
diff --git a/paddle/operators/sequence_reshape_op.h b/paddle/operators/sequence_reshape_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..c6f528ab8a73294bb8ee91425f34e44c66f1932c
--- /dev/null
+++ b/paddle/operators/sequence_reshape_op.h
@@ -0,0 +1,86 @@
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "paddle/framework/op_registry.h"
+#include "paddle/operators/math/math_function.h"
+
+namespace paddle {
+namespace operators {
+
+using LoDTensor = framework::LoDTensor;
+template <typename DeviceContext, typename T>
+class SequenceReshapeKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* in = context.Input<LoDTensor>("X");
+    auto* out = context.Output<LoDTensor>("Out");
+    int out_width = context.Attr<int>("new_dim");
+
+    auto in_dims = in->dims();
+    int64_t in_width = in_dims[1];
+    auto& in_lod = in->lod();
+
+    PADDLE_ENFORCE_EQ(in_lod.size(), 1UL,
+                      "Only support one level sequence now.");
+    PADDLE_ENFORCE_EQ(
+        in_dims[0], in_lod[0].back(),
+        "Inconsistent size between X.shape[0] and X.lod()[0].back().");
+
+    auto in_lod_l0 = in_lod[0];
+    int seq_num = in_lod_l0.size() - 1;
+
+    if (in_width == out_width) {
+      out->set_lod(in->lod());
+    } else {
+      auto& out_lod = *out->mutable_lod();
+      out_lod.resize(1);
+      out_lod[0].resize(seq_num + 1);
+      out_lod[0][0] = 0;
+      for (int i = 0; i < seq_num; ++i) {
+        size_t seq_len = in_lod_l0[i + 1] - in_lod_l0[i];
+        size_t offset = 0;
+        offset = (seq_len * in_width) / out_width;
+        PADDLE_ENFORCE_EQ(offset * out_width, seq_len * in_width,
+                          "Please make sure (sequence_length * dimension) can "
+                          "be divided by new_dim with no remainder for each "
+                          "sequence. The %dth sequence is invalid.",
+                          i + 1);
+        out_lod[0][i + 1] = out_lod[0][i] + offset;
+      }
+    }
+
+    framework::Copy(*in, context.GetPlace(), out);
+    out->Resize({static_cast<int64_t>(out->lod()[0].back()), out_width});
+  }
+};
+
+template <typename DeviceContext, typename T>
+class SequenceReshapeGradKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* x_tensor_ptr = context.Input<LoDTensor>("X");
+    auto* outg_tensor_ptr =
+        context.Input<LoDTensor>(framework::GradVarName("Out"));
+    auto* xg_tensor_ptr =
+        context.Output<LoDTensor>(framework::GradVarName("X"));
+
+    xg_tensor_ptr->mutable_data<T>(context.GetPlace());
+    framework::Copy(*outg_tensor_ptr, context.GetPlace(), xg_tensor_ptr);
+    xg_tensor_ptr->Resize(x_tensor_ptr->dims());
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/shrink_rnn_memory_op.cc b/paddle/operators/shrink_rnn_memory_op.cc
index ade94b40bed91c64d3074036c067de34323bdaa7..bf870115a4d7b6f4d578df7707826973d4363ba6 100644
--- a/paddle/operators/shrink_rnn_memory_op.cc
+++ b/paddle/operators/shrink_rnn_memory_op.cc
@@ -138,6 +138,7 @@ class ShrinkRNNMemoryGradOp : public ArrayOp {
         math::set_constant(dev_ctx, &rest_tensor, 0.0f);
       }
     }
+    dx_tensor.set_lod(x_tensor.lod());
   }
 };
 
diff --git a/paddle/operators/split_op.cc b/paddle/operators/split_op.cc
index 4dfae043cb1091c9491d89aec4d1415d4741e013..8d55ae5dd7b0e76acb9f21cb10b79cb7aca18a8d 100644
--- a/paddle/operators/split_op.cc
+++ b/paddle/operators/split_op.cc
@@ -60,6 +60,12 @@ class SplitOp : public framework::OperatorWithKernel {
       }
     }
     ctx->SetOutputsDim("Out", outs_dims);
+    if (axis != 0) {
+      // Only pass LoD when not spliting along the first dim.
+      for (size_t i = 0; i < outs_number; ++i) {
+        ctx->ShareLoD("X", "Out", 0, i);
+      }
+    }
   }
 };
 
diff --git a/paddle/operators/split_selected_rows_op.cc b/paddle/operators/split_selected_rows_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..0515ea13aadbd6e12e2586c937f3d1ed0a298d69
--- /dev/null
+++ b/paddle/operators/split_selected_rows_op.cc
@@ -0,0 +1,107 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/split_selected_rows_op.h"
+
+namespace paddle {
+namespace operators {
+
+class SplitSelectedRowsOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  SplitSelectedRowsOpMaker(OpProto *proto, OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "The input SelectedRows.");
+    AddOutput("Out", "The outputs of input SelectedRows.").AsDuplicable();
+    AddAttr<std::vector<int>>("height_sections",
+                              "Height for each output SelectedRows.")
+        .SetDefault(std::vector<int>({}));
+
+    AddComment(R"DOC(
+Split a SelectedRows with a specified rows section.
+height_sections is only needed when need to split the dims of the original tensor.
+
+Example:
+  Input:
+    X.rows = {7, 5}
+    X.height = 12
+  Attr:
+    height_sections = {4, 8}
+  Out:
+    out0.rows = {}
+    out0.height = 4
+
+    out1.rows = {5, 7}
+    out2.height = 8
+
+)DOC");
+  }
+};
+
+class SplitSelectedRowsOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext *ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"), "SplitSelectedRowsOp must has input X.");
+    PADDLE_ENFORCE(ctx->HasOutputs("Out"),
+                   "SplitSelectedRowsOp must has output Out.");
+
+    std::vector<int> height_sections =
+        ctx->Attrs().Get<std::vector<int>>("height_sections");
+    int64_t n = ctx->Outputs("Out").size();
+
+    std::vector<framework::DDim> outs_dims;
+    outs_dims.reserve(n);
+
+    // make output dims
+    for (int64_t i = 0; i < n; ++i) {
+      auto dims = ctx->GetInputDim("X");
+      if (height_sections.size()) {
+        PADDLE_ENFORCE_EQ(
+            height_sections.size(), static_cast<size_t>(n),
+            "The size of height section should be the same with height"
+            " section size.");
+        dims[0] = height_sections[i];
+      }
+      outs_dims.push_back(dims);
+    }
+    ctx->SetOutputsDim("Out", outs_dims);
+  }
+};
+
+class SplitSelectedRowsGradMaker : public framework::SingleGradOpDescMaker {
+ public:
+  using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
+
+ protected:
+  std::unique_ptr<framework::OpDesc> Apply() const override {
+    auto *grad_op = new framework::OpDesc();
+    grad_op->SetType("sum");
+    grad_op->SetInput("X", OutputGrad("Out"));
+    grad_op->SetOutput("Out", InputGrad("X"));
+    grad_op->SetAttrMap(Attrs());
+    return std::unique_ptr<framework::OpDesc>(grad_op);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(split_selected_rows, ops::SplitSelectedRowsOp,
+                  ops::SplitSelectedRowsOpMaker,
+                  ops::SplitSelectedRowsGradMaker);
+REGISTER_OP_CPU_KERNEL(
+    split_selected_rows,
+    ops::SplitSelectedRowsOpKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/pool_cudnn_op.h b/paddle/operators/split_selected_rows_op.cu
similarity index 72%
rename from paddle/operators/pool_cudnn_op.h
rename to paddle/operators/split_selected_rows_op.cu
index 5adf27f5bccae8542719612320bc6dbe21007634..983285480fd9de7a2a4d2787a9bba72c160b7fae 100644
--- a/paddle/operators/pool_cudnn_op.h
+++ b/paddle/operators/split_selected_rows_op.cu
@@ -1,19 +1,19 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
+
     http://www.apache.org/licenses/LICENSE-2.0
+
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#pragma once
-
-#include "paddle/framework/op_registry.h"
-#include "paddle/operators/pool_op.h"
-
-namespace paddle {
-namespace operators {}  // namespace operators
-}  // namespace paddle
+#include "paddle/operators/split_selected_rows_op.h"
+namespace ops = paddle::operators;
+REGISTER_OP_CUDA_KERNEL(
+    split_selected_rows,
+    ops::SplitSelectedRowsOpKernel<paddle::platform::CUDADeviceContext, float>);
diff --git a/paddle/operators/split_selected_rows_op.h b/paddle/operators/split_selected_rows_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..12e64e2901e5902d187d65a12c94b8d7ef45a481
--- /dev/null
+++ b/paddle/operators/split_selected_rows_op.h
@@ -0,0 +1,88 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <vector>
+#include "paddle/framework/op_registry.h"
+#include "paddle/operators/math/selected_rows_functor.h"
+
+namespace paddle {
+namespace operators {
+
+static int FindOutIdx(int row, const std::vector<int>& height_sections) {
+  int offset = 0;
+  for (size_t i = 0; i < height_sections.size(); ++i) {
+    if (row >= offset && row < (offset + height_sections[i])) {
+      return i;
+    }
+    offset += height_sections[i];
+  }
+  return -1;
+}
+
+template <typename DeviceContext, typename T>
+class SplitSelectedRowsOpKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* x = ctx.Input<framework::SelectedRows>("X");
+    auto outs = ctx.MultiOutput<framework::SelectedRows>("Out");
+    auto height_sections = ctx.Attr<std::vector<int>>("height_sections");
+
+    auto x_rows = x->rows();
+    std::vector<std::vector<int>> outs_rows_idx;
+    outs_rows_idx.resize(outs.size());
+
+    auto row_numel = x->value().numel() / x->value().dims()[0];
+    auto src = x->value().data<T>();
+
+    for (size_t i = 0; i < x_rows.size(); ++i) {
+      int out_idx = FindOutIdx(x_rows[i], height_sections);
+      outs_rows_idx[out_idx].push_back(i);
+    }
+    auto place = ctx.GetPlace();
+
+    for (size_t i = 0; i < outs_rows_idx.size(); ++i) {
+      auto rows_idx = outs_rows_idx[i];
+      if (rows_idx.size() > 0) {
+        auto dims = x->GetCompleteDims();
+        dims[0] = rows_idx.size();
+        outs[i]->mutable_value()->mutable_data<T>(dims, x->place());
+        for (auto idx : rows_idx) {
+          outs[i]->mutable_rows()->push_back(x_rows[idx]);
+        }
+        auto dst = outs[i]->mutable_value()->mutable_data<T>(ctx.GetPlace());
+        for (size_t j = 0; j < rows_idx.size(); j++) {
+          if (platform::is_cpu_place(place)) {
+            memory::Copy(platform::CPUPlace(), dst + j * row_numel,
+                         platform::CPUPlace(), src + rows_idx[j] * row_numel,
+                         sizeof(T) * row_numel);
+          } else {
+#ifdef PADDLE_WITH_CUDA
+            auto stream = ctx.cuda_device_context().stream();
+            memory::Copy(platform::CUDAPlace(), dst + j * row_numel,
+                         platform::CUDAPlace(), src + rows_idx[j] * row_numel,
+                         sizeof(T) * row_numel, stream);
+#else
+            PADDLE_THROW("Paddle is not compiled with GPU");
+#endif
+          }
+        }
+      }
+    }
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/warpctc_op.h b/paddle/operators/warpctc_op.h
index 41899c7fe0c3089c4fc7c160c8896dec0e3cd6dd..8aea061c00cc9614db37ed408b6d330ef707d1cf 100644
--- a/paddle/operators/warpctc_op.h
+++ b/paddle/operators/warpctc_op.h
@@ -17,6 +17,7 @@ limitations under the License. */
 #include "paddle/framework/op_registry.h"
 #include "paddle/operators/math/math_function.h"
 #include "paddle/operators/math/sequence_padding.h"
+#include "paddle/operators/math/sequence_scale.h"
 #include "paddle/platform/dynload/warpctc.h"
 
 namespace paddle {
@@ -178,11 +179,14 @@ class WarpCTCKernel : public framework::OpKernel<T> {
     T* warpctc_grad_data =
         warpctc_grad->mutable_data<T>(warpctc_logits.dims(), ctx.GetPlace());
 
+    math::SetConstant<DeviceContext, T>()(
+        ctx.template device_context<DeviceContext>(), warpctc_grad,
+        static_cast<T>(0));
+
     // warpctc accesses labels in CPU memory
     Tensor warpctc_label;
     Copy(*label, platform::CPUPlace(), ctx.device_context(), &warpctc_label);
     const int* warpctc_label_data = warpctc_label.data<int>();
-
     // warpctc stores loss in CPU memory
     Tensor warpctc_loss;
     T* warpctc_loss_data =
@@ -206,11 +210,18 @@ class WarpCTCGradKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto* warpctc_grad = ctx.Input<Tensor>("WarpCTCGrad");
     auto* logits_grad = ctx.Output<LoDTensor>(framework::GradVarName("Logits"));
+    const Tensor* loss_grad = ctx.Input<Tensor>(framework::GradVarName("Loss"));
 
+    logits_grad->mutable_data<T>(ctx.GetPlace());
     bool norm_by_times = ctx.Attr<bool>("norm_by_times");
     math::UnpaddingLoDTensorFunctor<DeviceContext, T>()(
         ctx.template device_context<DeviceContext>(), *logits_grad,
         *warpctc_grad, norm_by_times);
+
+    const T* loss_grad_data = loss_grad->data<T>();
+    math::ScaleLoDTensorFunctor<DeviceContext, T>()(
+        ctx.template device_context<DeviceContext>(), *logits_grad,
+        loss_grad_data);
   }
 };
 
diff --git a/paddle/operators/while_op.cc b/paddle/operators/while_op.cc
index 7a3400919efe6f3bed40e45a245b556beab6fce4..2fdd25dbbe68659f8a0a9da13a87148ed259127a 100644
--- a/paddle/operators/while_op.cc
+++ b/paddle/operators/while_op.cc
@@ -121,8 +121,8 @@ class WhileGradOp : public framework::OperatorBase {
       for (size_t i = 0; i < outside_og_names.size(); ++i) {
         auto outside_og_name = outside_og_names[i];
         auto inside_og_name = inside_og_names[i];
-        VLOG(10) << "Linking outside " << outside_og_name << " --> inside "
-                 << inside_og_name;
+        VLOG(8) << "Linking outside " << outside_og_name << " --> inside "
+                << inside_og_name;
         auto &og_outside =
             detail::Ref(scope.FindVar(outside_og_name),
                         "Cannot find Outside Gradient %s", outside_og_name);
@@ -141,11 +141,11 @@ class WhileGradOp : public framework::OperatorBase {
           auto &outside_array = og_outside.Get<framework::LoDTensorArray>();
           auto &inside_array =
               detail::Ref(og_inside.GetMutable<framework::LoDTensorArray>());
-          VLOG(10) << outside_og_name << " size = " << outside_array.size();
+          VLOG(8) << outside_og_name << " size = " << outside_array.size();
           inside_array.resize(outside_array.size());
 
           for (size_t j = 0; j < inside_array.size(); ++j) {
-            VLOG(10) << j << " " << outside_array[j].numel();
+            VLOG(8) << j << " " << outside_array[j].numel();
             if (outside_array[j].numel() != 0) {
               inside_array[j].set_lod(outside_array[j].lod());
               inside_array[j].ShareDataWith(outside_array[j]);
@@ -187,10 +187,14 @@ class WhileGradOp : public framework::OperatorBase {
             attrs["shape"] = framework::vectorize2int(inside_tensor.dims());
             attrs["value"] = 0.0f;
 
+            auto var_name = pg_names[param_id];
             auto zero_op = framework::OpRegistry::CreateOp(
                 "fill_constant", framework::VariableNameMap{},
-                {{"Out", {pg_names[param_id]}}}, attrs);
+                {{"Out", {var_name}}}, attrs);
             zero_op->Run(scope, dev_place);
+            scope.FindVar(var_name)
+                ->GetMutable<framework::LoDTensor>()
+                ->set_lod(inside_tensor.lod());
           }
         }
 
@@ -231,7 +235,7 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
     auto igs = InputGrad(kX, /*do not drop empty gradient*/ false);
     for (auto &each_ig : igs) {
       if (inner_op_outputs.find(each_ig) == inner_op_outputs.end()) {
-        VLOG(10) << "Ignore " << each_ig;
+        VLOG(8) << "Ignore " << each_ig;
         each_ig = framework::kEmptyVarName;
       }
     }
diff --git a/paddle/optimizer/lr_policy.h b/paddle/optimizer/lr_policy.h
index bbb1ee48214cecdc6b6cd2a400cc9d12d5e8b64a..9a44a776f2b032bc2c3452a739caf0994e25891b 100644
--- a/paddle/optimizer/lr_policy.h
+++ b/paddle/optimizer/lr_policy.h
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #pragma once
 
 #include <algorithm>
diff --git a/paddle/optimizer/parameter_optimizer_test.cc b/paddle/optimizer/parameter_optimizer_test.cc
index 83757a391784453341f22eca73bc73c14ce4174f..2bcfca55cc5c9720f16e715dd85bf15c6f2efbbe 100644
--- a/paddle/optimizer/parameter_optimizer_test.cc
+++ b/paddle/optimizer/parameter_optimizer_test.cc
@@ -1,18 +1,16 @@
-/*
-  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
-*/
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include "parameter_optimizer.h"
 #include <cmath>
diff --git a/paddle/optimizer/serialization.h b/paddle/optimizer/serialization.h
index 92fbf65cc6b98d7f92841bafe4ab77001ca03b7c..98548ddb7aa22558c83aff7454834caa89017388 100644
--- a/paddle/optimizer/serialization.h
+++ b/paddle/optimizer/serialization.h
@@ -1,3 +1,17 @@
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #pragma once
 
 #include <iostream>
diff --git a/paddle/optimizer/serialization_test.cc b/paddle/optimizer/serialization_test.cc
index 940e941e9042d8a37363311867df5bb477b3dac0..25a8f5d351e3e85ab5d8dee8b639d962f8fc9990 100644
--- a/paddle/optimizer/serialization_test.cc
+++ b/paddle/optimizer/serialization_test.cc
@@ -1,18 +1,16 @@
-/*
-  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
-*/
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include "serialization.h"
 #include "gtest/gtest.h"
diff --git a/paddle/optimizer/tensor.h b/paddle/optimizer/tensor.h
index 86fa625e01b981f0377bd699d191fc865ee89784..e999e9bda129a85306f73afc75f3936fefb4c006 100644
--- a/paddle/optimizer/tensor.h
+++ b/paddle/optimizer/tensor.h
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #pragma once
 /**
  * @brief tensor used by optimizer
diff --git a/paddle/platform/assert.h b/paddle/platform/assert.h
index 70d3bf75062c5471ab54ee2c4c7637c679d9a8a3..d813b9529ba2c8d5a3f39eadeb82d7569acd5fdd 100644
--- a/paddle/platform/assert.h
+++ b/paddle/platform/assert.h
@@ -1,3 +1,17 @@
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #pragma once
 
 #define STRINGIFY(x) #x
diff --git a/paddle/platform/cpu_info_test.cc b/paddle/platform/cpu_info_test.cc
index 8fb195aa7c0a41b7417ff5cf63394046e9c72267..1bfe62c1fb667e17d7383cf0a1b2632043c72743 100644
--- a/paddle/platform/cpu_info_test.cc
+++ b/paddle/platform/cpu_info_test.cc
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "paddle/platform/cpu_info.h"
 #include "paddle/string/printf.h"
 
diff --git a/paddle/platform/dynload/cudnn.cc b/paddle/platform/dynload/cudnn.cc
index 76ec82e10840751a654c7d7f57da8d5570d2a9ce..701f6240fef2e9df25472c0caf6177e7f1964cfd 100644
--- a/paddle/platform/dynload/cudnn.cc
+++ b/paddle/platform/dynload/cudnn.cc
@@ -44,7 +44,7 @@ CUDNN_DNN_ROUTINE_EACH_R7(DEFINE_WRAP);
 
 #ifdef PADDLE_USE_DSO
 bool HasCUDNN() {
-  std::call_once(cudnn_dso_flag, GetCudnnDsoHandle, &cudnn_dso_handle);
+  std::call_once(cudnn_dso_flag, GetCUDNNDsoHandle, &cudnn_dso_handle);
   return cudnn_dso_handle != nullptr;
 }
 
diff --git a/paddle/platform/dynload/cudnn.h b/paddle/platform/dynload/cudnn.h
index 8c937b37d714a06c623f4e204bd572fdd200ea5d..b92634794947f6979255d6241ff3a333f3771bfb 100644
--- a/paddle/platform/dynload/cudnn.h
+++ b/paddle/platform/dynload/cudnn.h
@@ -36,7 +36,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
     auto operator()(Args... args) -> decltype(__name(args...)) {   \
       using cudnn_func = decltype(__name(args...)) (*)(Args...);   \
       std::call_once(cudnn_dso_flag,                               \
-                     paddle::platform::dynload::GetCudnnDsoHandle, \
+                     paddle::platform::dynload::GetCUDNNDsoHandle, \
                      &cudnn_dso_handle);                           \
       EnforceCUDNNLoaded(#__name);                                 \
       void* p_##__name = dlsym(cudnn_dso_handle, #__name);         \
diff --git a/paddle/platform/dynload/dynamic_loader.cc b/paddle/platform/dynload/dynamic_loader.cc
index 7a82d06a0acbfa44386d40df97f6b0e43ed46577..c8c09ae608fa7cc67a54fada9cfe86b40096a9fd 100644
--- a/paddle/platform/dynload/dynamic_loader.cc
+++ b/paddle/platform/dynload/dynamic_loader.cc
@@ -134,7 +134,7 @@ void GetCublasDsoHandle(void** dso_handle) {
 #endif
 }
 
-void GetCudnnDsoHandle(void** dso_handle) {
+void GetCUDNNDsoHandle(void** dso_handle) {
 #if defined(__APPLE__) || defined(__OSX__)
   GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.dylib", dso_handle,
                              false);
diff --git a/paddle/platform/dynload/dynamic_loader.h b/paddle/platform/dynload/dynamic_loader.h
index c0e5452e5ae723ec314ebafde86a6ff63980be00..7b0c8c16d7484480550f3c753fe52d1f04651900 100644
--- a/paddle/platform/dynload/dynamic_loader.h
+++ b/paddle/platform/dynload/dynamic_loader.h
@@ -32,7 +32,7 @@ void GetCublasDsoHandle(void** dso_handle);
  * @param    **dso_handle   dso handler
  *
  */
-void GetCudnnDsoHandle(void** dso_handle);
+void GetCUDNNDsoHandle(void** dso_handle);
 
 /**
  * @brief    load the DSO of CURAND
diff --git a/paddle/platform/hostdevice.h b/paddle/platform/hostdevice.h
index eb2df291cceef553d6422e6166e1fef2c63e2a47..fa4659ed2988a2199d8a8450f825d31fd0ca5907 100644
--- a/paddle/platform/hostdevice.h
+++ b/paddle/platform/hostdevice.h
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #pragma once
 
 #ifdef __CUDACC__
diff --git a/paddle/platform/place_test.cc b/paddle/platform/place_test.cc
index 4f1eba01df5531529ad3c79648b5e7f8651df619..150b2d3b1fbacec18ea33156f30f1c9965aedb31 100644
--- a/paddle/platform/place_test.cc
+++ b/paddle/platform/place_test.cc
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "paddle/platform/place.h"
 #include <sstream>
 #include "gtest/gtest.h"
diff --git a/paddle/pybind/print_operators_doc.cc b/paddle/pybind/print_operators_doc.cc
index f4f281229e611a6c9c8e9ecd54e0097ab683bbf3..99694fa592059d979297b72748125d02b2dd70a3 100644
--- a/paddle/pybind/print_operators_doc.cc
+++ b/paddle/pybind/print_operators_doc.cc
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include <iostream>
 #include <sstream>  // std::stringstream
 #include <string>
diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc
index 5d170c66e97f56440968ba568167e6845631e1cc..c5d70bc9f91bc92b28a546cc79b08a9fda150050 100644
--- a/paddle/pybind/pybind.cc
+++ b/paddle/pybind/pybind.cc
@@ -430,13 +430,8 @@ All parameter, weight, gradient are variables in Paddle.
   m.def("init_glog", framework::InitGLOG);
   m.def("init_devices", &framework::InitDevices);
 
-  m.def("use_cpu", framework::UseCPU);
-  m.def("use_mkldnn", framework::UseMKLDNN);
-  m.def("use_cuda", framework::UseCUDA);
-  m.def("use_cudnn", framework::UseCUDNN);
-  m.def("use_all", framework::UseALL);
-
   m.def("is_compile_gpu", IsCompileGPU);
+
   m.def("set_feed_variable", framework::SetFeedVariable);
   m.def("get_fetch_variable", framework::GetFetchVariable);
 
diff --git a/paddle/pybind/tensor_py.h b/paddle/pybind/tensor_py.h
index 6b4290972bade585d1a0c2ae919a2e712bdf308c..3b5210e2b91dd697e213a0c847f73c6969cd654b 100644
--- a/paddle/pybind/tensor_py.h
+++ b/paddle/pybind/tensor_py.h
@@ -14,7 +14,7 @@ limitations under the License. */
 
 #pragma once
 #include <string>
-#include "paddle/framework/tensor.h"
+#include "paddle/framework/lod_tensor.h"
 #include "paddle/memory/memcpy.h"
 #include "paddle/platform/device_context.h"
 #include "pybind11/numpy.h"
@@ -97,14 +97,27 @@ inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) {
 
 template <typename T>
 T TensorGetElement(framework::Tensor &self, size_t offset) {
-  PADDLE_ENFORCE(platform::is_cpu_place(self.place()));
-  return self.data<T>()[offset];
+  if (platform::is_cpu_place(self.place())) {
+    return self.data<T>()[offset];
+  } else {
+    std::shared_ptr<framework::Tensor> dst(new framework::Tensor);
+    framework::Copy(self, platform::CPUPlace(), dst.get());
+    return dst->data<T>()[offset];
+  }
 }
 
+// TODO(dzhwinter) : fix the redundent Tensor allocate and free
 template <typename T>
 void TensorSetElement(framework::Tensor &self, size_t offset, T elem) {
-  PADDLE_ENFORCE(platform::is_cpu_place(self.place()));
-  self.data<T>()[offset] = elem;
+  if (platform::is_gpu_place(self.place())) {
+    std::shared_ptr<framework::Tensor> dst(new framework::Tensor);
+    framework::Copy(self, platform::CPUPlace(), dst.get());
+    dst->data<T>()[offset] = elem;
+    framework::Copy(*dst.get(), self.place(), &self);
+
+  } else if (platform::is_cpu_place(self.place())) {
+    self.data<T>()[offset] = elem;
+  }
 }
 
 template <typename T>
diff --git a/paddle/scripts/cluster_train/paddle.py b/paddle/scripts/cluster_train/paddle.py
index 9b03ed1d8f6a28259a6cb45f096575b5f3d27ca7..ba313ac6a18fe22e1e14d2cce42320ab6d4fe398 100644
--- a/paddle/scripts/cluster_train/paddle.py
+++ b/paddle/scripts/cluster_train/paddle.py
@@ -80,168 +80,3 @@ def job_prepare(jobdir, data=None):
             #create job dir
             run('rm ' + jobdir + ' -fr && ' + 'mkdir -p ' + jobdir)
             #push data and paddle bin
-            put(data + "/*", jobdir)
-            run("mkdir -p " + log)
-        run('rm -fr ' + log + "/*")
-
-    def set_nodefile(nodeid):
-        '''
-        create nodefile for later usage
-        '''
-        run('echo ' + str(nodeid) + ' > ' + jobdir + '/nodefile')
-
-    execute(job_create_workspace, jobdir, data, hosts=conf.HOSTS)
-    for i in xrange(len(conf.HOSTS)):
-        execute(set_nodefile, i, hosts=conf.HOSTS[i])
-    #clean rubbish caused by exception 
-    with settings(warn_only=True):
-        execute(kill_process, hosts=conf.HOSTS)
-
-
-def job_pserver(jobdir, pids=None):
-    '''
-    start all pservers
-    '''
-    pargs = " --num_gradient_servers=" + str(len(conf.HOSTS))
-    pargs += (" --nics=" + conf.PADDLE_NIC)
-    pargs += " --port=" + str(conf.PADDLE_PORT)
-    pargs += " --ports_num=" + str(conf.PADDLE_PORTS_NUM)
-    #always start sparse pserver by default
-    pargs += " --ports_num_for_sparse=" + str(conf.PADDLE_PORTS_NUM_FOR_SPARSE)
-    pargs += " --comment=" + "paddle_process_by_paddle"
-
-    def start_pserver(jobdir, pargs):
-        '''
-        start pserver process with fabric executor
-        '''
-        with prefix('export LD_LIBRARY_PATH=' + \
-                conf.LD_LIBRARY_PATH + \
-                ':$LD_LIBRARY_PATH'):
-            program = 'paddle pserver'
-            run('cd ' + jobdir + '; '  + \
-                'GLOG_logtostderr=0 GLOG_log_dir="./log" ' + \
-                'nohup ' + \
-                program + " " + pargs + ' > ./log/server.log 2>&1 < /dev/null & ',
-                pty=False)
-
-    execute(start_pserver, jobdir, pargs, hosts=conf.HOSTS)
-
-
-def job_trainer(jobdir, train_args_dict, pids=None):
-    '''
-    start paddle trainer
-    '''
-    args = " --num_gradient_servers=" + str(len(conf.HOSTS))
-    args += " --nics=" + conf.PADDLE_NIC
-    args += " --port=" + str(conf.PADDLE_PORT)
-    args += " --ports_num=" + str(conf.PADDLE_PORTS_NUM)
-    args += " --comment=" + "paddle_process_by_paddle"
-    ip_string = ""
-    for i in xrange(len(conf.HOSTS)):
-        host = conf.HOSTS[i]
-        left = host.find("@")
-        right = host.find(':')
-        left = 0 if left == -1 else left + 1
-        right = len(host) if right == -1 else right
-        ip_string += (socket.gethostbyname(host[left:right]) + ",")
-    ip_string = ip_string.rstrip(",")
-    args += " --pservers=" + ip_string
-
-    args_ext = ""
-    for key, value in train_args_dict.items():
-        args_ext += (' --' + key + '=' + value)
-    args += " " + args_ext
-
-    def start_trainer(jobdir, args):
-        '''
-        start trainer process with fabric executor
-        '''
-        with prefix('export LD_LIBRARY_PATH=' + \
-                conf.LD_LIBRARY_PATH + \
-                ':$LD_LIBRARY_PATH'):
-            program = 'paddle train'
-            run('cd ' + jobdir + '; '  + \
-                'GLOG_logtostderr=0 '
-                'GLOG_log_dir="./log" '
-                'nohup ' + \
-                program + " " + args + " > ./log/train.log 2>&1 < /dev/null & ",
-                pty=False)
-
-    for i in xrange(len(conf.HOSTS)):
-        train_args = copy.deepcopy(args)
-        train_args += " --trainer_id=" + str(i)
-        execute(start_trainer, jobdir, train_args, hosts=conf.HOSTS[i])
-
-
-def job_all(job_package, jobdir=None, train_args_dict=None):
-    '''
-    param job_package
-    param train_args_dict
-    '''
-    if jobdir is None:
-        timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime())
-        jobdir = conf.ROOT_DIR + "/JOB" + timestamp
-    job_prepare(jobdir, job_package)
-    job_pserver(jobdir)
-    time.sleep(5)  #wait until pservers completely start
-    job_trainer(jobdir, train_args_dict)
-    job_clean()
-
-
-def job_clean():
-    '''
-    if starting job failed from paddle internal, the framework always
-    is launched successfully since these process are daemon processes.
-    so this job_clean can alway clean job rubbish process with ctrl+c.
-    '''
-
-    def signal_handler(signal, frame):
-        '''
-        SIGINT handler
-        '''
-
-        def kill_process():
-            run("ps aux \
-                  | grep paddle_process_by_paddle \
-                  | grep -v grep  \
-                  | awk '{print $2}' \
-                  | xargs kill > /dev/null 2>&1")
-
-        with settings(warn_only=True):
-            execute(kill_process, hosts=conf.HOSTS)
-
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.pause()
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        prog="paddle.py", description='simple tool for cluster training')
-    parser.add_argument(
-        '-j',
-        '--job_workspace',
-        required=False,
-        default=None,
-        help='job workspace')
-    parser.add_argument(
-        '-p',
-        '--job_dispatch_package',
-        required=False,
-        default=None,
-        help='job package for dispatching to all other nodes')
-
-    args, train_args_list = parser.parse_known_args()
-    train_args = refine_unknown_args(train_args_list)
-    train_args_dict = dict(zip(train_args[:-1:2], train_args[1::2]))
-
-    if args.job_workspace is not None:
-        #if assigned workspace, do not need to dispatch data,
-        #so job_local_package should be None
-        assert args.job_dispatch_package is None
-        job_all(None, args.job_workspace, train_args_dict)
-    elif args.job_dispatch_package is not None:
-        assert args.job_workspace is None
-        assert os.path.isdir(args.job_dispatch_package)
-        job_all(args.job_dispatch_package, None, train_args_dict)
-    else:
-        print "--job_workspace or --job_dispatch_package should be set"
diff --git a/paddle/string/piece.cc b/paddle/string/piece.cc
index b80afdec82d642fd3a8245b96ce1bb2bea17cbae..330ca5f0155e92e34563ca16fffa482502428fda 100644
--- a/paddle/string/piece.cc
+++ b/paddle/string/piece.cc
@@ -1,18 +1,16 @@
-/*
-  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
-*/
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include "paddle/string/piece.h"
 
diff --git a/paddle/string/piece.h b/paddle/string/piece.h
index 7362ce02c7c80e121218fab77d87696403b1c5e8..f2bb6b2c76164fbe63a9b53ec41385014f240ffd 100644
--- a/paddle/string/piece.h
+++ b/paddle/string/piece.h
@@ -1,18 +1,16 @@
-/*
-  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
-*/
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #pragma once
 
diff --git a/paddle/string/piece_test.cc b/paddle/string/piece_test.cc
index cf5152ff5a3cb0a2afae0c90b787abf291122fa3..250f26d61f8efe49861a78b83f944fa06fa5ec46 100644
--- a/paddle/string/piece_test.cc
+++ b/paddle/string/piece_test.cc
@@ -1,18 +1,16 @@
-/*
-  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
-*/
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include "paddle/string/piece.h"
 
diff --git a/paddle/string/printf.h b/paddle/string/printf.h
index 8b5ce63a8e8dfe77962ff1e7415911d381a28aac..03809d22092012c7815eb6a89b9fb8bc9eed549b 100644
--- a/paddle/string/printf.h
+++ b/paddle/string/printf.h
@@ -1,18 +1,16 @@
-/*
-  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
-*/
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 // Compared with std::stringstream, there are primary purpose of
 // string::Printf:
diff --git a/paddle/string/printf_test.cc b/paddle/string/printf_test.cc
index 2586264046a2e2ba24b0908c1f6eba163cdef448..b5ad35513bdcbccd5dbf951a3a8e5422f6424b26 100644
--- a/paddle/string/printf_test.cc
+++ b/paddle/string/printf_test.cc
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "paddle/string/printf.h"
 
 #include <string>
diff --git a/paddle/string/tinyformat/tinyformat.h b/paddle/string/tinyformat/tinyformat.h
index 3516777d9f9669c1e1300b9136c26e61f65b14a7..d1a2c47f1a9f258aefbdcce88513e05dda308fa0 100644
--- a/paddle/string/tinyformat/tinyformat.h
+++ b/paddle/string/tinyformat/tinyformat.h
@@ -1,3 +1,17 @@
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 // tinyformat.h
 // Copyright (C) 2011, Chris Foster [chris42f (at) gmail (d0t) com]
 //
diff --git a/paddle/string/to_string.h b/paddle/string/to_string.h
index 3b3bcc69a478045156225728236174fd601461dd..178edc18951f94291a63566516df36956f25f67b 100644
--- a/paddle/string/to_string.h
+++ b/paddle/string/to_string.h
@@ -15,9 +15,15 @@ limitations under the License. */
 #pragma once
 #include <sstream>
 #include <string>
+#include <typeindex>
 
 namespace paddle {
 namespace string {
+inline std::ostream& operator<<(std::ostream& s, const std::type_index& t) {
+  s << t.name();
+  return s;
+}
+
 template <typename T>
 inline std::string to_string(T v) {
   std::ostringstream sout;
@@ -25,6 +31,11 @@ inline std::string to_string(T v) {
   return sout.str();
 }
 
+template <>
+inline std::string to_string(std::type_index t) {
+  return t.name();
+}
+
 // Faster std::string/const char* type
 template <>
 inline std::string to_string(std::string v) {
diff --git a/paddle/trainer/tests/picojson.h b/paddle/trainer/tests/picojson.h
index 4aa64961d096ce94a4187fe94000b05de4080122..eaa8b9baf6e4e753a441ab77811f494cbdab80cf 100644
--- a/paddle/trainer/tests/picojson.h
+++ b/paddle/trainer/tests/picojson.h
@@ -1,3 +1,17 @@
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 /*
  * Copyright 2009-2010 Cybozu Labs, Inc.
  * Copyright 2011-2014 Kazuho Oku
diff --git a/paddle/trainer/tests/simple_sparse_neural_network.py b/paddle/trainer/tests/simple_sparse_neural_network.py
index 30346ef299d0bc8585ccff7f2fc4885b0d9f9dfc..970fb466dc5061713fe7815d5247cbbde93be821 100644
--- a/paddle/trainer/tests/simple_sparse_neural_network.py
+++ b/paddle/trainer/tests/simple_sparse_neural_network.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=17, learning_method=AdaGradOptimizer(), learning_rate=1e-4)
diff --git a/paddle/trainer/tests/simple_sparse_neural_network_dp.py b/paddle/trainer/tests/simple_sparse_neural_network_dp.py
index 86b272edfe1bbb23c45cffe282f6475ceaa0cc41..49043c91758b7199d063670616826656f7e8b485 100644
--- a/paddle/trainer/tests/simple_sparse_neural_network_dp.py
+++ b/paddle/trainer/tests/simple_sparse_neural_network_dp.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer.PyDataProvider2 import provider, integer_sequence, integer_value
 import random
 
diff --git a/paddle/utils/enable_virtualenv.py b/paddle/utils/enable_virtualenv.py
index ccfaa7c147b2ce25cb6007aa04cfc33961b7e10b..4e998381e9e2a9254c642e969abb9f976d0e3938 100644
--- a/paddle/utils/enable_virtualenv.py
+++ b/paddle/utils/enable_virtualenv.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 
 
diff --git a/proto/OptimizerConfig.proto b/proto/OptimizerConfig.proto
index d27b1bcf80045216a5807812d39f7a248a956076..b341d78d194ddcbab265084db62752bc53e1b709 100644
--- a/proto/OptimizerConfig.proto
+++ b/proto/OptimizerConfig.proto
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 syntax = "proto2";
 
 option optimize_for = LITE_RUNTIME;
diff --git a/python/paddle/trainer_config_helpers/evaluators.py b/python/paddle/trainer_config_helpers/evaluators.py
index 95797fba8f67bacb421f5c2813ad6332bc53cbc9..0eeaf7eabb179f19d2af8dafe821f7baa153fead 100644
--- a/python/paddle/trainer_config_helpers/evaluators.py
+++ b/python/paddle/trainer_config_helpers/evaluators.py
@@ -16,13 +16,22 @@ from paddle.trainer.config_parser import *
 from default_decorators import *
 
 __all__ = [
-    "evaluator_base", "classification_error_evaluator", "auc_evaluator",
-    "pnpair_evaluator", "precision_recall_evaluator", "ctc_error_evaluator",
-    "chunk_evaluator", "sum_evaluator", "column_sum_evaluator",
-    "value_printer_evaluator", "gradient_printer_evaluator",
-    "maxid_printer_evaluator", "maxframe_printer_evaluator",
-    "seqtext_printer_evaluator", "classification_error_printer_evaluator",
-    "detection_map_evaluator"
+    "evaluator_base",
+    "classification_error_evaluator",
+    "auc_evaluator",
+    "pnpair_evaluator",
+    "precision_recall_evaluator",
+    "ctc_error_evaluator",
+    "chunk_evaluator",
+    "sum_evaluator",
+    "column_sum_evaluator",
+    "value_printer_evaluator",
+    "gradient_printer_evaluator",
+    "maxid_printer_evaluator",
+    "maxframe_printer_evaluator",
+    "seqtext_printer_evaluator",
+    "classification_error_printer_evaluator",
+    "detection_map_evaluator",
 ]
 
 
diff --git a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py b/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
index 01d31ef3fad827bfd103ee00f4ddd1bde14e0f82..93b505a6023cb4e2fe14f1208ac12a841ec18f55 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-3, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py b/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
index 91849b40a0801b07642f96c061755597cd2ec073..745f060fa5542293c646a83a81a263e92bda6a48 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-3, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
index f87237f9b59a833825841bcdd605c2332c2d5941..b6fc8f70f96487aabf249166b9b339d1b86f986e 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py b/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
index 7012dbf6a0b70957d6227d4125f4cd75b9abb215..6edc03bba0b286fae5964bbc2bdffb99b175c718 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
@@ -1,3 +1,16 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 '''
 Test all activations.
 '''
diff --git a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
index a607a62c99f69ac4921a465a20f00b6413b31c8e..59a71e1cd1d46f6be132fa5758d54edb219e9dd8 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/projections.py b/python/paddle/trainer_config_helpers/tests/configs/projections.py
index dc8975cb311582a621eb4a5a166ddc34348fe3e9..96f06b40180d5e7707ea363e6311079d81302631 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/projections.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/projections.py
@@ -1,3 +1,16 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 '''
 Test mixed layer, projections and operators.
 '''
diff --git a/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py b/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py
index 7c848ef3fcd63314bfe91db6ebac406ba8758998..69a0a5b8ff53961f07ff3648c63e61d382965519 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-4, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/shared_gru.py b/python/paddle/trainer_config_helpers/tests/configs/shared_gru.py
index c19bb9685aa24c4d66e4f0bbbcb004507413dbe8..97b41fb3725ae325d3e67769b2d6702fdbbdb238 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/shared_gru.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/shared_gru.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-4, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py b/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
index 565e281a6e1deff18aa48f97eb2f0e39ca79752f..4e653dedb9d374ed53148c47582c84c2b6fb532d 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-4, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py b/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
index a5b5bb30b1d21aaa0c90868af7b5138e8a81aab1..dc418325f8857dd8f3ff60a05bb003a2b72e30b4 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-4)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py b/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py
index a991b22252ba10eed895efd931108c2d8b0e52f1..5b98e3fb348a620e0474322b885112a6ecff3495 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-4)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py b/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py
index cd7f609638e384314177d653e46ecf7a4b41a12f..f3abdfe1ae71bf728a521c0f46ec773b058acb46 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-4)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py b/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
index be83f4f83c5d05ea2ffd9e3df0c09fb1a37a3e57..4eb9f207e0cc4d209bdd616e92a88678c148ebff 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py
index f066fe1fb30877bf40bb6299d35546f7427989a5..24564c105f9e1321c1a08dea611a601bc94bddb0 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 data = data_layer(name='input', size=300)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py
index aa0a2c0d5fe19b6c414acd708bb6e82d9fb6568f..35087c42289aa60f34b71b432fbf30df3a0c3167 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
index 7ce375c708af7b0b7ae1d700dedbdb6a4ce16c7f..b076b89106e0854046603f24164d391b0c668474 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-4, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
index caa6aaa9430ffaee7ade93ee04ec90103bf8cf43..fa7a1abe9a129ca24a6b2bb1c13dc30eddd48147 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-4, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_crop.py b/python/paddle/trainer_config_helpers/tests/configs/test_crop.py
index 8314a7e9a5586647c70ff010156817110919c72b..569d747857d8e756e70fea10b3e0b92801c1f4a1 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_crop.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_crop.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py
index a113279fc17b49ad01b8860b61180af0f35694fb..4f27d9987346198bab06ccc59b63b0b73906e800 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py
index 3572a2cb07d95ffaec261bdc63492ade734ea8b9..d37954222edb9202ec24bf5d6fed1da1028a276b 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py
index e52d48dde0084aacd3f7874cc384d59287a0c7d5..63ba0a72b9e19a6a0da9961bd086810905f9129a 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 vec1 = data_layer(name='vector1', size=10)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
index c53f10e0a410b27d86b2415d98178c4790e0b0ba..9892bca05d52f870df1c5b2e514240fa06d7a9a4 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py b/python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py
index b249de0fee3c8ca4ad0520872fa2497c493d31b5..6fb773d9f738aa3ffa91feb7bace22e2b8e4f312 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 data = data_layer(name='data', size=1024)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_fc.py b/python/paddle/trainer_config_helpers/tests/configs/test_fc.py
index 2842d3429c9c917845f8f4c33d3618608d40291d..4dd37d024259dfef3e70beb00d39e73287a24473 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_fc.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_fc.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py
index 9dab45519c65b0ca686558ec7fe2064bb9ad8824..082646b9d3d60ffd03236df027fec8417e47bda6 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 data = data_layer(name='input', size=256)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
index 474e4f36bad7eab13251afe265d1a7d107549efd..f5271b82804f96d97bca293581951c3d5d291f40 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-4)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py b/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
index dff1c535b3e84e14d0e7c343efe911f19872280a..ad86d7d5bd598fc241cea50bf4689242467bbb82 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-4, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_l2_distance_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_l2_distance_layer.py
index b36a5c6d1222860ee4b77f89ad4b6148ccd89589..1796e1c6b6d1d69db85c890a401cef630ed15f79 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_l2_distance_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_l2_distance_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 outputs(
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
index 7ca1cc2db365dedda5d9673cafaa851a464a7b6b..7484818ab24d5ffe7e285c39e94e32cca96b2996 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py b/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
index eb14270baa0c4ca0b84d2121a80fde0b45eda54a..22788be2e90e65c686b2db046a2407e41dd65fbd 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py
index c3376c47bded5a3aad15331936a61e12ac883b17..0dcccc49e43d9222f6a17fd8e25e38e3360d3dcc 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_multiplex_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_multiplex_layer.py
index d250001932547d63a70de05940957f90cc014dfb..046d38741e87dc60e04f88f8e8d599b4f75fa4c2 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_multiplex_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_multiplex_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
index b7a15666f0a5b863cbafec5f73dcfe0b9db2e0c7..d81128c77c3bd4af9e8bbe78db61ea0232a4a837 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_pad.py b/python/paddle/trainer_config_helpers/tests/configs/test_pad.py
index 491e8c8caab38eb7c24e5461107ab5a9d63b12ef..44b0b34d5adb544d81c7ae6e12f3ca89652d7d8e 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_pad.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_pad.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py
index 0dbb921d41986e711d5b8b31caab1f8b6bdc47b8..e257e735ad25f154600172ca43d739deb2069f3a 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=100, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py
index 45b02fbf325bb63b057bbbf64d59af8debf0bc9d..098e2397ececdd847ba76e7d4b65552c873bf2bb 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 data = data_layer(name='input', size=300, height=10, width=10)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
index 8da26ff44b19d0c18efae201a3b39002555d2605..714d8893e951267c312ee7b924433892864e4535 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-4, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_recursive_topology.py b/python/paddle/trainer_config_helpers/tests/configs/test_recursive_topology.py
index 1a693f8dff06dec6e71eeb488da9c807c35e4c9b..188a3d2320f6715cac2cd6e0cb3e9935b844452e 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_recursive_topology.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_recursive_topology.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_repeat_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_repeat_layer.py
index 004e2a5dd4efa9feab7619643673b37fe28146c5..93b673afeecbcd4995aaa9fec0380b88c2bdd946 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_repeat_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_repeat_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_resize_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_resize_layer.py
index 09a6f507338c1da8e9ce60555f8ca2576704170c..3a202974e3d64659ffa3291c764b55810cc3b555 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_resize_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_resize_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 data = data_layer(name='input', size=300)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py b/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
index 91010759e4847f087eb4e05ad98ae794a2129365..91074b8fdf3f0865516a301362ca9b5b8c469fbe 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-4, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py
index b739a81b8505c94a2312ac735647fb114982f1f7..f0a37f7e992e01588c39221efd28be15058ac0af 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 data = data_layer(name='data', size=3 * 14 * 14, height=14, width=14)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_row_conv.py b/python/paddle/trainer_config_helpers/tests/configs/test_row_conv.py
index ab33c496b0663d8472ce4b272be6c5cecbcfc978..68b1a991f35c9673d5f70f86d2bbedb92dfe7d4f 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_row_conv.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_row_conv.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py
index ac8badb26a40e96e75225e6f61aa536cd28e9098..c25393f580efc538dbf865b81715412041469adf 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 data = data_layer(name='input', size=300)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py
index dd589116fa9932144ca066d3fa4c929d1433a7f1..3691e8daeaabf9f863d5813f89275dd7dda3039a 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 data = data_layer(name='data', size=100)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_scale_sub_region_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_scale_sub_region_layer.py
index 8d4bf28bf1eaf58e1fd0eb62fd10efe998587edd..426afcf3a000011a32a129b894f285de565cb9cf 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_scale_sub_region_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_scale_sub_region_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py b/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py
index 5c161ba805fb301e8feb8702ad61a8341df40e3f..72960818573162a1033714cf3e36626b4b0814fa 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py b/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
index 3c205eabd80492a68383fdbecd14a7d6db3e16eb..d13a5a842990b49426120457ba0370e01815bd09 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-4, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py b/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py
index 66629662dd9166766daaf707409b720f56ef1405..42225b85058ee82c22ee829f8d3aafe6d7d3a329 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 data = data_layer(name='input', size=300)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py b/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py
index 318b4459bab7a70ddec534c4ad217161ffc72d5a..7ebdf7408db17e69be481f305657051d944f8c81 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 define_py_data_sources2(
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
index e0b0d0d3be252700d99f7097f0353df885efcf07..1f19ea77adf3fe378e8274e9d1ee2a99552b94b1 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=100, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py b/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
index ebb39219bdc1fa314e1d70bcda902f71296772f6..8581ba60ab9e382ee9bfdd2307921b4c94d132ed 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 settings(batch_size=1000, learning_rate=1e-4)
 
diff --git a/python/paddle/trainer_config_helpers/tests/configs/util_layers.py b/python/paddle/trainer_config_helpers/tests/configs/util_layers.py
index 27f1c8e9938cdec12fccb37a3127bba1f8ee8d04..a66c9515c77bc04b3a72ceb31b7dbc43050d7627 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/util_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/util_layers.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-4, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/test_reset_hook.py b/python/paddle/trainer_config_helpers/tests/test_reset_hook.py
index 0423babdb720191d8e9dfc67f1af3be339dbe27d..81186dedd20824234040e57967ce4cb53b07a8f6 100644
--- a/python/paddle/trainer_config_helpers/tests/test_reset_hook.py
+++ b/python/paddle/trainer_config_helpers/tests/test_reset_hook.py
@@ -1,4 +1,4 @@
-# Copyright PaddlePaddle contributors. All Rights Reserved
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import unittest
 from paddle.trainer.config_parser import parse_config
 
diff --git a/python/paddle/utils/image_multiproc.py b/python/paddle/utils/image_multiproc.py
index e8db525ff5c388aef1a39d8db56633d509cb4fb9..fdbefef9ff75669becadaa1291891c0d6b7fb268 100644
--- a/python/paddle/utils/image_multiproc.py
+++ b/python/paddle/utils/image_multiproc.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os, sys
 import numpy as np
 from PIL import Image
diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py
index 191d9ecfb127c1851a392bc9ec83734d630d0ac4..fab8a68b0beee8b813bee2a05047e2da526a9c9b 100644
--- a/python/paddle/v2/dataset/common.py
+++ b/python/paddle/v2/dataset/common.py
@@ -23,7 +23,6 @@ import paddle.v2.dataset
 import cPickle
 import glob
 import cPickle as pickle
-import random
 
 __all__ = [
     'DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader',
@@ -206,7 +205,6 @@ def convert(output_path, reader, line_count, name_prefix):
     indx_f = 0
 
     def write_data(indx_f, lines):
-        random.shuffle(lines)
         filename = "%s/%s-%05d" % (output_path, name_prefix, indx_f)
         writer = recordio.writer(filename)
         for l in lines:
diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py
index 21ed7f7a5ce279f5bc65e5b008f14a1b0ff97343..37c4296f9bcea7e16daa46f778934331513c30c4 100644
--- a/python/paddle/v2/dataset/imdb.py
+++ b/python/paddle/v2/dataset/imdb.py
@@ -25,7 +25,6 @@ import collections
 import tarfile
 import re
 import string
-import random
 
 __all__ = ['build_dict', 'train', 'test', 'convert']
 
@@ -83,7 +82,6 @@ def reader_creator(pos_pattern, neg_pattern, word_idx):
 
     load(pos_pattern, INS, 0)
     load(neg_pattern, INS, 1)
-    random.shuffle(INS)
 
     def reader():
         for doc, label in INS:
diff --git a/python/paddle/v2/dataset/mq2007.py b/python/paddle/v2/dataset/mq2007.py
index b705c9109b2b6769c9fafa9241db5d81c682f9e3..d3b3dd524c34be660c5f2d4fc5ce2fa0420efbc1 100644
--- a/python/paddle/v2/dataset/mq2007.py
+++ b/python/paddle/v2/dataset/mq2007.py
@@ -24,7 +24,6 @@ http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ20
 """
 
 import os
-import random
 import functools
 import rarfile
 from common import download
@@ -265,7 +264,7 @@ def query_filter(querylists):
     return filter_query
 
 
-def load_from_text(filepath, shuffle=True, fill_missing=-1):
+def load_from_text(filepath, shuffle=False, fill_missing=-1):
     """
   parse data file into querys
   """
@@ -287,17 +286,14 @@ def load_from_text(filepath, shuffle=True, fill_missing=-1):
             querylist._add_query(query)
     if querylist is not None:
         querylists.append(querylist)
-    if shuffle == True:
-        random.shuffle(querylists)
     return querylists
 
 
-def __reader__(filepath, format="pairwise", shuffle=True, fill_missing=-1):
+def __reader__(filepath, format="pairwise", shuffle=False, fill_missing=-1):
     """
   Parameters
   --------
   filename : string
-  shuffle : shuffle query-doc pair under the same query
   fill_missing : fill the missing value. default in MQ2007 is -1
   
   Returns
diff --git a/python/paddle/v2/dataset/tests/imikolov_test.py b/python/paddle/v2/dataset/tests/imikolov_test.py
index 4e52810e6b924e0796e3d836dbbcb27ede2c9e25..eed1458244562634ef44e8c9653059aaddbae6b2 100644
--- a/python/paddle/v2/dataset/tests/imikolov_test.py
+++ b/python/paddle/v2/dataset/tests/imikolov_test.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2.dataset.imikolov
 import unittest
 
diff --git a/python/paddle/v2/event.py b/python/paddle/v2/event.py
index a0ffd31c545eb10dd8c2f14746ee90df58700e61..01067ef426d426e2921e51e8b6f620313609ab2c 100644
--- a/python/paddle/v2/event.py
+++ b/python/paddle/v2/event.py
@@ -1,3 +1,16 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """
 Testing and training events.
 
diff --git a/python/paddle/v2/fluid/__init__.py b/python/paddle/v2/fluid/__init__.py
index 422aa0a5ba2e490d9d0667c9eff7c46f1b751b4c..e91eaa4f35fcf67fb73b139e13d4416d430487fe 100644
--- a/python/paddle/v2/fluid/__init__.py
+++ b/python/paddle/v2/fluid/__init__.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import print_function
 # import all class inside framework into fluid module
 import framework
@@ -18,14 +32,30 @@ from param_attr import ParamAttr
 from data_feeder import DataFeeder
 from core import LoDTensor, CPUPlace, CUDAPlace
 from distribute_transpiler import DistributeTranspiler
+from distribute_transpiler_simple import SimpleDistributeTranspiler
 import clip
 from memory_optimization_transpiler import memory_optimize
 
 Tensor = LoDTensor
+
 __all__ = framework.__all__ + executor.__all__ + [
-    'io', 'initializer', 'layers', 'nets', 'optimizer', 'backward',
-    'regularizer', 'LoDTensor', 'CPUPlace', 'CUDAPlace', 'Tensor', 'ParamAttr'
-    'DataFeeder', 'clip', 'DistributeTranspiler', 'memory_optimize'
+    'io',
+    'initializer',
+    'layers',
+    'nets',
+    'optimizer',
+    'backward',
+    'regularizer',
+    'LoDTensor',
+    'CPUPlace',
+    'CUDAPlace',
+    'Tensor',
+    'ParamAttr'
+    'DataFeeder',
+    'clip',
+    'SimpleDistributeTranspiler',
+    'DistributeTranspiler',
+    'memory_optimize',
 ]
 
 
@@ -65,4 +95,5 @@ def __bootstrap__():
     core.init_devices()
 
 
+layers.monkey_patch_variable()
 __bootstrap__()
diff --git a/python/paddle/v2/fluid/backward.py b/python/paddle/v2/fluid/backward.py
index cea2d1e09068da20f4d2fdbfbd9a3e3a511ba267..ae81d68bafd22db5d9f7ab0f9cc0dcdb204493e1 100644
--- a/python/paddle/v2/fluid/backward.py
+++ b/python/paddle/v2/fluid/backward.py
@@ -1,9 +1,26 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.v2.fluid import framework as framework
 from . import core
 import collections
 import copy
 
-__all__ = ['append_backward', 'calc_gradient']
+__all__ = [
+    'append_backward',
+    'calc_gradient',
+]
 
 
 def _rename_arg_(op_descs, old_name, new_name, begin_idx=None, end_idx=None):
diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py
index b1fd1c2b65f10010fa959dbb47b3fbab114db2f2..5241f4843c0df0314eba6168f8f36335c3f19d0a 100644
--- a/python/paddle/v2/fluid/clip.py
+++ b/python/paddle/v2/fluid/clip.py
@@ -1,9 +1,26 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import functools
 import layers
 from . import core
 
 __all__ = [
-    'GradientClipByValue', 'append_gradient_clip_ops', 'error_clip_callback'
+    'GradientClipByValue',
+    'ErrorClipByValue',
+    'append_gradient_clip_ops',
+    'error_clip_callback',
 ]
 
 
@@ -23,12 +40,12 @@ class ErrorClipByValue(BaseErrorClipAttr):
         self.min = min
 
     def append_clip_op(self, block, grad_name):
-        block.append_op(
-            type="clip",
-            inputs={"X": grad_name},
-            outputs={"Out": grad_name},
-            attrs={"min": self.min,
-                   "max": self.max})
+        clip_op_desc = block.desc.append_op()
+        clip_op_desc.set_type("clip")
+        clip_op_desc.set_input("X", [grad_name])
+        clip_op_desc.set_output("Out", [grad_name])
+        clip_op_desc.set_attr("min", self.min)
+        clip_op_desc.set_attr("max", self.max)
 
 
 def error_clip_callback(block, context):
@@ -39,6 +56,11 @@ def error_clip_callback(block, context):
                          op_desc.output_arg_names()):
         fwd_var = block.var_recursive(grad_to_var[grad_n])
         error_clip = getattr(fwd_var, "error_clip", None)
+        if not (error_clip is None or isinstance(error_clip,
+                                                 BaseErrorClipAttr)):
+            raise TypeError(
+                "Variable's error_clip should be an instance of BaseErrorClipAttr or None."
+            )
         if error_clip is not None:
             error_clip.append_clip_op(block, grad_n)
 
diff --git a/python/paddle/v2/fluid/data_feeder.py b/python/paddle/v2/fluid/data_feeder.py
index 24036c3e75b9594ba58cccb02825ab8020d1e107..a3b22a8633eae02c570f2a4c4caf4a6152649ef8 100644
--- a/python/paddle/v2/fluid/data_feeder.py
+++ b/python/paddle/v2/fluid/data_feeder.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import print_function
 import core
 import numpy
diff --git a/python/paddle/v2/fluid/default_scope_funcs.py b/python/paddle/v2/fluid/default_scope_funcs.py
index 60c6165b6bd959f7bb3d92afed667f00f73f144f..a27280208b8184c6539274afb1ddd6bda2861205 100644
--- a/python/paddle/v2/fluid/default_scope_funcs.py
+++ b/python/paddle/v2/fluid/default_scope_funcs.py
@@ -1,16 +1,29 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """
 Default scope function.
 
-`Paddle` manages Scope as programming language's scope.  It just a 
-thread-local stack of Scope. Top of that stack is current scope, the bottom 
-of that stack is all scopes' parent. 
+`Paddle` manages Scope as programming language's scope.  It just a
+thread-local stack of Scope. Top of that stack is current scope, the bottom
+of that stack is all scopes' parent.
 
-Invoking `var/find_var`  can `new/find` variable in current scope. 
-Invoking `enter_local_scope/leave_local_scope` can create or destroy local 
-scope. 
+Invoking `var/find_var`  can `new/find` variable in current scope.
+Invoking `enter_local_scope/leave_local_scope` can create or destroy local
+scope.
 
-A `scoped_function` will take a `function` as input. That function will be 
-invoked in a new local scope. 
+A `scoped_function` will take a `function` as input. That function will be
+invoked in a new local scope.
 """
 
 import paddle.v2.fluid.core
@@ -19,8 +32,12 @@ import threading
 __tl_scope__ = threading.local()
 
 __all__ = [
-    'get_cur_scope', 'enter_local_scope', 'leave_local_scope', 'var',
-    'find_var', 'scoped_function'
+    'get_cur_scope',
+    'enter_local_scope',
+    'leave_local_scope',
+    'var',
+    'find_var',
+    'scoped_function',
 ]
 
 
@@ -71,7 +88,7 @@ def find_var(name):
 def scoped_function(func):
     """
     invoke `func` in new scope.
-    
+
     :param func: a callable function that will be run in new scope.
     :type func: callable
     """
diff --git a/python/paddle/v2/fluid/distribute_transpiler.py b/python/paddle/v2/fluid/distribute_transpiler.py
index 49ece7b725e318d7526d58fe54c97cbe20200a7d..573774a2324791c1786e39700aeb27e64e2e8f9a 100644
--- a/python/paddle/v2/fluid/distribute_transpiler.py
+++ b/python/paddle/v2/fluid/distribute_transpiler.py
@@ -1,51 +1,77 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
 import framework
 from framework import Program, default_main_program, Parameter, Variable
 import optimizer
 from layer_helper import LayerHelper
+from distributed_spliter import *
+import math
+from . import core
 
 
-def hash_name_to_server(params_grads, pserver_endpoints):
-    """
-    :param param_grads:
-    :return: a map of pserver endpoint -> 
-                    params -> [param list]
-                    grads  -> [grad list]
-    """
-
-    def _hash_param(param_name, total):
-        return hash(param_name) % total
-
-    param_grad_map = dict()
-    for param, grad in params_grads:
-        if param.trainable is True and grad is not None:
-            server_id = _hash_param(param.name, len(pserver_endpoints))
-            server_for_param = pserver_endpoints[server_id]
-            if not param_grad_map.has_key(server_for_param):
-                param_grad_map[server_for_param] = {"params": [], "grads": []}
-            param_grad_map[server_for_param]["params"].append(param)
-            param_grad_map[server_for_param]["grads"].append(grad)
+class VarBlock:
+    def __init__(self, varname, offset, size):
+        self.varname = varname
+        # NOTE: real offset is offset * size
+        self.offset = offset
+        self.size = size
 
-    return param_grad_map
+    def __str__(self):
+        return "%s:%d:%d" % (self.varname, self.offset, self.size)
 
 
-def round_robin(params_grads, pserver_endpoints):
-    assert (len(params_grads) > len(pserver_endpoints))
-
-    param_grad_map = dict()
-    pserver_idx = 0
-    for param, grad in params_grads:
-        if param.trainable is True:
-            server_for_param = pserver_endpoints[pserver_idx]
-            if not param_grad_map.has_key(server_for_param):
-                param_grad_map[server_for_param] = {"params": [], "grads": []}
-
-            param_grad_map[server_for_param]["params"].append(param)
-            param_grad_map[server_for_param]["grads"].append(grad)
+def split_dense_variable(var_list,
+                         pserver_count,
+                         min_block_size=1024,
+                         max_block_size=1048576):
+    """
+        We may need to split dense tensor to one or several blocks and put
+        them equally onto parameter server. One block is a sub-tensor
+        aligned by dim[0] of the tensor.
+        
+        We need to have a minimal block size so that the calculations in
+        the parameter server side can gain better performance. By default
+        mininum block size is 1024. The max block size is used to prevent
+        too large block that may causing send error.
+    """
+    blocks = []
+    for var in var_list:
+        split_count = pserver_count
+        var_numel = reduce(lambda x, y: x * y, var.shape)
+        max_pserver_count = int(math.floor(var_numel / float(min_block_size)))
+        if max_pserver_count == 0:
+            max_pserver_count = 1
+        if max_pserver_count < pserver_count:
+            split_count = max_pserver_count
+        block_size = int(math.ceil(var_numel / float(split_count)))
 
-            pserver_idx += 1
-            if pserver_idx >= len(pserver_endpoints):
-                pserver_idx = 0
-    return param_grad_map
+        if len(var.shape) >= 2:
+            # align by dim1(width)
+            dim1 = reduce(lambda x, y: x * y, var.shape[1:])
+            remains = block_size % dim1
+            if remains != 0:
+                block_size += dim1 - remains
+        # update split_count after align
+        split_count = int(math.ceil(var_numel / float(block_size)))
+        for block_id in xrange(split_count):
+            curr_block_size = min(block_size, var_numel - (
+                (block_id) * block_size))
+            block = VarBlock(var.name, block_id, curr_block_size)
+            blocks.append(str(block))
+    return blocks
 
 
 class DistributeTranspiler:
@@ -58,7 +84,6 @@ class DistributeTranspiler:
                   split_method=round_robin):
         """
             Transpile the program to a distributed data-parallelism programs.
-
             The main_program will be transform to use a remote parameter server
             to do parameter optimization. And the optimization graph will be put
             in to a parameter server program.
@@ -66,60 +91,113 @@ class DistributeTranspiler:
             Use different methods to split trainable varialbles to different
             parameter servers.
 
-            Example to run:
-
-            exe = fluid.Executor(place)
-            t = fluid.DistributeTranspiler()
-            t.transpile(optimize_ops, params_grads, pservers="127.0.0.1:6174", trainers=1)
-
-            pserver_endpoint = os.getenv("PSERVER")
-            if pserver_endpoint:
-                pserver_prog = t.get_pserver_program(pserver_endpoint, optimize_ops)
-                exe.run(fluid.default_startup_program())
-                exe.run(pserver_prog)
-            else:
-                feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
-                exe.run(fluid.default_startup_program())
-
-                for pass_id in range(PASS_NUM):
-                    ...
-
             :param optimize_ops: op list of optimization, should be the
                                  return value of Optimizer.minimize
             :type optimize_ops: list
             :param program: program to optimize, default default_main_program
             :param pservers: parameter server endpoints like "m1:6174,m2:6174"
             :type pservers: string
-
             :return: return a list of programs
         """
+        assert (callable(split_method))
         if program is None:
             program = default_main_program()
         self.program = program
         self.trainers = trainers
         self.optimize_ops = optimize_ops
-        self._optimize_distributed(
-            optimize_ops,
-            program,
-            params_grads,
-            pservers=pservers,
-            trainers=trainers,
-            split_method=split_method)
-
-    def _clone_param(self, block, v):
-        assert isinstance(v, Parameter)
-        new_p = Parameter(
-            block=block,
-            shape=v.shape,
-            dtype=v.dtype,
-            type=v.type,
-            lod_level=v.lod_level,
-            stop_gradient=v.stop_gradient,
-            trainable=v.trainable,
-            optimize_attr=v.optimize_attr,
-            regularizer=v.regularizer,
-            name=v.name)
-        block.vars[new_p.name] = new_p
+        # steps to transpile:
+        # 1. split variable to multiple blocks, align by product(dim[1:]) (width).
+        # 2. modify trainer program add split_op to each Grad.
+        # 3. append send_op to trainer.
+        # 4. append concat_op to trainer to update local weights.
+        # 5. create new program as parameter server.
+        # 6. create parameter server program by split_method generated endpoint->VarBlock
+
+        pserver_endpoints = pservers.split(",")
+
+        # step1
+        param_list = [pg[0] for pg in params_grads]
+        grad_list = [pg[1] for pg in params_grads]
+        # TODO: add split selected rows support
+        grad_blocks = split_dense_variable(grad_list, len(pserver_endpoints))
+        param_blocks = split_dense_variable(param_list, len(pserver_endpoints))
+        # step2
+        grad_var_mapping = self._append_split_op(program, grad_blocks)
+
+        # step3
+        send_inputs = []
+        send_outputs = []
+        for b in grad_blocks:  # append by order
+            varname, block_id, _ = b.split(":")
+            send_inputs.append(grad_var_mapping[varname][int(block_id)])
+
+        param_var_mapping = self._create_vars_from_blocklist(program,
+                                                             param_blocks)
+        for b in param_blocks:
+            varname, block_id, _ = b.split(":")
+            send_outputs.append(param_var_mapping[varname][int(block_id)])
+        # let send_op know which endpoint to send which var, eplist is of the same
+        # order of send_inputs.
+        eplist = split_method(send_inputs, pserver_endpoints)
+        # create mapping of endpoint -> splited var to create pserver side program
+        self.param_grad_ep_mapping = dict()
+        for i, ep in enumerate(eplist):
+            param = send_outputs[i]
+            grad = send_inputs[i]
+            if not self.param_grad_ep_mapping.has_key(ep):
+                self.param_grad_ep_mapping[ep] = {"params": [], "grads": []}
+            self.param_grad_ep_mapping[ep]["params"].append(param)
+            self.param_grad_ep_mapping[ep]["grads"].append(grad)
+
+        send_op = program.global_block().append_op(
+            type="send",
+            inputs={"X": send_inputs},
+            outputs={"Out": send_outputs},
+            attrs={"endpoints": pserver_endpoints,
+                   "epmap": eplist})
+        # step4
+        for varname, splited_var in param_var_mapping.iteritems():
+            if len(splited_var) <= 1:
+                continue
+            orig_param = program.global_block().vars[varname]
+            concat = program.global_block().append_op(
+                type="concat",
+                inputs={"X": splited_var},
+                outputs={"Out": [orig_param]},
+                attrs={"axis": 0})
+
+    def _create_vars_from_blocklist(self, program, block_list):
+        block_map = dict()
+        var_mapping = dict()
+        for block_str in block_list:
+            varname, offset, size = block_str.split(":")
+            if not block_map.has_key(varname):
+                block_map[varname] = []
+            block_map[varname].append((long(offset), long(size)))
+        for varname, splited in block_map.iteritems():
+            orig_var = program.global_block().vars[varname]
+            var_mapping[varname] = []
+            if len(splited) == 1:
+                var_mapping[varname] = [orig_var]
+                continue
+            orig_shape = orig_var.shape
+            orig_dim1_flatten = 1
+            if len(orig_shape) >= 2:
+                orig_dim1_flatten = reduce(lambda x, y: x * y, orig_shape[1:])
+
+            for i, block in enumerate(splited):
+                size = block[1]
+                rows = size / orig_dim1_flatten
+                splited_shape = [rows]
+                if len(orig_shape) >= 2:
+                    splited_shape.extend(orig_shape[1:])
+                var = program.global_block().create_var(
+                    name="%s.block%d" % (varname, i),
+                    psersistable=False,
+                    dtype=orig_var.dtype,
+                    shape=splited_shape)  # flattend splited var
+                var_mapping[varname].append(var)
+        return var_mapping
 
     def _clone_var(self, block, var):
         assert isinstance(var, Variable)
@@ -129,34 +207,40 @@ class DistributeTranspiler:
             dtype=var.dtype,
             type=var.type,
             lod_level=var.lod_level,
-            persistable=var.persistable)
-
-    def _optimize_distributed(self, optimize_ops, program, params_and_grads,
-                              **kwargs):
-        if kwargs.has_key("split_method"):
-            split_method = kwargs["split_method"]
-        else:
-            split_method = round_robin
+            # HACK: let all param in pserver persistable so child
+            # program in recv can get them
+            persistable=True)
 
-        assert (callable(split_method))
-        pserver_endpoints = kwargs["pservers"].split(",")
-        self.param_grad_map = split_method(params_and_grads, pserver_endpoints)
-
-        send_op_ordered_inputs = []
-        send_op_ordered_outputs = []
-        epmap = []
-        for ep, v in self.param_grad_map.iteritems():
-            send_op_ordered_inputs.extend(v["grads"])
-            send_op_ordered_outputs.extend(v["params"])
-            for i in v["grads"]:
-                epmap.append(ep)
-        send_op = program.global_block().append_op(
-            type="send",
-            inputs={"X": send_op_ordered_inputs
-                    },  # inputs is a list of tensors to be send
-            outputs={"Out": send_op_ordered_outputs},
-            attrs={"endpoints": pserver_endpoints,
-                   "epmap": epmap})
+    def _append_split_op(self, program, gradblocks):
+        var_mapping = self._create_vars_from_blocklist(program, gradblocks)
+        for varname, splited_vars in var_mapping.iteritems():
+            # variable that don't need to split have empty splited_vars
+            if len(splited_vars) <= 1:
+                continue
+            orig_var = program.global_block().vars[varname]
+            if orig_var == core.VarDesc.VarType.SELECTED_ROWS:
+                height_sections = []
+                for v in splited_vars:
+                    height_sections.append(v.shape[0])
+                program.global_block().append_op(
+                    type="split_selected_rows",
+                    inputs={"X": orig_var},
+                    outputs={"Out": splited_vars},
+                    attrs={"height_sections": height_sections})
+            elif orig_var == core.VarDesc.VarType.LOD_TENSOR:
+                sections = []
+                for v in splited_vars:
+                    sections.append(v.shape[0])
+                program.global_block().append_op(
+                    type="split",
+                    inputs={"X": orig_var},
+                    outputs={"Out": splited_vars},
+                    attrs={"sections": sections}  # assume split evenly
+                )
+            else:
+                AssertionError("Variable type should be in set "
+                               "[LOD_TENSOR, SELECTED_ROWS]")
+        return var_mapping
 
     def get_trainer_program(self):
         # remove optimize ops and add a send op to main_program
@@ -174,69 +258,280 @@ class DistributeTranspiler:
             var_list.append(var_each)
         return var_list
 
-    def get_pserver_program(self, endpoint, optimize_ops):
-        pserver_program = Program()
-        for v in self.param_grad_map[endpoint]["params"]:
-            self._clone_param(pserver_program.global_block(), v)
+    def _get_optimizer_input_shape(self, op_type, varkey, orig_shape,
+                                   param_shape):
+        """
+        Returns the shape for optimizer inputs that need to be reshaped when
+        Param and Grad is splited to multiple servers.
+        """
+        # HACK(typhoonzero): Should use functions of corresponding optimizer in
+        # optimizer.py to get the shape, do not  bind this in the transpiler.
+        if op_type == "adam":
+            if varkey in ["Moment1", "Moment2"]:
+                return param_shape
+        elif op_type == "adagrad":
+            if varkey == "Moment":
+                return param_shape
+        elif op_type == "adamax":
+            if varkey in ["Moment", "InfNorm"]:
+                return param_shape
+        elif op_type == "momentum":
+            if varkey == "Velocity":
+                return param_shape
+        elif op_type == "":
+            if varkey == "Moment":
+                return param_shape
+        elif op_type == "sgd":
+            pass
+        return orig_shape
 
-        optimize_sub_program = Program()
-        grad_var_names = [
-            var.name for var in self.param_grad_map[endpoint]["grads"]
+    def _is_op_on_pserver(self, endpoint, all_ops, idx):
+        """
+        Recursively check if the op need to run on current server.
+        Assume that ops are in the execution order.
+        """
+        param_names = [
+            p.name for p in self.param_grad_ep_mapping[endpoint]["params"]
         ]
-        for opt_op in optimize_ops:
-            for _, var in opt_op.inputs.iteritems():
-                # NOTE: append operators to merge gradients from multiple
-                # trainers. If trainers == 1, this is not needed.
-                if self.trainers > 1 and var.name in grad_var_names:
+        op = all_ops[idx]
+        if op.inputs.has_key("Param"):
+            if op.inputs["Param"].name in param_names:
+                return True
+            else:
+                for n in param_names:
+                    if n.startswith(op.inputs["Param"].name+".block") and \
+                        n != op.inputs["Param"].name:
+                        return True
+                return False
+        else:
+            j = idx - 1
+            while j >= 0:
+                prev_op = all_ops[j]
+                prev_output_names = [o.name for o in prev_op.outputs.values()]
+                prev_input_names = [o.name for o in prev_op.inputs.values()]
+                found1 = False
+                found2 = False
+                for _, v in op.inputs.iteritems():
+                    if v.name in prev_output_names:
+                        found1 = self._is_op_on_pserver(endpoint, all_ops, j)
+                # later ops may produce output for prev op's next batch use.
+                for _, v in op.outputs.iteritems():
+                    if v.name in prev_input_names:
+                        found2 = self._is_op_on_pserver(endpoint, all_ops, j)
+                if found1 or found2:
+                    return True
+                j -= 1
+            return False
+
+    def _append_pserver_ops(self, program, pserver_program, opt_op, endpoint):
+        new_inputs = dict()
+        # update param/grad shape first, then other inputs like
+        # moment can use the updated shape
+        for key, var in opt_op.inputs.iteritems():
+            if key == "Grad":
+                grad_block = None
+                for g in self.param_grad_ep_mapping[endpoint]["grads"]:
+                    if g.name.startswith(var.name):
+                        grad_block = g
+                        break
+                if not grad_block:
+                    # do not append this op if current endpoint
+                    # is not dealing with this grad block
+                    return
+                merged_var = program.global_block().create_var(
+                    name=grad_block.name,
+                    persistable=grad_block.persistable,
+                    dtype=grad_block.dtype,
+                    shape=grad_block.shape)
+                # append merging ops if trainers > 1
+                if self.trainers > 1:
                     vars2merge = self._create_var_for_trainers(
-                        optimize_sub_program.global_block(), var, self.trainers)
-                    merged_var = optimize_sub_program.global_block().create_var(
-                        name=var.name,
-                        persistable=var.persistable,
-                        dtype=var.dtype,
-                        shape=var.shape)
-                    optimize_sub_program.global_block().append_op(
+                        program.global_block(), grad_block, self.trainers)
+                    program.global_block().append_op(
                         type="sum",
                         inputs={"X": vars2merge},
                         outputs={"Out": merged_var})
-                    optimize_sub_program.global_block().append_op(
+                    program.global_block().append_op(
                         type="scale",
                         inputs={"X": merged_var},
                         outputs={"Out": merged_var},
                         attrs={"scale": 1.0 / float(self.trainers)})
-                else:
-                    optimize_sub_program.global_block().create_var(
-                        name=var.name,
-                        persistable=var.persistable,
-                        dtype=var.dtype,
-                        shape=var.shape)
+                new_inputs[key] = merged_var
+            elif key == "Param":
+                # param is already created on global program
+                param_block = None
+                for p in self.param_grad_ep_mapping[endpoint]["params"]:
+                    if p.name.startswith(var.name):
+                        param_block = p
+                        break
+                if not param_block:
+                    return
+                tmpvar = program.global_block().create_var(
+                    name=param_block.name,
+                    persistable=True,
+                    dtype=param_block.dtype,
+                    shape=param_block.shape)
 
+                new_inputs[key] = tmpvar
+
+        for key, var in opt_op.inputs.iteritems():
+            if key in ["Param", "Grad"]:
+                continue
+            # update accumulator variable shape
+            param_shape = new_inputs["Param"].shape
+            new_shape = self._get_optimizer_input_shape(opt_op.type, key,
+                                                        var.shape, param_shape)
+            tmpvar = program.global_block().create_var(
+                name=var.name,
+                persistable=var.persistable,
+                dtype=var.dtype,
+                shape=new_shape)
+            new_inputs[key] = tmpvar
+            # create var in pserver program global block.
+            # TODO(typhoonzero): put blocks in one program to avoid create two
+            # variables.
+            pserver_program.global_block().create_var(
+                name=var.name,
+                persistable=var.persistable,
+                dtype=var.dtype,
+                shape=new_shape)
+
+        # change outputs ParamOut variable
+        opt_op.outputs["ParamOut"] = new_inputs["Param"]
+        program.global_block().append_op(
+            type=opt_op.type,
+            inputs=new_inputs,
+            outputs=opt_op.outputs,
+            attrs=opt_op.attrs)
+
+    def _append_pserver_non_opt_ops(self, program, pserver_program, opt_op):
+        for _, var in opt_op.inputs.iteritems():
+            program.global_block().create_var(
+                name=var.name,
+                persistable=var.persistable,
+                dtype=var.dtype,
+                shape=var.shape)
+            pserver_program.global_block().create_var(
+                name=var.name,
+                persistable=var.persistable,
+                dtype=var.dtype,
+                shape=var.shape)
+        program.global_block().append_op(
+            type=opt_op.type,
+            inputs=opt_op.inputs,
+            outputs=opt_op.outputs,
+            attrs=opt_op.attrs)
+
+    def get_pserver_program(self, endpoint):
+        """
+        get pserver side program by endpoint
+
+        NOTE: assume blocks of the same variable is not distributed
+        on the same pserver, only change param/grad varnames for
+        trainers to fetch. For each pserver endpoint, server side
+        program must be a sub-set of the original optimization program.
+        """
+        # step5
+        pserver_program = Program()
+        for v in self.param_grad_ep_mapping[endpoint]["params"]:
+            self._clone_var(pserver_program.global_block(), v)
+        for v in self.param_grad_ep_mapping[endpoint]["grads"]:
+            # create vars for each trainer in global scope, so
+            # we don't need to create them when grad arrives.
+            pserver_program.global_block().create_var(
+                name=v.name, persistable=True, dtype=v.dtype, shape=v.shape)
+            for trainer_id in xrange(self.trainers):
+                print("create variable for program: %s.trainer_%d" %
+                      (v.name, trainer_id))
+                pserver_program.global_block().create_var(
+                    name="%s.trainer_%d" % (v.name, trainer_id),
+                    persistable=True,
+                    dtype=v.dtype,
+                    shape=v.shape)
+        # step6
+        optimize_sub_program = Program()
+        for idx, opt_op in enumerate(self.optimize_ops):
+            is_op_on_pserver = self._is_op_on_pserver(endpoint,
+                                                      self.optimize_ops, idx)
+            if not is_op_on_pserver:
+                continue
             if opt_op.inputs.has_key("Grad"):
-                if opt_op.inputs["Grad"].name in grad_var_names:
-                    optimize_sub_program.global_block().append_op(
-                        type=opt_op.type,
-                        inputs=opt_op.inputs,
-                        outputs=opt_op.outputs,
-                        attrs=opt_op.attrs)
+                self._append_pserver_ops(optimize_sub_program, pserver_program,
+                                         opt_op, endpoint)
             else:
-                optimize_sub_program.global_block().append_op(
-                    type=opt_op.type,
-                    inputs=opt_op.inputs,
-                    outputs=opt_op.outputs,
-                    attrs=opt_op.attrs)
+                self._append_pserver_non_opt_ops(optimize_sub_program,
+                                                 pserver_program, opt_op)
         pserver_program.global_block().append_op(
             type="recv",
-            inputs={"RX":
-                    self.param_grad_map[endpoint]["grads"]},  # grads to recv
+            inputs={"RX": self.param_grad_ep_mapping[endpoint]["grads"]
+                    },  # grads to recv
             outputs={},
             attrs={
-                "OptimizeProgram": optimize_sub_program.desc,
+                "OptimizeBlock": optimize_sub_program.global_block(),
                 "endpoint": endpoint,
-                "ParamList":
-                [p.name for p in self.param_grad_map[endpoint]["params"]],
-                "GradList":
-                [p.name for p in self.param_grad_map[endpoint]["grads"]],
-                "Trainers": self.trainers
+                "ParamList": [
+                    p.name
+                    for p in self.param_grad_ep_mapping[endpoint]["params"]
+                ],
+                "GradList": [
+                    p.name
+                    for p in self.param_grad_ep_mapping[endpoint]["grads"]
+                ],
+                "Fanin": self.trainers
             })
         pserver_program.sync_with_cpp()
         return pserver_program
+
+    def get_startup_program(self, endpoint, pserver_program):
+        """
+        Get startup program for current parameter server.
+        Modify operator input variables if there are variables that
+        was splited to several blocks.
+        """
+        s_prog = Program()
+        orig_s_prog = framework.default_startup_program()
+        params = self.param_grad_ep_mapping[endpoint]["params"]
+
+        def _get_splited_name_and_shape(varname):
+            for idx, splited_param in enumerate(params):
+                pname = splited_param.name
+                if pname.startswith(varname) and varname != pname:
+                    return pname, splited_param.shape
+            return "", []
+
+        # 1. create vars in pserver program to startup program
+        pserver_vars = pserver_program.global_block().vars
+        created_var_map = dict()
+        for _, var in pserver_vars.iteritems():
+            tmpvar = s_prog.global_block().create_var(
+                name=var.name,
+                persistable=var.persistable,
+                dtype=var.dtype,
+                shape=var.shape)
+            created_var_map[var.name] = tmpvar
+
+        # 2. rename op outputs
+        for op in orig_s_prog.global_block().ops:
+            new_outputs = dict()
+            # do not append startup op if var is not on this pserver
+            op_on_pserver = False
+            for key, var in op.outputs.iteritems():
+                newname, _ = _get_splited_name_and_shape(var.name)
+                if newname:
+                    op_on_pserver = True
+                    new_outputs[key] = created_var_map[newname]
+                elif var.name in pserver_vars:
+                    op_on_pserver = True
+                    new_outputs[key] = pserver_vars[var.name]
+
+            if op_on_pserver:
+                if op.type in [
+                        "gaussian_random", "fill_constant", "uniform_random"
+                ]:
+                    op.attrs["shape"] = new_outputs["Out"].shape
+                s_prog.global_block().append_op(
+                    type=op.type,
+                    inputs=op.inputs,
+                    outputs=new_outputs,
+                    attrs=op.attrs)
+        return s_prog
diff --git a/python/paddle/v2/fluid/distribute_transpiler_simple.py b/python/paddle/v2/fluid/distribute_transpiler_simple.py
new file mode 100644
index 0000000000000000000000000000000000000000..73d9bed1ae9d81d66eb32675ea8473da248076f0
--- /dev/null
+++ b/python/paddle/v2/fluid/distribute_transpiler_simple.py
@@ -0,0 +1,256 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import framework
+from framework import Program, default_main_program, Parameter, Variable
+import optimizer
+from layer_helper import LayerHelper
+
+
+def hash_name_to_server(params_grads, pserver_endpoints):
+    """
+    :param param_grads:
+    :return: a map of pserver endpoint -> 
+                    params -> [param list]
+                    grads  -> [grad list]
+    """
+
+    def _hash_param(param_name, total):
+        return hash(param_name) % total
+
+    param_grad_map = dict()
+    for param, grad in params_grads:
+        if param.trainable is True and grad is not None:
+            server_id = _hash_param(param.name, len(pserver_endpoints))
+            server_for_param = pserver_endpoints[server_id]
+            if not param_grad_map.has_key(server_for_param):
+                param_grad_map[server_for_param] = {"params": [], "grads": []}
+            param_grad_map[server_for_param]["params"].append(param)
+            param_grad_map[server_for_param]["grads"].append(grad)
+
+    return param_grad_map
+
+
+def round_robin(params_grads, pserver_endpoints):
+    assert (len(params_grads) > len(pserver_endpoints))
+
+    param_grad_map = dict()
+    pserver_idx = 0
+    for param, grad in params_grads:
+        if param.trainable is True:
+            server_for_param = pserver_endpoints[pserver_idx]
+            if not param_grad_map.has_key(server_for_param):
+                param_grad_map[server_for_param] = {"params": [], "grads": []}
+
+            param_grad_map[server_for_param]["params"].append(param)
+            param_grad_map[server_for_param]["grads"].append(grad)
+
+            pserver_idx += 1
+            if pserver_idx >= len(pserver_endpoints):
+                pserver_idx = 0
+    return param_grad_map
+
+
+class SimpleDistributeTranspiler:
+    def transpile(self,
+                  optimize_ops,
+                  params_grads,
+                  program=None,
+                  pservers="127.0.0.1:6174",
+                  trainers=1,
+                  split_method=round_robin):
+        """
+            Transpile the program to a distributed data-parallelism programs.
+
+            The main_program will be transform to use a remote parameter server
+            to do parameter optimization. And the optimization graph will be put
+            in to a parameter server program.
+
+            Use different methods to split trainable varialbles to different
+            parameter servers.
+
+            Example to run:
+
+            exe = fluid.Executor(place)
+            t = fluid.DistributeTranspiler()
+            t.transpile(optimize_ops, params_grads, pservers="127.0.0.1:6174", trainers=1)
+
+            pserver_endpoint = os.getenv("PSERVER")
+            if pserver_endpoint:
+                pserver_prog = t.get_pserver_program(pserver_endpoint, optimize_ops)
+                exe.run(fluid.default_startup_program())
+                exe.run(pserver_prog)
+            else:
+                feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
+                exe.run(fluid.default_startup_program())
+
+                for pass_id in range(PASS_NUM):
+                    ...
+
+            :param optimize_ops: op list of optimization, should be the
+                                 return value of Optimizer.minimize
+            :type optimize_ops: list
+            :param program: program to optimize, default default_main_program
+            :param pservers: parameter server endpoints like "m1:6174,m2:6174"
+            :type pservers: string
+
+            :return: return a list of programs
+        """
+        if program is None:
+            program = default_main_program()
+        self.program = program
+        self.trainers = trainers
+        self.optimize_ops = optimize_ops
+        self._optimize_distributed(
+            optimize_ops,
+            program,
+            params_grads,
+            pservers=pservers,
+            trainers=trainers,
+            split_method=split_method)
+
+    def _clone_param(self, block, v):
+        assert isinstance(v, Parameter)
+        new_p = Parameter(
+            block=block,
+            shape=v.shape,
+            dtype=v.dtype,
+            type=v.type,
+            lod_level=v.lod_level,
+            stop_gradient=v.stop_gradient,
+            trainable=v.trainable,
+            optimize_attr=v.optimize_attr,
+            regularizer=v.regularizer,
+            name=v.name)
+        block.vars[new_p.name] = new_p
+
+    def _clone_var(self, block, var):
+        assert isinstance(var, Variable)
+        return block.create_var(
+            name=var.name,
+            shape=var.shape,
+            dtype=var.dtype,
+            type=var.type,
+            lod_level=var.lod_level,
+            persistable=var.persistable)
+
+    def _optimize_distributed(self, optimize_ops, program, params_and_grads,
+                              **kwargs):
+        if kwargs.has_key("split_method"):
+            split_method = kwargs["split_method"]
+        else:
+            split_method = round_robin
+
+        assert (callable(split_method))
+        pserver_endpoints = kwargs["pservers"].split(",")
+        self.param_grad_map = split_method(params_and_grads, pserver_endpoints)
+
+        send_op_ordered_inputs = []
+        send_op_ordered_outputs = []
+        epmap = []
+        for ep, v in self.param_grad_map.iteritems():
+            send_op_ordered_inputs.extend(v["grads"])
+            send_op_ordered_outputs.extend(v["params"])
+            for i in v["grads"]:
+                epmap.append(ep)
+        send_op = program.global_block().append_op(
+            type="send",
+            inputs={"X": send_op_ordered_inputs
+                    },  # inputs is a list of tensors to be send
+            outputs={"Out": send_op_ordered_outputs},
+            attrs={"endpoints": pserver_endpoints,
+                   "epmap": epmap})
+
+    def get_trainer_program(self):
+        # remove optimize ops and add a send op to main_program
+        self.program.global_block().delete_ops(self.optimize_ops)
+        return self.program
+
+    def _create_var_for_trainers(self, block, var, trainers):
+        var_list = []
+        for i in xrange(trainers):
+            var_each = block.create_var(
+                name="%s.trainer_%d" % (var.name, i),
+                psersistable=var.persistable,
+                dtype=var.dtype,
+                shape=var.shape)
+            var_list.append(var_each)
+        return var_list
+
+    def get_pserver_program(self, endpoint, optimize_ops):
+        pserver_program = Program()
+        for v in self.param_grad_map[endpoint]["params"]:
+            self._clone_param(pserver_program.global_block(), v)
+
+        optimize_sub_program = Program()
+        grad_var_names = [
+            var.name for var in self.param_grad_map[endpoint]["grads"]
+        ]
+        for opt_op in optimize_ops:
+            for _, var in opt_op.inputs.iteritems():
+                # NOTE: append operators to merge gradients from multiple
+                # trainers. If trainers == 1, this is not needed.
+                if self.trainers > 1 and var.name in grad_var_names:
+                    vars2merge = self._create_var_for_trainers(
+                        optimize_sub_program.global_block(), var, self.trainers)
+                    merged_var = optimize_sub_program.global_block().create_var(
+                        name=var.name,
+                        persistable=var.persistable,
+                        dtype=var.dtype,
+                        shape=var.shape)
+                    optimize_sub_program.global_block().append_op(
+                        type="sum",
+                        inputs={"X": vars2merge},
+                        outputs={"Out": merged_var})
+                    optimize_sub_program.global_block().append_op(
+                        type="scale",
+                        inputs={"X": merged_var},
+                        outputs={"Out": merged_var},
+                        attrs={"scale": 1.0 / float(self.trainers)})
+                else:
+                    optimize_sub_program.global_block().create_var(
+                        name=var.name,
+                        persistable=var.persistable,
+                        dtype=var.dtype,
+                        shape=var.shape)
+
+            if opt_op.inputs.has_key("Grad"):
+                if opt_op.inputs["Grad"].name in grad_var_names:
+                    optimize_sub_program.global_block().append_op(
+                        type=opt_op.type,
+                        inputs=opt_op.inputs,
+                        outputs=opt_op.outputs,
+                        attrs=opt_op.attrs)
+            else:
+                optimize_sub_program.global_block().append_op(
+                    type=opt_op.type,
+                    inputs=opt_op.inputs,
+                    outputs=opt_op.outputs,
+                    attrs=opt_op.attrs)
+        pserver_program.global_block().append_op(
+            type="recv",
+            inputs={"RX":
+                    self.param_grad_map[endpoint]["grads"]},  # grads to recv
+            outputs={},
+            attrs={
+                "OptimizeBlock": optimize_sub_program.global_block(),
+                "endpoint": endpoint,
+                "ParamList":
+                [p.name for p in self.param_grad_map[endpoint]["params"]],
+                "GradList":
+                [p.name for p in self.param_grad_map[endpoint]["grads"]],
+                "Trainers": self.trainers
+            })
+        pserver_program.sync_with_cpp()
+        return pserver_program
diff --git a/python/paddle/v2/fluid/distributed_spliter.py b/python/paddle/v2/fluid/distributed_spliter.py
new file mode 100644
index 0000000000000000000000000000000000000000..8cf0b06786f2ccb5601af10aab39e7d0c22ae624
--- /dev/null
+++ b/python/paddle/v2/fluid/distributed_spliter.py
@@ -0,0 +1,50 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def hash_name(varlist, pserver_endpoints):
+    """
+    hash variable names to several endpoints.
+
+    :param varlist: a list of Variables
+    :return: a map of pserver endpoint -> varname
+    """
+
+    def _hash_block(block_str, total):
+        return hash(block_str) % total
+
+    eplist = []
+    for var in varlist:
+        server_id = _hash_block(var.name(), len(pserver_endpoints))
+        server_for_param = pserver_endpoints[server_id]
+        eplist.append(server_for_param)
+    return eplist
+
+
+def round_robin(varlist, pserver_endpoints):
+    """
+    distribute variables to several endpoints.
+    """
+    assert (len(varlist) > len(pserver_endpoints))
+
+    eplist = []
+    pserver_idx = 0
+    for var in varlist:
+        server_for_param = pserver_endpoints[pserver_idx]
+        eplist.append(server_for_param)
+
+        pserver_idx += 1
+        if pserver_idx >= len(pserver_endpoints):
+            pserver_idx = 0
+    return eplist
diff --git a/python/paddle/v2/fluid/evaluator.py b/python/paddle/v2/fluid/evaluator.py
index e186ee96c387acf24471d4e26ce020c4ecac8d19..396d56fc8b236d95d38517f3513521aa969e47be 100644
--- a/python/paddle/v2/fluid/evaluator.py
+++ b/python/paddle/v2/fluid/evaluator.py
@@ -1,10 +1,27 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 
 import layers
 from framework import Program, unique_name, Variable, program_guard
 from layer_helper import LayerHelper
 
-__all__ = ['Accuracy', 'ChunkEvaluator']
+__all__ = [
+    'Accuracy',
+    'ChunkEvaluator',
+]
 
 
 def _clone_var_(block, var):
@@ -21,19 +38,19 @@ def _clone_var_(block, var):
 class Evaluator(object):
     """
     Base Class for all evaluators
-    
+
     Args:
-        name(str): The name of evaluator. such as, "accuracy". Used for generate 
+        name(str): The name of evaluator. such as, "accuracy". Used for generate
             temporary variable name.
-        main_program(Program, optional): The evaluator should be added to this 
+        main_program(Program, optional): The evaluator should be added to this
             main_program. Default default_main_program()
-        startup_program(Program, optional):The parameter should be added to this 
+        startup_program(Program, optional):The parameter should be added to this
             startup_program. Default default_startup_program()
-            
+
     Attributes:
-        states(list): The list of state variables. states will be reset to zero 
+        states(list): The list of state variables. states will be reset to zero
             when `reset` is invoked.
-        metrics(list): The list of metrics variables. They will be calculate 
+        metrics(list): The list of metrics variables. They will be calculate
             every mini-batch
     """
 
@@ -66,14 +83,14 @@ class Evaluator(object):
 
     def create_state(self, suffix, dtype, shape):
         """
-        Create state variable. 
-        
+        Create state variable.
+
         NOTE: It is not a public API.
-        
+
         Args:
-            suffix(str): the state suffix. 
-            dtype(str|core.DataType): the state data type 
-            shape(tuple|list): the shape of state 
+            suffix(str): the state suffix.
+            dtype(str|core.DataType): the state data type
+            shape(tuple|list): the shape of state
 
         Returns: State variable
 
@@ -127,8 +144,8 @@ class Accuracy(Evaluator):
 
 class ChunkEvaluator(Evaluator):
     """
-    Accumulate counter numbers output by chunk_eval from mini-batches and 
-    compute the precision recall and F1-score using the accumulated counter 
+    Accumulate counter numbers output by chunk_eval from mini-batches and
+    compute the precision recall and F1-score using the accumulated counter
     numbers.
     """
 
diff --git a/python/paddle/v2/fluid/executor.py b/python/paddle/v2/fluid/executor.py
index 1b2075dcd5ece5706e62431b360d4dc86ea57a89..9d5ed9571a2fa0a871a25e43b23b1a3c3a6102db 100644
--- a/python/paddle/v2/fluid/executor.py
+++ b/python/paddle/v2/fluid/executor.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 import contextlib
 from framework import Program, default_main_program
diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py
index 3ef6b33192d9509b765173de8981bc7ff18486e5..f87666545893884b2664b52be2d1a11b5aa4a244 100644
--- a/python/paddle/v2/fluid/framework.py
+++ b/python/paddle/v2/fluid/framework.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import collections
 import contextlib
 
@@ -7,9 +21,15 @@ import proto.framework_pb2 as framework_pb2
 from . import core
 
 __all__ = [
-    'Block', 'Variable', 'Program', 'Operator', 'default_startup_program',
-    'default_main_program', 'program_guard', 'switch_startup_program',
-    'switch_main_program'
+    'Block',
+    'Variable',
+    'Program',
+    'Operator',
+    'default_startup_program',
+    'default_main_program',
+    'program_guard',
+    'switch_startup_program',
+    'switch_main_program',
 ]
 
 EMPTY_VAR_NAME = core.kEmptyVarName()
@@ -97,8 +117,8 @@ def _debug_string_(proto, throw_on_error=True):
     """
     error_fields = list()
     if not proto.IsInitialized(error_fields) and throw_on_error:
-        raise ValueError("{0} are not initialized\nThe message is {1}".format(
-            error_fields, proto))
+        raise ValueError("{0} are not initialized.\nThe message is {1}:\n".
+                         format(error_fields, proto))
     return proto.__str__()
 
 
@@ -274,6 +294,9 @@ class Variable(object):
         uid = core.unique_integer(prefix)  # unique during whole process.
         return "_".join([prefix, str(uid)])
 
+    def set_error_clip(self, error_clip):
+        self.error_clip = error_clip
+
 
 def get_all_op_protos():
     """
@@ -352,12 +375,13 @@ class Operator(object):
         >>>                     outputs={"Out": [var1]})
 
         Args:
-            block(Block): The block has the current operator
-            desc(core.OpDesc): The protobuf description
+            block(Block): The block has the current operator.
+            desc(core.OpDesc): The protobuf description.
             type(str): The type of operator.
             inputs(dict): The input dictionary. Key is the input parameter name.
                 Value is a list of variables.
-            outputs(dict): The output dictionary. Has same format with inputs
+            outputs(dict): The output dictionary which has the same format with
+                           inputs.
             attrs(dict): The attributes dictionary. Key is attribute name. Value
                 is the attribute value. The attribute type should be as same as
                 the type registered in C++
@@ -414,10 +438,11 @@ class Operator(object):
             for m in proto.outputs:
                 need.add(m.name)
             if not given == need:
-                raise ValueError(
-                    "Incorrect setting for output(s) of operator \"%s\". Need: [%s] Given: [%s]"
-                    % (type, ", ".join(str(e) for e in need), ", ".join(
-                        str(e) for e in given)))
+                raise ValueError(("Incorrect setting for output(s) of "
+                                  "operator \"%s\". Need: [%s] Given: [%s]") %
+                                 (type, ", ".join(str(e)
+                                                  for e in need), ", ".join(
+                                                      str(e) for e in given)))
 
             for out_proto in proto.outputs:
                 out_args = outputs[out_proto.name]
@@ -796,9 +821,8 @@ class Program(object):
                 if isinstance(t, Variable):
                     t = t.op
                 else:
-                    raise ValueError(
-                        "All targets of prune() can only be Variable or Operator."
-                    )
+                    raise ValueError(("All targets of prune() can only be "
+                                      "Variable or Operator."))
 
             targets_idx.append([t.block.idx, t.idx])
         res = Program()
diff --git a/python/paddle/v2/fluid/initializer.py b/python/paddle/v2/fluid/initializer.py
index c0839caaf2bb5bc43a76a13b5782cc519a4afe63..b9c0d12ad6cf09e66df6b1a8da09df275c79a3f6 100644
--- a/python/paddle/v2/fluid/initializer.py
+++ b/python/paddle/v2/fluid/initializer.py
@@ -1,7 +1,26 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import framework
 import numpy as np
 
-__all__ = ['Constant', 'Uniform', 'Normal', 'Xavier']
+__all__ = [
+    'Constant',
+    'Uniform',
+    'Normal',
+    'Xavier',
+]
 
 
 class Initializer(object):
diff --git a/python/paddle/v2/fluid/io.py b/python/paddle/v2/fluid/io.py
index eef1e283c2c9db6de375b70f94c50f8dce140d52..376d6013a38923014fa35e964e58d7f56bf80546 100644
--- a/python/paddle/v2/fluid/io.py
+++ b/python/paddle/v2/fluid/io.py
@@ -1,12 +1,33 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import cPickle as pickle
 
 from paddle.v2.fluid.framework import Program, Parameter, default_main_program, Variable
+from . import core
 
 __all__ = [
-    'save_vars', 'save_params', 'save_persistables', 'load_vars', 'load_params',
-    'load_persistables', "save_inference_model", "load_inference_model",
-    "get_inference_program"
+    'save_vars',
+    'save_params',
+    'save_persistables',
+    'load_vars',
+    'load_params',
+    'load_persistables',
+    'save_inference_model',
+    'load_inference_model',
+    'get_inference_program',
 ]
 
 
@@ -172,6 +193,33 @@ def get_inference_program(target_vars, main_program=None):
     return inference_program
 
 
+def prepend_feed_ops(inference_program, feeded_var_names):
+    global_block = inference_program.global_block()
+    feed_var = global_block.create_var(
+        name='feed', type=core.VarDesc.VarType.FEED_MINIBATCH, persistable=True)
+
+    for i, name in enumerate(feeded_var_names):
+        out = global_block.var(name)
+        global_block.prepend_op(
+            type='feed',
+            inputs={'X': [feed_var]},
+            outputs={'Out': [out]},
+            attrs={'col': i})
+
+
+def append_fetch_ops(inference_program, fetch_var_names):
+    global_block = inference_program.global_block()
+    fetch_var = global_block.create_var(
+        name='fetch', type=core.VarDesc.VarType.FETCH_LIST, persistable=True)
+
+    for i, name in enumerate(fetch_var_names):
+        global_block.append_op(
+            type='fetch',
+            inputs={'X': [name]},
+            outputs={'Out': [fetch_var]},
+            attrs={'col': i})
+
+
 def save_inference_model(dirname,
                          feeded_var_names,
                          target_vars,
@@ -222,6 +270,9 @@ def save_inference_model(dirname,
             "fetch_var_names": fetch_var_names
         }, f, -1)
 
+    prepend_feed_ops(inference_program, feeded_var_names)
+    append_fetch_ops(inference_program, fetch_var_names)
+
     # Save only programDesc of inference_program in binary format
     # in another file: __model__.dat
     with open(model_file_name + ".dat", "wb") as fp:
diff --git a/python/paddle/v2/fluid/layer_helper.py b/python/paddle/v2/fluid/layer_helper.py
index 325735e67936ed40ae83a11ce2e45e2f618d3ac6..0b0064ade90d2b70dd1458cb4d20d741fbf1efcd 100644
--- a/python/paddle/v2/fluid/layer_helper.py
+++ b/python/paddle/v2/fluid/layer_helper.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import copy
 import itertools
 
diff --git a/python/paddle/v2/fluid/layers/__init__.py b/python/paddle/v2/fluid/layers/__init__.py
index 50ac0aba01a4079e7caf49d552c9361977aaa65d..a83dd3db74aed548a324a1c605723c957fca8604 100644
--- a/python/paddle/v2/fluid/layers/__init__.py
+++ b/python/paddle/v2/fluid/layers/__init__.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import ops
 from ops import *
 import nn
@@ -10,6 +24,8 @@ import control_flow
 from control_flow import *
 import device
 from device import *
+import math_op_patch
+from math_op_patch import *
 
 __all__ = []
 __all__ += nn.__all__
@@ -18,3 +34,4 @@ __all__ += tensor.__all__
 __all__ += control_flow.__all__
 __all__ += ops.__all__
 __all__ += device.__all__
+__all__ += math_op_patch.__all__
diff --git a/python/paddle/v2/fluid/layers/control_flow.py b/python/paddle/v2/fluid/layers/control_flow.py
index 4b363ecbe78af82733fe1f80e44118a0dfda1f11..2f1188c542cc0208a189511a1eef1eddc411007c 100644
--- a/python/paddle/v2/fluid/layers/control_flow.py
+++ b/python/paddle/v2/fluid/layers/control_flow.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from ..layer_helper import LayerHelper, unique_name
 from ..framework import Program, Variable, Operator
 from .. import core
@@ -117,7 +131,8 @@ def Print(input,
           print_tensor_name=True,
           print_tensor_type=True,
           print_tensor_shape=True,
-          print_tensor_lod=True):
+          print_tensor_lod=True,
+          print_phase='both'):
     '''
     **Print operator**
 
@@ -128,18 +143,21 @@ def Print(input,
     tensor `t`.
 
     Args:
-      input(Variable): A Tensor to print.
-      summarize(int): Print this number of elements in the tensor, will print all
-                 if left negative.
-      message(str): A string message to print as a prefix.
-      first_n(int): Only log `first_n` number of times.
-      print_tensor_name(bool): Print the tensor name.
-      print_tensor_type(bool): Print the tensor type.
-      print_tensor_shape(bool): Print the tensor shape.
-      print_tensor_lod(bool): Print the tensor lod.
+        input (Variable): A Tensor to print.
+        summarize (int): Print this number of elements in the tensor, will print
+                all if left is negative.
+        message (str): A string message to print as a prefix.
+        first_n (int): Only log `first_n` number of times.
+        print_tensor_name (bool): Print the tensor name.
+        print_tensor_type (bool): Print the tensor type.
+        print_tensor_shape (bool): Print the tensor shape.
+        print_tensor_lod (bool): Print the tensor lod.
+        print_phase (bool): Which phase to displace, including 'forward',
+                'backward' and 'both'. If set to 'backward' or 'both', will
+                print the gradients of input tensor.
 
     Returns:
-      None
+        Variable: Output tensor, same data with input tensor.
 
     Examples:
         .. code-block:: python
@@ -149,10 +167,10 @@ def Print(input,
               message="The content of some_layer: ")
     '''
     helper = LayerHelper('print', **locals())
-    out = helper.create_tmp_variable(dtype='int32')
+    out = helper.create_tmp_variable(dtype=helper.input_dtype())
     helper.append_op(
         type='print',
-        inputs={'input': input},
+        inputs={'In': input},
         attrs={
             'first_n': first_n,
             'summarize': summarize,
@@ -161,7 +179,9 @@ def Print(input,
             'print_tensor_type': print_tensor_type,
             'print_tensor_shape': print_tensor_shape,
             'print_tensor_lod': print_tensor_lod,
-        })
+            'print_phase': print_phase.upper()
+        },
+        outputs={'Out': out})
     return out
 
 
@@ -1220,7 +1240,8 @@ class DynamicRNN(object):
         self.lod_rank_table = None
         self.max_seq_len = None
         self.step_idx = None
-        self.zero_idx = fill_constant(shape=[1], value=0, dtype='int64')
+        self.zero_idx = fill_constant(
+            shape=[1], value=0, dtype='int64', force_cpu=True)
         self.mem_dict = dict()
         self.output_array = []
         self.outputs = []
@@ -1271,11 +1292,32 @@ class DynamicRNN(object):
             outputs={'Out': input_array})
         return array_read(array=input_array, i=self.step_idx)
 
+    def static_input(self, x):
+        self._assert_in_rnn_block_("static_input")
+        if not isinstance(x, Variable):
+            raise TypeError(
+                "static_input() can only take a Variable as its input")
+        if self.lod_rank_table is None:
+            raise RuntimeError(
+                "static_input() must be called after step_input().")
+        parent_block = self._parent_block_()
+        x_reordered = parent_block.create_var(
+            name=unique_name("dynamic_rnn_static_input_reordered"),
+            type=core.VarDesc.VarType.LOD_TENSOR,
+            dtype=x.dtype)
+        parent_block.append_op(
+            type='reorder_lod_tensor_by_rank',
+            inputs={'X': [x],
+                    'RankTable': [self.lod_rank_table]},
+            outputs={'Out': [x_reordered]})
+        return shrink_memory(x_reordered, self.step_idx, self.lod_rank_table)
+
     @contextlib.contextmanager
     def block(self):
         if self.status != DynamicRNN.BEFORE_RNN:
             raise ValueError("rnn.block() can only be invoke once")
-        self.step_idx = fill_constant(shape=[1], dtype='int64', value=0)
+        self.step_idx = fill_constant(
+            shape=[1], dtype='int64', value=0, force_cpu=True)
         self.step_idx.stop_gradient = False
         self.status = DynamicRNN.IN_RNN
         with self.while_op.block():
@@ -1302,20 +1344,44 @@ class DynamicRNN(object):
         else:
             return self.outputs
 
-    def memory(self, init=None, shape=None, value=0.0, dtype='float32'):
+    def memory(self,
+               init=None,
+               shape=None,
+               value=0.0,
+               need_reorder=False,
+               dtype='float32'):
         self._assert_in_rnn_block_('memory')
         if init is not None:
             if not isinstance(init, Variable):
                 raise TypeError(
                     "The input arg `init` of memory() must be a Variable")
             parent_block = self._parent_block_()
+            init_tensor = init
+            if need_reorder == True:
+                if self.lod_rank_table is None:
+                    raise ValueError(
+                        'If set need_reorder to True, make sure step_input be '
+                        'invoked before '
+                        'memory(init=init, need_reordered=True, ...).')
+                init_reordered = parent_block.create_var(
+                    name=unique_name('dynamic_rnn_mem_init_reordered'),
+                    type=core.VarDesc.VarType.LOD_TENSOR,
+                    dtype=init.dtype)
+                parent_block.append_op(
+                    type='reorder_lod_tensor_by_rank',
+                    inputs={
+                        'X': [init_tensor],
+                        'RankTable': [self.lod_rank_table]
+                    },
+                    outputs={'Out': [init_reordered]})
+                init_tensor = init_reordered
             mem_array = parent_block.create_var(
                 name=unique_name('dynamic_rnn_mem_array'),
                 type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
                 dtype=init.dtype)
             parent_block.append_op(
                 type='write_to_array',
-                inputs={'X': init,
+                inputs={'X': init_tensor,
                         'I': self.zero_idx},
                 outputs={'Out': mem_array})
             retv = array_read(array=mem_array, i=self.step_idx)
diff --git a/python/paddle/v2/fluid/layers/device.py b/python/paddle/v2/fluid/layers/device.py
index 775d40e5b5ef0cbb0b62bdc0678f2368b7b1a59a..736813d1b109087da367666d90be9e88dad1860e 100644
--- a/python/paddle/v2/fluid/layers/device.py
+++ b/python/paddle/v2/fluid/layers/device.py
@@ -1,3 +1,16 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """
 All util layers.
 """
diff --git a/python/paddle/v2/fluid/layers/io.py b/python/paddle/v2/fluid/layers/io.py
index 56c3f7b7b7f174338bb56bc5785423ca634650a6..9af00e7de560d96103b54b37facaeadba2d3fe23 100644
--- a/python/paddle/v2/fluid/layers/io.py
+++ b/python/paddle/v2/fluid/layers/io.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .. import core
 from ..layer_helper import LayerHelper
 
@@ -15,9 +29,9 @@ def data(name,
     **Data Layer**
 
     This function takes in the input and based on whether data has
-    to be returned back as a minibatch, it creates the global variable using
+    to be returned back as a minibatch, it creates the global variable by using
     the helper functions. The global variables can be accessed by all the
-    following operations and layers in the graph.
+    following operators in the graph.
 
     All the input variables of this function are passed in as local variables
     to the LayerHelper constructor.
diff --git a/python/paddle/v2/fluid/layers/math_op_patch.py b/python/paddle/v2/fluid/layers/math_op_patch.py
new file mode 100644
index 0000000000000000000000000000000000000000..11197b70a3d4cae08afbb49ad31013ab40e4dad2
--- /dev/null
+++ b/python/paddle/v2/fluid/layers/math_op_patch.py
@@ -0,0 +1,152 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ..framework import Variable, unique_name
+from ..registry import OpProtoHolder
+
+__all__ = ['monkey_patch_variable']
+
+
+def monkey_patch_variable():
+    def unique_tmp_name():
+        return unique_name("tmp")
+
+    def safe_get_dtype(var):
+        try:
+            dtype = var.dtype
+        except:
+            raise ValueError("Cannot get data type from %s", var.name)
+        return dtype
+
+    def create_tensor(block, value, dtype, shape):
+        value = float(value)
+        tmp_name = unique_tmp_name()
+        var = block.create_var(name=tmp_name, shape=shape, dtype=dtype)
+        block.append_op(
+            type="fill_constant",
+            outputs={'Out': [var]},
+            attrs={'dtype': var.dtype,
+                   'shape': shape,
+                   'value': value})
+        return var
+
+    def create_scalar(block, value, dtype):
+        return create_tensor(block, value, dtype, shape=[1])
+
+    def create_tensor_with_batchsize(ref_var, value, dtype):
+        assert isinstance(ref_var, Variable)
+        value = float(value)
+        tmp_name = unique_tmp_name()
+        var = ref_var.block.create_var(name=tmp_name, dtype=dtype)
+        ref_var.block.append_op(
+            type='fill_constant_batch_size_like',
+            outputs={'Out': [var]},
+            inputs={'Input': [ref_var]},
+            attrs={'shape': ref_var.shape,
+                   'value': value})
+        return var
+
+    def astype(self, dtype):
+        """
+        Cast a variable to a specified data type.
+        NOTE: The variable must be a Tensor
+        Args:
+            self(Variable): The source variable
+            dtype: The target dtype
+
+        Returns:
+            Variable with new dtype
+        """
+        tmp_name = unique_tmp_name()
+        out = self.block.create_var(name=tmp_name, dtype=dtype)
+        self.block.append_op(
+            type="cast",
+            inputs={"X": [self]},
+            outputs={"Out": [out]},
+            attrs={"in_dtype": self.dtype,
+                   "out_dtype": out.dtype})
+        return out
+
+    def _elemwise_method_creator_(method_name, op_type, reverse=False):
+        def __impl__(self, other_var):
+            lhs_dtype = safe_get_dtype(self)
+
+            if not isinstance(other_var, Variable):
+                if reverse:
+                    has_batch_size = False
+                    for elem in self.shape:
+                        if elem < 0:
+                            has_batch_size = True
+                            break
+                    if not has_batch_size:
+                        other_var = create_tensor(
+                            self.block,
+                            other_var,
+                            dtype=lhs_dtype,
+                            shape=self.shape)
+                    else:
+                        other_var = create_tensor_with_batchsize(
+                            self, other_var, lhs_dtype)
+                else:
+                    # add fill_op to self.block
+                    other_var = create_scalar(
+                        self.block, value=other_var, dtype=lhs_dtype)
+
+            rhs_dtype = safe_get_dtype(other_var)
+            if lhs_dtype != rhs_dtype:
+                other_var = astype(other_var, lhs_dtype)
+            if reverse:
+                tmp = self
+                self = other_var
+                other_var = tmp
+
+            tmp_name = unique_tmp_name()
+            out = self.block.create_var(name=tmp_name, dtype=lhs_dtype)
+            self.block.append_op(
+                type=op_type,
+                inputs={'X': [self],
+                        'Y': [other_var]},
+                outputs={'Out': out})
+            return out
+
+        comment = OpProtoHolder.instance().get_op_proto(op_type).comment
+
+        __impl__.__doc__ = """
+        {0}
+        Args:
+            self(Variable): left hand variable
+            other_var(Variable|float|int): right hand variable 
+
+        Returns:
+            Variable
+        """.format(comment)
+        __impl__.__name__ = method_name
+        return __impl__
+
+    # inject methods
+    for method_name, op_type, reverse in (
+        ("__add__", "elementwise_add", False),
+            # a+b == b+a. Do not need to reverse explicitly
+        ("__radd__", "elementwise_add", False),
+        ("__sub__", "elementwise_sub", False),
+        ("__rsub__", "elementwise_sub", True),
+        ("__mul__", "elementwise_mul", False),
+            # a*b == b*a. Do not need to reverse explicitly
+        ("__rmul__", "elementwise_mul", False),
+        ("__div__", "elementwise_div", False),
+        ("__rdiv__", "elementwise_div", True)):
+        setattr(Variable, method_name,
+                _elemwise_method_creator_(method_name, op_type, reverse))
+
+    Variable.astype = astype
diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py
index 1fb6523f5523a173ac8821b5493173e8a8746463..b1db16a83ecc917528be1defa781f659342edd77 100644
--- a/python/paddle/v2/fluid/layers/nn.py
+++ b/python/paddle/v2/fluid/layers/nn.py
@@ -1,3 +1,16 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """
 All layers just related to the neural network.
 """
@@ -14,7 +27,8 @@ __all__ = [
     'chunk_eval', 'sequence_conv', 'conv2d', 'sequence_pool', 'pool2d',
     'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand',
     'lstm_unit', 'reduce_sum', 'reduce_mean', 'reduce_max', 'reduce_min',
-    'sequence_first_step', 'sequence_last_step', 'dropout'
+    'sequence_first_step', 'sequence_last_step', 'dropout', 'split',
+    'l2_normalize', 'matmul', 'warpctc', 'sequence_reshape'
 ]
 
 
@@ -192,6 +206,102 @@ def dynamic_lstm(input,
                  cell_activation='tanh',
                  candidate_activation='tanh',
                  dtype='float32'):
+    """
+    **Dynamic LSTM Layer**
+
+    The defalut implementation is diagonal/peephole connection
+    (https://arxiv.org/pdf/1402.1128.pdf), the formula is as follows:
+
+    .. math::
+
+        i_t & = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i)
+
+        f_t & = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f)
+
+        \\tilde{c_t} & = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c)
+
+        o_t & = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o)
+
+        c_t & = f_t \odot c_{t-1} + i_t \odot \\tilde{c_t}
+
+        h_t & = o_t \odot act_h(c_t)
+
+    where the :math:`W` terms denote weight matrices (e.g. :math:`W_{xi}` is
+    the matrix of weights from the input gate to the input), :math:`W_{ic}, \
+    W_{fc}, W_{oc}` are diagonal weight matrices for peephole connections. In
+    our implementation, we use vectors to reprenset these diagonal weight
+    matrices. The :math:`b` terms denote bias vectors (:math:`b_i` is the input
+    gate bias vector), :math:`\sigma` is the non-line activations, such as
+    logistic sigmoid function, and :math:`i, f, o` and :math:`c` are the input
+    gate, forget gate, output gate, and cell activation vectors, respectively,
+    all of which have the same size as the cell output activation vector :math:`h`.
+
+    The :math:`\odot` is the element-wise product of the vectors. :math:`act_g`
+    and :math:`act_h` are the cell input and cell output activation functions
+    and `tanh` is usually used for them. :math:`\\tilde{c_t}` is also called
+    candidate hidden state, which is computed based on the current input and
+    the previous hidden state.
+
+    Set `use_peepholes` to `False` to disable peephole connection. The formula
+    is omitted here, please refer to the paper
+    http://www.bioinf.jku.at/publications/older/2604.pdf for details.
+
+    Note that these :math:`W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}`
+    operations on the input :math:`x_{t}` are NOT included in this operator.
+    Users can choose to use fully-connect layer before LSTM layer.
+
+    Args:
+        input(Variable): The input of dynamic_lstm layer, which supports
+                         variable-time length input sequence. The underlying
+                         tensor in this Variable is a matrix with shape
+                         (T X 4D), where T is the total time steps in this
+                         mini-batch, D is the hidden size.
+        size(int): 4 * hidden size.
+        param_attr(ParamAttr): The parameter attribute for the learnable
+                               hidden-hidden weights.
+
+                               - The shape is (D x 4D), where D is the hidden
+                                 size.
+                               - Weights = {:math:`W_{ch}, W_{ih}, \
+                                                W_{fh}, W_{oh}`}
+        bias_attr(ParamAttr): The bias attribute for the learnable bias
+                              weights, which contains two parts, input-hidden
+                              bias weights and peephole connections weights if
+                              setting `use_peepholes` to `True`.
+
+                              1. `use_peepholes = False`
+                                - The shape is (1 x 4D).
+                                - Biases = {:math:`b_c, b_i, b_f, b_o`}.
+                              2. `use_peepholes = True`
+                                - The shape is (1 x 7D).
+                                - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \
+                                                 W_{fc}, W_{oc}`}.
+        use_peepholes(bool): Whether to enable diagonal/peephole connections,
+                             default `True`.
+        is_reverse(bool): Whether to compute reversed LSTM, default `False`.
+        gate_activation(str): The activation for input gate, forget gate and
+                              output gate. Choices = ["sigmoid", "tanh", "relu",
+                              "identity"], default "sigmoid".
+        cell_activation(str): The activation for cell output. Choices = ["sigmoid",
+                              "tanh", "relu", "identity"], default "tanh".
+        candidate_activation(str): The activation for candidate hidden state.
+                              Choices = ["sigmoid", "tanh", "relu", "identity"],
+                              default "tanh".
+        dtype(str): Data type. Choices = ["float32", "float64"], default "float32".
+
+    Returns:
+        tuple: The hidden state, and cell state of LSTM. The shape of both \
+        is (T x D), and lod is the same with the `input`.
+
+    Examples:
+        .. code-block:: python
+
+            hidden_dim = 512
+            forward_proj = fluid.layers.fc(input=input_seq, size=hidden_dim * 4,
+                                           act=None, bias_attr=None)
+            forward, _ = fluid.layers.dynamic_lstm(
+                input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
+    """
     helper = LayerHelper('lstm', **locals())
     size = size / 4
     weight = helper.create_parameter(
@@ -248,13 +358,13 @@ def gru_unit(input,
             h_t & = dot((1-u_t), m_t) + dot(u_t, h_{t-1})
 
     The inputs of gru unit includes :math:`z_t`, :math:`h_{t-1}`. In terms
-    of the equation above, the :math:`z_t` is split into 3 parts - 
-    :math:`xu_t`, :math:`xr_t` and :math:`xm_t`. This means that in order to 
-    implement a full GRU unit operator for an input, a fully 
+    of the equation above, the :math:`z_t` is split into 3 parts -
+    :math:`xu_t`, :math:`xr_t` and :math:`xm_t`. This means that in order to
+    implement a full GRU unit operator for an input, a fully
     connected layer has to be applied, such that :math:`z_t = W_{fc}x_t`.
 
-    The terms :math:`u_t` and :math:`r_t` represent the update and reset gates 
-    of the GRU cell. Unlike LSTM, GRU has one lesser gate. However, there is 
+    The terms :math:`u_t` and :math:`r_t` represent the update and reset gates
+    of the GRU cell. Unlike LSTM, GRU has one lesser gate. However, there is
     an intermediate candidate hidden output, which is denoted by :math:`m_t`.
     This layer has three outputs :math:`h_t`, :math:`dot(r_t, h_{t-1})`
     and concatenation of :math:`u_t`, :math:`r_t` and :math:`m_t`.
@@ -276,7 +386,7 @@ def gru_unit(input,
         .. code-block:: python
 
              # assuming we have x_t_data and prev_hidden of size=10
-             x_t = fluid.layers.fc(input=x_t_data, size=30) 
+             x_t = fluid.layers.fc(input=x_t_data, size=30)
              hidden_val, r_h_val, gate_val = fluid.layers.gru_unit(input=x_t,
                                                     hidden = prev_hidden)
 
@@ -639,6 +749,7 @@ def conv2d(input,
            groups=None,
            param_attr=None,
            bias_attr=None,
+           use_cudnn=True,
            act=None):
     """
     **Convlution2D Layer**
@@ -702,6 +813,8 @@ def conv2d(input,
             connected to the second half of the input channels. Default: groups=1
         param_attr(ParamAttr): The parameters to the Conv2d Layer. Default: None
         bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None
+        use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn
+            library is installed. Default: True
         act(str): Activation type. Default: None
 
     Returns:
@@ -737,6 +850,8 @@ def conv2d(input,
         stride = [stride, stride]
     if isinstance(padding, int):
         padding = [padding, padding]
+    if not isinstance(use_cudnn, bool):
+        raise ValueError("use_cudnn should be True or False")
 
     input_shape = input.shape
     filter_shape = [num_filters, num_filter_channels] + filter_size
@@ -754,15 +869,18 @@ def conv2d(input,
     pre_bias = helper.create_tmp_variable(dtype)
 
     helper.append_op(
-        type='conv2d_cudnn',
+        type='conv2d',
         inputs={
             'Input': input,
             'Filter': filter_param,
         },
         outputs={"Output": pre_bias},
-        attrs={'strides': stride,
-               'paddings': padding,
-               'groups': groups})
+        attrs={
+            'strides': stride,
+            'paddings': padding,
+            'groups': groups,
+            'use_cudnn': use_cudnn
+        })
 
     pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2)
 
@@ -910,7 +1028,9 @@ def pool2d(input,
            pool_type,
            pool_stride=None,
            pool_padding=None,
-           global_pooling=False):
+           global_pooling=False,
+           use_cudnn=True,
+           name=None):
     """
     This function adds the operator for pooling in 2 dimensions, using the
     pooling configurations mentioned in input parameters.
@@ -929,6 +1049,8 @@ def pool2d(input,
         pool_stride = [pool_stride, pool_stride]
     if isinstance(pool_padding, int):
         pool_padding = [pool_padding, pool_padding]
+    if not isinstance(use_cudnn, bool):
+        raise ValueError("use_cudnn should be True or False")
 
     helper = LayerHelper('pool2d', **locals())
     dtype = helper.input_dtype()
@@ -943,7 +1065,8 @@ def pool2d(input,
             "ksize": pool_size,
             "global_pooling": global_pooling,
             "strides": pool_stride,
-            "paddings": pool_padding
+            "paddings": pool_padding,
+            "use_cudnn": use_cudnn
         })
 
     return pool_out
@@ -956,7 +1079,8 @@ def batch_norm(input,
                epsilon=1e-05,
                param_attr=None,
                bias_attr=None,
-               data_layout='NCHW'):
+               data_layout='NCHW',
+               name=None):
     """
     This function helps create an operator to implement
     the BatchNorm layer using the configurations from the input parameters.
@@ -1032,7 +1156,7 @@ def batch_norm(input,
     return helper.append_activation(batch_norm_out)
 
 
-def beam_search_decode(ids, scores):
+def beam_search_decode(ids, scores, name=None):
     helper = LayerHelper('beam_search_decode', **locals())
     sentence_ids = helper.create_tmp_variable(dtype=ids.dtype)
     sentence_scores = helper.create_tmp_variable(dtype=ids.dtype)
@@ -1056,7 +1180,9 @@ def conv2d_transpose(input,
                      padding=None,
                      stride=None,
                      dilation=None,
-                     param_attr=None):
+                     param_attr=None,
+                     use_cudnn=True,
+                     name=None):
     """
     The transpose of conv2d layer.
 
@@ -1083,8 +1209,10 @@ def conv2d_transpose(input,
             contain two integers, (dilation_H, dilation_W). Otherwise, the
             dilation_H = dilation_W = dilation.
         param_attr: Parameter Attribute.
-        main_program(Program): the main program
-        startup_program(Program): the startup program
+        use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn
+            library is installed. Default: True
+        name(str|None): A name for this layer(optional). If set None, the layer
+                       will be named automatically.
 
     Returns:
         Variable: Output image.
@@ -1111,6 +1239,10 @@ def conv2d_transpose(input,
     elif dilation is not None:
         op_attr['dilations'] = dilation
 
+    if not isinstance(use_cudnn, bool):
+        raise ValueError("use_cudnn should be True or False")
+    op_attr['use_cudnn'] = use_cudnn
+
     if filter_size is None:
         if output_size is None:
             raise ValueError("output_size must be set when filter_size is None")
@@ -1148,7 +1280,7 @@ def conv2d_transpose(input,
     return out
 
 
-def sequence_expand(x, y):
+def sequence_expand(x, y, name=None):
     """Sequence Expand Layer. This layer will expand the input variable **x**
     according to LoD information of **y**. And the following examples will
     explain how sequence_expand works:
@@ -1192,6 +1324,8 @@ def sequence_expand(x, y):
     Args:
         x (Variable): The input variable which is a Tensor or LoDTensor.
         y (Variable): The input variable which is a LoDTensor.
+        name(str|None): A name for this layer(optional). If set None, the layer
+                       will be named automatically.
 
     Returns:
         Variable: The expanded variable which is a LoDTensor.
@@ -1218,7 +1352,8 @@ def lstm_unit(x_t,
               cell_t_prev,
               forget_bias=0.0,
               param_attr=None,
-              bias_attr=None):
+              bias_attr=None,
+              name=None):
     """Lstm unit layer. The equation of a lstm step is:
 
         .. math::
@@ -1265,6 +1400,8 @@ def lstm_unit(x_t,
             initializer, name etc.
         bias_attr (ParamAttr): The attributes of bias weights, if not False,
             bias weights will be created and be set to default value.
+        name(str|None): A name for this layer(optional). If set None, the layer
+                       will be named automatically.
 
     Returns:
         tuple: The hidden value and cell value of lstm unit.
@@ -1330,7 +1467,7 @@ def lstm_unit(x_t,
     return h, c
 
 
-def reduce_sum(input, dim=None, keep_dim=False):
+def reduce_sum(input, dim=None, keep_dim=False, name=None):
     """
     Computes the sum of tensor elements over the given dimension.
 
@@ -1344,6 +1481,8 @@ def reduce_sum(input, dim=None, keep_dim=False):
         keep_dim (bool): Whether to reserve the reduced dimension in the
             output Tensor. The result tensor will have one fewer dimension
             than the :attr:`input` unless :attr:`keep_dim` is true.
+        name(str|None): A name for this layer(optional). If set None, the layer
+                       will be named automatically.
 
     Returns:
         Variable: The reduced Tensor variable.
@@ -1374,7 +1513,7 @@ def reduce_sum(input, dim=None, keep_dim=False):
     return out
 
 
-def reduce_mean(input, dim=None, keep_dim=False):
+def reduce_mean(input, dim=None, keep_dim=False, name=None):
     """
     Computes the mean of tensor elements over the given dimension.
 
@@ -1388,6 +1527,8 @@ def reduce_mean(input, dim=None, keep_dim=False):
         keep_dim (bool): Whether to reserve the reduced dimension in the
             output Tensor. The result tensor will have one fewer dimension
             than the :attr:`input` unless :attr:`keep_dim` is true.
+        name(str|None): A name for this layer(optional). If set None, the layer
+                       will be named automatically.
 
     Returns:
         Variable: The reduced Tensor variable.
@@ -1418,7 +1559,7 @@ def reduce_mean(input, dim=None, keep_dim=False):
     return out
 
 
-def reduce_max(input, dim=None, keep_dim=False):
+def reduce_max(input, dim=None, keep_dim=False, name=None):
     """
     Computes the maximum of tensor elements over the given dimension.
 
@@ -1432,6 +1573,8 @@ def reduce_max(input, dim=None, keep_dim=False):
         keep_dim (bool): Whether to reserve the reduced dimension in the
             output Tensor. The result tensor will have one fewer dimension
             than the :attr:`input` unless :attr:`keep_dim` is true.
+        name(str|None): A name for this layer(optional). If set None, the layer
+                       will be named automatically.
 
     Returns:
         Variable: The reduced Tensor variable.
@@ -1462,7 +1605,7 @@ def reduce_max(input, dim=None, keep_dim=False):
     return out
 
 
-def reduce_min(input, dim=None, keep_dim=False):
+def reduce_min(input, dim=None, keep_dim=False, name=None):
     """
     Computes the minimum of tensor elements over the given dimension.
 
@@ -1476,6 +1619,8 @@ def reduce_min(input, dim=None, keep_dim=False):
         keep_dim (bool): Whether to reserve the reduced dimension in the
             output Tensor. The result tensor will have one fewer dimension
             than the :attr:`input` unless :attr:`keep_dim` is true.
+        name(str|None): A name for this layer(optional). If set None, the layer
+                       will be named automatically.
 
     Returns:
         Variable: The reduced Tensor variable.
@@ -1504,3 +1649,325 @@ def reduce_min(input, dim=None, keep_dim=False):
             'reduce_all': True if dim == None else False
         })
     return out
+
+
+def split(input, num_or_sections, dim=-1, name=None):
+    """
+    Split the input tensor into multiple sub-tensors.
+
+    Args:
+        input (Variable): The input variable which is a Tensor or LoDTensor.
+        num_or_sections (int|list): If :attr:`num_or_sections` is an integer,
+            then the integer indicates the number of equal sized sub-tensors
+            that the tensor will be divided into. If :attr:`num_or_sections`
+            is a list of integers, the length of list indicates the number of
+            sub-tensors and the integers indicate the sizes of sub-tensors'
+            :attr:`dim` dimension orderly.
+        dim (int): The dimension along which to split. If :math:`dim < 0`, the
+            dimension to split along is :math:`rank(input) + dim`.
+        name(str|None): A name for this layer(optional). If set None, the layer
+                       will be named automatically.
+
+    Returns:
+        List: The list of segmented tensor variables.
+
+    Examples:
+        .. code-block:: python
+
+            # x is a Tensor variable with shape [3, 9, 5]:
+            x0, x1, x2 = fluid.layers.split(x, num_or_sections=3, dim=1)
+            x0.shape  # [3, 3, 5]
+            x1.shape  # [3, 3, 5]
+            x2.shape  # [3, 3, 5]
+            x0, x1, x2 = fluid.layers.split(x, num_or_sections=[2, 3, 4], dim=1)
+            x0.shape  # [3, 2, 5]
+            x1.shape  # [3, 3, 5]
+            x2.shape  # [3, 4, 5]
+    """
+    helper = LayerHelper('split', **locals())
+    input_shape = input.shape
+    dim = (len(input_shape) + dim) if dim < 0 else dim
+    if isinstance(num_or_sections, int):
+        assert num_or_sections > 1, 'num_or_sections must be more than 1.'
+        num = num_or_sections
+    else:
+        assert len(num_or_sections) < input_shape[
+            dim], 'len(num_or_sections) must not be more than input.shape[dim].'
+        num = len(num_or_sections)
+    outs = [
+        helper.create_tmp_variable(dtype=helper.input_dtype())
+        for i in range(num)
+    ]
+    helper.append_op(
+        type='split',
+        inputs={'X': input},
+        outputs={'Out': outs},
+        attrs={
+            'num': num_or_sections if isinstance(num_or_sections, int) else 0,
+            'sections': num_or_sections
+            if isinstance(num_or_sections, list) else [],
+            'axis': dim
+        })
+    return outs
+
+
+def l2_normalize(x, axis, epsilon=1e-12, name=None):
+    """
+    **L2 normalize Layer**
+
+    The l2 normalize layer normalizes `x` along dimension `axis` using an L2
+    norm. For a 1-D tensor (`dim` is fixed to 0), this layer computes
+
+    output = x / sqrt(max(sum(x**2), epsilon))
+
+    For `x` with more dimensions, this layer independently normalizes each 1-D
+    slice along dimension `axis`.
+
+    Args:
+       x(Variable|list): The input tensor to l2_normalize layer.
+       axis(int): Dimension along which to normalize the input.
+       epsilon(float): A lower bound value for `x`'s l2 norm. sqrt(epsilon) will
+                       be used as the divisor if the l2 norm of `x` is less than
+                       sqrt(epsilon).
+       name(str|None): A name for this layer(optional). If set None, the layer
+                       will be named automatically.
+
+
+    Returns:
+        Variable: The output tensor variable.
+
+    Examples:
+        .. code-block:: python
+
+          data = fluid.layers.data(name="data",
+                                   shape=(3, 17, 13),
+                                   dtype="float32")
+          fc = fluid.layers.l2_normalize(x=data, axis=1)
+    """
+
+    if len(x.shape) == 1: axis = 0
+
+    helper = LayerHelper("l2_normalize", **locals())
+
+    square = helper.create_tmp_variable(dtype=x.dtype)
+    helper.append_op(type="square", inputs={"X": x}, outputs={"Out": square})
+
+    reduced_sum = helper.create_tmp_variable(dtype=x.dtype)
+    helper.append_op(
+        type="reduce_sum",
+        inputs={"X": square},
+        outputs={"Out": reduced_sum},
+        attrs={
+            "dim": 1 if axis is None else axis,
+            "keep_dim": True,
+            "reduce_all": False
+        })
+
+    # TODO(caoying) A lower bound value epsilon for the norm is needed to
+    # imporve the numeric stability of reciprocal. This requires a maximum_op.
+    rsquare = helper.create_tmp_variable(dtype=x.dtype)
+    helper.append_op(
+        type="reciprocal", inputs={"X": reduced_sum}, outputs={"Out": rsquare})
+
+    # TODO(caoying) the current elementwise_mul operator does not support a
+    # general broadcast rule which broadcasts input(Y) to have the same
+    # dimension with Input(X) starting from a specified dimension. So this
+    # exanpsion is requred. Once a general broadcast rule is spported, this
+    # expanding canbe removed.
+    rsquare_expanded = helper.create_tmp_variable(dtype=x.dtype)
+    expand_times = [1] * len(x.shape)
+    expand_times[axis] = int(x.shape[axis])
+    helper.append_op(
+        type="expand",
+        inputs={"X": rsquare},
+        outputs={"Out": rsquare_expanded},
+        attrs={"expand_times": expand_times})
+
+    out = helper.create_tmp_variable(dtype=x.dtype)
+    helper.append_op(
+        type="elementwise_mul",
+        inputs={"X": x,
+                "Y": rsquare_expanded},
+        outputs={"Out": out})
+    return out
+
+
+def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
+    """
+    Applies matrix multiplication to two tensors. Currently, the input
+    tensors' rank can be any, but when the rank of anyone inputs is
+    bigger than 3, this two inputs' rank should be equal.
+
+    The actual behavior depends on the shapes of :math:`x`, :math:`y` and the
+    flag values of :attr:`transpose_x`, :attr:`transpose_y`. Specifically:
+
+    - If a transpose flag is specified, the last two dimensions of the tensor
+      are transposed. If the tensor is rank-1 of shape :math:`[D]`, then for
+      :math:`x` it is treated as :math:`[1, D]` in nontransposed form and as
+      :math:`[D, 1]` in transposed form, whereas for :math:`y` it is the
+      opposite: It is treated as :math:`[D, 1]` in nontransposed form and as
+      :math:`[1, D]` in transposed form.
+
+    - After transpose, the two tensors are 2-D or n-D and matrix multiplication
+      performs in the following way.
+
+      - If both are 2-D, they are multiplied like conventional matrices.
+      - If either is n-D, it is treated as a stack of matrices residing in the
+        last two dimensions and a batched matrix multiply supporting broadcast 
+        applies on the two tensors.
+
+    Also note that if the raw tensor :math:`x` or :math:`y` is rank-1 and 
+    nontransposed, the prepended or appended dimension :math:`1` will be 
+    removed after matrix multiplication.
+
+    Args:
+        x (Variable): The input variable which is a Tensor or LoDTensor.
+        y (Variable): The input variable which is a Tensor or LoDTensor.
+        transpose_x (bool): Whether to transpose :math:`x` before multiplication.
+        transpose_y (bool): Whether to transpose :math:`y` before multiplication.
+        name(str|None): A name for this layer(optional). If set None, the layer
+            will be named automatically.
+
+    Returns:
+        Variable: The product Tensor variable.
+
+    Examples:
+        .. code-block:: python
+
+            # Examples to clarify shapes of the inputs and output
+            # x: [B, ..., M, K], y: [B, ..., K, N]
+            fluid.layers.matmul(x, y)  # out: [B, ..., M, N]
+            # x: [B, M, K], y: [B, K, N]
+            fluid.layers.matmul(x, y)  # out: [B, M, N]
+            # x: [B, M, K], y: [K, N]
+            fluid.layers.matmul(x, y)  # out: [B, M, N]
+            # x: [B, M, K], y: [K]
+            fluid.layers.matmul(x, y)  # out: [B, M]
+            # x: [M, K], y: [K, N]
+            fluid.layers.matmul(x, y)  # out: [M, N]
+            # x: [K], y: [K]
+            fluid.layers.matmul(x, y)  # out: [1]
+            # x: [M], y: [N]
+
+            fluid.layers.matmul(x, y, True, True)  # out: [M, N]
+    """
+    helper = LayerHelper('matmul', **locals())
+    assert max(len(x.shape), len(y.shape)) <= 3 or len(x.shape) == len(
+        y.
+        shape), 'Inputs\' rank should be equal or their rank should be less 4.'
+    out = helper.create_tmp_variable(dtype=helper.input_dtype())
+    helper.append_op(
+        type='matmul',
+        inputs={'X': x,
+                'Y': y},
+        outputs={'Out': out},
+        attrs={'transpose_X': transpose_x,
+               'transpose_Y': transpose_y})
+    return out
+
+
+def warpctc(input, label, blank=0, norm_by_times=False, **kwargs):
+    """
+    An operator integrating the open source Warp-CTC library
+    (https://github.com/baidu-research/warp-ctc)
+    to compute Connectionist Temporal Classification (CTC) loss.
+    It can be aliased as softmax with CTC, since a native softmax activation is
+    interated to the Warp-CTC library, to to normlize values for each row of the
+    input tensor.
+
+    Args:
+       input(Variable): (LodTensor, default: LoDTensor<float>),
+         the unscaled probabilities of variable-length sequences,
+         which is a 2-D Tensor with LoD information.
+         It's shape is [Lp, num_classes + 1], where Lp is the sum of all input
+         sequences' length and num_classes is the true number of classes.
+         (not including the blank label).
+       label(Variable): (LodTensor, default: LoDTensor<int>), the ground truth
+         of variable-length sequence, which is a 2-D Tensor with LoD
+         information. It is of the shape [Lg, 1], where Lg is th sum of
+         all labels' length.
+       blank: (int, default: 0), the blank label index of Connectionist
+         Temporal Classification (CTC) loss, which is in the
+         half-opened interval [0, num_classes + 1).
+       norm_by_times: (bool, default: false), whether to normalize
+       the gradients by the number of time-step,which is also the
+       sequence's length. There is no need to normalize the gradients
+       if warpctc layer was follewed by a mean_op.
+
+    Returns:
+        Variable: The Connectionist Temporal Classification (CTC) loss,
+        which is a 2-D Tensor of the shape [batch_size, 1].
+
+    Examples:
+        .. code-block:: python
+            y = layers.data(name='y', shape=[11, 8], dtype='float32', lod_level=1)
+            y_predict = layers.data(name='y_predict', shape=[11, 1], dtype='float32')
+            cost = layers.warpctc(input=y_predict, label=y)
+
+    """
+    helper = LayerHelper('warpctc', **kwargs)
+    loss_out = helper.create_tmp_variable(dtype=input.dtype)
+    grad_out = helper.create_tmp_variable(dtype=input.dtype)
+    helper.append_op(
+        type='warpctc',
+        inputs={'Logits': [input],
+                'Label': [label]},
+        outputs={'WarpCTCGrad': [grad_out],
+                 'Loss': [loss_out]},
+        attrs={'blank': blank,
+               'norm_by_times': norm_by_times})
+    return loss_out
+
+
+def sequence_reshape(input, new_dim):
+    """
+    **Sequence Reshape Layer**
+
+    This layer will rearrange the input sequences. The new dimension is set by
+    user. Length of each sequence is computed according to original length,
+    original dimension and new dimension. The following example will help to
+    illustrate the function of this layer:
+
+    .. code-block:: text
+
+        x is a LoDTensor:
+            x.lod  = [[0, 2, 6]]
+            x.data = [[1, 2], [3, 4],
+                      [5, 6], [7, 8], [9, 10], [11, 12]]
+            x.dims = [6, 2]
+
+        set new_dim = 4
+
+        then out is a LoDTensor:
+            out.lod  = [[0, 1, 3]]
+            out.data = [[1, 2, 3, 4],
+                        [5, 6, 7, 8], [9, 10, 11, 12]]
+            out.dims = [3, 4]
+
+    Currently, only 1-level LoDTensor is supported and please make sure
+    (original length * original dimension) can be divided by new dimension with
+    no remainder for each sequence.
+
+    Args:
+       input (Variable): (LodTensor, default: LoDTensor<float>), a 2-D LoDTensor
+                with shape being [N, M] where M for dimension.
+       new_dim (int): New dimension which the input LoDTensor is reshaped to.
+
+    Returns:
+        Variable: Reshaped LoDTensor according to new dimension.
+
+    Examples:
+        .. code-block:: python
+
+            x = fluid.layers.data(name='x', shape=[5, 20],
+                              dtype='float32', lod_level=1)
+            x_reshaped = layers.sequence_reshape(input=x, new_dim=10)
+    """
+    helper = LayerHelper('sequence_reshape', **locals())
+    out = helper.create_tmp_variable(helper.input_dtype())
+    helper.append_op(
+        type='sequence_reshape',
+        inputs={'X': [input]},
+        outputs={'Out': [out]},
+        attrs={'new_dim': new_dim})
+    return out
diff --git a/python/paddle/v2/fluid/layers/ops.py b/python/paddle/v2/fluid/layers/ops.py
index d3a5b70785947148d6e208b4d8dafec8bb52ff85..7716052a5cae0d8883dd9996f34d39e97f4399ea 100644
--- a/python/paddle/v2/fluid/layers/ops.py
+++ b/python/paddle/v2/fluid/layers/ops.py
@@ -1,7 +1,48 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from ..registry import register_layer
 
 __activations__ = [
-    'abs', 'tanh', 'sigmoid', 'relu', 'sqrt', 'ceil', 'floor', 'log', 'round'
+    'sigmoid',
+    'logsigmoid',
+    'exp',
+    'relu',
+    'tanh',
+    'tanh_shrink',
+    'softshrink',
+    'sqrt',
+    'abs',
+    'ceil',
+    'floor',
+    'round',
+    'reciprocal',
+    'log',
+    'square',
+    'softplus',
+    'softsign',
+    'brelu',
+    'leaky_relu',
+    'soft_relu',
+    'elu',
+    'relu6',
+    'pow',
+    'stanh',
+    'hard_shrink',
+    'thresholded_relu',
+    'hard_sigmoid',
+    'swish',
 ]
 
 __all__ = [
@@ -15,6 +56,8 @@ __all__ = [
     'elementwise_div',
     'elementwise_sub',
     'elementwise_mul',
+    'elementwise_max',
+    'elementwise_min',
     'clip',
     'sequence_softmax',
 ] + __activations__
diff --git a/python/paddle/v2/fluid/layers/tensor.py b/python/paddle/v2/fluid/layers/tensor.py
index 438df33afbe33a6b7b8e216c098b949b2cb43fa2..6e7d09459c07c77a8579300a1c67ae36dc3d2ba2 100644
--- a/python/paddle/v2/fluid/layers/tensor.py
+++ b/python/paddle/v2/fluid/layers/tensor.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from ..layer_helper import LayerHelper
 from ..param_attr import ParamAttr
 from ..framework import convert_np_dtype_to_dtype_
@@ -6,8 +20,16 @@ from ..core import DataType
 import numpy
 
 __all__ = [
-    'create_tensor', 'create_parameter', 'cast', 'concat', 'sums', 'assign',
-    'fill_constant_batch_size_like', 'fill_constant', 'ones', 'zeros'
+    'create_tensor',
+    'create_parameter',
+    'cast',
+    'concat',
+    'sums',
+    'assign',
+    'fill_constant_batch_size_like',
+    'fill_constant',
+    'ones',
+    'zeros',
 ]
 
 
@@ -172,7 +194,7 @@ def assign(input, output):
     return output
 
 
-def fill_constant(shape, dtype, value, out=None):
+def fill_constant(shape, dtype, value, force_cpu=False, out=None):
     """
     **fill_constant**
 
@@ -203,9 +225,12 @@ def fill_constant(shape, dtype, value, out=None):
         type='fill_constant',
         inputs={},
         outputs={'Out': [out]},
-        attrs={'shape': shape,
-               'dtype': out.dtype,
-               'value': float(value)})
+        attrs={
+            'shape': shape,
+            'dtype': out.dtype,
+            'value': float(value),
+            'force_cpu': force_cpu
+        })
     out.stop_gradient = True
     return out
 
diff --git a/python/paddle/v2/fluid/memory_optimization_transpiler.py b/python/paddle/v2/fluid/memory_optimization_transpiler.py
index 6800d7ddbb141a8bc2be10abe68ab86771c71156..1b4b64755963b5edc3d07d861c2a9b6cc3f23587 100644
--- a/python/paddle/v2/fluid/memory_optimization_transpiler.py
+++ b/python/paddle/v2/fluid/memory_optimization_transpiler.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from collections import defaultdict
 import framework
 from framework import Program, default_main_program, Parameter, Variable
@@ -121,8 +135,10 @@ class ControlFlowGraph(object):
                                 # and dtype_to_size[cache_dtype]
                                 if x_dtype == cache_dtype:
                                     print(
-                                        "Hit Cache !!!! cache pool index is %d, var name is %s, cached var name is %s, var shape is %s "
-                                        %
+                                        ("Hit Cache !!!! cache pool index "
+                                         "is %d, var name is %s, "
+                                         "cached var name is %s, "
+                                         "var shape is %s ") %
                                         (index, x, cache_var, str(cache_shape)))
                                     self.pool.pop(index)
                                     _rename_arg_(
diff --git a/python/paddle/v2/fluid/net_drawer.py b/python/paddle/v2/fluid/net_drawer.py
index 94fdd5e38970b309580de6fc934b158a3c46e464..9b126f51971acabba73db7bd6c33ba39e8876ca3 100644
--- a/python/paddle/v2/fluid/net_drawer.py
+++ b/python/paddle/v2/fluid/net_drawer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import argparse
 import json
 import logging
diff --git a/python/paddle/v2/fluid/nets.py b/python/paddle/v2/fluid/nets.py
index 54886a8f2cc63474fe82290c0a12771b4cbdba72..a30e646d8cbccb397d11c1f6164946e748f40c5e 100644
--- a/python/paddle/v2/fluid/nets.py
+++ b/python/paddle/v2/fluid/nets.py
@@ -1,6 +1,25 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import layers
 
-__all__ = ["simple_img_conv_pool", "sequence_conv_pool"]
+__all__ = [
+    "simple_img_conv_pool",
+    "sequence_conv_pool",
+    "glu",
+    "dot_product_attention",
+]
 
 
 def simple_img_conv_pool(input,
@@ -10,19 +29,22 @@ def simple_img_conv_pool(input,
                          pool_stride,
                          act,
                          param_attr=None,
-                         pool_type='max'):
+                         pool_type='max',
+                         use_cudnn=True):
     conv_out = layers.conv2d(
         input=input,
         num_filters=num_filters,
         filter_size=filter_size,
         param_attr=param_attr,
-        act=act)
+        act=act,
+        use_cudnn=use_cudnn)
 
     pool_out = layers.pool2d(
         input=conv_out,
         pool_size=pool_size,
         pool_type=pool_type,
-        pool_stride=pool_stride)
+        pool_stride=pool_stride,
+        use_cudnn=use_cudnn)
     return pool_out
 
 
@@ -36,7 +58,8 @@ def img_conv_group(input,
                    conv_with_batchnorm=False,
                    conv_batchnorm_drop_rate=None,
                    pool_stride=1,
-                   pool_type=None):
+                   pool_type=None,
+                   use_cudnn=True):
     """
     Image Convolution Group, Used for vgg net.
     """
@@ -67,7 +90,8 @@ def img_conv_group(input,
             filter_size=conv_filter_size[i],
             padding=conv_padding[i],
             param_attr=param_attr[i],
-            act=local_conv_act)
+            act=local_conv_act,
+            use_cudnn=use_cudnn)
 
         if conv_with_batchnorm[i]:
             tmp = layers.batch_norm(input=tmp, act=conv_act)
@@ -79,7 +103,8 @@ def img_conv_group(input,
         input=tmp,
         pool_size=pool_size,
         pool_type=pool_type,
-        pool_stride=pool_stride)
+        pool_stride=pool_stride,
+        use_cudnn=use_cudnn)
     return pool_out
 
 
@@ -98,3 +123,90 @@ def sequence_conv_pool(input,
 
     pool_out = layers.sequence_pool(input=conv_out, pool_type=pool_type)
     return pool_out
+
+
+def glu(input, dim=-1):
+    """
+    The gated linear unit composed by split, sigmoid activation and elementwise 
+    multiplication. Specifically, Split the input into two equal sized parts 
+    :math:`a` and :math:`b` along the given dimension and then compute as 
+    following:
+
+        .. math::
+
+            {GLU}(a, b)= a \otimes \sigma(b)
+
+    Refer to `Language Modeling with Gated Convolutional Networks 
+    <https://arxiv.org/pdf/1612.08083.pdf>`_.
+    
+    Args:
+        input (Variable): The input variable which is a Tensor or LoDTensor.
+        dim (int): The dimension along which to split. If :math:`dim < 0`, the 
+            dimension to split along is :math:`rank(input) + dim`.
+
+    Returns:
+        Variable: The Tensor variable with half the size of input.
+
+    Examples:
+        .. code-block:: python
+
+            # x is a Tensor variable with shape [3, 6, 9]
+            fluid.nets.glu(input=x, dim=1)  # shape of output: [3, 3, 9]
+    """
+
+    a, b = layers.split(input, num_or_sections=2, dim=dim)
+    act_b = layers.sigmoid(x=b)
+    out = layers.elementwise_mul(x=a, y=act_b)
+    return out
+
+
+def dot_product_attention(querys, keys, values):
+    """
+    The dot-product attention.
+
+    Attention mechanism can be seen as mapping a query and a set of key-value 
+    pairs to an output. The output is computed as a weighted sum of the values, 
+    where the weight assigned to each value is computed by a compatibility 
+    function (dot-product here) of the query with the corresponding key.
+    
+    The dot-product attention can be implemented through (batch) matrix 
+    multipication as follows:
+
+        .. math::
+
+            Attention(Q, K, V)= softmax(QK^\mathrm{T})V
+
+    Refer to `Attention Is All You Need 
+    <https://arxiv.org/pdf/1706.03762.pdf>`_.
+
+    Note that batch data containing sequences with different lengths is not 
+    supported by this because of the (batch) matrix multipication.
+    
+    Args:
+        query (Variable): The input variable which is a Tensor or LoDTensor.
+        key (Variable): The input variable which is a Tensor or LoDTensor.
+        value (Variable): The input variable which is a Tensor or LoDTensor.
+
+    Returns:
+        tuple: The Tensor variables representing the output and attention scores.
+
+    Examples:
+        .. code-block:: python
+
+            # Suppose q, k, v are tensor variables with the following shape:
+            # q: [3, 5, 9], k: [3, 6, 9], v: [3, 6, 10]
+            out, attn_scores = fluid.nets.dot_product_attention(q, k, v)
+            out.shape  # [3, 5, 10]
+            attn_scores.shape  # [3, 5, 6]
+    """
+    assert keys.shape[-2] == values.shape[
+        -2], 'The shapes of keys and values mismatch.'
+    assert querys.shape[-1] == keys.shape[
+        -1], 'The shapes of querys and keys mismatch.'
+    product = layers.matmul(x=querys, y=keys, transpose_y=True)
+    attn_scores = layers.reshape(
+        x=layers.reshape(
+            x=product, shape=[-1, product.shape[-1]], act='softmax'),
+        shape=product.shape)
+    out = layers.matmul(attn_scores, values)
+    return out, attn_scores
diff --git a/python/paddle/v2/fluid/op.py b/python/paddle/v2/fluid/op.py
index 5828803497ec06bc7644da18ca752f61469ca53f..f368e0c2d86b233cba49b312febf1293c91f91a2 100644
--- a/python/paddle/v2/fluid/op.py
+++ b/python/paddle/v2/fluid/op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2.fluid.core as core
 import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
 
diff --git a/python/paddle/v2/fluid/optimizer.py b/python/paddle/v2/fluid/optimizer.py
index 40721b5e97a3ab2b6fe772635454105f5cdf7b6c..0c3533b892176edd5dfd111fdd771cc17d468168 100644
--- a/python/paddle/v2/fluid/optimizer.py
+++ b/python/paddle/v2/fluid/optimizer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from collections import defaultdict
 
 import framework
diff --git a/python/paddle/v2/fluid/param_attr.py b/python/paddle/v2/fluid/param_attr.py
index ab4561b0423dd73c8c0d529cbf34b52876b1b77c..26e9111f6f31019ab14780cc4bea01e617561fb7 100644
--- a/python/paddle/v2/fluid/param_attr.py
+++ b/python/paddle/v2/fluid/param_attr.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from initializer import Initializer, Xavier, Constant
 from regularizer import WeightDecayRegularizer
 
diff --git a/python/paddle/v2/fluid/profiler.py b/python/paddle/v2/fluid/profiler.py
index dcecd76224e70d03ed987a5bb104a977a527d218..29e0d54a3ac9622e5505c8e5de38616d9c636e67 100644
--- a/python/paddle/v2/fluid/profiler.py
+++ b/python/paddle/v2/fluid/profiler.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2.fluid.core as core
 from contextlib import contextmanager
 import os
diff --git a/python/paddle/v2/fluid/registry.py b/python/paddle/v2/fluid/registry.py
index 7aa82906114b355277185211134bb791e5dc43f9..ff10542d40aabaf31897842754d38b7868472b21 100644
--- a/python/paddle/v2/fluid/registry.py
+++ b/python/paddle/v2/fluid/registry.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import re
 import cStringIO
 import warnings
@@ -8,7 +21,11 @@ import proto.framework_pb2 as framework_pb2
 from framework import OpProtoHolder, Variable, Program, Operator
 from paddle.v2.fluid.layer_helper import LayerHelper, unique_name
 
-__all__ = ['deprecated', 'register_layer', 'autodoc']
+__all__ = [
+    'deprecated',
+    'register_layer',
+    'autodoc',
+]
 
 
 def _convert_(name):
@@ -80,11 +97,10 @@ def _generate_doc_string_(op_proto):
 
 
 def register_layer(op_type):
-    """
-    Register an Python layer for an Operator
+    """Register the Python layer for an Operator.
 
     Args:
-       op_type: The name of the operator to be created
+       op_type: The name of the operator to be created.
 
     This function takes in the operator type (sigmoid, mean , average etc) and
     creates the operator functionality.
@@ -98,16 +114,16 @@ def register_layer(op_type):
 
     if len(not_intermediate_outputs) != 1:
         raise ValueError("Only one non intermediate output operator can be",
-                         "automatically generated")
+                         "automatically generated.")
 
     if not_intermediate_outputs[0].duplicable:
         raise ValueError(
-            "Only non duplicable op can be automatically generated")
+            "Only non duplicable op can be automatically generated.")
 
     for output in intermediate_outputs:
         if output.duplicable:
             raise ValueError("The op can be automatically generated only when ",
-                             "all intermediate ops are not duplicable")
+                             "all intermediate ops are not duplicable.")
 
     o_name = not_intermediate_outputs[0].name
     intermediate_output_names = [output.name for output in intermediate_outputs]
@@ -151,13 +167,18 @@ def register_layer(op_type):
             inputs[ipt.name] = val
 
         outputs = dict()
-        out = helper.create_tmp_variable(dtype=dtype)
-        outputs[o_name] = [out]
+        out = kwargs.pop(_convert_(o_name), [])
+        if out:
+            out_var = out[0] if (isinstance(out, list) or
+                                 isinstance(out, tuple)) else out
+        else:
+            out_var = helper.create_tmp_variable(dtype=dtype)
+        outputs[o_name] = [out_var]
         for name in intermediate_output_names:
             outputs[name] = [helper.create_tmp_variable(dtype=dtype)]
         helper.append_op(
             type=op_type, inputs=inputs, outputs=outputs, attrs=kwargs)
-        return helper.append_activation(out)
+        return helper.append_activation(out_var)
 
     func.__name__ = op_type
     func.__doc__ = _generate_doc_string_(op_proto)
diff --git a/python/paddle/v2/fluid/regularizer.py b/python/paddle/v2/fluid/regularizer.py
index d1955b00479676448d99603a31249aa7ac6a0d3f..c2f28eecfda71e305d96c5a6b62c4f5f0fbf3fa6 100644
--- a/python/paddle/v2/fluid/regularizer.py
+++ b/python/paddle/v2/fluid/regularizer.py
@@ -1,6 +1,24 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import framework
 
-__all__ = ['append_regularization_ops', 'L1Decay', 'L2Decay']
+__all__ = [
+    'append_regularization_ops',
+    'L1Decay',
+    'L2Decay',
+]
 
 
 def append_regularization_ops(parameters_and_grads, regularization=None):
diff --git a/python/paddle/v2/fluid/tests/CMakeLists.txt b/python/paddle/v2/fluid/tests/CMakeLists.txt
index e795627bfe9e8ad0c196349a332e62e975f20aa3..9a0240cbf65c7a79e29babc2abcb157ada684c5e 100644
--- a/python/paddle/v2/fluid/tests/CMakeLists.txt
+++ b/python/paddle/v2/fluid/tests/CMakeLists.txt
@@ -5,3 +5,4 @@ foreach(src ${TEST_OPS})
 endforeach()
 
 add_subdirectory(book)
+add_subdirectory(book_distribute)
diff --git a/python/paddle/v2/fluid/tests/__init__.py b/python/paddle/v2/fluid/tests/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..b94a21a7e406b833797f8f521c62a2351c2bc30a 100644
--- a/python/paddle/v2/fluid/tests/__init__.py
+++ b/python/paddle/v2/fluid/tests/__init__.py
@@ -0,0 +1,13 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py
index fbf46ac6cba8fa4981cc8a6e8f5434a510c52d7d..462669c262f285a7c6d36cf60f2f3f952c83f6b3 100644
--- a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py
+++ b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 import paddle.v2 as paddle
 import paddle.v2.fluid as fluid
diff --git a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py
index 3d336ffe9582ddd9a2031e7aa7e2c26a772820f8..30582a21d0a5eeab125f3a2764b45b51aa4f94b6 100644
--- a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py
+++ b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import print_function
 
 import sys
diff --git a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
index 74ca56182c47de2e74e80a56bf84dcf90ca6c104..1a342bf1fbbc0e5f4e3c7d440424b66c4b9f732f 100644
--- a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
+++ b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import math
 
 import numpy as np
@@ -20,7 +34,7 @@ mix_hidden_lr = 1e-3
 
 IS_SPARSE = True
 PASS_NUM = 10
-BATCH_SIZE = 20
+BATCH_SIZE = 10
 
 embedding_name = 'emb'
 
diff --git a/python/paddle/v2/fluid/tests/book/test_machine_translation.py b/python/paddle/v2/fluid/tests/book/test_machine_translation.py
index e79864b3977ed8111903f9497685ee7ebf76e1da..53ae200a2387712c63ab67f44d4e9da03ebbe4b2 100644
--- a/python/paddle/v2/fluid/tests/book/test_machine_translation.py
+++ b/python/paddle/v2/fluid/tests/book/test_machine_translation.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 import paddle.v2 as paddle
 import paddle.v2.fluid as fluid
diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
index 35bf8da924dc76475df9bd5e6a4c04f4d204426a..4710d16c24e95a11108801a014f94687558fd91e 100644
--- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
+++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import print_function
 import numpy as np
 import paddle.v2 as paddle
diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
index 51bfe2973db7bd2ec4b43bb588be4c1fcfb11e74..be22e97054a16d69cf9e2d1e88629497e519c778 100644
--- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
+++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import print_function
 import numpy as np
 import paddle.v2 as paddle
diff --git a/python/paddle/v2/fluid/tests/book/test_recommender_system.py b/python/paddle/v2/fluid/tests/book/test_recommender_system.py
index e3cc2a89371233014dec4ba3d730a866722d3eae..d4a694e5721415fd9c953a83d927b25b80f5fb47 100644
--- a/python/paddle/v2/fluid/tests/book/test_recommender_system.py
+++ b/python/paddle/v2/fluid/tests/book/test_recommender_system.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 import paddle.v2 as paddle
 import paddle.v2.fluid.core as core
diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py
index f103358edca9bbd2e28c99afd249f97b1d8069ae..df27399dd215a579d7e3f8a1659180a06b1e7f64 100644
--- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py
+++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import print_function
 import numpy as np
 import paddle.v2 as paddle
diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py
index cd28f04b8574778316d70e7d8a03026f807c3e52..529223eba8af6d968b490068f34559880312515d 100644
--- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py
+++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 import paddle.v2 as paddle
 import paddle.v2.fluid as fluid
diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py
index 633de66bea2af7404ab0d325b425e7b9e63d3e43..618191424150eb7c5a24407fc2e106ee8825fedb 100644
--- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py
+++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 import paddle.v2 as paddle
 import paddle.v2.fluid as fluid
diff --git a/python/paddle/v2/fluid/tests/book/test_word2vec.py b/python/paddle/v2/fluid/tests/book/test_word2vec.py
index 8b928ff9eed41f8945c749058b4177fd023452ba..8cf54846fe5dba2742ce69e34e0788e124a1a85d 100644
--- a/python/paddle/v2/fluid/tests/book/test_word2vec.py
+++ b/python/paddle/v2/fluid/tests/book/test_word2vec.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 import paddle.v2 as paddle
 import paddle.v2.fluid as fluid
diff --git a/python/paddle/v2/fluid/tests/book_distribute/CMakeLists.txt b/python/paddle/v2/fluid/tests/book_distribute/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d7664469e481344cf9eea84688f068b4fb99dee
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/book_distribute/CMakeLists.txt
@@ -0,0 +1,5 @@
+file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
+string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
+foreach(src ${TEST_OPS})
+    py_test(${src} SRCS ${src}.py)
+endforeach()
diff --git a/python/paddle/v2/fluid/tests/book_distribute/test_dist_fit_a_line.py b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_fit_a_line.py
similarity index 71%
rename from python/paddle/v2/fluid/tests/book_distribute/test_dist_fit_a_line.py
rename to python/paddle/v2/fluid/tests/book_distribute/notest_dist_fit_a_line.py
index bb339c440bd0d229d2ae348cf5a7745b16d156d5..52c7ecdeb3646fdce36937b84ba8956947371d87 100644
--- a/python/paddle/v2/fluid/tests/book_distribute/test_dist_fit_a_line.py
+++ b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_fit_a_line.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 import paddle.v2 as paddle
 import paddle.v2.fluid as fluid
@@ -40,8 +54,9 @@ if training_role == "PSERVER":
     if not current_endpoint:
         print("need env SERVER_ENDPOINT")
         exit(1)
-    pserver_prog = t.get_pserver_program(current_endpoint, optimize_ops)
-    exe.run(fluid.default_startup_program())
+    pserver_prog = t.get_pserver_program(current_endpoint)
+    pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
+    exe.run(pserver_startup)
     exe.run(pserver_prog)
 else:
     trainer_prog = t.get_trainer_program()
diff --git a/python/paddle/v2/fluid/tests/book_distribute/notest_dist_image_classification.py b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_image_classification.py
new file mode 100644
index 0000000000000000000000000000000000000000..218dea31e10757d901c5524567f13501b64dbea5
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_image_classification.py
@@ -0,0 +1,173 @@
+#Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import print_function
+
+import sys
+
+import paddle.v2 as paddle
+import paddle.v2.fluid as fluid
+import os
+import sys
+
+TRAINERS = 5
+BATCH_SIZE = 128
+PASS_NUM = 100
+
+
+def resnet_cifar10(input, depth=32):
+    def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'):
+        tmp = fluid.layers.conv2d(
+            input=input,
+            filter_size=filter_size,
+            num_filters=ch_out,
+            stride=stride,
+            padding=padding,
+            act=None,
+            bias_attr=False)
+        return fluid.layers.batch_norm(input=tmp, act=act)
+
+    def shortcut(input, ch_in, ch_out, stride):
+        if ch_in != ch_out:
+            return conv_bn_layer(input, ch_out, 1, stride, 0, None)
+        else:
+            return input
+
+    def basicblock(input, ch_in, ch_out, stride):
+        tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
+        tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None)
+        short = shortcut(input, ch_in, ch_out, stride)
+        return fluid.layers.elementwise_add(x=tmp, y=short, act='relu')
+
+    def layer_warp(block_func, input, ch_in, ch_out, count, stride):
+        tmp = block_func(input, ch_in, ch_out, stride)
+        for i in range(1, count):
+            tmp = block_func(tmp, ch_out, ch_out, 1)
+        return tmp
+
+    assert (depth - 2) % 6 == 0
+    n = (depth - 2) / 6
+    conv1 = conv_bn_layer(
+        input=input, ch_out=16, filter_size=3, stride=1, padding=1)
+    res1 = layer_warp(basicblock, conv1, 16, 16, n, 1)
+    res2 = layer_warp(basicblock, res1, 16, 32, n, 2)
+    res3 = layer_warp(basicblock, res2, 32, 64, n, 2)
+    pool = fluid.layers.pool2d(
+        input=res3, pool_size=8, pool_type='avg', pool_stride=1)
+    return pool
+
+
+def vgg16_bn_drop(input):
+    def conv_block(input, num_filter, groups, dropouts):
+        return fluid.nets.img_conv_group(
+            input=input,
+            pool_size=2,
+            pool_stride=2,
+            conv_num_filter=[num_filter] * groups,
+            conv_filter_size=3,
+            conv_act='relu',
+            conv_with_batchnorm=True,
+            conv_batchnorm_drop_rate=dropouts,
+            pool_type='max')
+
+    conv1 = conv_block(input, 64, 2, [0.3, 0])
+    conv2 = conv_block(conv1, 128, 2, [0.4, 0])
+    conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
+    conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
+    conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
+
+    drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5)
+    fc1 = fluid.layers.fc(input=drop, size=512, act=None)
+    bn = fluid.layers.batch_norm(input=fc1, act='relu')
+    drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5)
+    fc2 = fluid.layers.fc(input=drop2, size=512, act=None)
+    return fc2
+
+
+classdim = 10
+data_shape = [3, 32, 32]
+
+images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
+label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+
+net_type = "vgg"
+if len(sys.argv) >= 2:
+    net_type = sys.argv[1]
+
+if net_type == "vgg":
+    print("train vgg net")
+    net = vgg16_bn_drop(images)
+elif net_type == "resnet":
+    print("train resnet")
+    net = resnet_cifar10(images, 32)
+else:
+    raise ValueError("%s network is not supported" % net_type)
+
+predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
+cost = fluid.layers.cross_entropy(input=predict, label=label)
+avg_cost = fluid.layers.mean(x=cost)
+
+optimizer = fluid.optimizer.Adam(learning_rate=0.001)
+optimize_ops, params_grads = optimizer.minimize(avg_cost)
+
+accuracy = fluid.evaluator.Accuracy(input=predict, label=label)
+
+train_reader = paddle.batch(
+    paddle.reader.shuffle(
+        paddle.dataset.cifar.train10(), buf_size=128 * 10),
+    batch_size=BATCH_SIZE)
+
+place = fluid.CPUPlace()
+exe = fluid.Executor(place)
+
+t = fluid.DistributeTranspiler()
+# all parameter server endpoints list for spliting parameters
+pserver_endpoints = os.getenv("PSERVERS")
+# server endpoint for current node
+current_endpoint = os.getenv("SERVER_ENDPOINT")
+# run as trainer or parameter server
+training_role = os.getenv("TRAINING_ROLE",
+                          "TRAINER")  # get the training role: trainer/pserver
+t.transpile(
+    optimize_ops, params_grads, pservers=pserver_endpoints, trainers=TRAINERS)
+
+if training_role == "PSERVER":
+    if not current_endpoint:
+        print("need env SERVER_ENDPOINT")
+        exit(1)
+    print("start pserver at:", current_endpoint)
+    pserver_prog = t.get_pserver_program(current_endpoint)
+    pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
+    exe.run(pserver_startup)
+    exe.run(pserver_prog)
+    print("pserver run end")
+elif training_role == "TRAINER":
+    print("start trainer")
+    trainer_prog = t.get_trainer_program()
+    feeder = fluid.DataFeeder(place=place, feed_list=[images, label])
+    exe.run(fluid.default_startup_program())
+    for pass_id in range(PASS_NUM):
+        accuracy.reset(exe)
+        for data in train_reader():
+            loss, acc = exe.run(trainer_prog,
+                                feed=feeder.feed(data),
+                                fetch_list=[avg_cost] + accuracy.metrics)
+            pass_acc = accuracy.eval(exe)
+            print("loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str(
+                pass_acc))
+            # this model is slow, so if we can train two mini batch, we think it works properly.
+    print("trainer run end")
+else:
+    print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
+exit(1)
diff --git a/python/paddle/v2/fluid/tests/book_distribute/test_dist_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_label_semantic_roles.py
similarity index 90%
rename from python/paddle/v2/fluid/tests/book_distribute/test_dist_label_semantic_roles.py
rename to python/paddle/v2/fluid/tests/book_distribute/notest_dist_label_semantic_roles.py
index 5fa5e0e5f34e6904e0e66d3ab4149cdfcffeb244..08bb67b0a1f53c73b713238ab45ec8055726cf9c 100644
--- a/python/paddle/v2/fluid/tests/book_distribute/test_dist_label_semantic_roles.py
+++ b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_label_semantic_roles.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import math
 
 import numpy as np
@@ -184,8 +198,9 @@ def main():
         if not current_endpoint:
             print("need env SERVER_ENDPOINT")
             exit(1)
-        pserver_prog = t.get_pserver_program(current_endpoint, optimize_ops)
-        exe.run(fluid.default_startup_program())
+        pserver_prog = t.get_pserver_program(current_endpoint)
+        pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
+        exe.run(pserver_startup)
         exe.run(pserver_prog)
     elif training_role == "TRAINER":
         trainer_prog = t.get_trainer_program()
diff --git a/python/paddle/v2/fluid/tests/book_distribute/test_dist_word2vec.py b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py
similarity index 81%
rename from python/paddle/v2/fluid/tests/book_distribute/test_dist_word2vec.py
rename to python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py
index b41853784d607c566fc596ab93f2282520778a4b..04b3113690fde072ab74893508298b920ab9599e 100644
--- a/python/paddle/v2/fluid/tests/book_distribute/test_dist_word2vec.py
+++ b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import print_function
 import numpy as np
 import paddle.v2 as paddle
@@ -74,8 +88,9 @@ if training_role == "PSERVER":
     if not current_endpoint:
         print("need env SERVER_ENDPOINT")
         exit(1)
-    pserver_prog = t.get_pserver_program(current_endpoint, optimize_ops)
-    exe.run(fluid.default_startup_program())
+    pserver_prog = t.get_pserver_program(current_endpoint)
+    pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
+    exe.run(pserver_startup)
     exe.run(pserver_prog)
 elif training_role == "TRAINER":
     feeder = fluid.DataFeeder(
diff --git a/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_conv_dist.py b/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_conv_dist.py
index 20b4a8b34cd085ae51e6169f0d4eac58b7f3ffb2..f18ca05c78093ff4ca22bf8b30d59240ee55ee8b 100644
--- a/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_conv_dist.py
+++ b/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_conv_dist.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import print_function
 import numpy as np
 import paddle.v2 as paddle
@@ -39,26 +53,27 @@ train_reader = paddle.batch(
 place = fluid.CPUPlace()
 exe = fluid.Executor(place)
 
-t = fluid.DistributeTranspiler()
-# all parameter server endpoints list for spliting parameters
-pserver_endpoints = os.getenv("PSERVERS")
-# server endpoint for current node
-current_endpoint = os.getenv("SERVER_ENDPOINT")
-# run as trainer or parameter server
+pserver_endpoints = os.getenv("PSERVERS")  # all pserver endpoints
+trainers = int(os.getenv("TRAINERS"))  # total trainer count
+current_endpoint = os.getenv("SERVER_ENDPOINT")  # current pserver endpoint
 training_role = os.getenv("TRAINING_ROLE",
                           "TRAINER")  # get the training role: trainer/pserver
-t.transpile(optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
+t = fluid.DistributeTranspiler()
+t.transpile(
+    optimize_ops, params_grads, pservers=pserver_endpoints, trainers=trainers)
 
 if training_role == "PSERVER":
     if not current_endpoint:
         print("need env SERVER_ENDPOINT")
         exit(1)
-    pserver_prog = t.get_pserver_program(current_endpoint, optimize_ops)
-    exe.run(fluid.default_startup_program())
+    pserver_prog = t.get_pserver_program(current_endpoint)
+    pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
+    exe.run(pserver_startup)
     exe.run(pserver_prog)
 elif training_role == "TRAINER":
     trainer_prog = t.get_trainer_program()
     feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
+    # TODO(typhoonzero): change trainer startup program to fetch parameters from pserver
     exe.run(fluid.default_startup_program())
 
     for pass_id in range(PASS_NUM):
diff --git a/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_mlp_dist.py b/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_mlp_dist.py
new file mode 100644
index 0000000000000000000000000000000000000000..7733248cb447ed953d8b05945d51370c4c293489
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_mlp_dist.py
@@ -0,0 +1,89 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import numpy as np
+import paddle.v2 as paddle
+import paddle.v2.fluid as fluid
+import os
+
+BATCH_SIZE = 128
+PASS_NUM = 100
+
+images = fluid.layers.data(name='x', shape=[784], dtype='float32')
+
+# TODO(aroraabhinav) Add regularization and error clipping after
+# Issue 7432(https://github.com/PaddlePaddle/Paddle/issues/7432) is resolved.
+hidden1 = fluid.layers.fc(input=images, size=128, act='relu')
+hidden2 = fluid.layers.fc(input=hidden1, size=64, act='relu')
+predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
+
+label = fluid.layers.data(name='y', shape=[1], dtype='int64')
+
+cost = fluid.layers.cross_entropy(input=predict, label=label)
+avg_cost = fluid.layers.mean(x=cost)
+
+optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9)
+optimize_ops, params_grads = optimizer.minimize(avg_cost)
+
+accuracy = fluid.evaluator.Accuracy(input=predict, label=label)
+
+train_reader = paddle.batch(
+    paddle.reader.shuffle(
+        paddle.dataset.mnist.train(), buf_size=8192),
+    batch_size=BATCH_SIZE)
+
+place = fluid.CPUPlace()
+exe = fluid.Executor(place)
+
+t = fluid.DistributeTranspiler()
+# all parameter server endpoints list for spliting parameters
+pserver_endpoints = os.getenv("PSERVERS")
+# server endpoint for current node
+current_endpoint = os.getenv("SERVER_ENDPOINT")
+# run as trainer or parameter server
+training_role = os.getenv("TRAINING_ROLE",
+                          "TRAINER")  # get the training role: trainer/pserver
+t.transpile(optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
+
+if training_role == "PSERVER":
+    if not current_endpoint:
+        print("need env SERVER_ENDPOINT")
+        exit(1)
+    pserver_prog = t.get_pserver_program(current_endpoint)
+    pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
+    exe.run(pserver_startup)
+    exe.run(pserver_prog)
+elif training_role == "TRAINER":
+    trainer_prog = t.get_trainer_program()
+    feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
+    exe.run(fluid.default_startup_program())
+
+    for pass_id in range(PASS_NUM):
+        accuracy.reset(exe)
+        batch_id = 0
+        for data in train_reader():
+            loss, acc = exe.run(trainer_prog,
+                                feed=feeder.feed(data),
+                                fetch_list=[avg_cost] + accuracy.metrics)
+            pass_acc = accuracy.eval(exe)
+            if batch_id % 100 == 0:
+                print("batch_id %d, loss: %f, acc: %f" %
+                      (batch_id, loss, pass_acc))
+            batch_id += 1
+
+        pass_acc = accuracy.eval(exe)
+        print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc))
+else:
+    print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
diff --git a/python/paddle/v2/fluid/tests/book_distribute/test_understand_sentiment_conv_dist.py b/python/paddle/v2/fluid/tests/book_distribute/notest_understand_sentiment_conv_dist.py
similarity index 81%
rename from python/paddle/v2/fluid/tests/book_distribute/test_understand_sentiment_conv_dist.py
rename to python/paddle/v2/fluid/tests/book_distribute/notest_understand_sentiment_conv_dist.py
index db419e23abcd06ca39011b1bef078b0cafb5100e..49f26d6b69a836cdc44244eb8938884637acf720 100644
--- a/python/paddle/v2/fluid/tests/book_distribute/test_understand_sentiment_conv_dist.py
+++ b/python/paddle/v2/fluid/tests/book_distribute/notest_understand_sentiment_conv_dist.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import print_function
 import os
 import numpy as np
@@ -79,15 +93,16 @@ def main():
     t.transpile(
         optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
 
-    exe.run(fluid.default_startup_program())
-
     if training_role == "PSERVER":
         if not current_endpoint:
             print("need env SERVER_ENDPOINT")
             exit(1)
-        pserver_prog = t.get_pserver_program(current_endpoint, optimize_ops)
+        pserver_prog = t.get_pserver_program(current_endpoint)
+        pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
+        exe.run(pserver_startup)
         exe.run(pserver_prog)
     elif training_role == "TRAINER":
+        exe.run(fluid.default_startup_program())
         trainer_prog = t.get_trainer_program()
         feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
 
diff --git a/python/paddle/v2/fluid/tests/book_distribute/test_split_var.py b/python/paddle/v2/fluid/tests/book_distribute/test_split_var.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a50049bf2644f237de9feadc284ead05fa2f36c
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/book_distribute/test_split_var.py
@@ -0,0 +1,53 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import unittest
+from paddle.v2.fluid.distribute_transpiler import split_dense_variable
+import paddle.v2.fluid as fluid
+import paddle.v2.fluid.core as core
+import random
+
+
+class TestSplitVar(unittest.TestCase):
+    def test_check_output(self):
+        # split below shapes to 10 servers
+        shapes = [[3, 5], [1024], [28, 784], [8, 1020], [800, 10]]
+        expected_sizes = [
+            [15], [1024],
+            [2352, 2352, 2352, 2352, 2352, 2352, 2352, 2352, 2352, 784],
+            [2040, 2040, 2040, 2040],
+            [1150, 1150, 1150, 1150, 1150, 1150, 1100]
+        ]
+        var_list = []
+        program = fluid.Program()
+        for shape in shapes:
+            var = program.global_block().create_var(
+                name=str(random.randint(10000, 99999)),
+                persistable=True,
+                # dtype=core.VarDesc.VarType.LOD_TENSOR,
+                shape=shape)
+            var_list.append(var)
+        blocks = split_dense_variable(var_list, 10)
+        all_sizes = []
+        for s in expected_sizes:
+            for s2 in s:
+                all_sizes.append(s2)
+        for i, block_str in enumerate(blocks):
+            varname, block_id, size = block_str.split(":")
+            self.assertEqual(int(size), all_sizes[i])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/fluid/tests/decorators.py b/python/paddle/v2/fluid/tests/decorators.py
index 154619b0e93455922700a12d734967b4d20c4f13..0a8a2ccc4dc2bdfbda1a502651559647ddd8f422 100644
--- a/python/paddle/v2/fluid/tests/decorators.py
+++ b/python/paddle/v2/fluid/tests/decorators.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2.fluid as fluid
 
 __all__ = ['many_times', 'prog_scope']
diff --git a/python/paddle/v2/fluid/tests/demo/fc_gan.py b/python/paddle/v2/fluid/tests/demo/fc_gan.py
index cae959593e855f11c04585341d86478b649d17c9..0652c8134d58bcb6e5bba469ae16ff1ab4fdae4b 100644
--- a/python/paddle/v2/fluid/tests/demo/fc_gan.py
+++ b/python/paddle/v2/fluid/tests/demo/fc_gan.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import errno
 import math
 import os
diff --git a/python/paddle/v2/fluid/tests/op_test.py b/python/paddle/v2/fluid/tests/op_test.py
index b77d2b1268f27c5ec3c34839aaad9b75f0132c2e..56f54de86f680653fbd97a7ce1d3f547d1657587 100644
--- a/python/paddle/v2/fluid/tests/op_test.py
+++ b/python/paddle/v2/fluid/tests/op_test.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 import random
@@ -31,7 +45,8 @@ def create_op(scope, op_type, inputs, outputs, attrs):
             kwargs[in_name] = []
             if in_dup:
                 sub_in = inputs[in_name]
-                for sub_in_name, _ in sub_in:
+                for item in sub_in:
+                    sub_in_name, _ = item[0], item[1]
                     __create_var__(in_name, sub_in_name)
             else:
                 __create_var__(in_name, in_name)
@@ -41,7 +56,8 @@ def create_op(scope, op_type, inputs, outputs, attrs):
             kwargs[out_name] = []
             if out_dup:
                 sub_out = outputs[out_name]
-                for sub_out_name, _ in sub_out:
+                for item in sub_out:
+                    sub_out_name, _ = item[0], item[1]
                     __create_var__(out_name, sub_out_name)
             else:
                 __create_var__(out_name, out_name)
@@ -71,13 +87,15 @@ def set_input(scope, op, inputs, place):
         if in_name in inputs:
             if in_dup:
                 sub_in = inputs[in_name]
-                for sub_in_name, sub_in_val in sub_in:
+                for item in sub_in:
+                    sub_in_name, sub_in_val = item[0], item[1]
                     __set_input__(sub_in_name, sub_in_val)
             else:
                 __set_input__(in_name, inputs[in_name])
 
 
-def get_numeric_gradient(scope,
+def get_numeric_gradient(place,
+                         scope,
                          op,
                          inputs,
                          input_to_check,
@@ -85,7 +103,7 @@ def get_numeric_gradient(scope,
                          delta=0.005,
                          in_place=False):
     # FIXME: change this method by compile time concepts
-    set_input(scope, op, inputs, core.CPUPlace())
+    set_input(scope, op, inputs, place)
 
     def product(dim):
         return reduce(lambda a, b: a * b, dim, 1)
@@ -93,7 +111,7 @@ def get_numeric_gradient(scope,
     def get_output():
         sum = []
         for output_name in output_names:
-            op.run(scope, core.CPUPlace())
+            op.run(scope, place)
             sum.append(
                 np.array(scope.find_var(output_name).get_tensor()).mean())
         return np.array(sum).mean()
@@ -127,7 +145,7 @@ def get_numeric_gradient(scope,
     # we use a for loop to compute the gradient of every element.
     for i in xrange(tensor_size):
         if in_place:
-            set_input(scope, op, inputs, core.CPUPlace())
+            set_input(scope, op, inputs, place)
 
         # get one input element throw it's index i.
         origin = __get_elem__(tensor_to_check, i)
@@ -137,7 +155,7 @@ def get_numeric_gradient(scope,
         y_pos = get_output()
 
         if in_place:
-            set_input(scope, op, inputs, core.CPUPlace())
+            set_input(scope, op, inputs, place)
 
         x_neg = origin - delta
         __set_elem__(tensor_to_check, i, x_neg)
@@ -283,7 +301,8 @@ class OpTest(unittest.TestCase):
                 if not isinstance(sub_out, list):
                     raise AssertionError("sub_out type %s is not list",
                                          type(sub_out))
-                for sub_out_name, expect in sub_out:
+                for item in sub_out:
+                    sub_out_name, expect = item[0], item[1]
                     idx = find_actual(sub_out_name, fetch_list)
                     actual = outs[idx]
                     actual_t = np.array(actual)
@@ -347,6 +366,24 @@ class OpTest(unittest.TestCase):
                    in_place=False,
                    max_relative_error=0.005,
                    user_defined_grads=None):
+        places = [core.CPUPlace()]
+        if core.is_compile_gpu() and core.op_support_gpu(self.op_type):
+            places.append(core.CUDAPlace(0))
+        for place in places:
+            self.check_grad_with_place(place, inputs_to_check, output_names,
+                                       no_grad_set, numeric_grad_delta,
+                                       in_place, max_relative_error,
+                                       user_defined_grads)
+
+    def check_grad_with_place(self,
+                              place,
+                              inputs_to_check,
+                              output_names,
+                              no_grad_set=None,
+                              numeric_grad_delta=0.005,
+                              in_place=False,
+                              max_relative_error=0.005,
+                              user_defined_grads=None):
         self.scope = core.Scope()
         op_inputs = self.inputs if hasattr(self, "inputs") else dict()
         op_outputs = self.outputs if hasattr(self, "outputs") else dict()
@@ -362,6 +399,7 @@ class OpTest(unittest.TestCase):
 
         numeric_grads = user_defined_grads or [
             get_numeric_gradient(
+                place,
                 self.scope,
                 self.op,
                 self.inputs,
@@ -370,22 +408,12 @@ class OpTest(unittest.TestCase):
                 delta=numeric_grad_delta,
                 in_place=in_place) for input_to_check in inputs_to_check
         ]
-        cpu_place = core.CPUPlace()
-        cpu_analytic_grads = self._get_gradient(inputs_to_check, cpu_place,
-                                                output_names, no_grad_set)
-
-        self.__assert_is_close(numeric_grads, cpu_analytic_grads,
-                               inputs_to_check, max_relative_error,
-                               "Gradient Check On %s" % str(cpu_place))
-
-        if core.is_compile_gpu() and self.op.support_gpu():
-            gpu_place = core.CUDAPlace(0)
-            gpu_analytic_grads = self._get_gradient(inputs_to_check, gpu_place,
-                                                    output_names, no_grad_set)
-
-            self.__assert_is_close(numeric_grads, gpu_analytic_grads,
-                                   inputs_to_check, max_relative_error,
-                                   "Gradient Check On %s" % str(gpu_place))
+        analytic_grads = self._get_gradient(inputs_to_check, place,
+                                            output_names, no_grad_set)
+
+        self.__assert_is_close(numeric_grads, analytic_grads, inputs_to_check,
+                               max_relative_error,
+                               "Gradient Check On %s" % str(place))
 
     @staticmethod
     def _create_var_descs_(block, var_dict):
diff --git a/python/paddle/v2/fluid/tests/test_accuracy_op.py b/python/paddle/v2/fluid/tests/test_accuracy_op.py
index 6f72918b7178bc1f856010f1111f18842f6cc34a..ac3f3bdff44a870cb68d317d5a57e7a25270e6c3 100644
--- a/python/paddle/v2/fluid/tests/test_accuracy_op.py
+++ b/python/paddle/v2/fluid/tests/test_accuracy_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_activation_op.py b/python/paddle/v2/fluid/tests/test_activation_op.py
index 03eb7deb9a35933e5a1676a262a371c69151e6d1..18605e60652a1614571a91918a012f0c08c8f1b3 100644
--- a/python/paddle/v2/fluid/tests/test_activation_op.py
+++ b/python/paddle/v2/fluid/tests/test_activation_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_adadelta_op.py b/python/paddle/v2/fluid/tests/test_adadelta_op.py
index 7105593a98aee9885ba16e3ee0649a6024033ee7..949318d00776712ad08d335f7afdb9b7d9140c42 100644
--- a/python/paddle/v2/fluid/tests/test_adadelta_op.py
+++ b/python/paddle/v2/fluid/tests/test_adadelta_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_adagrad_op.py b/python/paddle/v2/fluid/tests/test_adagrad_op.py
index 7b2d02fbf4256d2c27383a3452d526271af543a3..86b0567ce123b00bace639fb8fe76cf3894abd6d 100644
--- a/python/paddle/v2/fluid/tests/test_adagrad_op.py
+++ b/python/paddle/v2/fluid/tests/test_adagrad_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 import paddle.v2.fluid.core as core
diff --git a/python/paddle/v2/fluid/tests/test_adam_op.py b/python/paddle/v2/fluid/tests/test_adam_op.py
index 7dbc2fa0858a68c5da9e8d48dcb187494357e940..10580adca714beeb7571312b8fdc4235ecaaccfe 100644
--- a/python/paddle/v2/fluid/tests/test_adam_op.py
+++ b/python/paddle/v2/fluid/tests/test_adam_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_adamax_op.py b/python/paddle/v2/fluid/tests/test_adamax_op.py
index 8e5a15aa3d12bbaae99cae6fcb627a336e48f684..e285c454f035936c9dd28bc41b9174f780201ba0 100644
--- a/python/paddle/v2/fluid/tests/test_adamax_op.py
+++ b/python/paddle/v2/fluid/tests/test_adamax_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_array_read_write_op.py b/python/paddle/v2/fluid/tests/test_array_read_write_op.py
index 01321de8eac34d562d99726b1f4125d1932ab40f..a32c24486e1f4340d913da3ea42e7c9ff4a48d90 100644
--- a/python/paddle/v2/fluid/tests/test_array_read_write_op.py
+++ b/python/paddle/v2/fluid/tests/test_array_read_write_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import paddle.v2.fluid.core as core
 import paddle.v2.fluid.layers as layers
diff --git a/python/paddle/v2/fluid/tests/test_assign_op.py b/python/paddle/v2/fluid/tests/test_assign_op.py
index 1b0c145f1a69678b228bc70e4e4e273f5bcf9888..fbbfe0d02c4036ea3e971a9c0a9a2469ca62ad53 100644
--- a/python/paddle/v2/fluid/tests/test_assign_op.py
+++ b/python/paddle/v2/fluid/tests/test_assign_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import op_test
 import numpy
 import unittest
diff --git a/python/paddle/v2/fluid/tests/test_assign_value_op.py b/python/paddle/v2/fluid/tests/test_assign_value_op.py
index 51b99d091825ab3edc2175202ae5d8a364a54378..93970f863b165c44c554a40e2fa60388800ce300 100644
--- a/python/paddle/v2/fluid/tests/test_assign_value_op.py
+++ b/python/paddle/v2/fluid/tests/test_assign_value_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2.fluid as fluid
 import paddle.v2.fluid.layers as layers
 import op_test
diff --git a/python/paddle/v2/fluid/tests/test_auc_op.py b/python/paddle/v2/fluid/tests/test_auc_op.py
index 26ea905d88093605dff820b178996a5724becf82..5e4caedf5d612ea9d1a4bcf17beee69316f9266d 100644
--- a/python/paddle/v2/fluid/tests/test_auc_op.py
+++ b/python/paddle/v2/fluid/tests/test_auc_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_batch_norm_op.py b/python/paddle/v2/fluid/tests/test_batch_norm_op.py
index ac9418549f45f818257d74045cabb9c581816968..371bd426781b457582e74c33c80c46b5d56946fa 100644
--- a/python/paddle/v2/fluid/tests/test_batch_norm_op.py
+++ b/python/paddle/v2/fluid/tests/test_batch_norm_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py b/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py
index f329214dce407fe0382c51b29f0f4c33b562541a..36747849859fd54b34b5f5c25e9f5b4c779774fb 100644
--- a/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py
+++ b/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_beam_search_op.py b/python/paddle/v2/fluid/tests/test_beam_search_op.py
index 319a7e49e35b0515e69703b2d03080cd9ffcae9d..4da463df260efe48498207e9758f91d0bf95e7fe 100644
--- a/python/paddle/v2/fluid/tests/test_beam_search_op.py
+++ b/python/paddle/v2/fluid/tests/test_beam_search_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import logging
 from paddle.v2.fluid.op import Operator, DynamicRecurrentOp
 import paddle.v2.fluid.core as core
diff --git a/python/paddle/v2/fluid/tests/test_bilinear_tensor_product_op.py b/python/paddle/v2/fluid/tests/test_bilinear_tensor_product_op.py
index 080ca43b8269e0f6a9f4d0ce3973f4d4a07a8e2a..4b03f512c2f0d073fad3ba04b5e72adec13af6ed 100644
--- a/python/paddle/v2/fluid/tests/test_bilinear_tensor_product_op.py
+++ b/python/paddle/v2/fluid/tests/test_bilinear_tensor_product_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_calc_gradient.py b/python/paddle/v2/fluid/tests/test_calc_gradient.py
index c34c8ff6d143ff2c8ae0def935d2b44982c0764e..c773e81768f50c6bd3865f8dd527f4d955a95229 100644
--- a/python/paddle/v2/fluid/tests/test_calc_gradient.py
+++ b/python/paddle/v2/fluid/tests/test_calc_gradient.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 
 import paddle.v2.fluid as fluid
diff --git a/python/paddle/v2/fluid/tests/test_cast_op.py b/python/paddle/v2/fluid/tests/test_cast_op.py
index 4e431bb88da6070718d64a68467be20ca87f8fb9..327b246ed80596c10877734cdea083264a5b9309 100644
--- a/python/paddle/v2/fluid/tests/test_cast_op.py
+++ b/python/paddle/v2/fluid/tests/test_cast_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import op_test
 import unittest
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_chunk_eval_op.py b/python/paddle/v2/fluid/tests/test_chunk_eval_op.py
index 53bf6f815b8c7baf4c92d9fd488b69722ab0bef5..5c3efe9baa7da6161e22b274fb5116ffaed68f1a 100644
--- a/python/paddle/v2/fluid/tests/test_chunk_eval_op.py
+++ b/python/paddle/v2/fluid/tests/test_chunk_eval_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_clip.py b/python/paddle/v2/fluid/tests/test_clip.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f7718f4d8751fc0514bafd342d9d4309d39b86b
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/test_clip.py
@@ -0,0 +1,81 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import numpy as np
+import paddle.v2 as paddle
+import paddle.v2.fluid as fluid
+
+BATCH_SIZE = 128
+CLIP_MAX = 2e-6
+CLIP_MIN = -1e-6
+
+prog = fluid.framework.Program()
+
+with fluid.program_guard(main_program=prog):
+    image = fluid.layers.data(name='x', shape=[784], dtype='float32')
+
+    hidden1 = fluid.layers.fc(input=image, size=128, act='relu')
+    hidden2 = fluid.layers.fc(input=hidden1, size=64, act='relu')
+    predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
+
+    label = fluid.layers.data(name='y', shape=[1], dtype='int64')
+
+    cost = fluid.layers.cross_entropy(input=predict, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+
+prog_clip = prog.clone()
+prog_clip.block(0).var(hidden1.name).set_error_clip(
+    fluid.clip.ErrorClipByValue(
+        max=CLIP_MAX, min=CLIP_MIN))
+
+avg_cost_clip = prog_clip.block(0).var(avg_cost.name)
+fluid.backward.append_backward(loss=avg_cost)
+fluid.backward.append_backward(
+    loss=avg_cost_clip, callback=fluid.clip.error_clip_callback)
+
+hidden1_grad = prog.block(0).var(hidden1.name + "@GRAD")
+hidden1_grad_clip = prog_clip.block(0).var(hidden1.name + "@GRAD")
+
+hidden2_grad = prog.block(0).var(hidden2.name + "@GRAD")
+hidden2_grad_clip = prog_clip.block(0).var(hidden2.name + "@GRAD")
+
+train_reader = paddle.batch(
+    paddle.reader.shuffle(
+        paddle.dataset.mnist.train(), buf_size=8192),
+    batch_size=BATCH_SIZE)
+
+place = fluid.CPUPlace()
+exe = fluid.Executor(place)
+feeder = fluid.DataFeeder(feed_list=[image, label], place=place)
+exe.run(fluid.default_startup_program())
+
+count = 0
+for data in train_reader():
+    count += 1
+    if count > 5:
+        break
+    out1, out2 = exe.run(prog,
+                         feed=feeder.feed(data),
+                         fetch_list=[hidden1_grad, hidden2_grad])
+    out1_clip, out2_clip = exe.run(
+        prog_clip,
+        feed=feeder.feed(data),
+        fetch_list=[hidden1_grad_clip, hidden2_grad_clip])
+    if not ((out1.clip(
+            min=CLIP_MIN, max=CLIP_MAX) == out1_clip).all() and
+            (out2 == out2_clip).all()):
+        exit(1)
+
+exit(0)
diff --git a/python/paddle/v2/fluid/tests/test_clip_by_norm_op.py b/python/paddle/v2/fluid/tests/test_clip_by_norm_op.py
index 02f6108a3a661b0e32cd2e7ed65cb4b8cb50c067..b30f321c79f3d3a4061ee2fc2fb55fb5fab95f27 100644
--- a/python/paddle/v2/fluid/tests/test_clip_by_norm_op.py
+++ b/python/paddle/v2/fluid/tests/test_clip_by_norm_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_clip_op.py b/python/paddle/v2/fluid/tests/test_clip_op.py
index a7e1bf174408e4139db0435d9f4bb0c885f76705..ef0b75e286797436d730278585e946d1d897edc2 100644
--- a/python/paddle/v2/fluid/tests/test_clip_op.py
+++ b/python/paddle/v2/fluid/tests/test_clip_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_compare_op.py b/python/paddle/v2/fluid/tests/test_compare_op.py
index 5d0dfab6ffd1cbbbfbcdb3af60f1868b7b780456..08ef90b10eb70eb380db2821415184709602b848 100644
--- a/python/paddle/v2/fluid/tests/test_compare_op.py
+++ b/python/paddle/v2/fluid/tests/test_compare_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import op_test
 import unittest
 import numpy
diff --git a/python/paddle/v2/fluid/tests/test_concat_op.py b/python/paddle/v2/fluid/tests/test_concat_op.py
index a792d1c106ac00efd92e680cfad67f41a7520e26..ea0a95ebec24477797c1a17096c61132248587e5 100644
--- a/python/paddle/v2/fluid/tests/test_concat_op.py
+++ b/python/paddle/v2/fluid/tests/test_concat_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_cond_op.py b/python/paddle/v2/fluid/tests/test_cond_op.py
index 32e54084e48cf77c569db4dee54a0c89d5108373..4b7ca0963e92ead0b4168ae732d70ff8497d38c0 100644
--- a/python/paddle/v2/fluid/tests/test_cond_op.py
+++ b/python/paddle/v2/fluid/tests/test_cond_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import logging
 import paddle.v2.fluid.core as core
 import unittest
diff --git a/python/paddle/v2/fluid/tests/test_conditional_block.py b/python/paddle/v2/fluid/tests/test_conditional_block.py
index 7d815123f3454d1457f59202219f9a93bf3d8c31..5ee729cfee6f432091c07f360f2d0c97c3801b99 100644
--- a/python/paddle/v2/fluid/tests/test_conditional_block.py
+++ b/python/paddle/v2/fluid/tests/test_conditional_block.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import paddle.v2.fluid.layers as layers
 import paddle.v2.fluid.core as core
diff --git a/python/paddle/v2/fluid/tests/test_const_value.py b/python/paddle/v2/fluid/tests/test_const_value.py
index f8c17c2c98674fa67458efa090e166e37f5a6a8a..d5b7cfded1a943ad84493afe367bf33c5304db42 100644
--- a/python/paddle/v2/fluid/tests/test_const_value.py
+++ b/python/paddle/v2/fluid/tests/test_const_value.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import paddle.v2.fluid.framework as framework
 
diff --git a/python/paddle/v2/fluid/tests/test_conv2d_op.py b/python/paddle/v2/fluid/tests/test_conv2d_op.py
index 958300e655e012b91598360105ca2734c3bd2c37..24de74d730eedbccb4837598bd6d2eb92da59e0d 100644
--- a/python/paddle/v2/fluid/tests/test_conv2d_op.py
+++ b/python/paddle/v2/fluid/tests/test_conv2d_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 
@@ -49,7 +63,7 @@ def conv2d_forward_naive(input, filter, group, conv_param):
 
 class TestConv2dOp(OpTest):
     def setUp(self):
-        core.use_cuda()
+        self.use_cudnn = False
         self.init_op_type()
         self.init_group()
         self.init_dilation()
@@ -70,30 +84,59 @@ class TestConv2dOp(OpTest):
             'strides': self.stride,
             'paddings': self.pad,
             'groups': self.groups,
-            'dilations': self.dilations
+            'dilations': self.dilations,
+            'use_cudnn': self.use_cudnn
         }
         self.outputs = {'Output': output}
 
     def test_check_output(self):
-        self.check_output()
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_output_with_place(place, atol=1e-5)
+        else:
+            self.check_output()
 
     def test_check_grad(self):
-        self.check_grad(
-            set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place,
+                set(['Input', 'Filter']),
+                'Output',
+                max_relative_error=0.02)
+        else:
+            self.check_grad(
+                set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
 
     def test_check_grad_no_filter(self):
-        self.check_grad(
-            ['Input'],
-            'Output',
-            max_relative_error=0.02,
-            no_grad_set=set(['Filter']))
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place, ['Input'],
+                'Output',
+                max_relative_error=0.02,
+                no_grad_set=set(['Filter']))
+        else:
+            self.check_grad(
+                ['Input'],
+                'Output',
+                max_relative_error=0.02,
+                no_grad_set=set(['Filter']))
 
     def test_check_grad_no_input(self):
-        self.check_grad(
-            ['Filter'],
-            'Output',
-            max_relative_error=0.02,
-            no_grad_set=set(['Input']))
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place, ['Filter'],
+                'Output',
+                max_relative_error=0.02,
+                no_grad_set=set(['Input']))
+        else:
+            self.check_grad(
+                ['Filter'],
+                'Output',
+                max_relative_error=0.02,
+                no_grad_set=set(['Input']))
 
     def init_test_case(self):
         self.pad = [0, 0]
@@ -167,39 +210,39 @@ class TestWithDilation(TestConv2dOp):
         self.groups = 3
 
 
-#----------------Conv2dCudnn----------------
-class TestCudnn(TestConv2dOp):
+#----------------Conv2dCUDNN----------------
+class TestCUDNN(TestConv2dOp):
     def init_op_type(self):
-        core.use_cudnn()
-        self.op_type = "conv2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv2d"
 
 
-class TestCudnnWithPad(TestWithPad):
+class TestCUDNNWithPad(TestWithPad):
     def init_op_type(self):
-        core.use_cudnn()
-        self.op_type = "conv2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv2d"
 
 
-class TestCudnnWithStride(TestWithStride):
+class TestCUDNNWithStride(TestWithStride):
     def init_op_type(self):
-        core.use_cudnn()
-        self.op_type = "conv2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv2d"
 
 
-class TestCudnnWithGroup(TestWithGroup):
+class TestCUDNNWithGroup(TestWithGroup):
     def init_op_type(self):
-        core.use_cudnn()
-        self.op_type = "conv2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv2d"
 
 
-class TestCudnnWith1x1(TestWith1x1):
+class TestCUDNNWith1x1(TestWith1x1):
     def init_op_type(self):
-        core.use_cudnn()
-        self.op_type = "conv2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv2d"
 
 
 #  cudnn v5 does not support dilation conv.
-# class TestCudnnWithDilation(TestWithDilation):
+# class TestCUDNNWithDilation(TestWithDilation):
 #     def init_op_type(self):
 #         self.op_type = "conv_cudnn"
 
diff --git a/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py b/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
index d59537b924d57d40f7d740d99eb814c95f528e5f..0c76e222c90c7a61c08240e6b3d25fbb5b979252 100644
--- a/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
+++ b/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
@@ -1,5 +1,21 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
+
+import paddle.v2.fluid.core as core
 from op_test import OpTest
 
 
@@ -37,6 +53,7 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs):
 class TestConv2dTransposeOp(OpTest):
     def setUp(self):
         # init as conv transpose
+        self.use_cudnn = False
         self.init_op_type()
         self.init_test_case()
 
@@ -47,7 +64,9 @@ class TestConv2dTransposeOp(OpTest):
         self.attrs = {
             'strides': self.stride,
             'paddings': self.pad,
-            'dilations': self.dilations
+            'dilations': self.dilations,
+            'use_cudnn': self.use_cudnn,
+            'data_format': 'AnyLayout'  # TODO(dzhwinter) : should be fix latter
         }
 
         output = conv2dtranspose_forward_naive(input_, filter_,
@@ -56,25 +75,53 @@ class TestConv2dTransposeOp(OpTest):
         self.outputs = {'Output': output}
 
     def test_check_output(self):
-        self.check_output()
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_output_with_place(place, atol=1e-5)
+        else:
+            self.check_output()
 
     def test_check_grad_no_input(self):
-        self.check_grad(
-            ['Filter'],
-            'Output',
-            max_relative_error=0.02,
-            no_grad_set=set(['Input']))
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place, ['Filter'],
+                'Output',
+                max_relative_error=0.02,
+                no_grad_set=set(['Input']))
+        else:
+            self.check_grad(
+                ['Filter'],
+                'Output',
+                max_relative_error=0.02,
+                no_grad_set=set(['Input']))
 
     def test_check_grad_no_filter(self):
-        self.check_grad(
-            ['Input'],
-            'Output',
-            max_relative_error=0.02,
-            no_grad_set=set(['Filter']))
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place, ['Input'],
+                'Output',
+                max_relative_error=0.02,
+                no_grad_set=set(['Filter']))
+        else:
+            self.check_grad(
+                ['Input'],
+                'Output',
+                max_relative_error=0.02,
+                no_grad_set=set(['Filter']))
 
     def test_check_grad(self):
-        self.check_grad(
-            set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place,
+                set(['Input', 'Filter']),
+                'Output',
+                max_relative_error=0.02)
+        else:
+            self.check_grad(
+                set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
 
     def init_test_case(self):
         self.pad = [0, 0]
@@ -119,12 +166,13 @@ class TestWithDilation(TestConv2dTransposeOp):
 
 
 # ------------ test_cudnn ------------
-class TestCudnn(TestConv2dTransposeOp):
+class TestCUDNN(TestConv2dTransposeOp):
     def init_op_type(self):
-        self.op_type = "conv2d_transpose_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv2d_transpose"
 
 
-class TestCudnnWithPad(TestWithPad):
+class TestCUDNNWithPad(TestWithPad):
     def init_test_case(self):
         self.pad = [1, 1]
         self.stride = [1, 1]
@@ -134,10 +182,11 @@ class TestCudnnWithPad(TestWithPad):
         self.filter_size = [f_c, 6, 3, 3]
 
     def init_op_type(self):
-        self.op_type = "conv2d_transpose_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv2d_transpose"
 
 
-class TestCudnnWithStride(TestWithStride):
+class TestCUDNNWithStride(TestWithStride):
     def init_test_case(self):
         self.pad = [1, 1]
         self.stride = [2, 2]
@@ -147,11 +196,12 @@ class TestCudnnWithStride(TestWithStride):
         self.filter_size = [f_c, 6, 3, 3]
 
     def init_op_type(self):
-        self.op_type = "conv2d_transpose_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv2d_transpose"
 
 
 # #cudnn v5 does not support dilation conv.
-# class TestCudnnWithDilation(TestWithDilation):
+# class TestCUDNNWithDilation(TestWithDilation):
 #     def init_test_case(self):
 #         self.pad = [1, 1]
 #         self.stride = [2, 2]
@@ -161,7 +211,7 @@ class TestCudnnWithStride(TestWithStride):
 #         self.filter_size = [f_c, 6, 3, 3]
 #
 #     def init_op_type(self):
-#         self.op_type = "conv2d_transpose_cudnn"
+#         self.op_type = "conv2d_transpose"
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_conv3d_op.py b/python/paddle/v2/fluid/tests/test_conv3d_op.py
index 8593dff20b5c283d5862206dfb0c0d2501039d07..8121e3286597e5842138eac1801f4466db24f799 100644
--- a/python/paddle/v2/fluid/tests/test_conv3d_op.py
+++ b/python/paddle/v2/fluid/tests/test_conv3d_op.py
@@ -1,5 +1,21 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
+
+import paddle.v2.fluid.core as core
 from op_test import OpTest
 
 
@@ -54,6 +70,7 @@ def conv3d_forward_naive(input, filter, group, conv_param):
 
 class TestConv3dOp(OpTest):
     def setUp(self):
+        self.use_cudnn = False
         self.init_group()
         self.init_op_type()
         self.init_dilation()
@@ -62,7 +79,9 @@ class TestConv3dOp(OpTest):
         conv3d_param = {
             'stride': self.stride,
             'pad': self.pad,
-            'dilations': self.dilations
+            'dilations': self.dilations,
+            'use_cudnn': self.use_cudnn,
+            'data_format': 'AnyLayout'  # TODO(dzhwinter) : should be fix latter
         }
         input = np.random.random(self.input_size).astype("float32")
         filter = np.random.random(self.filter_size).astype("float32")
@@ -79,25 +98,53 @@ class TestConv3dOp(OpTest):
         self.outputs = {'Output': output}
 
     def test_check_output(self):
-        self.check_output()
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_output_with_place(place, atol=1e-5)
+        else:
+            self.check_output()
 
     def test_check_grad(self):
-        self.check_grad(
-            set(['Input', 'Filter']), 'Output', max_relative_error=0.03)
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place,
+                set(['Input', 'Filter']),
+                'Output',
+                max_relative_error=0.03)
+        else:
+            self.check_grad(
+                set(['Input', 'Filter']), 'Output', max_relative_error=0.03)
 
     def test_check_grad_no_filter(self):
-        self.check_grad(
-            ['Input'],
-            'Output',
-            max_relative_error=0.03,
-            no_grad_set=set(['Filter']))
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place, ['Input'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Filter']))
+        else:
+            self.check_grad(
+                ['Input'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Filter']))
 
     def test_check_grad_no_input(self):
-        self.check_grad(
-            ['Filter'],
-            'Output',
-            max_relative_error=0.03,
-            no_grad_set=set(['Input']))
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place, ['Filter'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Input']))
+        else:
+            self.check_grad(
+                ['Filter'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Input']))
 
     def init_test_case(self):
         self.pad = [0, 0, 0]
@@ -169,31 +216,35 @@ class TestWithDilation(TestConv3dOp):
         self.groups = 3
 
 
-class TestCudnn(TestConv3dOp):
+class TestCUDNN(TestConv3dOp):
     def init_op_type(self):
-        self.op_type = "conv3d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv3d"
 
 
-class TestWithGroup1Cudnn(TestWithGroup1):
+class TestWithGroup1CUDNN(TestWithGroup1):
     def init_op_type(self):
-        self.op_type = "conv3d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv3d"
 
 
-class TestWithGroup2Cudnn(TestWithGroup2):
+class TestWithGroup2CUDNN(TestWithGroup2):
     def init_op_type(self):
-        self.op_type = "conv3d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv3d"
 
 
-class TestWith1x1Cudnn(TestWith1x1):
+class TestWith1x1CUDNN(TestWith1x1):
     def init_op_type(self):
-        self.op_type = "conv3d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv3d"
 
 
 # FIXME(typhoonzero): find a way to determine if
 # using cudnn > 6 in python
-# class TestWithDilationCudnn(TestWithDilation):
+# class TestWithDilationCUDNN(TestWithDilation):
 #     def init_op_type(self):
-#         self.op_type = "conv3d_cudnn"
+#         self.op_type = "conv3d"
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py b/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
index a353f9b4d40233de46237005138f21430f4d865a..4934c5a34e519578183a577adf66b48e6c1047d5 100644
--- a/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
+++ b/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
@@ -1,5 +1,21 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
+
+import paddle.v2.fluid.core as core
 from op_test import OpTest
 
 
@@ -44,6 +60,7 @@ def conv3dtranspose_forward_naive(input_, filter_, attrs):
 class TestConv3dTransposeOp(OpTest):
     def setUp(self):
         # init as conv transpose
+        self.use_cudnn = False
         self.init_op_type()
         self.init_test_case()
 
@@ -54,7 +71,9 @@ class TestConv3dTransposeOp(OpTest):
         self.attrs = {
             'strides': self.stride,
             'paddings': self.pad,
-            'dilations': self.dilations
+            'dilations': self.dilations,
+            'use_cudnn': self.use_cudnn,
+            'data_format': 'AnyLayout'  # TODO(dzhwinter) : should be fix latter
         }
 
         output = conv3dtranspose_forward_naive(input_, filter_,
@@ -63,25 +82,53 @@ class TestConv3dTransposeOp(OpTest):
         self.outputs = {'Output': output}
 
     def test_check_output(self):
-        self.check_output()
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_output_with_place(place, atol=1e-5)
+        else:
+            self.check_output()
 
     def test_check_grad(self):
-        self.check_grad(
-            set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place,
+                set(['Input', 'Filter']),
+                'Output',
+                max_relative_error=0.03)
+        else:
+            self.check_grad(
+                set(['Input', 'Filter']), 'Output', max_relative_error=0.03)
 
     def test_check_grad_no_filter(self):
-        self.check_grad(
-            ['Input'],
-            'Output',
-            max_relative_error=0.02,
-            no_grad_set=set(['Filter']))
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place, ['Input'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Filter']))
+        else:
+            self.check_grad(
+                ['Input'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Filter']))
 
     def test_check_grad_no_input(self):
-        self.check_grad(
-            ['Filter'],
-            'Output',
-            max_relative_error=0.02,
-            no_grad_set=set(['Input']))
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place, ['Filter'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Input']))
+        else:
+            self.check_grad(
+                ['Filter'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Input']))
 
     def init_test_case(self):
         self.pad = [0, 0, 0]
@@ -126,12 +173,13 @@ class TestWithDilation(TestConv3dTransposeOp):
 
 
 # ------------ test_cudnn ------------
-class TestCudnn(TestConv3dTransposeOp):
+class TestCUDNN(TestConv3dTransposeOp):
     def init_op_type(self):
-        self.op_type = "conv3d_transpose_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv3d_transpose"
 
 
-class TestCudnnWithPad(TestWithPad):
+class TestCUDNNWithPad(TestWithPad):
     def init_test_case(self):
         self.pad = [1, 1, 1]
         self.stride = [1, 1, 1]
@@ -141,10 +189,11 @@ class TestCudnnWithPad(TestWithPad):
         self.filter_size = [f_c, 6, 3, 3, 3]
 
     def init_op_type(self):
-        self.op_type = "conv3d_transpose_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv3d_transpose"
 
 
-class TestCudnnWithStride(TestWithStride):
+class TestCUDNNWithStride(TestWithStride):
     def init_test_case(self):
         self.pad = [1, 1, 1]
         self.stride = [2, 2, 2]
@@ -154,11 +203,12 @@ class TestCudnnWithStride(TestWithStride):
         self.filter_size = [f_c, 6, 3, 3, 3]
 
     def init_op_type(self):
-        self.op_type = "conv3d_transpose_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv3d_transpose"
 
 
 # #cudnn v5 does not support dilation conv.
-# class TestCudnnWithDilation(TestWithDilation):
+# class TestCUDNNWithDilation(TestWithDilation):
 #     def init_test_case(self):
 #         self.pad = [1, 1, 1]
 #         self.stride = [2, 2, 2]
@@ -168,7 +218,7 @@ class TestCudnnWithStride(TestWithStride):
 #         self.filter_size = [f_c, 6, 3, 3, 3]
 #
 #     def init_op_type(self):
-#         self.op_type = "conv3d_transpose_cudnn"
+#         self.op_type = "conv3d_transpose"
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_conv_shift_op.py b/python/paddle/v2/fluid/tests/test_conv_shift_op.py
index b9ab21a06a1c6e8e2d1e936a0b4b8a07a59f57b9..7029d5a2eb433707ed7e7dd8acddbd0fef80e40c 100644
--- a/python/paddle/v2/fluid/tests/test_conv_shift_op.py
+++ b/python/paddle/v2/fluid/tests/test_conv_shift_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_cos_sim_op.py b/python/paddle/v2/fluid/tests/test_cos_sim_op.py
index 47557ccb41d1e835b5d04d1b94f54dfc7aa2855a..33db12ba9c7b119259fc918a70c029a99aea0c68 100644
--- a/python/paddle/v2/fluid/tests/test_cos_sim_op.py
+++ b/python/paddle/v2/fluid/tests/test_cos_sim_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_create_op_doc_string.py b/python/paddle/v2/fluid/tests/test_create_op_doc_string.py
index 42b6f7a3616bbce53a8cae68a5fc1eda411a7422..2b7951ecea791b43ffe9123fda77ec80d626e065 100644
--- a/python/paddle/v2/fluid/tests/test_create_op_doc_string.py
+++ b/python/paddle/v2/fluid/tests/test_create_op_doc_string.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import paddle.v2.fluid.layers as layers
 
diff --git a/python/paddle/v2/fluid/tests/test_crf_decoding_op.py b/python/paddle/v2/fluid/tests/test_crf_decoding_op.py
index ab573da31dfb9d7b40e44a79465a61cdc6b62a46..f819387cdc4476187d108e7cffca7851baea5933 100644
--- a/python/paddle/v2/fluid/tests/test_crf_decoding_op.py
+++ b/python/paddle/v2/fluid/tests/test_crf_decoding_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import random
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_crop_op.py b/python/paddle/v2/fluid/tests/test_crop_op.py
index 62c883bdc130021d06c33ded9c2865505da0b719..36bf1761689c32928ea8f9e2996038ae94c92bdd 100644
--- a/python/paddle/v2/fluid/tests/test_crop_op.py
+++ b/python/paddle/v2/fluid/tests/test_crop_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_cross_entropy_op.py b/python/paddle/v2/fluid/tests/test_cross_entropy_op.py
index b81af9364d63bc9b242372e71f175ad047d7c240..ae8e9be6de453e78f8e941641919a4c8eaae7e30 100644
--- a/python/paddle/v2/fluid/tests/test_cross_entropy_op.py
+++ b/python/paddle/v2/fluid/tests/test_cross_entropy_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest, randomize_probability
diff --git a/python/paddle/v2/fluid/tests/test_ctc_align.py b/python/paddle/v2/fluid/tests/test_ctc_align.py
new file mode 100644
index 0000000000000000000000000000000000000000..773c69d1ad0794d2e4edfb1f6f8140cbcd64bee6
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/test_ctc_align.py
@@ -0,0 +1,76 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import unittest
+import numpy as np
+from op_test import OpTest
+from test_softmax_op import stable_softmax
+
+
+def CTCAlign(input, lod, blank, merge_repeated):
+    lod0 = lod[0]
+    result = []
+    for i in range(len(lod0) - 1):
+        prev_token = -1
+        for j in range(lod0[i], lod0[i + 1]):
+            token = input[j][0]
+            if (token != blank) and not (merge_repeated and
+                                         token == prev_token):
+                result.append(token)
+            prev_token = token
+    result = np.array(result).reshape([len(result), 1]).astype("int32")
+    return result
+
+
+class TestCTCAlignOp(OpTest):
+    def config(self):
+        self.op_type = "ctc_align"
+        self.input_lod = [[0, 11, 18]]
+        self.blank = 0
+        self.merge_repeated = False
+        self.input = np.array(
+            [0, 1, 2, 2, 0, 4, 0, 4, 5, 0, 6, 6, 0, 0, 7, 7, 7, 0]).reshape(
+                [18, 1]).astype("int32")
+
+    def setUp(self):
+        self.config()
+        output = CTCAlign(self.input, self.input_lod, self.blank,
+                          self.merge_repeated)
+
+        self.inputs = {"Input": (self.input, self.input_lod), }
+        self.outputs = {"Output": output}
+        self.attrs = {
+            "blank": self.blank,
+            "merge_repeated": self.merge_repeated
+        }
+
+    def test_check_output(self):
+        self.check_output()
+        pass
+
+
+class TestCTCAlignOpCase1(TestCTCAlignOp):
+    def config(self):
+        self.op_type = "ctc_align"
+        self.input_lod = [[0, 11, 19]]
+        self.blank = 0
+        self.merge_repeated = True
+        self.input = np.array(
+            [0, 1, 2, 2, 0, 4, 0, 4, 5, 0, 6, 6, 0, 0, 7, 7, 7, 0, 0]).reshape(
+                [19, 1]).astype("int32")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_data_feeder.py b/python/paddle/v2/fluid/tests/test_data_feeder.py
index 454969320321b72342803f507f0054f79f276669..f967221015821e409592aeec93ffda8e4a4f3252 100644
--- a/python/paddle/v2/fluid/tests/test_data_feeder.py
+++ b/python/paddle/v2/fluid/tests/test_data_feeder.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2.fluid as fluid
 
 
diff --git a/python/paddle/v2/fluid/tests/test_decayed_adagrad_op.py b/python/paddle/v2/fluid/tests/test_decayed_adagrad_op.py
index 674c3fda5c82309bbfbbad936a8b0b26929d42d9..78d4e3608e6f9fef247551a9ec8a70899bf8f86c 100644
--- a/python/paddle/v2/fluid/tests/test_decayed_adagrad_op.py
+++ b/python/paddle/v2/fluid/tests/test_decayed_adagrad_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_default_scope_funcs.py b/python/paddle/v2/fluid/tests/test_default_scope_funcs.py
index 738e69529ea447e87516d5e0efc098910b966ded..5ff52f6d6b46f18a12fc508d8ce1a9fb20e6d6ca 100644
--- a/python/paddle/v2/fluid/tests/test_default_scope_funcs.py
+++ b/python/paddle/v2/fluid/tests/test_default_scope_funcs.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.v2.fluid.default_scope_funcs import *
 import unittest
 
diff --git a/python/paddle/v2/fluid/tests/test_detection_output_op.py b/python/paddle/v2/fluid/tests/test_detection_output_op.py
index 080a9743b0182cb7e6dd0030fc306a7f82510a05..4a9cd474b81a419bfb42c202327df04c0d2e5bd9 100644
--- a/python/paddle/v2/fluid/tests/test_detection_output_op.py
+++ b/python/paddle/v2/fluid/tests/test_detection_output_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_dropout_op.py b/python/paddle/v2/fluid/tests/test_dropout_op.py
index 2483200212686caf9c46f9c1129b5d8ffdcc9145..107b9567dc4a8539532c2fff40df437cc72cc163 100644
--- a/python/paddle/v2/fluid/tests/test_dropout_op.py
+++ b/python/paddle/v2/fluid/tests/test_dropout_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_dyn_rnn.py b/python/paddle/v2/fluid/tests/test_dyn_rnn.py
index 8090c5f47814c60034f2f46f00e56c530e0f2c19..2ac926c63c906767cb08c561b043b8a6cc6b36bd 100644
--- a/python/paddle/v2/fluid/tests/test_dyn_rnn.py
+++ b/python/paddle/v2/fluid/tests/test_dyn_rnn.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2.fluid as fluid
 import paddle.v2 as paddle
 import unittest
diff --git a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py
index c02c59284e1ca2e28ba2f6c5ec13b241c15fc288..dd608432df411a670210148af81ff4a6d3151a85 100644
--- a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py
+++ b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy
 import random
 import collections
@@ -197,7 +211,24 @@ class BaseRNN(object):
         return numpy.array([o.mean() for o in outs.itervalues()]).mean()
 
 
-class TestSimpleMul(unittest.TestCase):
+class SeedFixedTestCase(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        """Fix random seeds to remove randomness from tests"""
+        cls._np_rand_state = numpy.random.get_state()
+        cls._py_rand_state = random.getstate()
+
+        numpy.random.seed(123)
+        random.seed(124)
+
+    @classmethod
+    def tearDownClass(cls):
+        """Restore random seeds"""
+        numpy.random.set_state(cls._np_rand_state)
+        random.setstate(cls._py_rand_state)
+
+
+class TestSimpleMul(SeedFixedTestCase):
     DATA_NAME = 'X'
     DATA_WIDTH = 32
     PARAM_NAME = 'W'
@@ -263,7 +294,7 @@ class TestSimpleMul(unittest.TestCase):
         self.assertTrue(numpy.allclose(i_g_num, i_g, rtol=0.05))
 
 
-class TestSimpleMulWithMemory(unittest.TestCase):
+class TestSimpleMulWithMemory(SeedFixedTestCase):
     DATA_WIDTH = 32
     HIDDEN_WIDTH = 20
     DATA_NAME = 'X'
diff --git a/python/paddle/v2/fluid/tests/test_dynrnn_static_input.py b/python/paddle/v2/fluid/tests/test_dynrnn_static_input.py
new file mode 100644
index 0000000000000000000000000000000000000000..d14923b6b30d80e89f27221030f60edf947ae63d
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/test_dynrnn_static_input.py
@@ -0,0 +1,206 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import paddle.v2 as paddle
+import paddle.v2.fluid.core as core
+import paddle.v2.fluid as fluid
+from paddle.v2.fluid.backward import append_backward
+import paddle.v2.fluid.framework as framework
+from paddle.v2.fluid.framework import Program, switch_main_program
+import bisect
+import numpy as np
+
+fluid.default_startup_program().random_seed = 1
+
+
+class TestDyRnnStaticInput(unittest.TestCase):
+    def setUp(self):
+        self._delta = 0.005
+        self._max_sequence_len = 3
+        self._program = Program()
+        switch_main_program(self._program)
+        self.output_dim = 10
+        self.place = core.CPUPlace()
+        self.prepare_x_tensor()
+        self.prepare_static_input_tensor()
+        self.exe = fluid.Executor(self.place)
+
+    def prepare_x_tensor(self):
+        self.x_tensor_dim = 10
+        lod = [[0, 2, 3, 6]]
+        shape = [lod[0][-1], self.x_tensor_dim]
+        self.x_tensor_data = np.random.random(shape).astype('float32')
+        self.x_tensor = core.LoDTensor()
+        self.x_tensor.set_lod(lod)
+        self.x_tensor.set(self.x_tensor_data, self.place)
+
+    def prepare_static_input_tensor(self):
+        self.static_input_tensor_dim = 4
+        lod = [[0, 1, 3, 6]]
+        shape = [lod[0][-1], self.static_input_tensor_dim]
+        self.static_input_data = np.random.random(shape).astype('float32')
+        self.static_input_tensor = core.LoDTensor()
+        self.static_input_tensor.set_lod(lod)
+        self.static_input_tensor.set(self.static_input_data, self.place)
+
+    def fetch_value(self, var):
+        fetch_outs = self.exe.run(feed={
+            'x_tensor': self.x_tensor,
+            'static_input_tensor': self.static_input_tensor
+        },
+                                  fetch_list=[var],
+                                  return_numpy=False)
+        return self._lodtensor_to_ndarray(fetch_outs[0])
+
+    def _lodtensor_to_ndarray(self, lod_tensor):
+        dims = lod_tensor.get_dims()
+        ndarray = np.zeros(shape=dims).astype('float32')
+        for i in xrange(np.product(dims)):
+            ndarray.ravel()[i] = lod_tensor.get_float_element(i)
+        return ndarray, lod_tensor.lod()
+
+    def build_graph(self, only_forward=False):
+        x_tensor = fluid.layers.data(
+            name='x_tensor',
+            shape=[self.x_tensor_dim],
+            dtype='float32',
+            lod_level=1)
+        x_tensor.stop_gradient = False
+
+        static_input_tensor = fluid.layers.data(
+            name='static_input_tensor',
+            shape=[self.static_input_tensor_dim],
+            dtype='float32',
+            lod_level=1)
+        static_input_tensor.stop_gradient = False
+
+        if only_forward:
+            static_input_out_array = self._program.global_block().create_var(
+                name='static_input_out_array',
+                type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
+                dtype='float32')
+            static_input_out_array.stop_gradient = True
+
+        rnn = fluid.layers.DynamicRNN()
+        with rnn.block():
+            step_x = rnn.step_input(x_tensor)
+            step_static_input = rnn.static_input(static_input_tensor)
+            if only_forward:
+                fluid.layers.array_write(
+                    x=step_static_input,
+                    i=rnn.step_idx,
+                    array=static_input_out_array)
+            last = fluid.layers.sequence_pool(
+                input=step_static_input, pool_type='last')
+            projected = fluid.layers.fc(input=[step_x, last],
+                                        size=self.output_dim)
+            rnn.output(projected)
+
+        if only_forward:
+            static_input_step_outs = []
+            step_idx = fluid.layers.fill_constant(
+                shape=[1], dtype='int64', value=0)
+            step_idx.stop_gradient = True
+
+            for i in xrange(self._max_sequence_len):
+                step_out = fluid.layers.array_read(static_input_out_array,
+                                                   step_idx)
+                step_out.stop_gradient = True
+                static_input_step_outs.append(step_out)
+                fluid.layers.increment(x=step_idx, value=1.0, in_place=True)
+
+        if only_forward:
+            return static_input_step_outs
+
+        last = fluid.layers.sequence_pool(input=rnn(), pool_type='last')
+        loss = fluid.layers.mean(x=last)
+        append_backward(loss)
+        static_input_grad = self._program.global_block().var(
+            framework.grad_var_name('static_input_tensor'))
+        return static_input_grad, loss
+
+    def get_seq_len_from_lod(self, lod):
+        return [lod[0][i + 1] - lod[0][i] for i in xrange(len(lod[0]) - 1)]
+
+    def get_expected_static_step_outs(self):
+        x_lod = self.x_tensor.lod()
+        x_seq_len = self.get_seq_len_from_lod(x_lod)
+        x_seq_len_sorted = sorted(x_seq_len)
+        x_sorted_indices = np.argsort(x_seq_len)[::-1]
+
+        static_lod = self.static_input_tensor.lod()
+        static_sliced = [
+            self.static_input_data[static_lod[0][i]:static_lod[0][i + 1]]
+            for i in xrange(len(static_lod[0]) - 1)
+        ]
+        static_seq_len = self.get_seq_len_from_lod(static_lod)
+        static_reordered = []
+        for i in xrange(len(x_sorted_indices)):
+            static_reordered.extend(static_sliced[x_sorted_indices[i]].tolist())
+        static_seq_len_reordered = [
+            static_seq_len[x_sorted_indices[i]]
+            for i in xrange(len(x_sorted_indices))
+        ]
+
+        static_step_outs = []
+        static_step_lods = []
+
+        for i in xrange(self._max_sequence_len):
+            end = len(x_seq_len) - bisect.bisect_left(x_seq_len_sorted, i + 1)
+            lod = [0]
+            for i in xrange(end):
+                lod.append(static_seq_len_reordered[i] + lod[-1])
+            static_step_lods.append([lod])
+            end = lod[-1]
+            static_step_outs.append(
+                np.array(static_reordered[:end]).astype('float32'))
+
+        return static_step_outs, static_step_lods
+
+    def test_step_out(self):
+        static_step_outs = self.build_graph(only_forward=True)
+        self.exe.run(framework.default_startup_program())
+        expected_outs, expected_lods = self.get_expected_static_step_outs()
+        for i in xrange(self._max_sequence_len):
+            step_out, lod = self.fetch_value(static_step_outs[i])
+            self.assertTrue(np.allclose(step_out, expected_outs[i]))
+            self.assertTrue(np.allclose(lod, expected_lods[i]))
+
+    def test_network_gradient(self):
+        static_input_grad, loss = self.build_graph()
+        self.exe.run(framework.default_startup_program())
+
+        actual_gradients, actual_lod = self.fetch_value(static_input_grad)
+
+        static_input_shape = self.static_input_tensor.get_dims()
+        numeric_gradients = np.zeros(shape=static_input_shape).astype('float32')
+        # calculate numeric gradients
+        tensor_size = np.product(static_input_shape)
+        for i in xrange(tensor_size):
+            origin = self.static_input_tensor.get_float_element(i)
+            x_pos = origin + self._delta
+            self.static_input_tensor.set_float_element(i, x_pos)
+            y_pos = self.fetch_value(loss)[0][0]
+            x_neg = origin - self._delta
+            self.static_input_tensor.set_float_element(i, x_neg)
+            y_neg = self.fetch_value(loss)[0][0]
+            self.static_input_tensor.set_float_element(i, origin)
+            numeric_gradients.ravel()[i] = (y_pos - y_neg) / self._delta / 2
+        self.assertTrue(np.allclose(actual_gradients, numeric_gradients, 0.001))
+        self.assertTrue(np.allclose(actual_lod, self.static_input_tensor.lod()))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_edit_distance_op.py b/python/paddle/v2/fluid/tests/test_edit_distance_op.py
index 38e87728b387bb70a8921a2fe73a4e69701aabe9..11cb85a151d1a4e213bcc52592d2f860f69b457f 100644
--- a/python/paddle/v2/fluid/tests/test_edit_distance_op.py
+++ b/python/paddle/v2/fluid/tests/test_edit_distance_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
@@ -38,8 +52,8 @@ class TestEditDistanceOp(OpTest):
     def setUp(self):
         self.op_type = "edit_distance"
         normalized = False
-        x1 = np.array([[0, 12, 3, 5, 8, 2]]).astype("int32")
-        x2 = np.array([[0, 12, 4, 7, 8]]).astype("int32")
+        x1 = np.array([[0, 12, 3, 5, 8, 2]]).astype("int64")
+        x2 = np.array([[0, 12, 4, 7, 8]]).astype("int64")
         x1 = np.transpose(x1)
         x2 = np.transpose(x2)
         x1_lod = [0, 1, 5]
@@ -66,8 +80,8 @@ class TestEditDistanceOpNormalized(OpTest):
     def setUp(self):
         self.op_type = "edit_distance"
         normalized = True
-        x1 = np.array([[0, 10, 3, 6, 5, 8, 2]]).astype("int32")
-        x2 = np.array([[0, 10, 4, 6, 7, 8]]).astype("int32")
+        x1 = np.array([[0, 10, 3, 6, 5, 8, 2]]).astype("int64")
+        x2 = np.array([[0, 10, 4, 6, 7, 8]]).astype("int64")
         x1 = np.transpose(x1)
         x2 = np.transpose(x2)
         x1_lod = [0, 1, 3, 6]
diff --git a/python/paddle/v2/fluid/tests/test_elementwise_add_op.py b/python/paddle/v2/fluid/tests/test_elementwise_add_op.py
index 57daddd5698f77527bc5b78c436065a851867ae0..3564772fb52882e9e58ea88caeb12c5e91137525 100644
--- a/python/paddle/v2/fluid/tests/test_elementwise_add_op.py
+++ b/python/paddle/v2/fluid/tests/test_elementwise_add_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
@@ -27,6 +40,16 @@ class TestElementwiseOp(OpTest):
             ['X'], 'Out', max_relative_error=0.005, no_grad_set=set('Y'))
 
 
+class TestElementwiseAddOp_scalar(TestElementwiseOp):
+    def setUp(self):
+        self.op_type = "elementwise_add"
+        self.inputs = {
+            'X': np.random.rand(2, 3, 4).astype(np.float32),
+            'Y': np.random.rand(1).astype(np.float32)
+        }
+        self.outputs = {'Out': self.inputs['X'] + self.inputs['Y']}
+
+
 class TestElementwiseAddOp_Vector(TestElementwiseOp):
     def setUp(self):
         self.op_type = "elementwise_add"
diff --git a/python/paddle/v2/fluid/tests/test_elementwise_div_op.py b/python/paddle/v2/fluid/tests/test_elementwise_div_op.py
index 41cb2b7767eb8e01e46e770a5da21b609f4eb911..77b113af7693c4a71a5a13c791cfb3e0420f4ff8 100644
--- a/python/paddle/v2/fluid/tests/test_elementwise_div_op.py
+++ b/python/paddle/v2/fluid/tests/test_elementwise_div_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
@@ -32,6 +45,16 @@ class ElementwiseDivOp(OpTest):
             ['X'], 'Out', max_relative_error=0.05, no_grad_set=set('Y'))
 
 
+class TestElementwiseDivOp_scalar(ElementwiseDivOp):
+    def setUp(self):
+        self.op_type = "elementwise_div"
+        self.inputs = {
+            'X': np.random.uniform(0.1, 1, [2, 3, 4]).astype(np.float32),
+            'Y': np.random.uniform(0.1, 1, [1]).astype(np.float32)
+        }
+        self.outputs = {'Out': self.inputs['X'] / self.inputs['Y']}
+
+
 class TestElementwiseDivOp_Vector(ElementwiseDivOp):
     def setUp(self):
         self.op_type = "elementwise_div"
diff --git a/python/paddle/v2/fluid/tests/test_elementwise_max_op.py b/python/paddle/v2/fluid/tests/test_elementwise_max_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..0fc15693b1d2fbd89a1659dfbe5de0fff8d15762
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/test_elementwise_max_op.py
@@ -0,0 +1,130 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+from op_test import OpTest
+
+
+class TestElementwiseOp(OpTest):
+    def setUp(self):
+        self.op_type = "elementwise_max"
+        # If x and y have the same value, the max() is not differentiable.
+        # So we generate test data by the following method
+        # to avoid them being too close to each other.
+        x = np.random.uniform(0.1, 1, [13, 17]).astype("float32")
+        sgn = np.random.choice([-1, 1], [13, 17]).astype("float32")
+        y = x + sgn * np.random.uniform(0.1, 1, [13, 17]).astype("float32")
+        self.inputs = {'X': x, 'Y': y}
+        self.outputs = {'Out': np.maximum(self.inputs['X'], self.inputs['Y'])}
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad_normal(self):
+        self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.005)
+
+    def test_check_grad_ingore_x(self):
+        self.check_grad(
+            ['Y'], 'Out', max_relative_error=0.005, no_grad_set=set("X"))
+
+    def test_check_grad_ingore_y(self):
+        self.check_grad(
+            ['X'], 'Out', max_relative_error=0.005, no_grad_set=set('Y'))
+
+
+class TestElementwiseMaxOp_scalar(TestElementwiseOp):
+    def setUp(self):
+        self.op_type = "elementwise_max"
+        x = np.random.random_integers(-5, 5, [2, 3, 4]).astype("float32")
+        y = np.array([0.5]).astype("float32")
+        self.inputs = {'X': x, 'Y': y}
+        self.outputs = {'Out': np.maximum(self.inputs['X'], self.inputs['Y'])}
+
+
+class TestElementwiseMaxOp_Vector(TestElementwiseOp):
+    def setUp(self):
+        self.op_type = "elementwise_max"
+        x = np.random.random((32, )).astype("float32")
+        sgn = np.random.choice([-1, 1], (32, )).astype("float32")
+        y = x + sgn * np.random.uniform(0.1, 1, (32, )).astype("float32")
+        self.inputs = {'X': x, 'Y': y}
+        self.outputs = {'Out': np.maximum(self.inputs['X'], self.inputs['Y'])}
+
+
+class TestElementwiseMaxOp_broadcast_0(TestElementwiseOp):
+    def setUp(self):
+        self.op_type = "elementwise_max"
+        x = np.random.uniform(0.5, 1, (2, 3, 4)).astype(np.float32)
+        sgn = np.random.choice([-1, 1], (2, )).astype(np.float32)
+        y = x[:, 0, 0] + sgn * \
+            np.random.uniform(1, 2, (2, )).astype(np.float32)
+        self.inputs = {'X': x, 'Y': y}
+
+        self.attrs = {'axis': 0}
+        self.outputs = {
+            'Out':
+            np.maximum(self.inputs['X'], self.inputs['Y'].reshape(2, 1, 1))
+        }
+
+
+class TestElementwiseMaxOp_broadcast_1(TestElementwiseOp):
+    def setUp(self):
+        self.op_type = "elementwise_max"
+        x = np.random.uniform(0.5, 1, (2, 3, 4)).astype(np.float32)
+        sgn = np.random.choice([-1, 1], (3, )).astype(np.float32)
+        y = x[0, :, 0] + sgn * \
+            np.random.uniform(1, 2, (3, )).astype(np.float32)
+        self.inputs = {'X': x, 'Y': y}
+
+        self.attrs = {'axis': 1}
+        self.outputs = {
+            'Out':
+            np.maximum(self.inputs['X'], self.inputs['Y'].reshape(1, 3, 1))
+        }
+
+
+class TestElementwiseMaxOp_broadcast_2(TestElementwiseOp):
+    def setUp(self):
+        self.op_type = "elementwise_max"
+        x = np.random.uniform(0.5, 1, (2, 3, 4)).astype(np.float32)
+        sgn = np.random.choice([-1, 1], (4, )).astype(np.float32)
+        y = x[0, 0, :] + sgn * \
+            np.random.uniform(1, 2, (4, )).astype(np.float32)
+        self.inputs = {'X': x, 'Y': y}
+
+        self.outputs = {
+            'Out':
+            np.maximum(self.inputs['X'], self.inputs['Y'].reshape(1, 1, 4))
+        }
+
+
+class TestElementwiseMaxOp_broadcast_3(TestElementwiseOp):
+    def setUp(self):
+        self.op_type = "elementwise_max"
+        x = np.random.uniform(0.5, 1, (2, 3, 4, 5)).astype(np.float32)
+        sgn = np.random.choice([-1, 1], (3, 4)).astype(np.float32)
+        y = x[0, :, :, 0] + sgn * \
+            np.random.uniform(1, 2, (3, 4)).astype(np.float32)
+        self.inputs = {'X': x, 'Y': y}
+
+        self.attrs = {'axis': 1}
+        self.outputs = {
+            'Out':
+            np.maximum(self.inputs['X'], self.inputs['Y'].reshape(1, 3, 4, 1))
+        }
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_elementwise_min_op.py b/python/paddle/v2/fluid/tests/test_elementwise_min_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..51584d6980924f2f6dcaedf4eec7bc75de33564b
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/test_elementwise_min_op.py
@@ -0,0 +1,130 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+from op_test import OpTest
+
+
+class TestElementwiseOp(OpTest):
+    def setUp(self):
+        self.op_type = "elementwise_min"
+        # If x and y have the same value, the min() is not differentiable.
+        # So we generate test data by the following method
+        # to avoid them being too close to each other.
+        x = np.random.uniform(0.1, 1, [13, 17]).astype("float32")
+        sgn = np.random.choice([-1, 1], [13, 17]).astype("float32")
+        y = x + sgn * np.random.uniform(0.1, 1, [13, 17]).astype("float32")
+        self.inputs = {'X': x, 'Y': y}
+        self.outputs = {'Out': np.minimum(self.inputs['X'], self.inputs['Y'])}
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad_normal(self):
+        self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.005)
+
+    def test_check_grad_ingore_x(self):
+        self.check_grad(
+            ['Y'], 'Out', max_relative_error=0.005, no_grad_set=set("X"))
+
+    def test_check_grad_ingore_y(self):
+        self.check_grad(
+            ['X'], 'Out', max_relative_error=0.005, no_grad_set=set('Y'))
+
+
+class TestElementwiseMinOp_scalar(TestElementwiseOp):
+    def setUp(self):
+        self.op_type = "elementwise_min"
+        x = np.random.random_integers(-5, 5, [2, 3, 4]).astype("float32")
+        y = np.array([0.5]).astype("float32")
+        self.inputs = {'X': x, 'Y': y}
+        self.outputs = {'Out': np.minimum(self.inputs['X'], self.inputs['Y'])}
+
+
+class TestElementwiseMaxOp_Vector(TestElementwiseOp):
+    def setUp(self):
+        self.op_type = "elementwise_min"
+        x = np.random.random((32, )).astype("float32")
+        sgn = np.random.choice([-1, 1], (32, )).astype("float32")
+        y = x + sgn * np.random.uniform(0.1, 1, (32, )).astype("float32")
+        self.inputs = {'X': x, 'Y': y}
+        self.outputs = {'Out': np.minimum(self.inputs['X'], self.inputs['Y'])}
+
+
+class TestElementwiseMaxOp_broadcast_0(TestElementwiseOp):
+    def setUp(self):
+        self.op_type = "elementwise_min"
+        x = np.random.uniform(0.5, 1, (2, 3, 4)).astype(np.float32)
+        sgn = np.random.choice([-1, 1], (2, )).astype(np.float32)
+        y = x[:, 0, 0] + sgn * \
+            np.random.uniform(1, 2, (2, )).astype(np.float32)
+        self.inputs = {'X': x, 'Y': y}
+
+        self.attrs = {'axis': 0}
+        self.outputs = {
+            'Out':
+            np.minimum(self.inputs['X'], self.inputs['Y'].reshape(2, 1, 1))
+        }
+
+
+class TestElementwiseMaxOp_broadcast_1(TestElementwiseOp):
+    def setUp(self):
+        self.op_type = "elementwise_min"
+        x = np.random.uniform(0.5, 1, (2, 3, 4)).astype(np.float32)
+        sgn = np.random.choice([-1, 1], (3, )).astype(np.float32)
+        y = x[0, :, 0] + sgn * \
+            np.random.uniform(1, 2, (3, )).astype(np.float32)
+        self.inputs = {'X': x, 'Y': y}
+
+        self.attrs = {'axis': 1}
+        self.outputs = {
+            'Out':
+            np.minimum(self.inputs['X'], self.inputs['Y'].reshape(1, 3, 1))
+        }
+
+
+class TestElementwiseMaxOp_broadcast_2(TestElementwiseOp):
+    def setUp(self):
+        self.op_type = "elementwise_min"
+        x = np.random.uniform(0.5, 1, (2, 3, 4)).astype(np.float32)
+        sgn = np.random.choice([-1, 1], (4, )).astype(np.float32)
+        y = x[0, 0, :] + sgn * \
+            np.random.uniform(1, 2, (4, )).astype(np.float32)
+        self.inputs = {'X': x, 'Y': y}
+
+        self.outputs = {
+            'Out':
+            np.minimum(self.inputs['X'], self.inputs['Y'].reshape(1, 1, 4))
+        }
+
+
+class TestElementwiseMaxOp_broadcast_3(TestElementwiseOp):
+    def setUp(self):
+        self.op_type = "elementwise_min"
+        x = np.random.uniform(0.5, 1, (2, 3, 4, 5)).astype(np.float32)
+        sgn = np.random.choice([-1, 1], (3, 4)).astype(np.float32)
+        y = x[0, :, :, 0] + sgn * \
+            np.random.uniform(1, 2, (3, 4)).astype(np.float32)
+        self.inputs = {'X': x, 'Y': y}
+
+        self.attrs = {'axis': 1}
+        self.outputs = {
+            'Out':
+            np.minimum(self.inputs['X'], self.inputs['Y'].reshape(1, 3, 4, 1))
+        }
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_elementwise_mul_op.py b/python/paddle/v2/fluid/tests/test_elementwise_mul_op.py
index 261ca9cb3da90dee91b016fee98f67b4c19356a1..12dfa6599cd634e1d806980f89e7c013b8eb8754 100644
--- a/python/paddle/v2/fluid/tests/test_elementwise_mul_op.py
+++ b/python/paddle/v2/fluid/tests/test_elementwise_mul_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
@@ -25,6 +38,16 @@ class ElementwiseMulOp(OpTest):
         self.check_grad(['X'], 'Out', no_grad_set=set('Y'))
 
 
+class TestElementwiseMulOp_scalar(ElementwiseMulOp):
+    def setUp(self):
+        self.op_type = "elementwise_mul"
+        self.inputs = {
+            'X': np.random.rand(2, 3, 4).astype(np.float32),
+            'Y': np.random.rand(1).astype(np.float32)
+        }
+        self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']}
+
+
 class TestElementwiseMulOp_Vector(ElementwiseMulOp):
     def setUp(self):
         self.op_type = "elementwise_mul"
diff --git a/python/paddle/v2/fluid/tests/test_elementwise_sub_op.py b/python/paddle/v2/fluid/tests/test_elementwise_sub_op.py
index be982e8c57b30b91c2834bd5db38ea3c89f573ee..cf53d85bbad81f393a6263f8742fc942d357135f 100644
--- a/python/paddle/v2/fluid/tests/test_elementwise_sub_op.py
+++ b/python/paddle/v2/fluid/tests/test_elementwise_sub_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
@@ -27,6 +40,16 @@ class TestElementwiseOp(OpTest):
             ['X'], 'Out', max_relative_error=0.005, no_grad_set=set('Y'))
 
 
+class TestElementwiseSubOp_scalar(TestElementwiseOp):
+    def setUp(self):
+        self.op_type = "elementwise_sub"
+        self.inputs = {
+            'X': np.random.rand(2, 3, 4).astype(np.float32),
+            'Y': np.random.rand(1).astype(np.float32)
+        }
+        self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']}
+
+
 class TestElementwiseSubOp_Vector(TestElementwiseOp):
     def setUp(self):
         self.op_type = "elementwise_sub"
diff --git a/python/paddle/v2/fluid/tests/test_exception.py b/python/paddle/v2/fluid/tests/test_exception.py
index b871f40c4a07ae2db7559e5a0f15664b21e94402..cd57ca586bfbc0773a2fd02d0c6d28182df2366b 100644
--- a/python/paddle/v2/fluid/tests/test_exception.py
+++ b/python/paddle/v2/fluid/tests/test_exception.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2.fluid.core as core
 import unittest
 
diff --git a/python/paddle/v2/fluid/tests/test_executor_and_mul.py b/python/paddle/v2/fluid/tests/test_executor_and_mul.py
index b1ef87c5cb1711c419b401c5950839816f7f4160..44f93be6cb3778822b849118396dc51084e66f6b 100644
--- a/python/paddle/v2/fluid/tests/test_executor_and_mul.py
+++ b/python/paddle/v2/fluid/tests/test_executor_and_mul.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 
 import numpy
diff --git a/python/paddle/v2/fluid/tests/test_expand_op.py b/python/paddle/v2/fluid/tests/test_expand_op.py
index 0440f7a2bb159bab4923683b5d0980e59e0a69c9..b1a1cbc0fae15b3e6159c9ec850ebde7cf19c228 100644
--- a/python/paddle/v2/fluid/tests/test_expand_op.py
+++ b/python/paddle/v2/fluid/tests/test_expand_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_feed_fetch_method.py b/python/paddle/v2/fluid/tests/test_feed_fetch_method.py
index 178c85b0dd50df61b1fd35ef5d53ebbf39445cb4..827a7590ff3ca6b32fb37dda67468f3b1f95ee1b 100644
--- a/python/paddle/v2/fluid/tests/test_feed_fetch_method.py
+++ b/python/paddle/v2/fluid/tests/test_feed_fetch_method.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2.fluid.core as core
 import unittest
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_fill_constant_batch_size_like_op.py b/python/paddle/v2/fluid/tests/test_fill_constant_batch_size_like_op.py
index 99de6b5d052b41499800afb6181a235da340bc15..f34a1ceb230208d6842f473636348e654154c2f4 100644
--- a/python/paddle/v2/fluid/tests/test_fill_constant_batch_size_like_op.py
+++ b/python/paddle/v2/fluid/tests/test_fill_constant_batch_size_like_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_fill_constant_op.py b/python/paddle/v2/fluid/tests/test_fill_constant_op.py
index dff7b615aa378b0ef932df47241db07eace61a86..a05fa39729d4cd91c93cab935ba6f8cc68a57c52 100644
--- a/python/paddle/v2/fluid/tests/test_fill_constant_op.py
+++ b/python/paddle/v2/fluid/tests/test_fill_constant_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_fill_op.py b/python/paddle/v2/fluid/tests/test_fill_op.py
index 88337598c895a5a663ef45fd0800fa950fee1253..901546f6f8965e3ffb55c6686c229061e15eae8d 100644
--- a/python/paddle/v2/fluid/tests/test_fill_op.py
+++ b/python/paddle/v2/fluid/tests/test_fill_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_fill_zeros_like_op.py b/python/paddle/v2/fluid/tests/test_fill_zeros_like_op.py
index cd91769a22f8d6af193efabd8d997913676fbba6..b7f0b96647d5fe645157498f15a8a5d0e4430c67 100644
--- a/python/paddle/v2/fluid/tests/test_fill_zeros_like_op.py
+++ b/python/paddle/v2/fluid/tests/test_fill_zeros_like_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_framework_debug_str.py b/python/paddle/v2/fluid/tests/test_framework_debug_str.py
index a4cbabdb36362c4ca14b76f366b648d6dbdbf7b3..f8fcfb2249bc635ce297381359db954bfd62df26 100644
--- a/python/paddle/v2/fluid/tests/test_framework_debug_str.py
+++ b/python/paddle/v2/fluid/tests/test_framework_debug_str.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 from paddle.v2.fluid.framework import Program
 
diff --git a/python/paddle/v2/fluid/tests/test_ftrl_op.py b/python/paddle/v2/fluid/tests/test_ftrl_op.py
index f77ac4659a9b877829f7ae52dd005d9dd11dac07..895337de0fb9885b3d78d0af23c03fd82a360497 100644
--- a/python/paddle/v2/fluid/tests/test_ftrl_op.py
+++ b/python/paddle/v2/fluid/tests/test_ftrl_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_gather_op.py b/python/paddle/v2/fluid/tests/test_gather_op.py
index b0ab429ef1b53640dfb696f6ea2f7b745564b874..76756367976c7056904d6d757ef9635261dc8985 100644
--- a/python/paddle/v2/fluid/tests/test_gather_op.py
+++ b/python/paddle/v2/fluid/tests/test_gather_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_gaussian_random_op.py b/python/paddle/v2/fluid/tests/test_gaussian_random_op.py
index 6f6a60ccb3ff17f6a12eec6974b8b2d73885c29f..82842534d4ac7ad8b0a8e0d877c6a638fb53cadc 100644
--- a/python/paddle/v2/fluid/tests/test_gaussian_random_op.py
+++ b/python/paddle/v2/fluid/tests/test_gaussian_random_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy
 
diff --git a/python/paddle/v2/fluid/tests/test_get_places_op.py b/python/paddle/v2/fluid/tests/test_get_places_op.py
index c4346f6786c096026fa9cbd55fbd44c68f2f9981..68698c5f4a2354595dcc0cb271ad9f57a35386e1 100644
--- a/python/paddle/v2/fluid/tests/test_get_places_op.py
+++ b/python/paddle/v2/fluid/tests/test_get_places_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2.fluid as fluid
 import decorators
 import unittest
diff --git a/python/paddle/v2/fluid/tests/test_gru_op.py b/python/paddle/v2/fluid/tests/test_gru_op.py
index fa2c5a53ec4a01b6545e25f773c11277a4d24706..69cfd6c481cba30148b0f367711c7ee8c25acd3a 100644
--- a/python/paddle/v2/fluid/tests/test_gru_op.py
+++ b/python/paddle/v2/fluid/tests/test_gru_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 import math
diff --git a/python/paddle/v2/fluid/tests/test_gru_unit_op.py b/python/paddle/v2/fluid/tests/test_gru_unit_op.py
index 501d5aa5797d6def708338692f0861657f951ef7..71f13c4513622579b5a8a718954e6b493b929ce1 100644
--- a/python/paddle/v2/fluid/tests/test_gru_unit_op.py
+++ b/python/paddle/v2/fluid/tests/test_gru_unit_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import math
 import unittest
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_hinge_loss_op.py b/python/paddle/v2/fluid/tests/test_hinge_loss_op.py
index a8757a891faa01413dc6858451f1a988a3e030b5..71ff47316eccd9a9f0a34942feaea6722a4b28f5 100644
--- a/python/paddle/v2/fluid/tests/test_hinge_loss_op.py
+++ b/python/paddle/v2/fluid/tests/test_hinge_loss_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_huber_loss_op.py b/python/paddle/v2/fluid/tests/test_huber_loss_op.py
index a24fcbec6cc4801118ce4ef97eb4692cd2351c28..e4560af77826a7368b305eb58515e4e44b450a78 100644
--- a/python/paddle/v2/fluid/tests/test_huber_loss_op.py
+++ b/python/paddle/v2/fluid/tests/test_huber_loss_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_image_classification_layer.py b/python/paddle/v2/fluid/tests/test_image_classification_layer.py
index b621d1525e33693869e24e2bb233bc8e257b077f..c64cfed5f583fb48588e479f862917a7b4d37c2c 100644
--- a/python/paddle/v2/fluid/tests/test_image_classification_layer.py
+++ b/python/paddle/v2/fluid/tests/test_image_classification_layer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 
 import paddle.v2.fluid as fluid
diff --git a/python/paddle/v2/fluid/tests/test_infer_shape.py b/python/paddle/v2/fluid/tests/test_infer_shape.py
index 9f6695ce02de749178046fbb613a58ba591b3dbc..521096388a338ceda0c71f6b7963c06f85263c69 100644
--- a/python/paddle/v2/fluid/tests/test_infer_shape.py
+++ b/python/paddle/v2/fluid/tests/test_infer_shape.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 
 import paddle.v2.fluid.core as core
diff --git a/python/paddle/v2/fluid/tests/test_inference_model_io.py b/python/paddle/v2/fluid/tests/test_inference_model_io.py
index 71ca3e6c105c4437470f8e9f596e723d879b65e4..adf428aa848268704c1749b360a4b499be2383e8 100644
--- a/python/paddle/v2/fluid/tests/test_inference_model_io.py
+++ b/python/paddle/v2/fluid/tests/test_inference_model_io.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_initializer.py b/python/paddle/v2/fluid/tests/test_initializer.py
index 3175010f48229d04421fc0068af0f0ed90e63af4..67746b4d7d96a6bb039e05795f11f3cb117cf85c 100644
--- a/python/paddle/v2/fluid/tests/test_initializer.py
+++ b/python/paddle/v2/fluid/tests/test_initializer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 import unittest
 
diff --git a/python/paddle/v2/fluid/tests/test_is_empty_op.py b/python/paddle/v2/fluid/tests/test_is_empty_op.py
index 0a4dd0f4faf370161e5695d97f0ed4bf73b6ec26..7c17e3d57aa3eb93d0bc82ca81a96a7a1866f15c 100644
--- a/python/paddle/v2/fluid/tests/test_is_empty_op.py
+++ b/python/paddle/v2/fluid/tests/test_is_empty_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from paddle.v2.fluid.op import Operator
diff --git a/python/paddle/v2/fluid/tests/test_l1_norm_op.py b/python/paddle/v2/fluid/tests/test_l1_norm_op.py
index 3a1d1689fe6f941e95ca2df171a1e8e03278076d..bbc20878468371a547d2b9e275157dab51a79408 100644
--- a/python/paddle/v2/fluid/tests/test_l1_norm_op.py
+++ b/python/paddle/v2/fluid/tests/test_l1_norm_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 import unittest
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py
index a56277d216c28ddeb752a0aad46daca305a685e4..709abd6c6a4e0c2aa1b38a135d7424cd6886c966 100644
--- a/python/paddle/v2/fluid/tests/test_layers.py
+++ b/python/paddle/v2/fluid/tests/test_layers.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import print_function
 import unittest
 
@@ -203,6 +217,14 @@ class TestBook(unittest.TestCase):
             self.assertIsNotNone(x)
         print(str(program))
 
+    def test_sequence_reshape(self):
+        program = Program()
+        with program_guard(program):
+            x = layers.data(name='x', shape=[8], dtype='float32', lod_level=1)
+            out = layers.sequence_reshape(input=x, new_dim=16)
+            self.assertIsNotNone(out)
+        print(str(program))
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py b/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py
index c26634ff20c46e484d600c758be386ec8327d1c1..cbfd9d5e5b359b708e0e84c2c40ca0ef41cc35b6 100644
--- a/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py
+++ b/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import random
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_lod_array_length_op.py b/python/paddle/v2/fluid/tests/test_lod_array_length_op.py
index 8a4be545eda841dbda33b7c8cae9f91a4199f2f8..eff28368f1a6a78b8e7e9df7e281a1eb3b11288e 100644
--- a/python/paddle/v2/fluid/tests/test_lod_array_length_op.py
+++ b/python/paddle/v2/fluid/tests/test_lod_array_length_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import paddle.v2.fluid.layers as layers
 from paddle.v2.fluid.executor import Executor
diff --git a/python/paddle/v2/fluid/tests/test_lod_rank_table.py b/python/paddle/v2/fluid/tests/test_lod_rank_table.py
index 30d619fe318517345195281b17f88e9916b6afb3..eb0392e8bf7111f223a54430ab7a6646a4f35f71 100644
--- a/python/paddle/v2/fluid/tests/test_lod_rank_table.py
+++ b/python/paddle/v2/fluid/tests/test_lod_rank_table.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from paddle.v2.fluid.layers import lod_rank_table, data
 from paddle.v2.fluid.executor import Executor
 import paddle.v2.fluid.core as core
diff --git a/python/paddle/v2/fluid/tests/test_lod_reset_op.py b/python/paddle/v2/fluid/tests/test_lod_reset_op.py
index 652ccecfa443fc95f08f52df766709cb550f4049..4ee360403e88c6a4f8d5fcc6d6ce8acc865e6277 100644
--- a/python/paddle/v2/fluid/tests/test_lod_reset_op.py
+++ b/python/paddle/v2/fluid/tests/test_lod_reset_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_lod_tensor_array.py b/python/paddle/v2/fluid/tests/test_lod_tensor_array.py
index d6d3e23fd8898a62528d63795d1bff1b72752477..0f3ac3c03dbe43c2e977a5e98faff31d6c231acd 100644
--- a/python/paddle/v2/fluid/tests/test_lod_tensor_array.py
+++ b/python/paddle/v2/fluid/tests/test_lod_tensor_array.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import paddle.v2.fluid.core as core
 import numpy
diff --git a/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py b/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py
index c552cb033f1ec8f5843490083edee7b2762b5703..c2d04db99b969430f3a70e45c92967f548ba513b 100644
--- a/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py
+++ b/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import paddle.v2.fluid.core as core
 import numpy
diff --git a/python/paddle/v2/fluid/tests/test_log_loss_op.py b/python/paddle/v2/fluid/tests/test_log_loss_op.py
index 2eeaa90758c57ef0d92a8ad7b0a4c1b1f2c38be3..338355d0c4d33a8927e689001bae3a236a0ecd3a 100644
--- a/python/paddle/v2/fluid/tests/test_log_loss_op.py
+++ b/python/paddle/v2/fluid/tests/test_log_loss_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_logical_op.py b/python/paddle/v2/fluid/tests/test_logical_op.py
index ac90bf839cb96053387bb82c112692136707744c..dd67dc561b0e2740d60c3bc21fae0a10ba648c5e 100644
--- a/python/paddle/v2/fluid/tests/test_logical_op.py
+++ b/python/paddle/v2/fluid/tests/test_logical_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import op_test
 import unittest
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_lookup_table_op.py b/python/paddle/v2/fluid/tests/test_lookup_table_op.py
index a56a549e69eaf950df39853a63947a8abac930d7..d5255ba31f7c9e45cf29f412546146234f822026 100644
--- a/python/paddle/v2/fluid/tests/test_lookup_table_op.py
+++ b/python/paddle/v2/fluid/tests/test_lookup_table_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_lrn_op.py b/python/paddle/v2/fluid/tests/test_lrn_op.py
index 9abb09e53a7af8eec69f9bd501c6883dd9df9930..a841dcf79f9abbd9e4a995a395ae9211f62832f2 100644
--- a/python/paddle/v2/fluid/tests/test_lrn_op.py
+++ b/python/paddle/v2/fluid/tests/test_lrn_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_lstm_op.py b/python/paddle/v2/fluid/tests/test_lstm_op.py
index 77f062e8c8870ec9cc56c9566108abe74665ae30..d9fa01e247ae613fb2a7ed523a447e31a5bd5994 100644
--- a/python/paddle/v2/fluid/tests/test_lstm_op.py
+++ b/python/paddle/v2/fluid/tests/test_lstm_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_lstm_unit_op.py b/python/paddle/v2/fluid/tests/test_lstm_unit_op.py
index 6bad2e1f7c34c51419424d88b41b809da997eb8f..d6348ea0ec9ddf6adb8b48f0d4aeb03dd9b0e895 100644
--- a/python/paddle/v2/fluid/tests/test_lstm_unit_op.py
+++ b/python/paddle/v2/fluid/tests/test_lstm_unit_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_margin_rank_loss_op.py b/python/paddle/v2/fluid/tests/test_margin_rank_loss_op.py
index 63378cbc4ec95d7d3c49a92f750b55a8dbc22414..694ce20712864effbd2d1e1c66c388b5a3c8ec49 100644
--- a/python/paddle/v2/fluid/tests/test_margin_rank_loss_op.py
+++ b/python/paddle/v2/fluid/tests/test_margin_rank_loss_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_math_op_patch.py b/python/paddle/v2/fluid/tests/test_math_op_patch.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e77639a4c886327cc8dc7053fc6c0f6c6e9dcc9
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/test_math_op_patch.py
@@ -0,0 +1,181 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import decorators
+import paddle.v2.fluid as fluid
+import numpy
+
+
+class TestMathOpPatches(unittest.TestCase):
+    @decorators.prog_scope()
+    def test_add_scalar(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = a + 10
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np},
+                       fetch_list=[b])
+        self.assertTrue(numpy.allclose(a_np + 10, b_np))
+
+    @decorators.prog_scope()
+    def test_radd_scalar(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = 10 + a
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np},
+                       fetch_list=[b])
+        self.assertTrue(numpy.allclose(a_np + 10, b_np))
+
+    @decorators.prog_scope()
+    def test_sub_scalar(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = a - 10
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np},
+                       fetch_list=[b])
+        self.assertTrue(numpy.allclose(a_np - 10, b_np))
+
+    @decorators.prog_scope()
+    def test_radd_scalar(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = 10 - a
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np},
+                       fetch_list=[b])
+        self.assertTrue(numpy.allclose(10 - a_np, b_np))
+
+    @decorators.prog_scope()
+    def test_mul_scalar(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = a * 10
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np},
+                       fetch_list=[b])
+        self.assertTrue(numpy.allclose(a_np * 10, b_np))
+
+    @decorators.prog_scope()
+    def test_rmul_scalar(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = 10 * a
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np},
+                       fetch_list=[b])
+        self.assertTrue(numpy.allclose(10 * a_np, b_np))
+
+    @decorators.prog_scope()
+    def test_div_scalar(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = a / 10
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np},
+                       fetch_list=[b])
+        self.assertTrue(numpy.allclose(a_np / 10, b_np))
+
+    @decorators.prog_scope()
+    def test_rdiv_scalar(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = 10 / a
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32') + 1e-2
+
+        b_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np},
+                       fetch_list=[b])
+        self.assertTrue(numpy.allclose(10 / a_np, b_np))
+
+    @decorators.prog_scope()
+    def test_div_two_tensor(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = fluid.layers.data(name="b", shape=[1])
+        c = a / b
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = numpy.random.random(size=[10, 1]).astype('float32') + 1e-2
+        c_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np,
+                             'b': b_np},
+                       fetch_list=[c])
+        self.assertTrue(numpy.allclose(a_np / b_np, c_np))
+
+    @decorators.prog_scope()
+    def test_mul_two_tensor(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = fluid.layers.data(name="b", shape=[1])
+        c = a * b
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = numpy.random.random(size=[10, 1]).astype('float32')
+        c_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np,
+                             'b': b_np},
+                       fetch_list=[c])
+        self.assertTrue(numpy.allclose(a_np * b_np, c_np))
+
+    @decorators.prog_scope()
+    def test_add_two_tensor(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = fluid.layers.data(name="b", shape=[1])
+        c = a + b
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = numpy.random.random(size=[10, 1]).astype('float32')
+        c_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np,
+                             'b': b_np},
+                       fetch_list=[c])
+        self.assertTrue(numpy.allclose(a_np + b_np, c_np))
+
+    @decorators.prog_scope()
+    def test_sub_two_tensor(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = fluid.layers.data(name="b", shape=[1])
+        c = a - b
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = numpy.random.random(size=[10, 1]).astype('float32')
+        c_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np,
+                             'b': b_np},
+                       fetch_list=[c])
+        self.assertTrue(numpy.allclose(a_np - b_np, c_np))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_matmul_op.py b/python/paddle/v2/fluid/tests/test_matmul_op.py
index d51572c8ab7c44fa0c6e83e50b56f05780530c61..5138af38f4dcc044d31622341b3d7fccc4f6bc90 100644
--- a/python/paddle/v2/fluid/tests/test_matmul_op.py
+++ b/python/paddle/v2/fluid/tests/test_matmul_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
@@ -46,19 +60,18 @@ def reference_matmul(X, Y, transpose_X=False, transpose_Y=False):
             X = X.reshape((X.size, 1))
         elif X.ndim == 2:
             X = X.T
-        elif X.ndim == 3:
-            X = np.transpose(X, (0, 2, 1))
         else:
-            raise ValueError('X must have between 1 and 3 dimensions')
+            dim = [i for i in range(len(X.shape))]
+            dim[-1], dim[len(X.shape) - 2] = dim[len(X.shape) - 2], dim[-1]
+            X = np.transpose(X, tuple(dim))
     if transpose_Y:
         if Y.ndim == 1:
             Y = Y.reshape((1, Y.size))
-        elif Y.ndim == 2:
-            Y = Y.T
-        elif Y.ndim == 3:
-            Y = np.transpose(Y, (0, 2, 1))
         else:
-            raise ValueError('Y must have between 1 and 3 dimensions')
+            dim = [i for i in range(len(Y.shape))]
+            dim[-1], dim[len(Y.shape) - 2] = dim[len(Y.shape) - 2], dim[-1]
+            Y = np.transpose(Y, tuple(dim))
+
     Out = np.matmul(X, Y)
     if not Out.shape:
         # We do not support 0-dimensional Tensors (scalars). So where
@@ -83,18 +96,18 @@ class Generator(object):
         self.outputs = {'Out': Out}
 
     def test_check_output(self):
-        self.check_output(atol=1e-2)
+        self.check_output(atol=1e-3)
 
     def test_check_grad_normal(self):
-        self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.5)
+        self.check_grad(['X', 'Y'], 'Out', max_relative_error=1e-3)
 
     def test_check_grad_ignore_x(self):
         self.check_grad(
-            ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X"))
+            ['Y'], 'Out', max_relative_error=1e-3, no_grad_set=set("X"))
 
     def test_check_grad_ignore_y(self):
         self.check_grad(
-            ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y'))
+            ['X'], 'Out', max_relative_error=1e-3, no_grad_set=set('Y'))
 
 
 # Generate test cases for all possibilities
@@ -107,13 +120,50 @@ for dim_X in [1, 2, 3]:
                         dim_X, dim_Y, transpose_X, transpose_Y))
                 shape_X, shape_Y = generate_compatible_shapes(
                     dim_X, dim_Y, transpose_X, transpose_Y)
-                test_class = type(test_name, (Generator, OpTest), {
+                globals()[test_name] = type(test_name, (Generator, OpTest), {
                     'shape_X': shape_X,
                     'shape_Y': shape_Y,
                     'transpose_X': transpose_X,
                     'transpose_Y': transpose_Y,
                 })
-                globals()[test_name] = test_class
+
+
+# Test case n-dim
+def generate_compatible_shapes(dim, transpose_X, transpose_Y):
+    M = 2
+    N = 4
+    K = 3
+    shape_X = [2 for _ in range(dim - 2)]
+    shape_Y = [2 for _ in range(dim - 2)]
+
+    if transpose_X:
+        shape_X += [K, M]
+    else:
+        shape_X += [M, K]
+
+    if transpose_Y:
+        shape_Y += [N, K]
+    else:
+        shape_Y += [K, N]
+
+    return shape_X, shape_Y
+
+
+# Test case n-dim
+for dim in [4]:
+    for transpose_X in [False, True]:
+        for transpose_Y in [False, True]:
+            test_name = (
+                'TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format(
+                    dim, dim, transpose_X, transpose_Y))
+            shape_X, shape_Y = generate_compatible_shapes(dim, transpose_X,
+                                                          transpose_Y)
+            globals()[test_name] = type(test_name, (Generator, OpTest), {
+                'shape_X': shape_X,
+                'shape_Y': shape_Y,
+                'transpose_X': transpose_X,
+                'transpose_Y': transpose_Y,
+            })
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_maxout_op.py b/python/paddle/v2/fluid/tests/test_maxout_op.py
index 5fbed43e254b811d38e441e946a73c24f87373de..5cd7fbde84a3414829aa51f39283fed0499d39f1 100644
--- a/python/paddle/v2/fluid/tests/test_maxout_op.py
+++ b/python/paddle/v2/fluid/tests/test_maxout_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_mean_op.py b/python/paddle/v2/fluid/tests/test_mean_op.py
index 7823abd8f813aad6462c98a9ace9a13dc286a157..81e842163584456b33a0dbd95cfcb405ae857b75 100644
--- a/python/paddle/v2/fluid/tests/test_mean_op.py
+++ b/python/paddle/v2/fluid/tests/test_mean_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_memory_optimization_transpiler.py b/python/paddle/v2/fluid/tests/test_memory_optimization_transpiler.py
index 5cce75ddb8df50a35156fc2b8b411823711989c0..2e9ed78ffd8253d5f2286256097a2238a99d7ba5 100644
--- a/python/paddle/v2/fluid/tests/test_memory_optimization_transpiler.py
+++ b/python/paddle/v2/fluid/tests/test_memory_optimization_transpiler.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import print_function
 import unittest
 
diff --git a/python/paddle/v2/fluid/tests/test_minus_op.py b/python/paddle/v2/fluid/tests/test_minus_op.py
index c56d7cb548706880dd482bad750f2989c0e9a710..aee909f56c4353dc8feb7bb82198d5944376f151 100644
--- a/python/paddle/v2/fluid/tests/test_minus_op.py
+++ b/python/paddle/v2/fluid/tests/test_minus_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py b/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py
index 33558c6105442b169b2e26abc7f39e15b7fe7322..3288a0f007c8e593831692eb9a134e78804bdc2e 100644
--- a/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py
+++ b/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2.fluid.layers as layers
 from paddle.v2.fluid.framework import Program, program_guard, default_main_program, default_startup_program
 from paddle.v2.fluid.executor import Executor
diff --git a/python/paddle/v2/fluid/tests/test_modified_huber_loss_op.py b/python/paddle/v2/fluid/tests/test_modified_huber_loss_op.py
index 33de8ff7219fafa1ddeb9ebd78d77ae4fa240c98..eb3873b9ea8b9cbf7b87c125559cc070c39946dc 100644
--- a/python/paddle/v2/fluid/tests/test_modified_huber_loss_op.py
+++ b/python/paddle/v2/fluid/tests/test_modified_huber_loss_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_momentum_op.py b/python/paddle/v2/fluid/tests/test_momentum_op.py
index 638095f7564c8761151a7794f98f9ca797b0083b..048eaae06ba5323517d5a32174233faaa0fd8be9 100644
--- a/python/paddle/v2/fluid/tests/test_momentum_op.py
+++ b/python/paddle/v2/fluid/tests/test_momentum_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_mul_op.py b/python/paddle/v2/fluid/tests/test_mul_op.py
index 57d6d7e7e095cab2c3afb60d229fc09da98aed8b..83715f0e27b4e4599360356c02831c6814ef520f 100644
--- a/python/paddle/v2/fluid/tests/test_mul_op.py
+++ b/python/paddle/v2/fluid/tests/test_mul_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_multiplex_op.py b/python/paddle/v2/fluid/tests/test_multiplex_op.py
index 5937eb5aa4621556c9b8d59ea83a39d9738c7925..a06aef94a5d9dbee1bcad287c18540e308ce22fa 100644
--- a/python/paddle/v2/fluid/tests/test_multiplex_op.py
+++ b/python/paddle/v2/fluid/tests/test_multiplex_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_nce.py b/python/paddle/v2/fluid/tests/test_nce.py
index 8aeba69769525935c26576ec50035ed50d2ce44f..3ae727a573855b3cb618a8fab70404adf3d92f51 100644
--- a/python/paddle/v2/fluid/tests/test_nce.py
+++ b/python/paddle/v2/fluid/tests/test_nce.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_net.py b/python/paddle/v2/fluid/tests/test_net.py
index d9fe55a8af5c750c5c926e875ddbb645f8abb1a0..69d95d4f707d6ff3a66079802a312d0c847f410c 100644
--- a/python/paddle/v2/fluid/tests/test_net.py
+++ b/python/paddle/v2/fluid/tests/test_net.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2.fluid.core as core
 from paddle.v2.fluid.op import Operator
 import unittest
diff --git a/python/paddle/v2/fluid/tests/test_norm_op.py b/python/paddle/v2/fluid/tests/test_norm_op.py
index 7d56320489b24c5547e045cb51b778851ff94a32..dd1cd5a31c55f898239297f2815370316a6a8ccf 100644
--- a/python/paddle/v2/fluid/tests/test_norm_op.py
+++ b/python/paddle/v2/fluid/tests/test_norm_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_normalization_wrapper.py b/python/paddle/v2/fluid/tests/test_normalization_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..57f14f6b9cc9c7cf9ae93274cf3d7763350e6e10
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/test_normalization_wrapper.py
@@ -0,0 +1,96 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import paddle.v2.fluid as fluid
+import paddle.v2.fluid.core as core
+import numpy as np
+
+
+class TestNormalization(unittest.TestCase):
+    data_desc = {"name": "input", "shape": (2, 3, 7)}
+
+    def gen_random_input(self):
+        """Generate random input data.
+        """
+        self.data = np.random.random(
+            size=self.data_desc["shape"]).astype("float32")
+
+    def set_program(self, axis, epsilon):
+        """Build the test program.
+        """
+        data = fluid.layers.data(
+            name=self.data_desc["name"],
+            shape=self.data_desc["shape"],
+            dtype="float32",
+            append_batch_size=False)
+        data.stop_gradient = False
+        l2_norm = fluid.layers.l2_normalize(x=data, axis=axis, epsilon=epsilon)
+        out = fluid.layers.reduce_sum(l2_norm, dim=None)
+
+        fluid.backward.append_backward(loss=out)
+        self.fetch_list = [l2_norm]
+
+    def run_program(self):
+        """Run the test program.
+        """
+        places = [core.CPUPlace()]
+        if core.is_compile_gpu():
+            places.append(core.CUDAPlace(0))
+
+        for place in places:
+            self.set_inputs(place)
+            exe = fluid.Executor(place)
+
+            output = exe.run(fluid.default_main_program(),
+                             feed=self.inputs,
+                             fetch_list=self.fetch_list,
+                             return_numpy=True)
+            self.op_output = output
+
+    def set_inputs(self, place):
+        """Set the randomly generated data to the test program.
+        """
+        self.inputs = {}
+        tensor = fluid.Tensor()
+        tensor.set(self.data, place)
+        self.inputs[self.data_desc["name"]] = tensor
+
+    def l2_normalize(self, data, axis, epsilon):
+        """ Compute the groundtruth.
+        """
+        output = data * np.reciprocal(
+            np.sum(np.square(data), axis=axis, keepdims=True))
+        return output
+
+    def test_l2_normalize(self):
+        """ Test the python wrapper for l2_normalize.
+        """
+        axis = 1
+        #TODO(caoying) epsilon is not supported due to lack of a maximum_op.
+        epsilon = 1e-6
+
+        self.gen_random_input()
+
+        self.set_program(axis, epsilon)
+        self.run_program()
+
+        expect_output = self.l2_normalize(self.data, axis, epsilon)
+
+        # check output
+        self.assertTrue(np.allclose(self.op_output, expect_output, atol=0.001))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_op_support_gpu.py b/python/paddle/v2/fluid/tests/test_op_support_gpu.py
index a0eb4bd5fd2cc178ffe0763efdee61524ad6d4bd..34939818126b1d747fb76861bbd691894fb3759b 100644
--- a/python/paddle/v2/fluid/tests/test_op_support_gpu.py
+++ b/python/paddle/v2/fluid/tests/test_op_support_gpu.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import paddle.v2.fluid.core as core
 
diff --git a/python/paddle/v2/fluid/tests/test_operator.py b/python/paddle/v2/fluid/tests/test_operator.py
index c059a2b88b1324935f871b6e9c11efd5652ddd65..b82cf580e8567a7d519a75f971bfa0de3ce90684 100644
--- a/python/paddle/v2/fluid/tests/test_operator.py
+++ b/python/paddle/v2/fluid/tests/test_operator.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 
 import paddle.v2.fluid.op as op
diff --git a/python/paddle/v2/fluid/tests/test_operator_desc.py b/python/paddle/v2/fluid/tests/test_operator_desc.py
index ce34d95ac8cb2644dee9c551cd8e85b33609919a..2c8665ffa25549c57eed2934440be676260c1f31 100644
--- a/python/paddle/v2/fluid/tests/test_operator_desc.py
+++ b/python/paddle/v2/fluid/tests/test_operator_desc.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 
 import paddle.v2.fluid.core as core
diff --git a/python/paddle/v2/fluid/tests/test_optimizer.py b/python/paddle/v2/fluid/tests/test_optimizer.py
index 1eadb7d912629024ee21e30b0a5fa4910bb96e06..480ee7091579ba171ca957cb4d25f0034e0534c0 100644
--- a/python/paddle/v2/fluid/tests/test_optimizer.py
+++ b/python/paddle/v2/fluid/tests/test_optimizer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 
 import paddle.v2.fluid.framework as framework
diff --git a/python/paddle/v2/fluid/tests/test_pad_op.py b/python/paddle/v2/fluid/tests/test_pad_op.py
index 55f1774e5755c846f60a2f1df3e705444a81192b..0bd48000555697be9822d4dfb1056cbc0414aa11 100644
--- a/python/paddle/v2/fluid/tests/test_pad_op.py
+++ b/python/paddle/v2/fluid/tests/test_pad_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_parallel_op.py b/python/paddle/v2/fluid/tests/test_parallel_op.py
index 2b51a1f50473d0728b8180772f42584797143b4e..dfde492c7cd930615c030bb0c8e5a2cf36ff59a8 100644
--- a/python/paddle/v2/fluid/tests/test_parallel_op.py
+++ b/python/paddle/v2/fluid/tests/test_parallel_op.py
@@ -1,4 +1,19 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
+
 import paddle.v2.fluid as fluid
 import numpy
 
@@ -13,13 +28,13 @@ class BaseParallelForTest(unittest.TestCase):
                 returns the data layers, and the second yield returns the loss. 
                 The modified data variables will be sent back during the first 
                 yield.
-            
+
             feed(dict): The executor feeding dictionary.
             fetch(list|basestr): The fetch name lists. 
 
         Returns:
             None
-            
+
         Raises:
             AssertionError when the computation of cpu, parallel.for in cpu, 
                 gpu, parallel.for in gpu are different.
@@ -134,23 +149,27 @@ class BaseParallelForTest(unittest.TestCase):
 
 
 class ParallelOpTest(BaseParallelForTest):
-    def test_simple_fc(self):
-        def __network__():
-            x = fluid.layers.data(shape=[784], dtype='float32', name='img')
-            # FIXME: This is a bug of parallel.do
-            x.stop_gradient = False
-            x = yield x
-            hidden = fluid.layers.fc(input=x, size=200, param_attr='fc1.w')
-            loss = fluid.layers.mean(x=hidden)
-            yield loss
+    @staticmethod
+    def __network__():
+        x = fluid.layers.data(shape=[784], dtype='float32', name='img')
+        x = yield x
+        hidden = fluid.layers.fc(input=x, size=200, param_attr='fc1.w')
+        loss = fluid.layers.mean(x=hidden)
+        yield loss
 
+    def test_simple_fc(self):
         self.run_test(
-            callback=__network__,
+            callback=ParallelOpTest.__network__,
             feed={
-                'img':
-                numpy.random.random(size=(128 * 3, 784)).astype('float32')
+                'img': numpy.random.random(size=(51, 784)).astype('float32')
             },
-            fetch='fc1.w@GRAD')
+            fetch=['fc1.w@GRAD'])
+
+    def test_fc_with_tiny_data(self):
+        self.run_test(
+            callback=ParallelOpTest.__network__,
+            feed={'img': numpy.random.random(size=(1, 784)).astype('float32')},
+            fetch=['fc1.w@GRAD'])
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/v2/fluid/tests/test_parameter.py b/python/paddle/v2/fluid/tests/test_parameter.py
index 694344acbbd3b7c80cb0ff48ada843f794061282..dfecdf939bcdb7957b55daea627cdd178b1a3947 100644
--- a/python/paddle/v2/fluid/tests/test_parameter.py
+++ b/python/paddle/v2/fluid/tests/test_parameter.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 from paddle.v2.fluid.framework import default_main_program
 import paddle.v2.fluid.core as core
diff --git a/python/paddle/v2/fluid/tests/test_pool2d_op.py b/python/paddle/v2/fluid/tests/test_pool2d_op.py
index 5dff6270f455395ce6ca8ae2428236f630467095..2f43be8a0ff03731262039cd8eb060d89ae4be40 100644
--- a/python/paddle/v2/fluid/tests/test_pool2d_op.py
+++ b/python/paddle/v2/fluid/tests/test_pool2d_op.py
@@ -1,5 +1,21 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
+
+import paddle.v2.fluid.core as core
 from op_test import OpTest
 
 
@@ -44,6 +60,7 @@ def avg_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=0):
 
 class TestPool2d_Op(OpTest):
     def setUp(self):
+        self.use_cudnn = False
         self.init_test_case()
         self.init_global_pool()
         self.init_op_type()
@@ -62,15 +79,25 @@ class TestPool2d_Op(OpTest):
             'ksize': self.ksize,
             'pooling_type': self.pool_type,
             'global_pooling': self.global_pool,
+            'use_cudnn': self.use_cudnn,
+            'data_format': 'AnyLayout'  # TODO(dzhwinter) : should be fix latter
         }
 
         self.outputs = {'Out': output.astype('float32')}
 
     def test_check_output(self):
-        self.check_output()
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_output_with_place(place, atol=1e-5)
+        else:
+            self.check_output()
 
     def test_check_grad(self):
-        if self.pool_type != "max":
+        if self.use_cudnn and self.pool_type != "max":
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place, set(['X']), 'Out', max_relative_error=0.07)
+        elif self.pool_type != "max":
             self.check_grad(set(['X']), 'Out', max_relative_error=0.07)
 
     def init_test_case(self):
@@ -153,35 +180,41 @@ class TestCase5(TestCase2):
         self.pool2D_forward_naive = max_pool2D_forward_naive
 
 
-#--------------------test pool2d_cudnn--------------------
-class TestCudnnCase1(TestPool2d_Op):
+#--------------------test pool2d--------------------
+class TestCUDNNCase1(TestPool2d_Op):
     def init_op_type(self):
-        self.op_type = "pool2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool2d"
 
 
-class TestCudnnCase2(TestCase1):
+class TestCUDNNCase2(TestCase1):
     def init_op_type(self):
-        self.op_type = "pool2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool2d"
 
 
-class TestCudnnCase3(TestCase2):
+class TestCUDNNCase3(TestCase2):
     def init_op_type(self):
-        self.op_type = "pool2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool2d"
 
 
-class TestCudnnCase4(TestCase3):
+class TestCUDNNCase4(TestCase3):
     def init_op_type(self):
-        self.op_type = "pool2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool2d"
 
 
-class TestCudnnCase5(TestCase4):
+class TestCUDNNCase5(TestCase4):
     def init_op_type(self):
-        self.op_type = "pool2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool2d"
 
 
-class TestCudnnCase6(TestCase5):
+class TestCUDNNCase6(TestCase5):
     def init_op_type(self):
-        self.op_type = "pool2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool2d"
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/v2/fluid/tests/test_pool3d_op.py b/python/paddle/v2/fluid/tests/test_pool3d_op.py
index 2ba86665a7d207e61159c02643fa40daca3be080..c93711e051b9b12a2314f16fc822e1378bddce47 100644
--- a/python/paddle/v2/fluid/tests/test_pool3d_op.py
+++ b/python/paddle/v2/fluid/tests/test_pool3d_op.py
@@ -1,5 +1,21 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
+
+import paddle.v2.fluid.core as core
 from op_test import OpTest
 
 
@@ -52,6 +68,7 @@ def avg_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=0):
 
 class TestPool3d_Op(OpTest):
     def setUp(self):
+        self.use_cudnn = False
         self.init_test_case()
         self.init_global_pool()
         self.init_op_type()
@@ -71,15 +88,25 @@ class TestPool3d_Op(OpTest):
             'ksize': self.ksize,
             'pooling_type': self.pool_type,
             'global_pooling': self.global_pool,
+            'use_cudnn': self.use_cudnn,
+            'data_format': 'AnyLayout'  # TODO(dzhwinter) : should be fix latter
         }
 
         self.outputs = {'Out': output.astype('float32')}
 
     def test_check_output(self):
-        self.check_output()
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_output_with_place(place, atol=1e-5)
+        else:
+            self.check_output()
 
     def test_check_grad(self):
-        if self.pool_type != "max":
+        if self.use_cudnn and self.pool_type != "max":
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place, set(['X']), 'Out', max_relative_error=0.07)
+        elif self.pool_type != "max":
             self.check_grad(set(['X']), 'Out', max_relative_error=0.07)
 
     def init_test_case(self):
@@ -163,35 +190,41 @@ class TestCase5(TestCase2):
         self.pool3D_forward_naive = max_pool3D_forward_naive
 
 
-#--------------------test pool3d_cudnn--------------------
-class TestCudnnCase1(TestPool3d_Op):
+#--------------------test pool3d--------------------
+class TestCUDNNCase1(TestPool3d_Op):
     def init_op_type(self):
-        self.op_type = "pool3d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool3d"
 
 
-class TestCudnnCase2(TestCase1):
+class TestCUDNNCase2(TestCase1):
     def init_op_type(self):
-        self.op_type = "pool3d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool3d"
 
 
-class TestCudnnCase3(TestCase2):
+class TestCUDNNCase3(TestCase2):
     def init_op_type(self):
-        self.op_type = "pool3d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool3d"
 
 
-class TestCudnnCase4(TestCase3):
+class TestCUDNNCase4(TestCase3):
     def init_op_type(self):
-        self.op_type = "pool3d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool3d"
 
 
-class TestCudnnCase5(TestCase4):
+class TestCUDNNCase5(TestCase4):
     def init_op_type(self):
-        self.op_type = "pool3d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool3d"
 
 
-class TestCudnnCase6(TestCase5):
+class TestCUDNNCase6(TestCase5):
     def init_op_type(self):
-        self.op_type = "pool3d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool3d"
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/v2/fluid/tests/test_pool_max_op.py b/python/paddle/v2/fluid/tests/test_pool_max_op.py
index 9d2d61c43868701392e90542f3b7fb2c4ea07548..330ad24bd420401024571725a36191492bfae495 100644
--- a/python/paddle/v2/fluid/tests/test_pool_max_op.py
+++ b/python/paddle/v2/fluid/tests/test_pool_max_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_positive_negative_pair_op.py b/python/paddle/v2/fluid/tests/test_positive_negative_pair_op.py
index f6a6c428a26dece01fe2958991edd3edf3a8266e..9b5e54465559ece98387d1690ed29a582999715e 100644
--- a/python/paddle/v2/fluid/tests/test_positive_negative_pair_op.py
+++ b/python/paddle/v2/fluid/tests/test_positive_negative_pair_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import itertools
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_precision_recall_op.py b/python/paddle/v2/fluid/tests/test_precision_recall_op.py
index d3dbdb6e2aba6dfe98440ad07083cf1ffda5b668..188b7af559376f8535d109c245f08d8e050201d5 100644
--- a/python/paddle/v2/fluid/tests/test_precision_recall_op.py
+++ b/python/paddle/v2/fluid/tests/test_precision_recall_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_prelu_op.py b/python/paddle/v2/fluid/tests/test_prelu_op.py
index 7be932ac8f6b82283fecd32ac4b3b7bb9aff0338..848036234c8cddd0cb3819322bd338eaf59dd360 100644
--- a/python/paddle/v2/fluid/tests/test_prelu_op.py
+++ b/python/paddle/v2/fluid/tests/test_prelu_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_print_op.py b/python/paddle/v2/fluid/tests/test_print_op.py
index 86a701a020fc197d69d113f82a4e5ac58f377179..3177700dfad6567d667b890d2a58a61c8355c631 100644
--- a/python/paddle/v2/fluid/tests/test_print_op.py
+++ b/python/paddle/v2/fluid/tests/test_print_op.py
@@ -1,20 +1,68 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
-import numpy as np
-from paddle.v2.fluid.executor import Executor
 import paddle.v2.fluid.core as core
-import paddle.v2.fluid.layers as pd
+from paddle.v2.fluid.executor import Executor
+import paddle.v2.fluid.layers as layers
+from paddle.v2.fluid.backward import append_backward
+from paddle.v2.fluid.framework import switch_main_program
+from paddle.v2.fluid.framework import Program
+import numpy as np
+
+
+class TestPrintOpCPU(unittest.TestCase):
+    def setUp(self):
+        self.place = core.CPUPlace()
+        self.x_tensor = core.LoDTensor()
+        tensor_np = np.random.random(size=(2, 3)).astype('float32')
+        self.x_tensor.set(tensor_np, self.place)
+        self.x_tensor.set_lod([[0, 1, 1]])
 
+    def build_network(self, only_forward, **kargs):
+        x = layers.data('x', shape=[3], dtype='float32', lod_level=1)
+        x.stop_gradient = False
+        printed = layers.Print(input=x, **kargs)
+        if only_forward: return printed
+        loss = layers.mean(x=printed)
+        append_backward(loss=loss)
+        return loss
 
-class TestSumOp(unittest.TestCase):
-    def test_tensor(self):
-        i = pd.zeros(shape=[2, 10], dtype='float32')
+    def test_forward(self):
+        switch_main_program(Program())
+        printed = self.build_network(True, print_phase='forward')
+        exe = Executor(self.place)
+        outs = exe.run(feed={'x': self.x_tensor},
+                       fetch_list=[printed],
+                       return_numpy=False)
 
-        pd.Print(i, message="I am a message", summarize=10)
+    def test_backward(self):
+        switch_main_program(Program())
+        loss = self.build_network(False, print_phase='backward')
+        exe = Executor(self.place)
+        outs = exe.run(feed={'x': self.x_tensor},
+                       fetch_list=[loss],
+                       return_numpy=False)
 
-        cpu = core.CPUPlace()
-        exe = Executor(cpu)
 
-        exe.run()
+class TestPrintOpGPU(TestPrintOpCPU):
+    def setUp(self):
+        self.place = core.CUDAPlace(0)
+        self.x_tensor = core.LoDTensor()
+        tensor_np = np.random.random(size=(2, 3)).astype('float32')
+        self.x_tensor.set(tensor_np, self.place)
+        self.x_tensor.set_lod([[0, 1, 1]])
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/v2/fluid/tests/test_profiler.py b/python/paddle/v2/fluid/tests/test_profiler.py
index e3f3ac58ef9b30864849770510f7339749dab84f..abf8881b6786416f56f93e498761a4791b35d7c3 100644
--- a/python/paddle/v2/fluid/tests/test_profiler.py
+++ b/python/paddle/v2/fluid/tests/test_profiler.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 import paddle.v2.fluid as fluid
diff --git a/python/paddle/v2/fluid/tests/test_program.py b/python/paddle/v2/fluid/tests/test_program.py
index 447c746aacc1c9455d7a023bca625d548ab2638b..9967da15937a1d11adfbaa67b3aa163917a52843 100644
--- a/python/paddle/v2/fluid/tests/test_program.py
+++ b/python/paddle/v2/fluid/tests/test_program.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import print_function
 import unittest
 
diff --git a/python/paddle/v2/fluid/tests/test_protobuf.py b/python/paddle/v2/fluid/tests/test_protobuf.py
index e064374176fa221cfd042b7dbd2ddcb3b5ec41ec..48e6dedc5862ccf10751d8dd66a8230bdf85eca2 100644
--- a/python/paddle/v2/fluid/tests/test_protobuf.py
+++ b/python/paddle/v2/fluid/tests/test_protobuf.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
 import unittest
 
diff --git a/python/paddle/v2/fluid/tests/test_protobuf_descs.py b/python/paddle/v2/fluid/tests/test_protobuf_descs.py
index d8abe17606c4ddb2ff51d5f918b1e5d7e110f7fa..9034b2f4ef1c983ef224b14b8f602f87e6ce94b0 100644
--- a/python/paddle/v2/fluid/tests/test_protobuf_descs.py
+++ b/python/paddle/v2/fluid/tests/test_protobuf_descs.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import paddle.v2.fluid.core as core
 
diff --git a/python/paddle/v2/fluid/tests/test_proximal_adagrad_op.py b/python/paddle/v2/fluid/tests/test_proximal_adagrad_op.py
index f89a493ab7a7a3d841088b7db37bff4dfbe63735..744d71bdcf438538e725e7d776b8423edd475639 100644
--- a/python/paddle/v2/fluid/tests/test_proximal_adagrad_op.py
+++ b/python/paddle/v2/fluid/tests/test_proximal_adagrad_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_proximal_gd_op.py b/python/paddle/v2/fluid/tests/test_proximal_gd_op.py
index 9ca79ce6b3b710244e4f65db70b305231a9f3fcf..96540cf6cf4ed970c3e743df7621e443c49eef89 100644
--- a/python/paddle/v2/fluid/tests/test_proximal_gd_op.py
+++ b/python/paddle/v2/fluid/tests/test_proximal_gd_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_rank_loss_op.py b/python/paddle/v2/fluid/tests/test_rank_loss_op.py
index 0e41ab1b3fd8fa8b62c5f3b914b752918119a265..f31a2c2681871f63a272a1a658ed65cc8646b8a8 100644
--- a/python/paddle/v2/fluid/tests/test_rank_loss_op.py
+++ b/python/paddle/v2/fluid/tests/test_rank_loss_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_recurrent_op.py b/python/paddle/v2/fluid/tests/test_recurrent_op.py
index 84f4e36fa7312fbcb96cc66ff26e234c3016df30..6d59e199e24d460791ab6273b20d30bfa733d3c1 100644
--- a/python/paddle/v2/fluid/tests/test_recurrent_op.py
+++ b/python/paddle/v2/fluid/tests/test_recurrent_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 
 import paddle.v2.fluid.layers as layers
diff --git a/python/paddle/v2/fluid/tests/test_reduce_op.py b/python/paddle/v2/fluid/tests/test_reduce_op.py
index a021d4dd91bb9cc1e5d85411b3813b966ef5b296..1a4af39fb9dbc3de7d6746ee92a8e0c232e76c9f 100644
--- a/python/paddle/v2/fluid/tests/test_reduce_op.py
+++ b/python/paddle/v2/fluid/tests/test_reduce_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_registry.py b/python/paddle/v2/fluid/tests/test_registry.py
index f8328f31cf8203f5ea8af2c14417879616ccab71..6435e7e243d4e7fa10c99fda48a011523d8cc588 100644
--- a/python/paddle/v2/fluid/tests/test_registry.py
+++ b/python/paddle/v2/fluid/tests/test_registry.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import warnings
 
diff --git a/python/paddle/v2/fluid/tests/test_regularizer.py b/python/paddle/v2/fluid/tests/test_regularizer.py
index 890c881a126a32344128652691c6cad45e02e82d..b33817fa41636e7a62aa5907e63a9302b2149f66 100644
--- a/python/paddle/v2/fluid/tests/test_regularizer.py
+++ b/python/paddle/v2/fluid/tests/test_regularizer.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 
 import paddle.v2.fluid.framework as framework
diff --git a/python/paddle/v2/fluid/tests/test_reorder_lod_tensor.py b/python/paddle/v2/fluid/tests/test_reorder_lod_tensor.py
index 8b79d448e263d00849877c29158d7898bafe1937..74cd6de9e6fde70c001bb2189c4976cdd8e34633 100644
--- a/python/paddle/v2/fluid/tests/test_reorder_lod_tensor.py
+++ b/python/paddle/v2/fluid/tests/test_reorder_lod_tensor.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import paddle.v2.fluid as fluid
 import paddle.v2.fluid.core as core
@@ -6,8 +20,8 @@ import numpy
 
 class TestReorderLoDTensor(unittest.TestCase):
     num_seq = 5
-    # [name, dim, lod_level] pair indicating data info of source and target
-    data_desc = (['input', 9, 0], ['ref', 5, 1])
+    # [name, shape, lod_level] pair indicating data info of source and target
+    data_desc = (['input', [9], 0], ['ref', [5], 1])
 
     @classmethod
     def setUpClass(cls):
@@ -16,10 +30,10 @@ class TestReorderLoDTensor(unittest.TestCase):
     @classmethod
     def set_program(cls):
         dat = fluid.layers.data(
-            name=cls.data_desc[0][0], shape=[cls.data_desc[0][1]])
+            name=cls.data_desc[0][0], shape=cls.data_desc[0][1])
         dat.stop_gradient = False
         rank_dat = fluid.layers.data(
-            name=cls.data_desc[1][0], shape=[cls.data_desc[1][1]])
+            name=cls.data_desc[1][0], shape=cls.data_desc[1][1])
         table = fluid.layers.lod_rank_table(rank_dat)
         new_dat = fluid.layers.reorder_lod_tensor_by_rank(
             x=dat, rank_table=table)
@@ -49,7 +63,7 @@ class TestReorderLoDTensor(unittest.TestCase):
         self.data = {}
         for desc in self.data_desc:
             data_name = desc[0]
-            data_dim = desc[1]
+            data_shape = desc[1]
             data_lod_level = desc[2]
             data_lod = []
             for i in range(data_lod_level):
@@ -59,9 +73,9 @@ class TestReorderLoDTensor(unittest.TestCase):
                     size=self.num_seq if i == 0 else lod_level_i[-1])
                 lod_level_i = [0] + numpy.cumsum(lod_level_i).tolist()
                 data_lod.append(lod_level_i)
-            data_value = numpy.random.random(size=[
-                data_lod[-1][-1] if data_lod else self.num_seq, data_dim
-            ]).astype('float32')
+            data_value = numpy.random.random(
+                size=[data_lod[-1][-1] if data_lod else self.num_seq
+                      ] + data_shape).astype('float32')
             self.data[data_name] = (data_value, data_lod)
 
     def set_inputs(self, place):
@@ -163,8 +177,6 @@ class TestReorderLoDTensor(unittest.TestCase):
                 numpy.allclose(
                     numpy.array(actual_grad), expect_grad, atol=0.001))
             self.assertEqual(expect_grad_lod, actual_grad.lod())
-        global outputs_from_tensor_implicit_lod
-        outputs_from_tensor_implicit_lod = self.actual_outputs
 
         # compare outputs between LodTensors with explicit and implicit lod
         # use the same data but set the input lod explicitly
diff --git a/python/paddle/v2/fluid/tests/test_reshape_op.py b/python/paddle/v2/fluid/tests/test_reshape_op.py
index 18ee3aece656276fec9671df9baf298b7fd3c9b1..2cc0b36460994b021c6e6fd944c80dfec6c92f2f 100644
--- a/python/paddle/v2/fluid/tests/test_reshape_op.py
+++ b/python/paddle/v2/fluid/tests/test_reshape_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_rmsprop_op.py b/python/paddle/v2/fluid/tests/test_rmsprop_op.py
index 237bcfccceee89f62fc05e4c6c972a76d1875367..b6d7c698009d1fc35a5cdbcb72708ee74c181bf4 100644
--- a/python/paddle/v2/fluid/tests/test_rmsprop_op.py
+++ b/python/paddle/v2/fluid/tests/test_rmsprop_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_rnn_memory_helper_op.py b/python/paddle/v2/fluid/tests/test_rnn_memory_helper_op.py
index d1bb20f37a3785f70bee072b9df282bba4012c16..82b54bbd1a46be763c436f075b28e466f7fbd3fa 100644
--- a/python/paddle/v2/fluid/tests/test_rnn_memory_helper_op.py
+++ b/python/paddle/v2/fluid/tests/test_rnn_memory_helper_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 
 from paddle.v2.fluid.framework import Program
diff --git a/python/paddle/v2/fluid/tests/test_roi_pool_op.py b/python/paddle/v2/fluid/tests/test_roi_pool_op.py
index a28d9c7f82d3735c410369eb61e350168c267cea..af48848dcd06d35aa404d4cb688d7fd55c6b048b 100644
--- a/python/paddle/v2/fluid/tests/test_roi_pool_op.py
+++ b/python/paddle/v2/fluid/tests/test_roi_pool_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 import math
diff --git a/python/paddle/v2/fluid/tests/test_row_conv_op.py b/python/paddle/v2/fluid/tests/test_row_conv_op.py
index 1ed86e23ac28a575cdc3388e9da547918eb8a1be..580b08f75ebbdf0f79cc45becf84520503e9877c 100644
--- a/python/paddle/v2/fluid/tests/test_row_conv_op.py
+++ b/python/paddle/v2/fluid/tests/test_row_conv_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_scale_op.py b/python/paddle/v2/fluid/tests/test_scale_op.py
index 2ea1e185470280730ae8c8c0ea9568bbeb43eaf5..95cd935dda32bd5e1e8afc1e09c83affe8004cf7 100644
--- a/python/paddle/v2/fluid/tests/test_scale_op.py
+++ b/python/paddle/v2/fluid/tests/test_scale_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_scatter_op.py b/python/paddle/v2/fluid/tests/test_scatter_op.py
index 1032269d5dfb02e3518b9ef2820d5d0dcc8a51a0..f2936e19ae52b1b3c6d7daf02f68789af12d2b23 100644
--- a/python/paddle/v2/fluid/tests/test_scatter_op.py
+++ b/python/paddle/v2/fluid/tests/test_scatter_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_scope.py b/python/paddle/v2/fluid/tests/test_scope.py
index e4857b590aa6e09f1fa37c4a8a70a3ec9495b085..566a11abbe28c7fa79f07253609c1152879a0ad2 100644
--- a/python/paddle/v2/fluid/tests/test_scope.py
+++ b/python/paddle/v2/fluid/tests/test_scope.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2.fluid.core
 import unittest
 
diff --git a/python/paddle/v2/fluid/tests/test_selected_rows.py b/python/paddle/v2/fluid/tests/test_selected_rows.py
index 93daf37aa2ceb8a599973f7b02874f23fe0763ff..65ddf1f8f5f565ab5f361b8ef00b0032d62a58f8 100644
--- a/python/paddle/v2/fluid/tests/test_selected_rows.py
+++ b/python/paddle/v2/fluid/tests/test_selected_rows.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2.fluid.core as core
 import unittest
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_seq_concat_op.py b/python/paddle/v2/fluid/tests/test_seq_concat_op.py
index dccc6ed8afe2315da74f6886878b15d58b26b3c9..ba2bb075e6d76ec4e4d74291582512f1223b8813 100644
--- a/python/paddle/v2/fluid/tests/test_seq_concat_op.py
+++ b/python/paddle/v2/fluid/tests/test_seq_concat_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 import sys
diff --git a/python/paddle/v2/fluid/tests/test_seq_conv.py b/python/paddle/v2/fluid/tests/test_seq_conv.py
index 14edc5f953022ca05f5620c28bd7276d961dd4d0..674a2e16940d1c8d40fcd6abdb33c25f3c2fd845 100644
--- a/python/paddle/v2/fluid/tests/test_seq_conv.py
+++ b/python/paddle/v2/fluid/tests/test_seq_conv.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 import random
diff --git a/python/paddle/v2/fluid/tests/test_seq_pool.py b/python/paddle/v2/fluid/tests/test_seq_pool.py
index 512d8b315f29cecf79ae274dca491c240f3447a1..9dd6b2a0872f64a33df3aae1cd78bc021c1f44e7 100644
--- a/python/paddle/v2/fluid/tests/test_seq_pool.py
+++ b/python/paddle/v2/fluid/tests/test_seq_pool.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_sequence_erase_op.py b/python/paddle/v2/fluid/tests/test_sequence_erase_op.py
index bf257fefea0d98c6f4d9860dbac4ccedf59bcdd9..4823836ba97d50dcc455ee0b558fdf602aa9c5e5 100644
--- a/python/paddle/v2/fluid/tests/test_sequence_erase_op.py
+++ b/python/paddle/v2/fluid/tests/test_sequence_erase_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
@@ -16,7 +30,7 @@ def sequence_erase(in_seq, lod0, tokens):
     return np.array(out_seq).astype("int32"), new_lod0
 
 
-class TestSequenceEraseOp(OpTest):
+class TestSequenceEraseOpInt32(OpTest):
     def setUp(self):
         self.op_type = "sequence_erase"
         in_seq = np.random.randint(0, 10, (30, 1)).astype("int32")
@@ -31,5 +45,35 @@ class TestSequenceEraseOp(OpTest):
         self.check_output()
 
 
+class TestSequenceEraseOpInt64(OpTest):
+    def setUp(self):
+        self.op_type = "sequence_erase"
+        in_seq = np.random.randint(0, 10, (30, 1)).astype("int64")
+        lod = [[0, 9, 13, 24, 30]]
+        tokens = [2, 3, 5]
+        out_seq, new_lod0 = sequence_erase(in_seq, lod[0], tokens)
+        self.attrs = {'tokens': tokens}
+        self.inputs = {'X': (in_seq, lod)}
+        self.outputs = {'Out': (out_seq, [new_lod0])}
+
+    def test_check_output(self):
+        self.check_output()
+
+
+class TestSequenceEraseOpEmpty(OpTest):
+    def setUp(self):
+        self.op_type = "sequence_erase"
+        in_seq = np.random.randint(0, 10, (30, 1)).astype("int32")
+        lod = [[0, 9, 13, 24, 30]]
+        tokens = []
+        out_seq, new_lod0 = sequence_erase(in_seq, lod[0], tokens)
+        self.attrs = {'tokens': tokens}
+        self.inputs = {'X': (in_seq, lod)}
+        self.outputs = {'Out': (out_seq, [new_lod0])}
+
+    def test_check_output(self):
+        self.check_output()
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_sequence_expand.py b/python/paddle/v2/fluid/tests/test_sequence_expand.py
index 0f22612d3dbe483e4d5a8638636e44e172160156..6fc045125fabf6192abc362b6b3d85623308b325 100644
--- a/python/paddle/v2/fluid/tests/test_sequence_expand.py
+++ b/python/paddle/v2/fluid/tests/test_sequence_expand.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_sequence_reshape.py b/python/paddle/v2/fluid/tests/test_sequence_reshape.py
new file mode 100644
index 0000000000000000000000000000000000000000..06d5af8f5e7f5383561245bbbd57ecf5f65ceec4
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/test_sequence_reshape.py
@@ -0,0 +1,85 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+import math
+from op_test import OpTest
+
+
+class TestSequenceReshape(OpTest):
+    def setUp(self):
+        self.op_type = 'sequence_reshape'
+        dimension = 12
+        x_lod = [[0, 4, 5, 8, 11]]
+        x = np.random.uniform(0.1, 1, [11, 24]).astype('float32')
+
+        self.inputs = {'X': (x, x_lod)}
+        self.attrs = {'new_dim': dimension}
+
+        out, out_lod = self.compute_output(x, x_lod, dimension)
+
+        self.outputs = {'Out': (out, out_lod)}
+
+    def compute_output(self, x, x_lod, dimension):
+        x_width = x.shape[1]
+        out_lod = [[0]]
+        for i in xrange(len(x_lod[0]) - 1):
+            seq_len = x_lod[0][i + 1] - x_lod[0][i]
+            offset = (seq_len * x_width) / dimension
+            assert int(offset) * dimension == seq_len * x_width
+            out_lod[0].append(out_lod[0][-1] + int(offset))
+        out = np.zeros(shape=(out_lod[0][-1], dimension)).astype('float32')
+        out.ravel()[:] = x.ravel()[:]
+        return out, out_lod
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(["X"], "Out")
+
+
+class TestSequenceReshape_reduce(TestSequenceReshape):
+    def setUp(self):
+        self.op_type = 'sequence_reshape'
+        dimension = 24
+        x_lod = [[0, 4, 6, 8, 12]]
+        x = np.random.uniform(0.1, 1, [12, 12]).astype('float32')
+
+        self.inputs = {'X': (x, x_lod)}
+        self.attrs = {'new_dim': dimension}
+
+        out, out_lod = self.compute_output(x, x_lod, dimension)
+
+        self.outputs = {'Out': (out, out_lod)}
+
+
+class TestSequenceReshape_same(TestSequenceReshape):
+    def setUp(self):
+        self.op_type = 'sequence_reshape'
+        dimension = 12
+        x_lod = [[0, 4, 6, 8, 12]]
+        x = np.random.uniform(0.1, 1, [12, 12]).astype('float32')
+
+        self.inputs = {'X': (x, x_lod)}
+        self.attrs = {'new_dim': dimension}
+
+        out, out_lod = self.compute_output(x, x_lod, dimension)
+
+        self.outputs = {'Out': (out, out_lod)}
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_sequence_slice_op.py b/python/paddle/v2/fluid/tests/test_sequence_slice_op.py
index ccd9a05343b0c4aa05b258959665c0662f271512..bf1f21bcde4e293c30c1863bb84725632a147101 100644
--- a/python/paddle/v2/fluid/tests/test_sequence_slice_op.py
+++ b/python/paddle/v2/fluid/tests/test_sequence_slice_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 import sys
diff --git a/python/paddle/v2/fluid/tests/test_sequence_softmax_op.py b/python/paddle/v2/fluid/tests/test_sequence_softmax_op.py
index 8bffdd585699bfae2262bcfcd0387d22fa1e62db..5bd780f6b5b5d0a9deda8701625d33fbe7e87abf 100644
--- a/python/paddle/v2/fluid/tests/test_sequence_softmax_op.py
+++ b/python/paddle/v2/fluid/tests/test_sequence_softmax_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_sgd_op.py b/python/paddle/v2/fluid/tests/test_sgd_op.py
index 14d41e172a22c677235ab3fa997ef6f0b6e39778..f87927968b0fdb00ec207ff1d52be9e0d81af139 100644
--- a/python/paddle/v2/fluid/tests/test_sgd_op.py
+++ b/python/paddle/v2/fluid/tests/test_sgd_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 import paddle.v2.fluid.core as core
diff --git a/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py b/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py
index a14721b9aacfa7437623024af41555fd26990499..4578211bac63fdf365f5e2d0de7181126c6fdaed 100644
--- a/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py
+++ b/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import paddle.v2.fluid.core as core
 from paddle.v2.fluid.executor import Executor
diff --git a/python/paddle/v2/fluid/tests/test_sigmoid_cross_entropy_with_logits_op.py b/python/paddle/v2/fluid/tests/test_sigmoid_cross_entropy_with_logits_op.py
index c42f578f72cb121a24d6b852334cbd8a977f2730..f88fa62119cfc60df832ec0e14ef11930b8bcf21 100644
--- a/python/paddle/v2/fluid/tests/test_sigmoid_cross_entropy_with_logits_op.py
+++ b/python/paddle/v2/fluid/tests/test_sigmoid_cross_entropy_with_logits_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 from op_test import OpTest
 from scipy.special import logit
diff --git a/python/paddle/v2/fluid/tests/test_sign_op.py b/python/paddle/v2/fluid/tests/test_sign_op.py
index c6b59bcfd8ba71e54d4c3a2b7a3dac1f2a346265..c1dfa7f45d4d22795bef97f1f937bdbcdb5f5a30 100644
--- a/python/paddle/v2/fluid/tests/test_sign_op.py
+++ b/python/paddle/v2/fluid/tests/test_sign_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_smooth_l1_loss_op.py b/python/paddle/v2/fluid/tests/test_smooth_l1_loss_op.py
index b7f13c5699918d4969300499bd03e1668b2a4bca..5a388bb7b37c2509f4af290479195dd20fdc63a0 100644
--- a/python/paddle/v2/fluid/tests/test_smooth_l1_loss_op.py
+++ b/python/paddle/v2/fluid/tests/test_smooth_l1_loss_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_softmax_op.py b/python/paddle/v2/fluid/tests/test_softmax_op.py
index 136fc0283afd6acf1de4baae5e681789662295ce..cf43e676c5421ce9fca07fe36fb6e347585d6e03 100644
--- a/python/paddle/v2/fluid/tests/test_softmax_op.py
+++ b/python/paddle/v2/fluid/tests/test_softmax_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_softmax_with_cross_entropy_op.py b/python/paddle/v2/fluid/tests/test_softmax_with_cross_entropy_op.py
index c2f07f9096c69f3d4977f9444bdd5dcda8028973..626f34f0e07c372d6262e030c9686a73b63cb97d 100644
--- a/python/paddle/v2/fluid/tests/test_softmax_with_cross_entropy_op.py
+++ b/python/paddle/v2/fluid/tests/test_softmax_with_cross_entropy_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 
diff --git a/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py b/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py
index 2e4defd55d75c2012f39bea30a6c4de12528e77c..bc541298ed80fd8ae9db86aec384a8522a994749 100644
--- a/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py
+++ b/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import paddle.v2.fluid.core as core
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_split_op.py b/python/paddle/v2/fluid/tests/test_split_op.py
index 37c6ebb89d1c3bcfc3c80a54a1e92c0326e046e3..b80b64c41be4a45e9d3725297a526e93b399f00d 100644
--- a/python/paddle/v2/fluid/tests/test_split_op.py
+++ b/python/paddle/v2/fluid/tests/test_split_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_split_selected_rows_op.py b/python/paddle/v2/fluid/tests/test_split_selected_rows_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..37c6587c4151a89563f93cab35d63b2419ef88ab
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/test_split_selected_rows_op.py
@@ -0,0 +1,130 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import paddle.v2.fluid.core as core
+import numpy as np
+from paddle.v2.fluid.op import Operator
+
+
+class TestSpliteSelectedRows(unittest.TestCase):
+    def get_places(self):
+        places = [core.CPUPlace()]
+        if core.is_compile_gpu():
+            places.append(core.CUDAPlace(0))
+        return places
+
+    def test_check_output(self):
+        for place in self.get_places():
+            self.check_with_place(place)
+
+    def test_check_grad(self):
+        for place in self.get_places():
+            self.check_grad_with_place(place)
+
+    def check_with_place(self, place):
+        scope = core.Scope()
+        rows = [0, 5, 7, 4, 20]
+        height = 20
+        row_numel = 2
+
+        # initialize input variable X
+        x = scope.var('X').get_selected_rows()
+        x.set_rows(rows)
+        x.set_height(height)
+        np_array = np.ones((len(rows), row_numel)).astype("float32")
+        np_array[0, 0] = 2.0
+        np_array[2, 1] = 4.0
+        np_array[4, 1] = 8.0
+        x_tensor = x.get_tensor()
+        x_tensor.set(np_array, place)
+
+        height_sections = [5, 5, 5, 5, 3]
+
+        # initialize output variables [out0, out1]
+        outs_name = ["out%d" % i for i in xrange(len(height_sections))]
+        outs = [
+            scope.var(var_name).get_selected_rows() for var_name in outs_name
+        ]
+
+        # expected output selected rows
+        expected_out0_rows = [0, 4]
+        expected_out1_rows = [5, 7]
+        expected_out4_rows = [20]
+
+        op = Operator(
+            "split_selected_rows",
+            X="X",
+            Out=outs_name,
+            height_sections=height_sections)
+
+        op.run(scope, place)
+
+        self.assertEqual(outs[0].rows(), expected_out0_rows)
+        self.assertEqual(outs[1].rows(), expected_out1_rows)
+        self.assertEqual(outs[4].rows(), expected_out4_rows)
+
+        self.assertEqual(outs[0].height(), height_sections[0])
+        self.assertEqual(outs[4].height(), height_sections[4])
+
+        self.assertAlmostEqual(2.0, np.array(outs[0].get_tensor())[0, 0])
+        self.assertAlmostEqual(4.0, np.array(outs[1].get_tensor())[1, 1])
+        self.assertAlmostEqual(8.0, np.array(outs[4].get_tensor())[0, 1])
+
+    def check_grad_with_place(self, place):
+        scope = core.Scope()
+        height = 10
+        row_numel = 2
+
+        # attr
+        height_sections = [5, 5]
+
+        # initialize input variable X
+        out0_grad = scope.var("out0@GRAD").get_selected_rows()
+        rows0 = [0, 5]
+        out0_grad.set_rows(rows0)
+        out0_grad.set_height(height)
+        out0_grad_tensor = out0_grad.get_tensor()
+        np_array = np.ones((len(rows0), row_numel)).astype("float32")
+        np_array[0, 0] = 2.0
+        out0_grad_tensor.set(np_array, place)
+
+        out1_grad = scope.var("out1@GRAD").get_selected_rows()
+        rows1 = [7, 5]
+        out1_grad.set_rows(rows1)
+        out1_grad.set_height(height)
+        out1_grad_tensor = out1_grad.get_tensor()
+        np_array = np.ones((len(rows1), row_numel)).astype("float32")
+        np_array[0, 1] = 4.0
+        out1_grad_tensor.set(np_array, place)
+
+        x_grad = scope.var("X@GRAD").get_selected_rows()
+
+        grad_op = Operator(
+            "sum",
+            X=["out0@GRAD", "out1@GRAD"],
+            Out="X@GRAD",
+            height_sections=height_sections)
+
+        grad_op.run(scope, place)
+
+        self.assertEqual(x_grad.rows(), rows0 + rows1)
+        self.assertEqual(x_grad.height(), height)
+
+        self.assertAlmostEqual(2.0, np.array(x_grad.get_tensor())[0, 0])
+        self.assertAlmostEqual(4.0, np.array(x_grad.get_tensor())[2, 1])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_spp_op.py b/python/paddle/v2/fluid/tests/test_spp_op.py
index 007723f0e35ad194c427401337bc9b13756576de..e912b56de54c91a4c716e8056a6cf891de74bedf 100644
--- a/python/paddle/v2/fluid/tests/test_spp_op.py
+++ b/python/paddle/v2/fluid/tests/test_spp_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_squared_l2_distance_op.py b/python/paddle/v2/fluid/tests/test_squared_l2_distance_op.py
index dc6ebf5d30369231b4918a168bbdf25c7096c808..8171207cd95f6b5104b6c0da3c7424016c9569b6 100644
--- a/python/paddle/v2/fluid/tests/test_squared_l2_distance_op.py
+++ b/python/paddle/v2/fluid/tests/test_squared_l2_distance_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_squared_l2_norm_op.py b/python/paddle/v2/fluid/tests/test_squared_l2_norm_op.py
index 5a52c6a66c781672a483324083b97a3c5894f508..b7575cb4d2cea1801cc194dd4b4dcda1aea2f545 100644
--- a/python/paddle/v2/fluid/tests/test_squared_l2_norm_op.py
+++ b/python/paddle/v2/fluid/tests/test_squared_l2_norm_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 import unittest
 from numpy import linalg as LA
diff --git a/python/paddle/v2/fluid/tests/test_sum_op.py b/python/paddle/v2/fluid/tests/test_sum_op.py
index 60254291e2ab9215e2bc37c12d5e2e1ca6d33d5d..0a15a9485d72629de6e2e8c7d76c8a5b2cdf0a14 100644
--- a/python/paddle/v2/fluid/tests/test_sum_op.py
+++ b/python/paddle/v2/fluid/tests/test_sum_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_tensor.py b/python/paddle/v2/fluid/tests/test_tensor.py
index 9f870d9eb3485aa0b54eb781b906f4232d12c49e..d5cc235f588ad37b0d1293dc9894952c97411757 100644
--- a/python/paddle/v2/fluid/tests/test_tensor.py
+++ b/python/paddle/v2/fluid/tests/test_tensor.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2.fluid.core as core
 import unittest
 import numpy
diff --git a/python/paddle/v2/fluid/tests/test_top_k_op.py b/python/paddle/v2/fluid/tests/test_top_k_op.py
index 6e8fbefa6eafa391cdb5e17c882ee74b5bdc6507..a50faf0fffdcffab8b67e968450573d676d2d40a 100644
--- a/python/paddle/v2/fluid/tests/test_top_k_op.py
+++ b/python/paddle/v2/fluid/tests/test_top_k_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_transpose_op.py b/python/paddle/v2/fluid/tests/test_transpose_op.py
index 9409cbaa00f792b60d5950556b869108aa732478..a16de1416f4d8d1c6cf51c3c4b57dbe97e528b45 100644
--- a/python/paddle/v2/fluid/tests/test_transpose_op.py
+++ b/python/paddle/v2/fluid/tests/test_transpose_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_uniform_random_op.py b/python/paddle/v2/fluid/tests/test_uniform_random_op.py
index dbe4d6bcd069d2088b3cc1b4efd575d14afd4198..b2a39f975eb461292dc2e7be332a26931684bf90 100644
--- a/python/paddle/v2/fluid/tests/test_uniform_random_op.py
+++ b/python/paddle/v2/fluid/tests/test_uniform_random_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy
 
diff --git a/python/paddle/v2/fluid/tests/test_unpool_op.py b/python/paddle/v2/fluid/tests/test_unpool_op.py
index e87f283042c081ed9f232d140ff8c303cd3d1858..3dd43f9ba4b0efe852ba1a7d509c6f81576b2206 100644
--- a/python/paddle/v2/fluid/tests/test_unpool_op.py
+++ b/python/paddle/v2/fluid/tests/test_unpool_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_variable.py b/python/paddle/v2/fluid/tests/test_variable.py
index f1e4c0ba21d5c4f10d2b5011bdb5abaebaec5431..9f9748ca4e4778bf963360fb13661d3469e344bf 100644
--- a/python/paddle/v2/fluid/tests/test_variable.py
+++ b/python/paddle/v2/fluid/tests/test_variable.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 from paddle.v2.fluid.framework import default_main_program, Program, convert_np_dtype_to_dtype_
 import paddle.v2.fluid.core as core
diff --git a/python/paddle/v2/fluid/tests/test_warpctc_op.py b/python/paddle/v2/fluid/tests/test_warpctc_op.py
index 59390d5303b9642ede0d421e908a1b129c68a072..55d1c73262a4d846ae2280c1967eb00f4f6c4dd7 100644
--- a/python/paddle/v2/fluid/tests/test_warpctc_op.py
+++ b/python/paddle/v2/fluid/tests/test_warpctc_op.py
@@ -1,9 +1,25 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import unittest
 import numpy as np
 from op_test import OpTest
 from test_softmax_op import stable_softmax
 
+CUDA_BLOCK_SIZE = 512
+
 
 class CTCForward(object):
     def __init__(self, softmax, softmax_lod, labels, labels_lod, blank,
@@ -154,47 +170,63 @@ class CTCForward(object):
 
 
 class TestWarpCTCOp(OpTest):
+    def config(self):
+        self.batch_size = 4
+        self.num_classes = 8
+        self.logits_lod = [[0, 4, 5, 8, 11]]
+        self.labels_lod = [[0, 3, 4, 8, 12]]
+        self.blank = self.num_classes - 1
+        self.norm_by_times = False
+
     def setUp(self):
         self.op_type = "warpctc"
+        self.config()
 
-        batch_size = 4
-        num_classes = 8
-        logits_lod = [[0, 4, 5, 8, 11]]
-        logits = np.random.uniform(0.1, 1.0,
-                                   [11, num_classes]).astype("float32")
+        logits = np.random.uniform(
+            0.1, 1.0,
+            [self.logits_lod[0][-1], self.num_classes]).astype("float32")
         softmax = np.apply_along_axis(stable_softmax, 1, logits)
-        labels_lod = [[0, 3, 4, 8, 12]]
         # labels should not be blank
-        labels = np.random.randint(0, num_classes - 1, [12, 1], dtype="int32")
-
-        blank = num_classes - 1
-        norm_by_times = False
+        labels = np.random.randint(
+            0, self.num_classes - 1, [self.labels_lod[0][-1], 1], dtype="int32")
 
-        ctc = CTCForward(softmax, logits_lod, labels, labels_lod, blank,
-                         norm_by_times)
+        ctc = CTCForward(softmax, self.logits_lod, labels, self.labels_lod,
+                         self.blank, self.norm_by_times)
         loss = ctc.forward()
 
         max_sequence_length = 0
-        for i in range(batch_size):
-            max_sequence_length = max(max_sequence_length,
-                                      logits_lod[0][i + 1] - logits_lod[0][i])
-        gradient = np.zeros(
-            [max_sequence_length, batch_size, num_classes], dtype="float32")
+        for i in range(self.batch_size):
+            max_sequence_length = max(
+                max_sequence_length,
+                self.logits_lod[0][i + 1] - self.logits_lod[0][i])
+        self.gradient = np.zeros(
+            [max_sequence_length, self.batch_size, self.num_classes],
+            dtype="float32")
 
         self.inputs = {
-            "Logits": (logits, logits_lod),
-            "Label": (labels, labels_lod)
+            "Logits": (logits, self.logits_lod),
+            "Label": (labels, self.labels_lod)
         }
         self.outputs = {"Loss": loss}
-        self.attrs = {"blank": blank, "norm_by_times": norm_by_times}
+        self.attrs = {"blank": self.blank, "norm_by_times": self.norm_by_times}
 
     def test_check_output(self):
         self.check_output()
 
+    def test_check_grad(self):
+        self.outputs['WarpCTCGrad'] = self.gradient
+        self.check_grad(["Logits"], "Loss", max_relative_error=0.007)
+
+
+class TestWarpCTCOpCase1(TestWarpCTCOp):
+    def config(self):
+        self.batch_size = 4
+        self.num_classes = CUDA_BLOCK_SIZE + 2
+        self.logits_lod = [[0, 4, 5, 8, 11]]
+        self.labels_lod = [[0, 3, 4, 8, 12]]
+        self.blank = 0
+        self.norm_by_times = False
 
-#    def test_check_grad(self):
-#        self.outputs["WarpCTCGrad"] = None
-#        self.check_grad(["Logits"], "Loss", max_relative_error=0.01)
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_while_op.py b/python/paddle/v2/fluid/tests/test_while_op.py
index 7c5593cc5e5a66d4ccb237e3706ff3e544adf033..9f5e1b668c0d01768138487d38f755d2832a8551 100644
--- a/python/paddle/v2/fluid/tests/test_while_op.py
+++ b/python/paddle/v2/fluid/tests/test_while_op.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import paddle.v2.fluid.layers as layers
 from paddle.v2.fluid.executor import Executor
diff --git a/python/paddle/v2/image.py b/python/paddle/v2/image.py
index 7408ea8ef611ddfa74dc5bb6ef45d4e0ccb9d141..1429d6b1e08fe4ab2d1c5a0f19f1cedbcbc85abd 100644
--- a/python/paddle/v2/image.py
+++ b/python/paddle/v2/image.py
@@ -1,3 +1,16 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """
 This file contains some common interfaces for image preprocess.
 Many users are confused about the image layout. We introduce
diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py
index 9148cb56cf78e1ebb994f4a4a34d4a1b6e2e6ef4..78bf9807da3586fd2899cd914cd64c824814e7f3 100644
--- a/python/paddle/v2/inference.py
+++ b/python/paddle/v2/inference.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy
 import collections
 import topology
diff --git a/python/paddle/v2/master/__init__.py b/python/paddle/v2/master/__init__.py
index c8975b5d4a33cbecb4fa5a144bc610c36591d629..494e4baf20e6d1c5171aa54c93a30c67c02bb02a 100644
--- a/python/paddle/v2/master/__init__.py
+++ b/python/paddle/v2/master/__init__.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from client import *
 
 __all__ = ['client']
diff --git a/python/paddle/v2/master/client.py b/python/paddle/v2/master/client.py
index fc718f031e2267e737adbc340226e145bf614bf2..b3c790e39d7fa9b6595a92b6df94474b9e111cb0 100644
--- a/python/paddle/v2/master/client.py
+++ b/python/paddle/v2/master/client.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import ctypes
 import os
 
diff --git a/python/paddle/v2/reader/tests/__init__.py b/python/paddle/v2/reader/tests/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..b94a21a7e406b833797f8f521c62a2351c2bc30a 100644
--- a/python/paddle/v2/reader/tests/__init__.py
+++ b/python/paddle/v2/reader/tests/__init__.py
@@ -0,0 +1,13 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/python/paddle/v2/reader/tests/creator_test.py b/python/paddle/v2/reader/tests/creator_test.py
index cf190aa6645f9a5bed891a3a47c03efa03813d65..ac6cd4e9b6e19efb8cddbede0ff91438e5dbf531 100644
--- a/python/paddle/v2/reader/tests/creator_test.py
+++ b/python/paddle/v2/reader/tests/creator_test.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Copyright PaddlePaddle contributors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/python/paddle/v2/reader/tests/decorator_test.py b/python/paddle/v2/reader/tests/decorator_test.py
index 4ba71969dffe7447b6c5b70aeb752a4e5469fb36..e41e9c78a070feab85f3d221c3f1a96b83e6dccf 100644
--- a/python/paddle/v2/reader/tests/decorator_test.py
+++ b/python/paddle/v2/reader/tests/decorator_test.py
@@ -1,4 +1,4 @@
-# Copyright PaddlePaddle contributors. All Rights Reserved
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import time
 import unittest
 
diff --git a/python/paddle/v2/tests/test_image.py b/python/paddle/v2/tests/test_image.py
index b2d773510de28ca2614e95b465c73b82aa7b0463..2b0444bb03fcdc051d0d2324c728584f01ca7ba4 100644
--- a/python/paddle/v2/tests/test_image.py
+++ b/python/paddle/v2/tests/test_image.py
@@ -1,4 +1,4 @@
-# Copyright PaddlePaddle contributors. All Rights Reserved
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import unittest
 import numpy as np
 
diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py
index de932ad715bea8db158393c3c192ef67502e2fa3..710e8135f2de2549e653e952533924e582d938d0 100644
--- a/python/paddle/v2/tests/test_layer.py
+++ b/python/paddle/v2/tests/test_layer.py
@@ -1,4 +1,4 @@
-# Copyright PaddlePaddle contributors. All Rights Reserved
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import unittest
 
 import paddle.v2.activation as activation
diff --git a/python/paddle/v2/tests/test_op.py b/python/paddle/v2/tests/test_op.py
index 69acccddf42bb22ab54e0cf9e2a5eaef34e47b50..dd04cc4ab6698f7f99209a49a67733c75789f1d9 100644
--- a/python/paddle/v2/tests/test_op.py
+++ b/python/paddle/v2/tests/test_op.py
@@ -1,4 +1,4 @@
-# Copyright PaddlePaddle contributors. All Rights Reserved
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import unittest
 
 import paddle.v2.data_type as data_type
diff --git a/python/paddle/v2/tests/test_paramconf_order.py b/python/paddle/v2/tests/test_paramconf_order.py
index 41fea64122b81948d57cce07f00d764e4889da66..33c240b8f5a54a679289c71167551f71288d2305 100644
--- a/python/paddle/v2/tests/test_paramconf_order.py
+++ b/python/paddle/v2/tests/test_paramconf_order.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Copyright PaddlePaddle contributors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/python/paddle/v2/tests/test_parameters.py b/python/paddle/v2/tests/test_parameters.py
index 7ba8a939fbd1a949d61a007b40c054e7543c0cbc..1fe1f09b9d38b35d0b36088f577d175342a9d9b1 100644
--- a/python/paddle/v2/tests/test_parameters.py
+++ b/python/paddle/v2/tests/test_parameters.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 import sys
 
diff --git a/python/paddle/v2/tests/test_rnn_layer.py b/python/paddle/v2/tests/test_rnn_layer.py
index 192b0ee678bcee752327b8c4d41fba29ea361bb6..7920e342e1406e6b7eba5037558a87050b4ee49d 100644
--- a/python/paddle/v2/tests/test_rnn_layer.py
+++ b/python/paddle/v2/tests/test_rnn_layer.py
@@ -1,4 +1,4 @@
-# Copyright PaddlePaddle contributors. All Rights Reserved
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import difflib
 import unittest
 
diff --git a/python/paddle/v2/tests/test_topology.py b/python/paddle/v2/tests/test_topology.py
index 7fd2ee82fde21d90be541a28f23742e51a9a1665..11b4154eedc629d703a90ece3768f2e26e981665 100644
--- a/python/paddle/v2/tests/test_topology.py
+++ b/python/paddle/v2/tests/test_topology.py
@@ -1,4 +1,4 @@
-# Copyright PaddlePaddle contributors. All Rights Reserved
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import unittest
 import paddle.v2.layer as layer
 import paddle.v2.topology as topology
diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py
index db01ab7374eca18b6063dc634da5ef83c4bc9adc..a0060bf227f01389093a106acb5f087a39969e74 100644
--- a/python/paddle/v2/trainer.py
+++ b/python/paddle/v2/trainer.py
@@ -1,3 +1,16 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """
 Module Trainer
 """
diff --git a/tools/manylinux1/build_scripts/manylinux1-check.py b/tools/manylinux1/build_scripts/manylinux1-check.py
index 47fd3d673be662d2229480ee650dc3799301c31e..a27eab1c77c3b8e2be02d5bd492e5c2514b6d3b1 100644
--- a/tools/manylinux1/build_scripts/manylinux1-check.py
+++ b/tools/manylinux1/build_scripts/manylinux1-check.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Logic copied from PEP 513
 
 
diff --git a/tools/manylinux1/build_scripts/python-tag-abi-tag.py b/tools/manylinux1/build_scripts/python-tag-abi-tag.py
index 301fbf07a47fef03c91d9dd5f49c2894a5971319..cd2573314c5b250e00584152f693a70682c17f39 100644
--- a/tools/manylinux1/build_scripts/python-tag-abi-tag.py
+++ b/tools/manylinux1/build_scripts/python-tag-abi-tag.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Utility script to print the python tag + the abi tag for a Python
 # See PEP 425 for exactly what these are, but an example would be:
 #   cp27-cp27mu
diff --git a/tools/manylinux1/build_scripts/ssl-check.py b/tools/manylinux1/build_scripts/ssl-check.py
index a85d91978c510cccd366c174c317e6a3bdb589bd..34a3116207f9e69d436227fce9961e099e22d34c 100644
--- a/tools/manylinux1/build_scripts/ssl-check.py
+++ b/tools/manylinux1/build_scripts/ssl-check.py
@@ -1,3 +1,17 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # cf. https://github.com/pypa/manylinux/issues/53
 
 GOOD_SSL = "https://google.com"
diff --git a/v1_api_demo/README.md b/v1_api_demo/README.md
deleted file mode 100644
index 0460a85fae078800332982751a5d4a9644c50bd6..0000000000000000000000000000000000000000
--- a/v1_api_demo/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-The examples in v1_api_demo are using v1_api currently, and will be upgraded to v2_api later.
-Thus, v1_api_demo is a temporary directory. We decide not to maintain it and will delete it in future.
-
-Please go to [PaddlePaddle/book](https://github.com/PaddlePaddle/book) and 
-[PaddlePaddle/models](https://github.com/PaddlePaddle/models) to learn PaddlePaddle.
diff --git a/v1_api_demo/gan/.gitignore b/v1_api_demo/gan/.gitignore
deleted file mode 100644
index 93a6f5080a16a601cffb0bff51af9aef3ba3bae7..0000000000000000000000000000000000000000
--- a/v1_api_demo/gan/.gitignore
+++ /dev/null
@@ -1,11 +0,0 @@
-output/
-uniform_params/
-cifar_params/
-mnist_params/
-*.png
-.pydevproject
-.project
-*.log
-*.pyc
-data/mnist_data/
-data/cifar-10-batches-py/
diff --git a/v1_api_demo/gan/README.md b/v1_api_demo/gan/README.md
deleted file mode 100644
index 1908b534b0c1f63904d5503399b961d74ce0037c..0000000000000000000000000000000000000000
--- a/v1_api_demo/gan/README.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# Generative Adversarial Networks (GAN) 
-
-This demo implements GAN training described in the original GAN paper (https://arxiv.org/abs/1406.2661) and DCGAN (https://arxiv.org/abs/1511.06434).
-
-The general training procedures are implemented in gan_trainer.py. The neural network configurations are specified in gan_conf.py (for synthetic data) and gan_conf_image.py (for image data).
-
-In order to run the model, first download the corresponding data by running the shell script in ./data.
-Then you can run the command below. The flag -d specifies the training data (cifar, mnist or uniform) and flag --useGpu specifies whether to use gpu for training (0 is cpu, 1 is gpu).  
-
-$python gan_trainer.py -d cifar --use_gpu 1
-
-The generated images will be stored in ./cifar_samples/
-The corresponding models will be stored in ./cifar_params/
diff --git a/v1_api_demo/gan/data/download_cifar.sh b/v1_api_demo/gan/data/download_cifar.sh
deleted file mode 100755
index bbadc7c10c73e45a0948018b8812f79040d14bc4..0000000000000000000000000000000000000000
--- a/v1_api_demo/gan/data/download_cifar.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
-tar zxf cifar-10-python.tar.gz
-rm cifar-10-python.tar.gz
diff --git a/v1_api_demo/gan/data/get_mnist_data.sh b/v1_api_demo/gan/data/get_mnist_data.sh
deleted file mode 100755
index a77c81bf5af9ddb6634ff89460797ca543c5e517..0000000000000000000000000000000000000000
--- a/v1_api_demo/gan/data/get_mnist_data.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env sh
-# This script downloads the mnist data and unzips it.
-set -e
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-rm -rf "$DIR/mnist_data"
-mkdir "$DIR/mnist_data"
-cd "$DIR/mnist_data"
-
-echo "Downloading..."
-
-for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
-do
-    if [ ! -e $fname ]; then
-        wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
-        gunzip ${fname}.gz
-    fi
-done
diff --git a/v1_api_demo/gan/gan_conf.py b/v1_api_demo/gan/gan_conf.py
deleted file mode 100644
index 86ac2dffe5f4490a88e12d1fa5e8cd9fa61a69f4..0000000000000000000000000000000000000000
--- a/v1_api_demo/gan/gan_conf.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from paddle.trainer_config_helpers import *
-
-mode = get_config_arg("mode", str, "generator")
-assert mode in set([
-    "generator", "discriminator", "generator_training", "discriminator_training"
-])
-
-is_generator_training = mode == "generator_training"
-is_discriminator_training = mode == "discriminator_training"
-is_generator = mode == "generator"
-is_discriminator = mode == "discriminator"
-
-# The network structure below follows the ref https://arxiv.org/abs/1406.2661
-# Here we used two hidden layers and batch_norm
-
-print('mode=%s' % mode)
-# the dim of the noise (z) as the input of the generator network
-noise_dim = 10
-# the dim of the hidden layer
-hidden_dim = 10
-# the dim of the generated sample
-sample_dim = 2
-
-settings(
-    batch_size=128,
-    learning_rate=1e-4,
-    learning_method=AdamOptimizer(beta1=0.5))
-
-
-def discriminator(sample):
-    """
-    discriminator ouputs the probablity of a sample is from generator
-    or real data.
-    The output has two dimenstional: dimension 0 is the probablity
-    of the sample is from generator and dimension 1 is the probabblity
-    of the sample is from real data.
-    """
-    param_attr = ParamAttr(is_static=is_generator_training)
-    bias_attr = ParamAttr(
-        is_static=is_generator_training, initial_mean=1.0, initial_std=0)
-
-    hidden = fc_layer(
-        input=sample,
-        name="dis_hidden",
-        size=hidden_dim,
-        bias_attr=bias_attr,
-        param_attr=param_attr,
-        act=ReluActivation())
-
-    hidden2 = fc_layer(
-        input=hidden,
-        name="dis_hidden2",
-        size=hidden_dim,
-        bias_attr=bias_attr,
-        param_attr=param_attr,
-        act=LinearActivation())
-
-    hidden_bn = batch_norm_layer(
-        hidden2,
-        act=ReluActivation(),
-        name="dis_hidden_bn",
-        bias_attr=bias_attr,
-        param_attr=ParamAttr(
-            is_static=is_generator_training, initial_mean=1.0,
-            initial_std=0.02),
-        use_global_stats=False)
-
-    return fc_layer(
-        input=hidden_bn,
-        name="dis_prob",
-        size=2,
-        bias_attr=bias_attr,
-        param_attr=param_attr,
-        act=SoftmaxActivation())
-
-
-def generator(noise):
-    """
-    generator generates a sample given noise
-    """
-    param_attr = ParamAttr(is_static=is_discriminator_training)
-    bias_attr = ParamAttr(
-        is_static=is_discriminator_training, initial_mean=1.0, initial_std=0)
-
-    hidden = fc_layer(
-        input=noise,
-        name="gen_layer_hidden",
-        size=hidden_dim,
-        bias_attr=bias_attr,
-        param_attr=param_attr,
-        act=ReluActivation())
-
-    hidden2 = fc_layer(
-        input=hidden,
-        name="gen_hidden2",
-        size=hidden_dim,
-        bias_attr=bias_attr,
-        param_attr=param_attr,
-        act=LinearActivation())
-
-    hidden_bn = batch_norm_layer(
-        hidden2,
-        act=ReluActivation(),
-        name="gen_layer_hidden_bn",
-        bias_attr=bias_attr,
-        param_attr=ParamAttr(
-            is_static=is_discriminator_training,
-            initial_mean=1.0,
-            initial_std=0.02),
-        use_global_stats=False)
-
-    return fc_layer(
-        input=hidden_bn,
-        name="gen_layer1",
-        size=sample_dim,
-        bias_attr=bias_attr,
-        param_attr=param_attr,
-        act=LinearActivation())
-
-
-if is_generator_training:
-    noise = data_layer(name="noise", size=noise_dim)
-    sample = generator(noise)
-
-if is_discriminator_training:
-    sample = data_layer(name="sample", size=sample_dim)
-
-if is_generator_training or is_discriminator_training:
-    label = data_layer(name="label", size=1)
-    prob = discriminator(sample)
-    cost = cross_entropy(input=prob, label=label)
-    classification_error_evaluator(
-        input=prob, label=label, name=mode + '_error')
-    outputs(cost)
-
-if is_generator:
-    noise = data_layer(name="noise", size=noise_dim)
-    outputs(generator(noise))
diff --git a/v1_api_demo/gan/gan_conf_image.py b/v1_api_demo/gan/gan_conf_image.py
deleted file mode 100644
index c469227994c1a84d1aa73e03bbc74ebeac41d30e..0000000000000000000000000000000000000000
--- a/v1_api_demo/gan/gan_conf_image.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from paddle.trainer_config_helpers import *
-
-mode = get_config_arg("mode", str, "generator")
-dataSource = get_config_arg("data", str, "mnist")
-assert mode in set([
-    "generator", "discriminator", "generator_training", "discriminator_training"
-])
-
-is_generator_training = mode == "generator_training"
-is_discriminator_training = mode == "discriminator_training"
-is_generator = mode == "generator"
-is_discriminator = mode == "discriminator"
-
-# The network structure below follows the dcgan paper 
-# (https://arxiv.org/abs/1511.06434)
-
-print('mode=%s' % mode)
-# the dim of the noise (z) as the input of the generator network
-noise_dim = 100
-# the number of filters in the layer in generator/discriminator that is 
-# closet to the image
-gf_dim = 64
-df_dim = 64
-if dataSource == "mnist":
-    sample_dim = 28  # image dim
-    c_dim = 1  # image color
-else:
-    sample_dim = 32
-    c_dim = 3
-s2, s4 = int(sample_dim / 2), int(sample_dim / 4),
-s8, s16 = int(sample_dim / 8), int(sample_dim / 16)
-
-settings(
-    batch_size=128,
-    learning_rate=2e-4,
-    learning_method=AdamOptimizer(beta1=0.5))
-
-
-def conv_bn(input,
-            channels,
-            imgSize,
-            num_filters,
-            output_x,
-            stride,
-            name,
-            param_attr,
-            bias_attr,
-            param_attr_bn,
-            bn,
-            trans=False,
-            act=ReluActivation()):
-    """
-    conv_bn is a utility function that constructs a convolution/deconv layer 
-    with an optional batch_norm layer
-
-    :param bn: whether to use batch_norm_layer
-    :type bn: bool
-    :param trans: whether to use conv (False) or deconv (True)
-    :type trans: bool
-    """
-
-    # calculate the filter_size and padding size based on the given
-    # imgSize and ouput size
-    tmp = imgSize - (output_x - 1) * stride
-    if tmp <= 1 or tmp > 5:
-        raise ValueError("conv input-output dimension does not fit")
-    elif tmp <= 3:
-        filter_size = tmp + 2
-        padding = 1
-    else:
-        filter_size = tmp
-        padding = 0
-
-    print(imgSize, output_x, stride, filter_size, padding)
-
-    if trans:
-        nameApx = "_convt"
-    else:
-        nameApx = "_conv"
-
-    if bn:
-        conv = img_conv_layer(
-            input,
-            filter_size=filter_size,
-            num_filters=num_filters,
-            name=name + nameApx,
-            num_channels=channels,
-            act=LinearActivation(),
-            groups=1,
-            stride=stride,
-            padding=padding,
-            bias_attr=bias_attr,
-            param_attr=param_attr,
-            shared_biases=True,
-            layer_attr=None,
-            filter_size_y=None,
-            stride_y=None,
-            padding_y=None,
-            trans=trans)
-
-        conv_bn = batch_norm_layer(
-            conv,
-            act=act,
-            name=name + nameApx + "_bn",
-            bias_attr=bias_attr,
-            param_attr=param_attr_bn,
-            use_global_stats=False)
-
-        return conv_bn
-    else:
-        conv = img_conv_layer(
-            input,
-            filter_size=filter_size,
-            num_filters=num_filters,
-            name=name + nameApx,
-            num_channels=channels,
-            act=act,
-            groups=1,
-            stride=stride,
-            padding=padding,
-            bias_attr=bias_attr,
-            param_attr=param_attr,
-            shared_biases=True,
-            layer_attr=None,
-            filter_size_y=None,
-            stride_y=None,
-            padding_y=None,
-            trans=trans)
-        return conv
-
-
-def generator(noise):
-    """
-    generator generates a sample given noise
-    """
-    param_attr = ParamAttr(
-        is_static=is_discriminator_training, initial_mean=0.0, initial_std=0.02)
-    bias_attr = ParamAttr(
-        is_static=is_discriminator_training, initial_mean=0.0, initial_std=0.0)
-
-    param_attr_bn = ParamAttr(
-        is_static=is_discriminator_training, initial_mean=1.0, initial_std=0.02)
-
-    h1 = fc_layer(
-        input=noise,
-        name="gen_layer_h1",
-        size=s8 * s8 * gf_dim * 4,
-        bias_attr=bias_attr,
-        param_attr=param_attr,
-        act=LinearActivation())
-
-    h1_bn = batch_norm_layer(
-        h1,
-        act=ReluActivation(),
-        name="gen_layer_h1_bn",
-        bias_attr=bias_attr,
-        param_attr=param_attr_bn,
-        use_global_stats=False)
-
-    h2_bn = conv_bn(
-        h1_bn,
-        channels=gf_dim * 4,
-        output_x=s8,
-        num_filters=gf_dim * 2,
-        imgSize=s4,
-        stride=2,
-        name="gen_layer_h2",
-        param_attr=param_attr,
-        bias_attr=bias_attr,
-        param_attr_bn=param_attr_bn,
-        bn=True,
-        trans=True)
-
-    h3_bn = conv_bn(
-        h2_bn,
-        channels=gf_dim * 2,
-        output_x=s4,
-        num_filters=gf_dim,
-        imgSize=s2,
-        stride=2,
-        name="gen_layer_h3",
-        param_attr=param_attr,
-        bias_attr=bias_attr,
-        param_attr_bn=param_attr_bn,
-        bn=True,
-        trans=True)
-
-    return conv_bn(
-        h3_bn,
-        channels=gf_dim,
-        output_x=s2,
-        num_filters=c_dim,
-        imgSize=sample_dim,
-        stride=2,
-        name="gen_layer_h4",
-        param_attr=param_attr,
-        bias_attr=bias_attr,
-        param_attr_bn=param_attr_bn,
-        bn=False,
-        trans=True,
-        act=TanhActivation())
-
-
-def discriminator(sample):
-    """
-    discriminator ouputs the probablity of a sample is from generator
-    or real data.
-    The output has two dimenstional: dimension 0 is the probablity
-    of the sample is from generator and dimension 1 is the probabblity
-    of the sample is from real data.
-    """
-    param_attr = ParamAttr(
-        is_static=is_generator_training, initial_mean=0.0, initial_std=0.02)
-    bias_attr = ParamAttr(
-        is_static=is_generator_training, initial_mean=0.0, initial_std=0.0)
-
-    param_attr_bn = ParamAttr(
-        is_static=is_generator_training, initial_mean=1.0, initial_std=0.02)
-
-    h0 = conv_bn(
-        sample,
-        channels=c_dim,
-        imgSize=sample_dim,
-        num_filters=df_dim,
-        output_x=s2,
-        stride=2,
-        name="dis_h0",
-        param_attr=param_attr,
-        bias_attr=bias_attr,
-        param_attr_bn=param_attr_bn,
-        bn=False)
-
-    h1_bn = conv_bn(
-        h0,
-        channels=df_dim,
-        imgSize=s2,
-        num_filters=df_dim * 2,
-        output_x=s4,
-        stride=2,
-        name="dis_h1",
-        param_attr=param_attr,
-        bias_attr=bias_attr,
-        param_attr_bn=param_attr_bn,
-        bn=True)
-
-    h2_bn = conv_bn(
-        h1_bn,
-        channels=df_dim * 2,
-        imgSize=s4,
-        num_filters=df_dim * 4,
-        output_x=s8,
-        stride=2,
-        name="dis_h2",
-        param_attr=param_attr,
-        bias_attr=bias_attr,
-        param_attr_bn=param_attr_bn,
-        bn=True)
-
-    return fc_layer(
-        input=h2_bn,
-        name="dis_prob",
-        size=2,
-        bias_attr=bias_attr,
-        param_attr=param_attr,
-        act=SoftmaxActivation())
-
-
-if is_generator_training:
-    noise = data_layer(name="noise", size=noise_dim)
-    sample = generator(noise)
-
-if is_discriminator_training:
-    sample = data_layer(name="sample", size=sample_dim * sample_dim * c_dim)
-
-if is_generator_training or is_discriminator_training:
-    label = data_layer(name="label", size=1)
-    prob = discriminator(sample)
-    cost = cross_entropy(input=prob, label=label)
-    classification_error_evaluator(
-        input=prob, label=label, name=mode + '_error')
-    outputs(cost)
-
-if is_generator:
-    noise = data_layer(name="noise", size=noise_dim)
-    outputs(generator(noise))
diff --git a/v1_api_demo/gan/gan_trainer.py b/v1_api_demo/gan/gan_trainer.py
deleted file mode 100644
index 4a26c230f7a21cc6dd4a3cdb52e32730b1ce73ca..0000000000000000000000000000000000000000
--- a/v1_api_demo/gan/gan_trainer.py
+++ /dev/null
@@ -1,349 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import random
-import numpy
-import cPickle
-import sys, os
-from PIL import Image
-
-from paddle.trainer.config_parser import parse_config
-from paddle.trainer.config_parser import logger
-import py_paddle.swig_paddle as api
-import matplotlib.pyplot as plt
-
-
-def plot2DScatter(data, outputfile):
-    '''
-    Plot the data as a 2D scatter plot and save to outputfile
-    data needs to be two dimensinoal
-    '''
-    x = data[:, 0]
-    y = data[:, 1]
-    logger.info("The mean vector is %s" % numpy.mean(data, 0))
-    logger.info("The std vector is %s" % numpy.std(data, 0))
-
-    heatmap, xedges, yedges = numpy.histogram2d(x, y, bins=50)
-    extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
-
-    plt.clf()
-    plt.scatter(x, y)
-    plt.savefig(outputfile, bbox_inches='tight')
-
-
-def CHECK_EQ(a, b):
-    assert a == b, "a=%s, b=%s" % (a, b)
-
-
-def copy_shared_parameters(src, dst):
-    '''
-    copy the parameters from src to dst
-    :param src: the source of the parameters
-    :type src: GradientMachine
-    :param dst: the destination of the parameters
-    :type dst: GradientMachine
-    '''
-    src_params = [src.getParameter(i) for i in xrange(src.getParameterSize())]
-    src_params = dict([(p.getName(), p) for p in src_params])
-
-    for i in xrange(dst.getParameterSize()):
-        dst_param = dst.getParameter(i)
-        src_param = src_params.get(dst_param.getName(), None)
-        if src_param is None:
-            continue
-        src_value = src_param.getBuf(api.PARAMETER_VALUE)
-        dst_value = dst_param.getBuf(api.PARAMETER_VALUE)
-        CHECK_EQ(len(src_value), len(dst_value))
-        dst_value.copyFrom(src_value)
-        dst_param.setValueUpdated()
-
-
-def print_parameters(src):
-    src_params = [src.getParameter(i) for i in xrange(src.getParameterSize())]
-
-    print "***************"
-    for p in src_params:
-        print "Name is %s" % p.getName()
-        print "value is %s \n" % p.getBuf(api.PARAMETER_VALUE).copyToNumpyArray(
-        )
-
-
-def load_mnist_data(imageFile):
-    f = open(imageFile, "rb")
-    f.read(16)
-
-    # Define number of samples for train/test
-    if "train" in imageFile:
-        n = 60000
-    else:
-        n = 10000
-
-    data = numpy.fromfile(f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28))
-    data = data / 255.0 * 2.0 - 1.0
-
-    f.close()
-    return data.astype('float32')
-
-
-def load_cifar_data(cifar_path):
-    batch_size = 10000
-    data = numpy.zeros((5 * batch_size, 32 * 32 * 3), dtype="float32")
-    for i in range(1, 6):
-        file = cifar_path + "/data_batch_" + str(i)
-        fo = open(file, 'rb')
-        dict = cPickle.load(fo)
-        fo.close()
-        data[(i - 1) * batch_size:(i * batch_size), :] = dict["data"]
-
-    data = data / 255.0 * 2.0 - 1.0
-    return data
-
-
-# synthesize 2-D uniform data
-def load_uniform_data():
-    data = numpy.random.rand(1000000, 2).astype('float32')
-    return data
-
-
-def merge(images, size):
-    if images.shape[1] == 28 * 28:
-        h, w, c = 28, 28, 1
-    else:
-        h, w, c = 32, 32, 3
-    img = numpy.zeros((h * size[0], w * size[1], c))
-    for idx in xrange(size[0] * size[1]):
-        i = idx % size[1]
-        j = idx // size[1]
-        img[j*h:j*h+h, i*w:i*w+w, :] = \
-          ((images[idx, :].reshape((h, w, c), order="F").transpose(1, 0, 2) + 1.0) / 2.0 * 255.0)
-    return img.astype('uint8')
-
-
-def save_images(images, path):
-    merged_img = merge(images, [8, 8])
-    if merged_img.shape[2] == 1:
-        im = Image.fromarray(numpy.squeeze(merged_img)).convert('RGB')
-    else:
-        im = Image.fromarray(merged_img, mode="RGB")
-    im.save(path)
-
-
-def get_real_samples(batch_size, data_np):
-    return data_np[numpy.random.choice(
-        data_np.shape[0], batch_size, replace=False), :]
-
-
-def get_noise(batch_size, noise_dim):
-    return numpy.random.normal(size=(batch_size, noise_dim)).astype('float32')
-
-
-def get_fake_samples(generator_machine, batch_size, noise):
-    gen_inputs = api.Arguments.createArguments(1)
-    gen_inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
-    gen_outputs = api.Arguments.createArguments(0)
-    generator_machine.forward(gen_inputs, gen_outputs, api.PASS_TEST)
-    fake_samples = gen_outputs.getSlotValue(0).copyToNumpyMat()
-    return fake_samples
-
-
-def get_training_loss(training_machine, inputs):
-    outputs = api.Arguments.createArguments(0)
-    training_machine.forward(inputs, outputs, api.PASS_TEST)
-    loss = outputs.getSlotValue(0).copyToNumpyMat()
-    return numpy.mean(loss)
-
-
-def prepare_discriminator_data_batch_pos(batch_size, data_np):
-    real_samples = get_real_samples(batch_size, data_np)
-    labels = numpy.ones(batch_size, dtype='int32')
-    inputs = api.Arguments.createArguments(2)
-    inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(real_samples))
-    inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(labels))
-    return inputs
-
-
-def prepare_discriminator_data_batch_neg(generator_machine, batch_size, noise):
-    fake_samples = get_fake_samples(generator_machine, batch_size, noise)
-    labels = numpy.zeros(batch_size, dtype='int32')
-    inputs = api.Arguments.createArguments(2)
-    inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(fake_samples))
-    inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(labels))
-    return inputs
-
-
-def prepare_generator_data_batch(batch_size, noise):
-    label = numpy.ones(batch_size, dtype='int32')
-    inputs = api.Arguments.createArguments(2)
-    inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
-    inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(label))
-    return inputs
-
-
-def find(iterable, cond):
-    for item in iterable:
-        if cond(item):
-            return item
-    return None
-
-
-def get_layer_size(model_conf, layer_name):
-    layer_conf = find(model_conf.layers, lambda x: x.name == layer_name)
-    assert layer_conf is not None, "Cannot find '%s' layer" % layer_name
-    return layer_conf.size
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("-d", "--data_source", help="mnist or cifar or uniform")
-    parser.add_argument(
-        "--use_gpu", default="1", help="1 means use gpu for training")
-    parser.add_argument("--gpu_id", default="0", help="the gpu_id parameter")
-    args = parser.parse_args()
-    data_source = args.data_source
-    use_gpu = args.use_gpu
-    assert data_source in ["mnist", "cifar", "uniform"]
-    assert use_gpu in ["0", "1"]
-
-    if not os.path.exists("./%s_samples/" % data_source):
-        os.makedirs("./%s_samples/" % data_source)
-
-    if not os.path.exists("./%s_params/" % data_source):
-        os.makedirs("./%s_params/" % data_source)
-
-    api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10',
-                   '--log_period=100', '--gpu_id=' + args.gpu_id,
-                   '--save_dir=' + "./%s_params/" % data_source)
-
-    if data_source == "uniform":
-        conf = "gan_conf.py"
-        num_iter = 10000
-    else:
-        conf = "gan_conf_image.py"
-        num_iter = 1000
-
-    gen_conf = parse_config(conf, "mode=generator_training,data=" + data_source)
-    dis_conf = parse_config(conf,
-                            "mode=discriminator_training,data=" + data_source)
-    generator_conf = parse_config(conf, "mode=generator,data=" + data_source)
-    batch_size = dis_conf.opt_config.batch_size
-    noise_dim = get_layer_size(gen_conf.model_config, "noise")
-
-    if data_source == "mnist":
-        data_np = load_mnist_data("./data/mnist_data/train-images-idx3-ubyte")
-    elif data_source == "cifar":
-        data_np = load_cifar_data("./data/cifar-10-batches-py/")
-    else:
-        data_np = load_uniform_data()
-
-    # this creates a gradient machine for discriminator
-    dis_training_machine = api.GradientMachine.createFromConfigProto(
-        dis_conf.model_config)
-    # this create a gradient machine for generator    
-    gen_training_machine = api.GradientMachine.createFromConfigProto(
-        gen_conf.model_config)
-
-    # generator_machine is used to generate data only, which is used for
-    # training discriminator
-    logger.info(str(generator_conf.model_config))
-    generator_machine = api.GradientMachine.createFromConfigProto(
-        generator_conf.model_config)
-
-    dis_trainer = api.Trainer.create(dis_conf, dis_training_machine)
-
-    gen_trainer = api.Trainer.create(gen_conf, gen_training_machine)
-
-    dis_trainer.startTrain()
-    gen_trainer.startTrain()
-
-    # Sync parameters between networks (GradientMachine) at the beginning
-    copy_shared_parameters(gen_training_machine, dis_training_machine)
-    copy_shared_parameters(gen_training_machine, generator_machine)
-
-    # constrain that either discriminator or generator can not be trained
-    # consecutively more than MAX_strike times
-    curr_train = "dis"
-    curr_strike = 0
-    MAX_strike = 5
-
-    for train_pass in xrange(100):
-        dis_trainer.startTrainPass()
-        gen_trainer.startTrainPass()
-        for i in xrange(num_iter):
-            # Do forward pass in discriminator to get the dis_loss
-            noise = get_noise(batch_size, noise_dim)
-            data_batch_dis_pos = prepare_discriminator_data_batch_pos(
-                batch_size, data_np)
-            dis_loss_pos = get_training_loss(dis_training_machine,
-                                             data_batch_dis_pos)
-
-            data_batch_dis_neg = prepare_discriminator_data_batch_neg(
-                generator_machine, batch_size, noise)
-            dis_loss_neg = get_training_loss(dis_training_machine,
-                                             data_batch_dis_neg)
-
-            dis_loss = (dis_loss_pos + dis_loss_neg) / 2.0
-
-            # Do forward pass in generator to get the gen_loss
-            data_batch_gen = prepare_generator_data_batch(batch_size, noise)
-            gen_loss = get_training_loss(gen_training_machine, data_batch_gen)
-
-            if i % 100 == 0:
-                print "d_pos_loss is %s     d_neg_loss is %s" % (dis_loss_pos,
-                                                                 dis_loss_neg)
-                print "d_loss is %s    g_loss is %s" % (dis_loss, gen_loss)
-
-            # Decide which network to train based on the training history
-            # And the relative size of the loss        
-            if (not (curr_train == "dis" and curr_strike == MAX_strike)) and \
-               ((curr_train == "gen" and curr_strike == MAX_strike) or dis_loss > gen_loss):
-                if curr_train == "dis":
-                    curr_strike += 1
-                else:
-                    curr_train = "dis"
-                    curr_strike = 1
-                dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_neg)
-                dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_pos)
-                copy_shared_parameters(dis_training_machine,
-                                       gen_training_machine)
-
-            else:
-                if curr_train == "gen":
-                    curr_strike += 1
-                else:
-                    curr_train = "gen"
-                    curr_strike = 1
-                gen_trainer.trainOneDataBatch(batch_size, data_batch_gen)
-                # TODO: add API for paddle to allow true parameter sharing between different GradientMachines 
-                # so that we do not need to copy shared parameters. 
-                copy_shared_parameters(gen_training_machine,
-                                       dis_training_machine)
-                copy_shared_parameters(gen_training_machine, generator_machine)
-
-        dis_trainer.finishTrainPass()
-        gen_trainer.finishTrainPass()
-        # At the end of each pass, save the generated samples/images
-        fake_samples = get_fake_samples(generator_machine, batch_size, noise)
-        if data_source == "uniform":
-            plot2DScatter(fake_samples, "./%s_samples/train_pass%s.png" %
-                          (data_source, train_pass))
-        else:
-            save_images(fake_samples, "./%s_samples/train_pass%s.png" %
-                        (data_source, train_pass))
-    dis_trainer.finishTrain()
-    gen_trainer.finishTrain()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/v1_api_demo/mnist/.gitignore b/v1_api_demo/mnist/.gitignore
deleted file mode 100644
index 7e61d5e3a0cabd46d4185454d46610ac2ee2e63f..0000000000000000000000000000000000000000
--- a/v1_api_demo/mnist/.gitignore
+++ /dev/null
@@ -1,10 +0,0 @@
-data/raw_data
-data/*.list
-mnist_vgg_model
-plot.png
-train.log
-*pyc
-.ipynb_checkpoints
-params.pkl
-params.tar
-params.tar.gz
diff --git a/v1_api_demo/mnist/api_train.py b/v1_api_demo/mnist/api_train.py
deleted file mode 100644
index ea1caa7dd9653a2cc2860ace736fe3d25a3767e0..0000000000000000000000000000000000000000
--- a/v1_api_demo/mnist/api_train.py
+++ /dev/null
@@ -1,196 +0,0 @@
-"""
-A very basic example for how to use current Raw SWIG API to train mnist network.
-
-Current implementation uses Raw SWIG, which means the API call is directly \
-passed to C++ side of Paddle.
-
-The user api could be simpler and carefully designed.
-"""
-import random
-
-import numpy as np
-import paddle.v2 as paddle_v2
-import py_paddle.swig_paddle as api
-from paddle.trainer_config_helpers import *
-from py_paddle import DataProviderConverter
-
-from mnist_util import read_from_mnist
-
-
-def init_parameter(network):
-    assert isinstance(network, api.GradientMachine)
-    for each_param in network.getParameters():
-        assert isinstance(each_param, api.Parameter)
-        array_size = len(each_param)
-        array = np.random.uniform(-1.0, 1.0, array_size).astype('float32')
-        each_param.getBuf(api.PARAMETER_VALUE).copyFromNumpyArray(array)
-
-
-def generator_to_batch(generator, batch_size):
-    ret_val = list()
-    for each_item in generator:
-        ret_val.append(each_item)
-        if len(ret_val) == batch_size:
-            yield ret_val
-            ret_val = list()
-    if len(ret_val) != 0:
-        yield ret_val
-
-
-class BatchPool(object):
-    def __init__(self, generator, batch_size):
-        self.data = list(generator)
-        self.batch_size = batch_size
-
-    def __call__(self):
-        random.shuffle(self.data)
-        for offset in xrange(0, len(self.data), self.batch_size):
-            limit = min(offset + self.batch_size, len(self.data))
-            yield self.data[offset:limit]
-
-
-def input_order_converter(generator):
-    for each_item in generator:
-        yield each_item['pixel'], each_item['label']
-
-
-def main():
-    api.initPaddle("-use_gpu=false", "-trainer_count=4")  # use 4 cpu cores
-
-    optimizer = paddle_v2.optimizer.Adam(
-        learning_rate=1e-4,
-        batch_size=1000,
-        model_average=ModelAverage(average_window=0.5),
-        regularization=L2Regularization(rate=0.5))
-
-    # Create Local Updater. Local means not run in cluster.
-    # For a cluster training, here we can change to createRemoteUpdater
-    # in future.
-    updater = optimizer.create_local_updater()
-    assert isinstance(updater, api.ParameterUpdater)
-
-    # define network
-    images = paddle_v2.layer.data(
-        name='pixel', type=paddle_v2.data_type.dense_vector(784))
-    label = paddle_v2.layer.data(
-        name='label', type=paddle_v2.data_type.integer_value(10))
-    hidden1 = paddle_v2.layer.fc(input=images, size=200)
-    hidden2 = paddle_v2.layer.fc(input=hidden1, size=200)
-    inference = paddle_v2.layer.fc(input=hidden2,
-                                   size=10,
-                                   act=paddle_v2.activation.Softmax())
-    cost = paddle_v2.layer.classification_cost(input=inference, label=label)
-
-    # Create Simple Gradient Machine.
-    model_config = paddle_v2.layer.parse_network(cost)
-    m = api.GradientMachine.createFromConfigProto(model_config,
-                                                  api.CREATE_MODE_NORMAL,
-                                                  optimizer.enable_types())
-
-    # This type check is not useful. Only enable type hint in IDE.
-    # Such as PyCharm
-    assert isinstance(m, api.GradientMachine)
-
-    # Initialize Parameter by numpy.
-    init_parameter(network=m)
-
-    # Initialize ParameterUpdater.
-    updater.init(m)
-
-    # DataProvider Converter is a utility convert Python Object to Paddle C++
-    # Input. The input format is as same as Paddle's DataProvider.
-    converter = DataProviderConverter(input_types=[images.type, label.type])
-
-    train_file = './data/raw_data/train'
-    test_file = './data/raw_data/t10k'
-
-    # start gradient machine.
-    # the gradient machine must be started before invoke forward/backward.
-    # not just for training, but also for inference.
-    m.start()
-
-    # evaluator can print error rate, etc. It is a C++ class.
-    batch_evaluator = m.makeEvaluator()
-    test_evaluator = m.makeEvaluator()
-
-    # Get Train Data.
-    # TrainData will stored in a data pool. Currently implementation is not care
-    # about memory, speed. Just a very naive implementation.
-    train_data_generator = input_order_converter(read_from_mnist(train_file))
-    train_data = BatchPool(train_data_generator, 512)
-
-    # outArgs is Neural Network forward result. Here is not useful, just passed
-    # to gradient_machine.forward
-    outArgs = api.Arguments.createArguments(0)
-
-    for pass_id in xrange(2):  # we train 2 passes.
-        updater.startPass()
-
-        for batch_id, data_batch in enumerate(train_data()):
-            # data_batch is input images.
-            # here, for online learning, we could get data_batch from network.
-
-            # Start update one batch.
-            pass_type = updater.startBatch(len(data_batch))
-
-            # Start BatchEvaluator.
-            # batch_evaluator can be used between start/finish.
-            batch_evaluator.start()
-
-            # forwardBackward is a shortcut for forward and backward.
-            # It is sometimes faster than invoke forward/backward separately,
-            # because in GradientMachine, it may be async.
-            m.forwardBackward(converter(data_batch), outArgs, pass_type)
-
-            for each_param in m.getParameters():
-                updater.update(each_param)
-
-            # Get cost. We use numpy to calculate total cost for this batch.
-            cost_vec = outArgs.getSlotValue(0)
-            cost_vec = cost_vec.copyToNumpyMat()
-            cost = cost_vec.sum() / len(data_batch)
-
-            # Make evaluator works.
-            m.eval(batch_evaluator)
-
-            # Print logs.
-            print 'Pass id', pass_id, 'Batch id', batch_id, 'with cost=', \
-                cost, batch_evaluator
-
-            batch_evaluator.finish()
-            # Finish batch.
-            #  * will clear gradient.
-            #  * ensure all values should be updated.
-            updater.finishBatch(cost)
-
-        # testing stage. use test data set to test current network.
-        updater.apply()
-        test_evaluator.start()
-        test_data_generator = input_order_converter(read_from_mnist(test_file))
-        for data_batch in generator_to_batch(test_data_generator, 512):
-            # in testing stage, only forward is needed.
-            m.forward(converter(data_batch), outArgs, api.PASS_TEST)
-            m.eval(test_evaluator)
-
-        # print error rate for test data set
-        print 'Pass', pass_id, ' test evaluator: ', test_evaluator
-        test_evaluator.finish()
-        updater.restore()
-
-        updater.catchUpWith()
-        params = m.getParameters()
-        for each_param in params:
-            assert isinstance(each_param, api.Parameter)
-            value = each_param.getBuf(api.PARAMETER_VALUE)
-            value = value.copyToNumpyArray()
-
-            # Here, we could save parameter to every where you want
-            print each_param.getName(), value
-
-        updater.finishPass()
-
-    m.finish()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/v1_api_demo/mnist/data/generate_list.py b/v1_api_demo/mnist/data/generate_list.py
deleted file mode 100644
index 49981cc7a93308bc96ad5097eba749440e958525..0000000000000000000000000000000000000000
--- a/v1_api_demo/mnist/data/generate_list.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-o = open("./" + "train.list", "w")
-o.write("./data/raw_data/train" + "\n")
-o.close()
-
-o = open("./" + "test.list", "w")
-o.write("./data/raw_data/t10k" + "\n")
-o.close()
diff --git a/v1_api_demo/mnist/data/get_mnist_data.sh b/v1_api_demo/mnist/data/get_mnist_data.sh
deleted file mode 100755
index 5a2e34026d4fe7f8315d4f5453bec7c4ee4f6885..0000000000000000000000000000000000000000
--- a/v1_api_demo/mnist/data/get_mnist_data.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env sh
-# This scripts downloads the mnist data and unzips it.
-set -e
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-rm -rf "$DIR/raw_data"
-mkdir "$DIR/raw_data"
-cd "$DIR/raw_data"
-
-echo "Downloading..."
-
-for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
-do
-    if [ ! -e $fname ]; then
-        wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
-        gunzip ${fname}.gz
-    fi
-done
-
-cd $DIR
-rm -f *.list
-python generate_list.py
diff --git a/v1_api_demo/mnist/light_mnist.py b/v1_api_demo/mnist/light_mnist.py
deleted file mode 100644
index 33409054357d2f0c6a765b3ab3164eb2e584467e..0000000000000000000000000000000000000000
--- a/v1_api_demo/mnist/light_mnist.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-is_predict = get_config_arg("is_predict", bool, False)
-
-####################Data Configuration ##################
-
-if not is_predict:
-    data_dir = './data/'
-    define_py_data_sources2(
-        train_list=data_dir + 'train.list',
-        test_list=data_dir + 'test.list',
-        module='mnist_provider',
-        obj='process')
-
-######################Algorithm Configuration #############
-settings(batch_size=50, learning_rate=0.001, learning_method=AdamOptimizer())
-
-#######################Network Configuration #############
-
-data_size = 1 * 28 * 28
-label_size = 10
-img = data_layer(name='pixel', size=data_size)
-
-
-# light cnn
-# A shallower cnn model: [CNN, BN, ReLU, Max-Pooling] x4 + FC x1
-# Easier to train for mnist dataset and quite efficient
-# Final performance is close to deeper ones on tasks such as digital and character classification 
-def light_cnn(input_image, num_channels, num_classes):
-    def __light__(ipt,
-                  num_filter=128,
-                  times=1,
-                  conv_filter_size=3,
-                  dropouts=0,
-                  num_channels_=None):
-        return img_conv_group(
-            input=ipt,
-            num_channels=num_channels_,
-            pool_size=2,
-            pool_stride=2,
-            conv_padding=0,
-            conv_num_filter=[num_filter] * times,
-            conv_filter_size=conv_filter_size,
-            conv_act=ReluActivation(),
-            conv_with_batchnorm=True,
-            conv_batchnorm_drop_rate=dropouts,
-            pool_type=MaxPooling())
-
-    tmp = __light__(input_image, num_filter=128, num_channels_=num_channels)
-    tmp = __light__(tmp, num_filter=128)
-    tmp = __light__(tmp, num_filter=128)
-    tmp = __light__(tmp, num_filter=128, conv_filter_size=1)
-
-    tmp = fc_layer(input=tmp, size=num_classes, act=SoftmaxActivation())
-    return tmp
-
-
-predict = light_cnn(input_image=img, num_channels=1, num_classes=label_size)
-
-if not is_predict:
-    lbl = data_layer(name="label", size=label_size)
-    inputs(img, lbl)
-    outputs(classification_cost(input=predict, label=lbl))
-else:
-    outputs(predict)
diff --git a/v1_api_demo/mnist/mnist_provider.py b/v1_api_demo/mnist/mnist_provider.py
deleted file mode 100644
index 888cfef1e7e3e1b4f556756c003eeb23e741cabe..0000000000000000000000000000000000000000
--- a/v1_api_demo/mnist/mnist_provider.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from paddle.trainer.PyDataProvider2 import *
-from mnist_util import read_from_mnist
-
-
-# Define a py data provider
-@provider(
-    input_types={'pixel': dense_vector(28 * 28),
-                 'label': integer_value(10)},
-    cache=CacheType.CACHE_PASS_IN_MEM)
-def process(settings, filename):  # settings is not used currently.
-    for each in read_from_mnist(filename):
-        yield each
diff --git a/v1_api_demo/mnist/mnist_util.py b/v1_api_demo/mnist/mnist_util.py
deleted file mode 100644
index 3fd88ae7edc821296ca0accbf6dedc083e411744..0000000000000000000000000000000000000000
--- a/v1_api_demo/mnist/mnist_util.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import numpy
-
-__all__ = ['read_from_mnist']
-
-
-def read_from_mnist(filename):
-    imgf = filename + "-images-idx3-ubyte"
-    labelf = filename + "-labels-idx1-ubyte"
-    f = open(imgf, "rb")
-    l = open(labelf, "rb")
-
-    f.read(16)
-    l.read(8)
-
-    # Define number of samples for train/test
-    if "train" in filename:
-        n = 60000
-    else:
-        n = 10000
-
-    images = numpy.fromfile(
-        f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
-    images = images / 255.0 * 2.0 - 1.0
-    labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")
-
-    for i in xrange(n):
-        yield {"pixel": images[i, :], 'label': labels[i]}
-
-    f.close()
-    l.close()
diff --git a/v1_api_demo/mnist/train.sh b/v1_api_demo/mnist/train.sh
deleted file mode 100755
index ca2b1ad9eb960685b95b0f294a9b929e1a4acab1..0000000000000000000000000000000000000000
--- a/v1_api_demo/mnist/train.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-config=vgg_16_mnist.py
-output=./mnist_vgg_model
-log=train.log
-
-paddle train \
---config=$config \
---dot_period=10 \
---log_period=100 \
---test_all_data_in_one_period=1 \
---use_gpu=0 \
---trainer_count=1 \
---num_passes=100 \
---save_dir=$output \
-2>&1 | tee $log
-paddle usage -l $log -e $? -n "mnist_train" >/dev/null 2>&1
-
-python -m paddle.utils.plotcurve -i $log > plot.png
diff --git a/v1_api_demo/mnist/vgg_16_mnist.py b/v1_api_demo/mnist/vgg_16_mnist.py
deleted file mode 100644
index a819b391c690fb473801eb2e7ba3161cc31b5b4b..0000000000000000000000000000000000000000
--- a/v1_api_demo/mnist/vgg_16_mnist.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-is_predict = get_config_arg("is_predict", bool, False)
-
-####################Data Configuration ##################
-
-if not is_predict:
-    data_dir = './data/'
-    define_py_data_sources2(
-        train_list=data_dir + 'train.list',
-        test_list=data_dir + 'test.list',
-        module='mnist_provider',
-        obj='process')
-
-######################Algorithm Configuration #############
-settings(
-    batch_size=128,
-    learning_rate=0.1 / 128.0,
-    learning_method=MomentumOptimizer(0.9),
-    regularization=L2Regularization(0.0005 * 128))
-
-#######################Network Configuration #############
-
-data_size = 1 * 28 * 28
-label_size = 10
-img = data_layer(name='pixel', size=data_size)
-
-# small_vgg is predined in trainer_config_helpers.network
-predict = small_vgg(input_image=img, num_channels=1, num_classes=label_size)
-
-if not is_predict:
-    lbl = data_layer(name="label", size=label_size)
-    inputs(img, lbl)
-    outputs(classification_cost(input=predict, label=lbl))
-else:
-    outputs(predict)
diff --git a/v1_api_demo/model_zoo/embedding/.gitignore b/v1_api_demo/model_zoo/embedding/.gitignore
deleted file mode 100644
index 908f5a3fb2f7c34368ea24d0fc3ac9cac29a4fdb..0000000000000000000000000000000000000000
--- a/v1_api_demo/model_zoo/embedding/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-baidu.dict
-model_*.emb
diff --git a/v1_api_demo/model_zoo/embedding/extract_para.py b/v1_api_demo/model_zoo/embedding/extract_para.py
deleted file mode 100755
index 570b90c1f772c8f6abfc6cda02560fd3471ef0b6..0000000000000000000000000000000000000000
--- a/v1_api_demo/model_zoo/embedding/extract_para.py
+++ /dev/null
@@ -1,113 +0,0 @@
-#!/bin/env python
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Example:
-    python extract_para.py --preModel PREMODEL --preDict PREDICT \
-                            --usrModel USRMODEL --usrDict USRDICT -d DIM
-
-Options:
-    -h, --help          show this help message and exit
-    --preModel PREMODEL the name of pretrained embedding model
-    --preDict PREDICT   the name of pretrained dictionary
-    --usrModel usrModel the name of output usr embedding model
-    --usrDict usrDict   the name of user specified dictionary
-    -d DIM              dimension of parameter
-"""
-from optparse import OptionParser
-import struct
-
-
-def get_row_index(preDict, usrDict):
-    """
-    Get the row positions for all words in user dictionary from pre-trained dictionary.
-    return: a list of row positions
-    Example: preDict='a\nb\nc\n', usrDict='a\nc\n', then return [0,2]
-    """
-    pos = []
-    index = dict()
-    with open(preDict, "r") as f:
-        for line_index, line in enumerate(f):
-            word = line.strip().split()[0]
-            index[word] = line_index
-    with open(usrDict, "r") as f:
-        for line in f:
-            word = line.strip().split()[0]
-            pos.append(index[word])
-    return pos
-
-
-def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict,
-                                  paraDim):
-    """
-    Extract desired parameters from a pretrained embedding model based on user dictionary
-    """
-    if paraDim not in [32, 64, 128, 256]:
-        raise RuntimeError("We only support 32, 64, 128, 256 dimensions now")
-
-    fi = open(preModel, "rb")
-    fo = open(usrModel, "wb")
-
-    # write filehead
-    rowIndex = get_row_index(preDict, usrDict)
-    newHead = struct.pack("iil", 0, 4, len(rowIndex) * paraDim)
-    fo.write(newHead)
-    bytes = 4 * paraDim
-    for i in range(0, len(rowIndex)):
-        # find the absolute position of input file
-        fi.seek(rowIndex[i] * bytes + 16, 0)
-        fo.write(fi.read(bytes))
-
-    print "extract parameters finish, total", len(rowIndex), "lines"
-    fi.close()
-
-
-def main():
-    """
-    Main entry for running paraconvert.py 
-    """
-    usage = "usage: \n" \
-            "python %prog --preModel PREMODEL --preDict PREDICT" \
-            " --usrModel USRMODEL --usrDict USRDICT -d DIM"
-    parser = OptionParser(usage)
-    parser.add_option(
-        "--preModel",
-        action="store",
-        dest="preModel",
-        help="the name of pretrained embedding model")
-    parser.add_option(
-        "--preDict",
-        action="store",
-        dest="preDict",
-        help="the name of pretrained dictionary")
-    parser.add_option(
-        "--usrModel",
-        action="store",
-        dest="usrModel",
-        help="the name of output usr embedding model")
-    parser.add_option(
-        "--usrDict",
-        action="store",
-        dest="usrDict",
-        help="the name of user specified dictionary")
-    parser.add_option(
-        "-d", action="store", dest="dim", help="dimension of parameter")
-    (options, args) = parser.parse_args()
-    extract_parameters_by_usrDict(options.preModel, options.preDict,
-                                  options.usrModel, options.usrDict,
-                                  int(options.dim))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/v1_api_demo/model_zoo/embedding/paraconvert.py b/v1_api_demo/model_zoo/embedding/paraconvert.py
deleted file mode 100755
index ce7a70efc43d7f85708f1e12bb94739f3588370c..0000000000000000000000000000000000000000
--- a/v1_api_demo/model_zoo/embedding/paraconvert.py
+++ /dev/null
@@ -1,159 +0,0 @@
-#!/bin/env python
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Example:
-    python paraconvert.py --b2t -i INPUT -o OUTPUT -d DIM
-    python paraconvert.py --t2b -i INPUT -o OUTPUT
-
-Options:
-    -h, --help  show this help message and exit
-    --b2t       convert parameter file of embedding model from binary to text
-    --t2b       convert parameter file of embedding model from text to binary
-    -i INPUT    input parameter file name
-    -o OUTPUT   output parameter file name
-    -d DIM      dimension of parameter
-"""
-from optparse import OptionParser
-import struct
-
-
-def binary2text(input, output, paraDim):
-    """
-    Convert a binary parameter file of embedding model to be a text file.  
-    input: the name of input binary parameter file, the format is:
-           1) the first 16 bytes is filehead:
-                version(4 bytes): version of paddle, default = 0
-                floatSize(4 bytes): sizeof(float) = 4
-                paraCount(8 bytes): total number of parameter
-           2) the next (paraCount * 4) bytes is parameters, each has 4 bytes 
-    output: the name of output text parameter file, for example:
-           0,4,32156096
-           -0.7845433,1.1937413,-0.1704215,...
-           0.0000909,0.0009465,-0.0008813,...
-           ...
-           the format is:
-           1) the first line is filehead: 
-              version=0, floatSize=4, paraCount=32156096
-           2) other lines print the paramters
-              a) each line prints paraDim paramters splitted by ','
-              b) there is paraCount/paraDim lines (embedding words)
-    paraDim: dimension of parameters 
-    """
-    fi = open(input, "rb")
-    fo = open(output, "w")
-    """
-    """
-    version, floatSize, paraCount = struct.unpack("iil", fi.read(16))
-    newHead = ','.join([str(version), str(floatSize), str(paraCount)])
-    print >> fo, newHead
-
-    bytes = 4 * int(paraDim)
-    format = "%df" % int(paraDim)
-    context = fi.read(bytes)
-    line = 0
-
-    while context:
-        numbers = struct.unpack(format, context)
-        lst = []
-        for i in numbers:
-            lst.append('%8.7f' % i)
-        print >> fo, ','.join(lst)
-        context = fi.read(bytes)
-        line += 1
-    fi.close()
-    fo.close()
-    print "binary2text finish, total", line, "lines"
-
-
-def get_para_count(input):
-    """
-    Compute the total number of embedding parameters in input text file. 
-    input: the name of input text file
-    """
-    numRows = 1
-    paraDim = 0
-    with open(input) as f:
-        line = f.readline()
-        paraDim = len(line.split(","))
-        for line in f:
-            numRows += 1
-    return numRows * paraDim
-
-
-def text2binary(input, output, paddle_head=True):
-    """
-    Convert a text parameter file of embedding model to be a binary file.
-    input: the name of input text parameter file, for example:
-           -0.7845433,1.1937413,-0.1704215,...
-           0.0000909,0.0009465,-0.0008813,... 
-           ...
-           the format is:
-           1) it doesn't have filehead
-           2) each line stores the same dimension of parameters, 
-              the separator is commas ','
-    output: the name of output binary parameter file, the format is:
-           1) the first 16 bytes is filehead: 
-             version(4 bytes), floatSize(4 bytes), paraCount(8 bytes)
-           2) the next (paraCount * 4) bytes is parameters, each has 4 bytes
-    """
-    fi = open(input, "r")
-    fo = open(output, "wb")
-
-    newHead = struct.pack("iil", 0, 4, get_para_count(input))
-    fo.write(newHead)
-
-    count = 0
-    for line in fi:
-        line = line.strip().split(",")
-        for i in range(0, len(line)):
-            binary_data = struct.pack("f", float(line[i]))
-            fo.write(binary_data)
-        count += 1
-    fi.close()
-    fo.close()
-    print "text2binary finish, total", count, "lines"
-
-
-def main():
-    """
-    Main entry for running paraconvert.py 
-    """
-    usage = "usage: \n" \
-            "python %prog --b2t -i INPUT -o OUTPUT -d DIM \n" \
-            "python %prog --t2b -i INPUT -o OUTPUT"
-    parser = OptionParser(usage)
-    parser.add_option(
-        "--b2t",
-        action="store_true",
-        help="convert parameter file of embedding model from binary to text")
-    parser.add_option(
-        "--t2b",
-        action="store_true",
-        help="convert parameter file of embedding model from text to binary")
-    parser.add_option(
-        "-i", action="store", dest="input", help="input parameter file name")
-    parser.add_option(
-        "-o", action="store", dest="output", help="output parameter file name")
-    parser.add_option(
-        "-d", action="store", dest="dim", help="dimension of parameter")
-    (options, args) = parser.parse_args()
-    if options.b2t:
-        binary2text(options.input, options.output, options.dim)
-    if options.t2b:
-        text2binary(options.input, options.output)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/v1_api_demo/model_zoo/embedding/pre_DictAndModel.sh b/v1_api_demo/model_zoo/embedding/pre_DictAndModel.sh
deleted file mode 100755
index f61c65a935c76032a06613cfe0b50f1c90bc50d9..0000000000000000000000000000000000000000
--- a/v1_api_demo/model_zoo/embedding/pre_DictAndModel.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-set -x
-BASE_URL='http://paddlepaddle.cdn.bcebos.com/model_zoo/embedding'
-
-DOWNLOAD_ITEMS=(baidu.dict model_32.emb model_64.emb model_128.emb model_256.emb)
-ITEM_MD5=(fa03a12321eaab6c30a8fcc9442eaea3
-          f88c8325ee6da6187f1080e8fe66c1cd
-          927cf70f27f860aff1a5703ebf7f1584
-	  a52e43655cd25d279777ed509a1ae27b
-	  b92c67fe9ff70fea53596080e351ac80)
-
-for ((i=0; i<${#ITEM_MD5[@]}; i++))
-do
-  FILENAME=${DOWNLOAD_ITEMS[${i}]}
-  REAL_MD5=`wget ${BASE_URL}/${FILENAME} -O - | tee ${FILENAME} | md5sum | cut -d ' ' -f 1`
-  EXPECTED_MD5=${ITEM_MD5[${i}]}
-  [ "${EXPECTED_MD5}" = "${REAL_MD5}" ]
-done
diff --git a/v1_api_demo/model_zoo/resnet/.gitignore b/v1_api_demo/model_zoo/resnet/.gitignore
deleted file mode 100644
index 7a64209b62340a5c5a51626821028e63ed5e588e..0000000000000000000000000000000000000000
--- a/v1_api_demo/model_zoo/resnet/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-fea_output/
-features/
-model.list
-ResNet_50.dot
-ResNet_50.png
diff --git a/v1_api_demo/model_zoo/resnet/classify.py b/v1_api_demo/model_zoo/resnet/classify.py
deleted file mode 100755
index 6074cc1d3a85e13e3e8d336d81e22104f9d8e7cf..0000000000000000000000000000000000000000
--- a/v1_api_demo/model_zoo/resnet/classify.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-import cPickle
-import logging
-from PIL import Image
-import numpy as np
-from optparse import OptionParser
-
-import paddle.utils.image_util as image_util
-
-from py_paddle import swig_paddle, DataProviderConverter
-from paddle.trainer.PyDataProvider2 import dense_vector
-from paddle.trainer.config_parser import parse_config
-
-logging.basicConfig(
-    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
-logging.getLogger().setLevel(logging.INFO)
-
-
-class ImageClassifier():
-    def __init__(self,
-                 train_conf,
-                 model_dir=None,
-                 resize_dim=256,
-                 crop_dim=224,
-                 use_gpu=True,
-                 mean_file=None,
-                 output_layer=None,
-                 oversample=False,
-                 is_color=True):
-        """
-        train_conf: network configure.
-        model_dir: string, directory of model.
-        resize_dim: int, resized image size.
-        crop_dim: int, crop size.
-        mean_file: string, image mean file.
-        oversample: bool, oversample means multiple crops, namely five
-                    patches (the four corner patches and the center
-                    patch) as well as their horizontal reflections,
-                    ten crops in all.
-        """
-        self.train_conf = train_conf
-        self.model_dir = model_dir
-        if model_dir is None:
-            self.model_dir = os.path.dirname(train_conf)
-
-        self.resize_dim = resize_dim
-        self.crop_dims = [crop_dim, crop_dim]
-        self.oversample = oversample
-        self.is_color = is_color
-
-        self.output_layer = output_layer
-        if self.output_layer:
-            assert isinstance(self.output_layer, basestring)
-            self.output_layer = self.output_layer.split(",")
-
-        self.transformer = image_util.ImageTransformer(is_color=is_color)
-        self.transformer.set_transpose((2, 0, 1))
-        self.transformer.set_channel_swap((2, 1, 0))
-
-        self.mean_file = mean_file
-        if self.mean_file is not None:
-            mean = np.load(self.mean_file)['data_mean']
-            mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
-            self.transformer.set_mean(mean)  # mean pixel
-        else:
-            # if you use three mean value, set like:
-            # this three mean value is calculated from ImageNet.
-            self.transformer.set_mean(np.array([103.939, 116.779, 123.68]))
-
-        conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (int(use_gpu))
-        conf = parse_config(train_conf, conf_args)
-        swig_paddle.initPaddle("--use_gpu=%d" % (int(use_gpu)))
-        self.network = swig_paddle.GradientMachine.createFromConfigProto(
-            conf.model_config)
-        assert isinstance(self.network, swig_paddle.GradientMachine)
-        self.network.loadParameters(self.model_dir)
-
-        data_size = 3 * self.crop_dims[0] * self.crop_dims[1]
-        slots = [dense_vector(data_size)]
-        self.converter = DataProviderConverter(slots)
-
-    def get_data(self, img_path):
-        """
-        1. load image from img_path.
-        2. resize or oversampling.
-        3. transformer data: transpose, channel swap, sub mean.
-        return K x H x W ndarray.
-
-        img_path: image path.
-        """
-        image = image_util.load_image(img_path, self.is_color)
-        # Another way to extract oversampled features is that
-        # cropping and averaging from large feature map which is
-        # calculated by large size of image.
-        # This way reduces the computation.
-        if self.oversample:
-            # image_util.resize_image: short side is self.resize_dim
-            image = image_util.resize_image(image, self.resize_dim)
-            image = np.array(image)
-            input = np.zeros(
-                (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
-            input[0] = image.astype(np.float32)
-            input = image_util.oversample(input, self.crop_dims)
-        else:
-            image = image.resize(self.crop_dims, Image.ANTIALIAS)
-            input = np.zeros(
-                (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
-            input[0] = np.array(image).astype(np.float32)
-
-        data_in = []
-        for img in input:
-            img = self.transformer.transformer(img).flatten()
-            data_in.append([img.tolist()])
-        # paddle input: [[[]],[[]],...], [[]] is one sample.
-        return data_in
-
-    def forward(self, input_data):
-        """
-        return output arguments which are the Outputs() in network configure.
-
-        input_data: py_paddle input data.
-        call forward.
-        """
-        in_arg = self.converter(input_data)
-        return self.network.forwardTest(in_arg)
-
-    def forward(self, data, output_layer):
-        """
-        return output arguments which are the Outputs() in network configure.
-
-        input_data: py_paddle input data.
-        call forward.
-        """
-        input = self.converter(data)
-        self.network.forwardTest(input)
-        output = self.network.getLayerOutputs(output_layer)
-        res = {}
-        if isinstance(output_layer, basestring):
-            output_layer = [output_layer]
-        for name in output_layer:
-            # For oversampling, average predictions across crops.
-            # If not, the shape of output[name]: (1, class_number),
-            # the mean is also applicable.
-            res[name] = output[name]['value'].mean(0)
-
-        return res
-
-    def predict(self, data_file):
-        """
-        call forward and predicting.
-
-        data_file: input image list.
-        """
-        image_files = open(data_file, 'rb').readlines()
-        results = {}
-        if self.output_layer is None:
-            self.output_layer = ["output"]
-        for line in image_files:
-            image = line.split()[0]
-            data = self.get_data(image)
-            prob = self.forward(data, self.output_layer)
-            lab = np.argsort(-prob[self.output_layer[0]])
-            results[image] = lab[0]
-            logging.info("Label of %s is: %d", image, lab[0])
-        return results
-
-    def extract(self, data_file, output_dir, batch_size=10000):
-        """
-        extract and save features of output layers, which are
-        specify in Outputs() in network configure.
-
-        data_file: file name of input data.
-        output_dir: saved directory of extracted features.
-        batch_size: sample number of one batch file.
-        """
-        if not os.path.exists(output_dir):
-            os.mkdir(output_dir)
-
-        sample_num = 0
-        batch_num = 0
-        image_feature = {}
-        image_files = open(data_file, 'rb').readlines()
-        for idx, line in enumerate(image_files):
-            image = line.split()[0]
-            data = self.get_data(image)
-            feature = self.forward(data, self.output_layer)
-            # save extracted features
-            file_name = image.split("/")[-1]
-            image_feature[file_name] = feature
-            sample_num += 1
-            if sample_num == batch_size:
-                batch_name = os.path.join(output_dir, 'batch_%d' % (batch_num))
-                self.save_file(image_feature, batch_name)
-                logging.info('Finish batch %d', batch_num)
-                batch_num += 1
-                sample_num = 0
-                image_feature = {}
-            if idx % 1000 == 0:
-                logging.info('%d/%d, %s', idx, len(image_files), file_name)
-        if sample_num > 0:
-            batch_name = os.path.join(output_dir, 'batch_%d' % (batch_num))
-            self.save_file(image_feature, batch_name)
-            logging.info('Finish batch %d', batch_num)
-        logging.info('Done: make image feature batch')
-
-    def save_file(self, data, file):
-        of = open(file, 'wb')
-        cPickle.dump(data, of, protocol=cPickle.HIGHEST_PROTOCOL)
-
-
-def option_parser():
-    """
-    Main entry for predciting
-    """
-    usage = "%prog -c config -i data_list -w model_dir [options]"
-    parser = OptionParser(usage="usage: %s" % usage)
-    parser.add_option(
-        "-j",
-        "--job",
-        action="store",
-        dest="job_type",
-        help="job type: predict, extract\
-                            predict: predicting,\
-                            extract: extract features")
-    parser.add_option(
-        "-c",
-        "--conf",
-        action="store",
-        dest="train_conf",
-        help="network config")
-    parser.add_option(
-        "-i", "--data", action="store", dest="data_file", help="image list")
-    parser.add_option(
-        "-w",
-        "--model",
-        action="store",
-        dest="model_path",
-        default=None,
-        help="model path")
-    parser.add_option(
-        "-g",
-        "--use_gpu",
-        action="store",
-        dest="use_gpu",
-        default=True,
-        help="Whether to use gpu mode.")
-    parser.add_option(
-        "-o",
-        "--output_dir",
-        action="store",
-        dest="output_dir",
-        default="output",
-        help="output path")
-    parser.add_option(
-        "-m",
-        "--mean",
-        action="store",
-        dest="mean",
-        default=None,
-        help="mean file.")
-    parser.add_option(
-        "-p",
-        "--multi_crop",
-        action="store_true",
-        dest="multi_crop",
-        default=False,
-        help="Wether to use multiple crops on image.")
-    parser.add_option("-l", "--output_layer", action="store",
-                      dest="output_layer", default=None,
-                      help="--job=extract, specify layers to extract "\
-                           "features, --job=predict, specify layer of "
-                           "classification probability, output in resnet.py.")
-    return parser.parse_args()
-
-
-def main():
-    """
-    1. parse input arguments.
-    2. predicting or extract features according job type.
-    """
-    options, args = option_parser()
-    obj = ImageClassifier(
-        options.train_conf,
-        options.model_path,
-        use_gpu=options.use_gpu,
-        mean_file=options.mean,
-        output_layer=options.output_layer,
-        oversample=options.multi_crop)
-    if options.job_type == "predict":
-        obj.predict(options.data_file)
-
-    elif options.job_type == "extract":
-        obj.extract(options.data_file, options.output_dir)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/v1_api_demo/model_zoo/resnet/example/.gitignore b/v1_api_demo/model_zoo/resnet/example/.gitignore
deleted file mode 100644
index 4a2b5962a6800f251cba655c026331f14648c86e..0000000000000000000000000000000000000000
--- a/v1_api_demo/model_zoo/resnet/example/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-*image_list_provider_copy_1.py
diff --git a/v1_api_demo/model_zoo/resnet/example/__init__.py b/v1_api_demo/model_zoo/resnet/example/__init__.py
deleted file mode 100644
index f662d6826321eb840739382558f76327d27b5847..0000000000000000000000000000000000000000
--- a/v1_api_demo/model_zoo/resnet/example/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/v1_api_demo/model_zoo/resnet/example/cat.jpg b/v1_api_demo/model_zoo/resnet/example/cat.jpg
deleted file mode 100644
index 47b01db90eddc46ff845f10bc2accaf2364c272d..0000000000000000000000000000000000000000
Binary files a/v1_api_demo/model_zoo/resnet/example/cat.jpg and /dev/null differ
diff --git a/v1_api_demo/model_zoo/resnet/example/dog.jpg b/v1_api_demo/model_zoo/resnet/example/dog.jpg
deleted file mode 100644
index b9cc33cf069da5c453b97dbb7383838edd07c199..0000000000000000000000000000000000000000
Binary files a/v1_api_demo/model_zoo/resnet/example/dog.jpg and /dev/null differ
diff --git a/v1_api_demo/model_zoo/resnet/example/image_list_provider.py b/v1_api_demo/model_zoo/resnet/example/image_list_provider.py
deleted file mode 100644
index 2cd8eb8bf850f41282ed5db2885dc0b7218c79f7..0000000000000000000000000000000000000000
--- a/v1_api_demo/model_zoo/resnet/example/image_list_provider.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.utils.image_util import *
-from paddle.trainer.PyDataProvider2 import *
-
-
-def hook(settings, image_size, crop_size, color, file_list, is_train, **kwargs):
-    """
-    Description: Init with a list of data file
-    file_list is the name list of input files.
-    kwargs["load_data_args"] is the value of 'load_data_args'
-    which can be set in config.
-    Each args is separated by a column.
-    image_size: the crop image size.
-    mean_meta: the path of the meta file to store the mean image.
-    mean_value: can be mean value, not a file.
-                can not set mean_meta and mean_value at the same time.
-    color: 'color' means a color image. Otherwise, it means a gray image.
-    is_train: whether the data provider is used for training.
-              Data argumentation might be different for training and testing.
-    """
-    settings.img_size = image_size
-    settings.crop_size = crop_size
-    settings.mean_img_size = settings.crop_size
-    settings.color = color  # default is color
-    settings.is_train = is_train
-
-    settings.is_swap_channel = kwargs.get('swap_channel', None)
-    if settings.is_swap_channel is not None:
-        settings.swap_channel = settings.is_swap_channel
-        settings.is_swap_channel = True
-
-    if settings.color:
-        settings.img_input_size = settings.crop_size * settings.crop_size * 3
-    else:
-        settings.img_input_size = settings.crop_size * settings.crop_size
-
-    settings.file_list = file_list
-    settings.mean_meta = kwargs.get('mean_meta', None)
-    settings.mean_value = kwargs.get('mean_value', None)
-    # can not specify both mean_meta and mean_value.
-    assert not (settings.mean_meta and settings.mean_value)
-    if not settings.mean_meta:
-        settings.mean_value = kwargs.get('mean_value')
-        sz = settings.crop_size * settings.crop_size
-        settings.img_mean = np.zeros(sz * 3, dtype=np.single)
-        for idx, value in enumerate(settings.mean_value):
-            settings.img_mean[idx * sz:(idx + 1) * sz] = value
-        settings.img_mean = settings.img_mean.reshape(3, settings.crop_size,
-                                                      settings.crop_size)
-
-    else:
-        settings.img_mean = load_meta(settings.mean_meta,
-                                      settings.mean_img_size,
-                                      settings.crop_size, settings.color)
-
-    settings.input_types = [
-        dense_vector(settings.img_input_size),  # image feature
-        integer_value(1)
-    ]  # labels
-
-    settings.logger.info('Image short side: %s', settings.img_size)
-    settings.logger.info('Crop size: %s', settings.crop_size)
-    settings.logger.info('Meta path: %s', settings.mean_meta)
-    if settings.is_swap_channel:
-        settings.logger.info('swap channel: %s', settings.swap_channel)
-    settings.logger.info('DataProvider Initialization finished')
-
-
-@provider(init_hook=hook, should_shuffle=False)
-def processData(settings, file_list):
-    """
-    The main function for loading data.
-    Load the batch, iterate all the images and labels in this batch.
-    file_name: the batch file name.
-    """
-    img_path, lab = file_list.strip().split(' ')
-    img = Image.open(img_path)
-    img.load()
-    img = img.resize((settings.img_size, settings.img_size), Image.ANTIALIAS)
-    img = np.array(img).astype(np.float32)
-    if len(img.shape) == 3:
-        img = np.swapaxes(img, 1, 2)
-        img = np.swapaxes(img, 1, 0)
-    # swap channel
-    if settings.is_swap_channel:
-        img = img[settings.swap_channel, :, :]
-    img_feat = preprocess_img(img, settings.img_mean, settings.crop_size,
-                              settings.is_train, settings.color)
-    yield img_feat.tolist(), int(lab.strip())
diff --git a/v1_api_demo/model_zoo/resnet/example/test.list b/v1_api_demo/model_zoo/resnet/example/test.list
deleted file mode 100644
index 30bbf630b640a26239fc104c9c08f6ebc9dfaa82..0000000000000000000000000000000000000000
--- a/v1_api_demo/model_zoo/resnet/example/test.list
+++ /dev/null
@@ -1,2 +0,0 @@
-example/dog.jpg 0
-example/cat.jpg 0
diff --git a/v1_api_demo/model_zoo/resnet/extract_fea_c++.sh b/v1_api_demo/model_zoo/resnet/extract_fea_c++.sh
deleted file mode 100755
index 5447aa92dfb5facd3433eb4a1893e96e3c786c73..0000000000000000000000000000000000000000
--- a/v1_api_demo/model_zoo/resnet/extract_fea_c++.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-#set names of layer which you want to extract feature
-#in Outputs() of resnet.py 
-#like: Outputs("res5_3_branch2c_conv", "res5_3_branch2c_bn")
-layer_num=50
-configure=./resnet.py
-model_path=./model/resnet_$layer_num
-fea_dir=fea_output
-#Output is text file.
-#Each line is one sample's features.
-#If you set N layer names in Outputs()
-#each line contains N features sperated by ";". 
-
-# create model list file.
-model_list=./model.list
-touch $model_list | echo $model_path > $model_list
-
-paddle train \
-  --local=true \
-  --job=test \
-  --config=$configure \
-  --model_list=$model_list \
-  --use_gpu=1 \
-  --predict_output_dir=$fea_dir \
-  --config_args=is_test=1,layer_num=$layer_num
diff --git a/v1_api_demo/model_zoo/resnet/extract_fea_py.sh b/v1_api_demo/model_zoo/resnet/extract_fea_py.sh
deleted file mode 100755
index 2e87152f7f8598f487870291271cdee646105044..0000000000000000000000000000000000000000
--- a/v1_api_demo/model_zoo/resnet/extract_fea_py.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-#Note if you use CPU mode, you need to set use_gpu=0 in classify.py. like this:
-#conf_args = "is_test=0,use_gpu=1,is_predict=1"
-#conf = parse_config(train_conf, conf_args)
-#swig_paddle.initPaddle("--use_gpu=0")
-python classify.py \
-     --job=extract \
-     --conf=resnet.py \
-     --use_gpu=1 \
-     --mean=model/mean_meta_224/mean.meta \
-     --model=model/resnet_50 \
-     --data=./example/test.list \
-     --output_layer="res5_3_branch2c_conv,res5_3_branch2c_bn" \
-     --output_dir=features
diff --git a/v1_api_demo/model_zoo/resnet/get_model.sh b/v1_api_demo/model_zoo/resnet/get_model.sh
deleted file mode 100755
index b33d8178ab7859fc0b0d514fb19bec2c28a77c3d..0000000000000000000000000000000000000000
--- a/v1_api_demo/model_zoo/resnet/get_model.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-cd $DIR
-
-mkdir model
-cd model
-
-echo "Downloading ResNet models..."
-
-for file in resnet_50.tar.gz resnet_101.tar.gz resnet_152.tar.gz mean_meta_224.tar.gz 
-do 
-  wget http://paddlepaddle.bj.bcebos.com/model_zoo/imagenet/$file
-  tar -xvf $file 
-  rm $file
-done
-
-echo "Done."
diff --git a/v1_api_demo/model_zoo/resnet/load_feature.py b/v1_api_demo/model_zoo/resnet/load_feature.py
deleted file mode 100644
index 5d3d0c0d30ef710c37c98e93a51b2f813d636b59..0000000000000000000000000000000000000000
--- a/v1_api_demo/model_zoo/resnet/load_feature.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-import cPickle
-import logging
-
-logging.basicConfig(
-    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
-logging.getLogger().setLevel(logging.INFO)
-
-
-def load_feature_c(file):
-    """
-    Load feature extracted by C++ interface.
-    Return a list.
-    file: feature file.
-    """
-    features = []
-    f = open(file, 'r')
-    for line in f:
-        sample = []
-        for slot in line.strip().split(";"):
-            fea = [float(val) for val in slot.strip().split()]
-            if fea:
-                sample.append(fea)
-        features.append(sample)
-    f.close()
-    return features
-
-
-def load_feature_py(feature_dir):
-    """
-    Load feature extracted by python interface.
-    Return a dictionary.
-    feature_dir: directory of feature file.
-    """
-    file_list = os.listdir(feature_dir)
-    file_list = [os.path.join(feature_dir, f) for f in file_list]
-    features = {}
-    for file_name in file_list:
-        with open(file_name, 'rb') as f:
-            feature = cPickle.load(f)
-            features.update(feature)
-            logging.info('Load feature file %s', file_name)
-    return features
-
-
-if __name__ == '__main__':
-    print load_feature_py(sys.argv[1])
-    #print load_feature_c(sys.argv[1]) 
diff --git a/v1_api_demo/model_zoo/resnet/net_diagram.sh b/v1_api_demo/model_zoo/resnet/net_diagram.sh
deleted file mode 100755
index 1b06ffa44eec8a0f312420c35699d3902f9a6400..0000000000000000000000000000000000000000
--- a/v1_api_demo/model_zoo/resnet/net_diagram.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-:'
-Visual deep residual network
-1. Using make_model_diagram.py to generate dot file.
-2. Using graphviz to convert dot file.
-
-Usage:
-./net_diagram.sh
-'
-
-set -e
-
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-cd $DIR
-
-img_type=png
-img_fileprefix=ResNet_50
-conf_filename=resnet.py
-dot_filename=ResNet_50.dot
-config_str="layer_num=50,data_provider=0"
-
-python -m paddle.utils.make_model_diagram $conf_filename $dot_filename $config_str
-
-# If you have installed graphviz, running like this:
-# dot -Tpng -o ResNet.png ResNet.dot
diff --git a/v1_api_demo/model_zoo/resnet/predict.sh b/v1_api_demo/model_zoo/resnet/predict.sh
deleted file mode 100755
index 2b67b17c48c60cc8a7b7c46a1c80a3f2bf281870..0000000000000000000000000000000000000000
--- a/v1_api_demo/model_zoo/resnet/predict.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-python classify.py \
-     --job=predict \
-     --conf=resnet.py\
-     --model=model/resnet_50 \
-     --multi_crop \
-     --use_gpu=1 \
-     --data=./example/test.list
diff --git a/v1_api_demo/model_zoo/resnet/resnet.py b/v1_api_demo/model_zoo/resnet/resnet.py
deleted file mode 100644
index 6fdd97fefc62392c93ecffae0fc918e8dc4b18c5..0000000000000000000000000000000000000000
--- a/v1_api_demo/model_zoo/resnet/resnet.py
+++ /dev/null
@@ -1,271 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-"""
-paper: https://arxiv.org/abs/1512.03385
-"""
-is_test = get_config_arg("is_test", bool, False)
-is_predict = get_config_arg("is_predict", bool, False)
-data_provider = get_config_arg("data_provider", bool, True)
-layer_num = get_config_arg("layer_num", int, 50)
-
-if not is_predict and data_provider:
-    train_list = 'train.list' if not is_test else None
-    # mean.meta is mean file of ImageNet dataset.
-    # mean.meta size : 3 x 224 x 224.
-    # If you use three mean value, set like:
-    # "mean_value:103.939,116.779,123.68;"
-    args = {
-        'mean_meta': "model/mean_meta_224/mean.meta",
-        'image_size': 224,
-        'crop_size': 224,
-        'color': True,
-        'swap_channel:': [2, 1, 0]
-    }
-    define_py_data_sources2(
-        train_list,
-        'example/test.list',
-        module="example.image_list_provider",
-        obj="processData",
-        args=args)
-
-batch_size = 1
-learning_rate = 0.1 / batch_size
-momentum = 0.9
-weight_decay = 0.0001 * batch_size
-default_momentum(momentum)
-default_decay_rate(weight_decay)
-
-Settings(
-    algorithm='sgd',
-    batch_size=batch_size,
-    learning_rate=learning_rate,
-
-    # set the appropriate parameters according your schedule
-    learning_method='momentum',
-    learning_rate_decay_a=0.5,
-    learning_rate_decay_b=1200000 * 10,
-    learning_rate_schedule="discexp", )
-
-
-def conv_bn_layer(name,
-                  input,
-                  filter_size,
-                  num_filters,
-                  stride,
-                  padding,
-                  channels=None,
-                  active_type=ReluActivation()):
-    """
-    A wrapper for conv layer with batch normalization layers.
-    Note:
-    conv layer has no activation.
-    """
-
-    tmp = img_conv_layer(
-        name=name + "_conv",
-        input=input,
-        filter_size=filter_size,
-        num_channels=channels,
-        num_filters=num_filters,
-        stride=stride,
-        padding=padding,
-        act=LinearActivation(),
-        bias_attr=False)
-    return batch_norm_layer(
-        name=name + "_bn", input=tmp, act=active_type, use_global_stats=is_test)
-
-
-def bottleneck_block(name, input, num_filters1, num_filters2):
-    """
-    A wrapper for bottlenect building block in ResNet.
-    Last conv_bn_layer has no activation.
-    Addto layer has activation of relu.
-    """
-    last_name = conv_bn_layer(
-        name=name + '_branch2a',
-        input=input,
-        filter_size=1,
-        num_filters=num_filters1,
-        stride=1,
-        padding=0)
-    last_name = conv_bn_layer(
-        name=name + '_branch2b',
-        input=last_name,
-        filter_size=3,
-        num_filters=num_filters1,
-        stride=1,
-        padding=1)
-    last_name = conv_bn_layer(
-        name=name + '_branch2c',
-        input=last_name,
-        filter_size=1,
-        num_filters=num_filters2,
-        stride=1,
-        padding=0,
-        active_type=LinearActivation())
-
-    return addto_layer(
-        name=name + "_addto", input=[input, last_name], act=ReluActivation())
-
-
-def mid_projection(name, input, num_filters1, num_filters2, stride=2):
-    """
-    A wrapper for middile projection in ResNet.
-    projection shortcuts are used for increasing dimensions,
-    and other shortcuts are identity
-    branch1: projection shortcuts are used for increasing
-    dimensions, has no activation.
-    branch2x: bottleneck building block, shortcuts are identity.
-    """
-    # stride = 2
-    branch1 = conv_bn_layer(
-        name=name + '_branch1',
-        input=input,
-        filter_size=1,
-        num_filters=num_filters2,
-        stride=stride,
-        padding=0,
-        active_type=LinearActivation())
-
-    last_name = conv_bn_layer(
-        name=name + '_branch2a',
-        input=input,
-        filter_size=1,
-        num_filters=num_filters1,
-        stride=stride,
-        padding=0)
-    last_name = conv_bn_layer(
-        name=name + '_branch2b',
-        input=last_name,
-        filter_size=3,
-        num_filters=num_filters1,
-        stride=1,
-        padding=1)
-
-    last_name = conv_bn_layer(
-        name=name + '_branch2c',
-        input=last_name,
-        filter_size=1,
-        num_filters=num_filters2,
-        stride=1,
-        padding=0,
-        active_type=LinearActivation())
-
-    return addto_layer(
-        name=name + "_addto", input=[branch1, last_name], act=ReluActivation())
-
-
-def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
-    """
-    A wrapper for 50,101,152 layers of ResNet.
-    res2_num: number of blocks stacked in conv2_x
-    res3_num: number of blocks stacked in conv3_x
-    res4_num: number of blocks stacked in conv4_x
-    res5_num: number of blocks stacked in conv5_x
-    """
-    # For ImageNet
-    # conv1: 112x112
-    img = data_layer(name='input', size=224 * 224 * 3)
-    tmp = conv_bn_layer(
-        "conv1",
-        img,
-        filter_size=7,
-        channels=3,
-        num_filters=64,
-        stride=2,
-        padding=3)
-    tmp = img_pool_layer(name="pool1", input=tmp, pool_size=3, stride=2)
-
-    # conv2_x: 56x56
-    tmp = mid_projection(
-        name="res2_1", input=tmp, num_filters1=64, num_filters2=256, stride=1)
-    for i in xrange(2, res2_num + 1, 1):
-        tmp = bottleneck_block(
-            name="res2_" + str(i), input=tmp, num_filters1=64, num_filters2=256)
-
-    # conv3_x: 28x28
-    tmp = mid_projection(
-        name="res3_1", input=tmp, num_filters1=128, num_filters2=512)
-    for i in xrange(2, res3_num + 1, 1):
-        tmp = bottleneck_block(
-            name="res3_" + str(i),
-            input=tmp,
-            num_filters1=128,
-            num_filters2=512)
-
-    # conv4_x: 14x14
-    tmp = mid_projection(
-        name="res4_1", input=tmp, num_filters1=256, num_filters2=1024)
-    for i in xrange(2, res4_num + 1, 1):
-        tmp = bottleneck_block(
-            name="res4_" + str(i),
-            input=tmp,
-            num_filters1=256,
-            num_filters2=1024)
-
-    # conv5_x: 7x7
-    tmp = mid_projection(
-        name="res5_1", input=tmp, num_filters1=512, num_filters2=2048)
-    for i in xrange(2, res5_num + 1, 1):
-        tmp = bottleneck_block(
-            name="res5_" + str(i),
-            input=tmp,
-            num_filters1=512,
-            num_filters2=2048)
-
-    tmp = img_pool_layer(
-        name='avgpool',
-        input=tmp,
-        pool_size=7,
-        stride=1,
-        pool_type=AvgPooling())
-
-    output = fc_layer(
-        name='output', input=tmp, size=1000, act=SoftmaxActivation())
-
-    if not is_predict:
-        classification_cost(
-            input=output, label=data_layer(
-                name='label', size=1))
-
-
-def res_net_50():
-    deep_res_net(3, 4, 6, 3)
-
-
-def res_net_101():
-    deep_res_net(3, 4, 23, 3)
-
-
-def res_net_152():
-    deep_res_net(3, 8, 36, 3)
-
-
-if not is_predict:
-    Inputs("input", "label")
-else:
-    Inputs("input")
-# Outputs("cost-softmax" if not is_predict else "output")
-Outputs("res5_3_branch2c_conv", "res5_3_branch2c_bn")
-
-if layer_num == 50:
-    res_net_50()
-elif layer_num == 101:
-    res_net_101()
-elif layer_num == 152:
-    res_net_152()
-else:
-    print("Wrong layer number.")
diff --git a/v1_api_demo/quick_start/.gitignore b/v1_api_demo/quick_start/.gitignore
deleted file mode 100644
index f71662563ff96d6227dd568d9951a90b0d09456e..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/.gitignore
+++ /dev/null
@@ -1,15 +0,0 @@
-*.pyc
-data/dict.txt
-data/dict_all.txt
-data/labels.list
-data/mosesdecoder-master/
-data/reviews_Electronics_5.json.gz
-data/test.list
-data/test.txt
-data/train.list
-data/train.txt
-data/pred.list
-data/pred.txt
-dataprovider_copy_1.py
-train.log
-output
diff --git a/v1_api_demo/quick_start/api_predict.py b/v1_api_demo/quick_start/api_predict.py
deleted file mode 100755
index 9bdffe1006281c58a595e2771561ba62e4c2d6bd..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/api_predict.py
+++ /dev/null
@@ -1,147 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os, sys
-import numpy as np
-from optparse import OptionParser
-from py_paddle import swig_paddle, DataProviderConverter
-from paddle.trainer.PyDataProvider2 import sparse_binary_vector
-from paddle.trainer.config_parser import parse_config
-"""
-Usage: run following command to show help message.
-  python api_predict.py -h
-"""
-
-
-class QuickStartPrediction():
-    def __init__(self, train_conf, dict_file, model_dir=None, label_file=None):
-        """
-        train_conf: trainer configure.
-        dict_file: word dictionary file name.
-        model_dir: directory of model.
-        """
-        self.train_conf = train_conf
-        self.dict_file = dict_file
-        self.word_dict = {}
-        self.dict_dim = self.load_dict()
-        self.model_dir = model_dir
-        if model_dir is None:
-            self.model_dir = os.path.dirname(train_conf)
-
-        self.label = None
-        if label_file is not None:
-            self.load_label(label_file)
-
-        conf = parse_config(train_conf, "is_predict=1")
-        self.network = swig_paddle.GradientMachine.createFromConfigProto(
-            conf.model_config)
-        self.network.loadParameters(self.model_dir)
-        input_types = [sparse_binary_vector(self.dict_dim)]
-        self.converter = DataProviderConverter(input_types)
-
-    def load_dict(self):
-        """
-        Load dictionary from self.dict_file.
-        """
-        for line_count, line in enumerate(open(self.dict_file, 'r')):
-            self.word_dict[line.strip().split('\t')[0]] = line_count
-        return len(self.word_dict)
-
-    def load_label(self, label_file):
-        """
-        Load label.
-        """
-        self.label = {}
-        for v in open(label_file, 'r'):
-            self.label[int(v.split('\t')[1])] = v.split('\t')[0]
-
-    def get_index(self, data):
-        """
-        transform word into integer index according to the dictionary.
-        """
-        words = data.strip().split()
-        word_slot = [self.word_dict[w] for w in words if w in self.word_dict]
-        return word_slot
-
-    def batch_predict(self, data_batch):
-        input = self.converter(data_batch)
-        output = self.network.forwardTest(input)
-        prob = output[0]["id"].tolist()
-        print("predicting labels is:")
-        print prob
-
-
-def option_parser():
-    usage = "python predict.py -n config -w model_dir -d dictionary -i input_file "
-    parser = OptionParser(usage="usage: %s [options]" % usage)
-    parser.add_option(
-        "-n",
-        "--tconf",
-        action="store",
-        dest="train_conf",
-        help="network config")
-    parser.add_option(
-        "-d",
-        "--dict",
-        action="store",
-        dest="dict_file",
-        help="dictionary file")
-    parser.add_option(
-        "-b",
-        "--label",
-        action="store",
-        dest="label",
-        default=None,
-        help="dictionary file")
-    parser.add_option(
-        "-c",
-        "--batch_size",
-        type="int",
-        action="store",
-        dest="batch_size",
-        default=1,
-        help="the batch size for prediction")
-    parser.add_option(
-        "-w",
-        "--model",
-        action="store",
-        dest="model_path",
-        default=None,
-        help="model path")
-    return parser.parse_args()
-
-
-def main():
-    options, args = option_parser()
-    train_conf = options.train_conf
-    batch_size = options.batch_size
-    dict_file = options.dict_file
-    model_path = options.model_path
-    label = options.label
-    swig_paddle.initPaddle("--use_gpu=0")
-    predict = QuickStartPrediction(train_conf, dict_file, model_path, label)
-
-    batch = []
-    labels = []
-    for line in sys.stdin:
-        [label, text] = line.split("\t")
-        labels.append(int(label))
-        batch.append([predict.get_index(text)])
-    print("labels is:")
-    print labels
-    predict.batch_predict(batch)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/v1_api_demo/quick_start/api_predict.sh b/v1_api_demo/quick_start/api_predict.sh
deleted file mode 100755
index 4d9aa9e8854ed79446a47dbc593f419cdda077b4..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/api_predict.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-#Note the default model is pass-00002, you shold make sure the model path
-#exists or change the mode path.
-#only test on trainer_config.lr.py
-model=output/model/pass-00001/
-config=trainer_config.lr.py
-label=data/labels.list
-dict=data/dict.txt
-batch_size=20
-head -n$batch_size data/test.txt | python api_predict.py \
-     --tconf=$config\
-     --model=$model \
-     --label=$label \
-     --dict=$dict \
-     --batch_size=$batch_size
diff --git a/v1_api_demo/quick_start/api_train.py b/v1_api_demo/quick_start/api_train.py
deleted file mode 100644
index 5699789daa4051661b0a72c69f4668f2d8bb9cb2..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/api_train.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import itertools
-import random
-
-from paddle.trainer.config_parser import parse_config
-from py_paddle import swig_paddle as api
-from py_paddle import DataProviderConverter
-from paddle.trainer.PyDataProvider2 \
-    import integer_value, integer_value_sequence, sparse_binary_vector
-
-
-def parse_arguments():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--train_data", type=str, required=False, help="train data file")
-    parser.add_argument("--test_data", type=str, help="test data file")
-    parser.add_argument(
-        "--config", type=str, required=True, help="config file name")
-    parser.add_argument("--dict_file", required=True, help="dictionary file")
-    parser.add_argument(
-        "--seq", default=1, type=int, help="whether use sequence training")
-    parser.add_argument(
-        "--use_gpu", default=0, type=int, help="whether use GPU for training")
-    parser.add_argument(
-        "--trainer_count",
-        default=1,
-        type=int,
-        help="Number of threads for training")
-    parser.add_argument(
-        "--num_passes", default=5, type=int, help="Number of training passes")
-    return parser.parse_args()
-
-
-UNK_IDX = 0
-
-
-def load_data(file_name, word_dict):
-    with open(file_name, 'r') as f:
-        for line in f:
-            label, comment = line.strip().split('\t')
-            words = comment.split()
-            word_slot = [word_dict.get(w, UNK_IDX) for w in words]
-            yield word_slot, int(label)
-
-
-def load_dict(dict_file):
-    word_dict = dict()
-    with open(dict_file, 'r') as f:
-        for i, line in enumerate(f):
-            w = line.strip().split()[0]
-            word_dict[w] = i
-    return word_dict
-
-
-def main():
-    options = parse_arguments()
-    api.initPaddle("--use_gpu=%s" % options.use_gpu,
-                   "--trainer_count=%s" % options.trainer_count)
-
-    word_dict = load_dict(options.dict_file)
-    train_dataset = list(load_data(options.train_data, word_dict))
-    if options.test_data:
-        test_dataset = list(load_data(options.test_data, word_dict))
-    else:
-        test_dataset = None
-
-    trainer_config = parse_config(options.config,
-                                  "dict_file=%s" % options.dict_file)
-    # No need to have data provider for trainer
-    trainer_config.ClearField('data_config')
-    trainer_config.ClearField('test_data_config')
-
-    # create a GradientMachine from the model configuratin
-    model = api.GradientMachine.createFromConfigProto(
-        trainer_config.model_config)
-    # create a trainer for the gradient machine
-    trainer = api.Trainer.create(trainer_config, model)
-
-    # create a data converter which converts data to PaddlePaddle
-    # internal format
-    input_types = [
-        integer_value_sequence(len(word_dict)) if options.seq else
-        sparse_binary_vector(len(word_dict)), integer_value(2)
-    ]
-    converter = DataProviderConverter(input_types)
-
-    batch_size = trainer_config.opt_config.batch_size
-    trainer.startTrain()
-    for train_pass in xrange(options.num_passes):
-        trainer.startTrainPass()
-        random.shuffle(train_dataset)
-        for pos in xrange(0, len(train_dataset), batch_size):
-            batch = itertools.islice(train_dataset, pos, pos + batch_size)
-            size = min(batch_size, len(train_dataset) - pos)
-            trainer.trainOneDataBatch(size, converter(batch))
-        trainer.finishTrainPass()
-        if test_dataset:
-            trainer.startTestPeriod()
-            for pos in xrange(0, len(test_dataset), batch_size):
-                batch = itertools.islice(test_dataset, pos, pos + batch_size)
-                size = min(batch_size, len(test_dataset) - pos)
-                trainer.testOneDataBatch(size, converter(batch))
-            trainer.finishTestPeriod()
-    trainer.finishTrain()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/v1_api_demo/quick_start/api_train.sh b/v1_api_demo/quick_start/api_train.sh
deleted file mode 100755
index 9b2a4e2f224b1677c458ede66a6a3bac09d8ad61..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/api_train.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-# Note: if using trainer_config.emb.py, trainer_config.cnn.py
-# or trainer_config.lstm.py, you need to change --seq to --seq=1
-# because they are sequence models.
-python api_train.py \
-  --config=trainer_config.lr.py \
-  --trainer_count=2 \
-  --num_passes=15 \
-  --use_gpu=0 \
-  --seq=0 \
-  --train_data=data/train.txt \
-  --test_data=data/test.txt \
-  --dict_file=data/dict.txt \
-  2>&1 | tee 'train.log'
diff --git a/v1_api_demo/quick_start/cluster/cluster_train.sh b/v1_api_demo/quick_start/cluster/cluster_train.sh
deleted file mode 100755
index a7b1f01064b29cf6abc4cd6b706ee466a6d6da36..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/cluster/cluster_train.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-# Should run pserver.sh before run this script.
-bin_dir=$(cd `dirname $0`; pwd)
-home_dir=$(cd "${bin_dir}/.."; pwd)
-source "$bin_dir/env.sh"
-
-model_dir="$bin_dir/output"
-log_file="$bin_dir/train.log"
-
-pushd "$home_dir"
-cfg=trainer_config.lr.py
-paddle train \
-  --start_pserver=false \
-  --config=$cfg \
-  --save_dir=${model_dir} \
-  --trainer_count=4 \
-  --local=0 \
-  --log_period=100 \
-  --num_passes=15 \
-  --use_gpu=false \
-  --show_parameter_stats_period=100 \
-  --test_all_data_in_one_period=1 \
-  --num_gradient_servers=1 \
-  --nics=`get_nics` \
-  --port=7164 \
-  --ports_num=1 \
-  --pservers="127.0.0.1" \
-  --comment="paddle_trainer" \
-  2>&1 | tee "$log_file"
-popd
diff --git a/v1_api_demo/quick_start/cluster/env.sh b/v1_api_demo/quick_start/cluster/env.sh
deleted file mode 100644
index a404993835d0e479f65c89c5561855293b7b66f0..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/cluster/env.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-function get_nics() {
-  machine=`uname -s`
-  local nics=""
-  if [ "$machine" == "Linux" ]; then
-    nics="lo"
-  elif [ "$machine" == "Darwin" ]; then
-    nics="lo0"
-  else
-    nics="unsupport"
-  fi
-  echo $nics
-}
diff --git a/v1_api_demo/quick_start/cluster/pserver.sh b/v1_api_demo/quick_start/cluster/pserver.sh
deleted file mode 100755
index b187c1d9b9108a607ed310253d54ecc096f0e792..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/cluster/pserver.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-bin_dir=$(cd `dirname $0`; pwd)
-source "$bin_dir/env.sh"
-
-paddle pserver \
-  --nics=`get_nics` \
-  --port=7164 \
-  --ports_num=1 \
-  --ports_num_for_sparse=1 \
-  --num_gradient_servers=1 \
-  --comment="paddle_pserver" \
-  2>&1 | tee 'pserver.log'
diff --git a/v1_api_demo/quick_start/data/README.md b/v1_api_demo/quick_start/data/README.md
deleted file mode 100644
index 63abcf7ebf31903213e44cf492b93e09f61db14e..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/data/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-This dataset consists of electronics product reviews associated with
-binary labels (positive/negative) for sentiment classification.
-
-The preprocessed data can be downloaded by script `get_data.sh`.
-The data was derived from reviews_Electronics_5.json.gz at
-
-http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz
-
-If you want to process the raw data, you can use the script `proc_from_raw_data/get_data.sh`.
diff --git a/v1_api_demo/quick_start/data/get_data.sh b/v1_api_demo/quick_start/data/get_data.sh
deleted file mode 100755
index a09a18f919e5a84f1f7c889a43f0a5fbf4a60a77..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/data/get_data.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-cd $DIR
-
-# Download the preprocessed data
-wget http://paddlepaddle.bj.bcebos.com/demo/quick_start_preprocessed_data/preprocessed_data.tar.gz
-
-# Extract package
-tar zxvf preprocessed_data.tar.gz
-
-# Remove compressed package
-rm preprocessed_data.tar.gz
diff --git a/v1_api_demo/quick_start/data/proc_from_raw_data/get_data.sh b/v1_api_demo/quick_start/data/proc_from_raw_data/get_data.sh
deleted file mode 100755
index d976eaebfaa600778e0ab6bb0adbd7159f1cce2f..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/data/proc_from_raw_data/get_data.sh
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# 1. size of pos : neg = 1:1.
-# 2. size of testing set = min(25k, len(all_data) * 0.1), others is traning set.
-# 3. distinct train set and test set.
-
-set -e
-
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-cd $DIR
-
-# Download data
-echo "Downloading Amazon Electronics reviews data..."
-# http://jmcauley.ucsd.edu/data/amazon/
-wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz
-echo "Downloading mosesdecoder..."
-# https://github.com/moses-smt/mosesdecoder
-wget https://github.com/moses-smt/mosesdecoder/archive/master.zip
-
-unzip master.zip
-rm master.zip
-
-##################
-# Preprocess data 
-echo "Preprocess data..."
-export LC_ALL=C
-UNAME_STR=`uname`
-
-if [ ${UNAME_STR} == 'Linux' ]; then
-  SHUF_PROG='shuf'
-else
-  SHUF_PROG='gshuf'
-fi
-
-mkdir -p tmp
-python preprocess.py -i reviews_Electronics_5.json.gz
-# uniq and shuffle
-cd tmp
-echo 'Uniq and shuffle...'
-cat pos_*|sort|uniq|${SHUF_PROG}> pos.shuffed
-cat neg_*|sort|uniq|${SHUF_PROG}> neg.shuffed
-
-min_len=`sed -n '$=' neg.shuffed`
-test_num=$((min_len/10))
-if [ $test_num -gt 12500 ];then
- test_num=12500
-fi
-train_num=$((min_len-test_num))
-
-head -n$train_num pos.shuffed >train.pos
-head -n$train_num neg.shuffed >train.neg
-tail -n$test_num pos.shuffed >test.pos
-tail -n$test_num neg.shuffed >test.neg
-
-cat train.pos train.neg | ${SHUF_PROG} >../train.txt
-cat test.pos test.neg | ${SHUF_PROG} >../test.txt
-
-cd -
-echo 'train.txt' > train.list
-echo 'test.txt' > test.list
-
-# use 30k dict
-rm -rf tmp
-mv dict.txt dict_all.txt
-cat dict_all.txt | head -n 30001 > dict.txt
-echo 'Done.'
diff --git a/v1_api_demo/quick_start/data/proc_from_raw_data/preprocess.py b/v1_api_demo/quick_start/data/proc_from_raw_data/preprocess.py
deleted file mode 100755
index 72bd95f21d8bde8b3d1962ea10ecf6fc7d0ea478..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/data/proc_from_raw_data/preprocess.py
+++ /dev/null
@@ -1,223 +0,0 @@
-# -*- coding: UTF-8 -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-1. Tokenize the words and punctuation 
-2. pos sample : rating score 5; neg sample: rating score 1-2.
-
-Usage:
-    python preprocess.py -i data_file [random seed]
-"""
-
-import sys
-import os
-import operator
-import gzip
-from subprocess import Popen, PIPE
-from optparse import OptionParser
-import json
-from multiprocessing import Queue
-from multiprocessing import Pool
-import multiprocessing
-
-batch_size = 5000
-word_count = {}
-num_tokenize = max(1,
-                   multiprocessing.cpu_count() - 2)  # parse + tokenize + save
-max_queue_size = 8
-parse_queue = Queue(maxsize=max_queue_size + num_tokenize)
-tokenize_queue = Queue(maxsize=max_queue_size + num_tokenize)
-
-
-def create_dict(data):
-    """
-    Create dictionary based on data, and saved in data_dir/dict.txt.
-    The first line is unk \t -1.
-    data: list, input data by batch.
-    """
-    for seq in data:
-        try:
-            for w in seq.lower().split():
-                if w not in word_count:
-                    word_count[w] = 1
-                else:
-                    word_count[w] += 1
-        except:
-            sys.stderr.write(seq + "\tERROR\n")
-
-
-def parse(path):
-    """
-    Open .gz file.
-    """
-    sys.stderr.write(path)
-    g = gzip.open(path, 'r')
-    for l in g:
-        yield json.loads(l)
-    g.close()
-
-
-def tokenize(sentences):
-    """
-    Use tokenizer.perl to tokenize input sentences.
-    tokenizer.perl is tool of Moses.
-    sentences : a list of input sentences.
-    return: a list of processed text.
-    """
-    dir = './mosesdecoder-master/scripts/tokenizer/tokenizer.perl'
-    if not os.path.exists(dir):
-        sys.exit(
-            "The ./mosesdecoder-master/scripts/tokenizer/tokenizer.perl does not exists."
-        )
-    tokenizer_cmd = [dir, '-l', 'en', '-q', '-']
-    assert isinstance(sentences, list)
-    text = "\n".join(sentences)
-    tokenizer = Popen(tokenizer_cmd, stdin=PIPE, stdout=PIPE)
-    tok_text, _ = tokenizer.communicate(text)
-    toks = tok_text.split('\n')[:-1]
-    return toks
-
-
-def save_data(instance, data_dir, pre_fix, batch_num):
-    """
-    save data by batch
-    """
-    label = ['1' if pre_fix == 'pos' else '0' for i in range(len(instance))]
-    lines = ['%s\t%s' % (label[i], instance[i]) for i in range(len(label))]
-    file_name = os.path.join(data_dir, "%s_%s.txt" % (pre_fix, batch_num))
-    file(file_name, 'w').write('\n'.join(lines) + '\n')
-
-
-def tokenize_batch(id):
-    """
-    tokenize data by batch
-    """
-    while True:
-        num_batch, instance, pre_fix = parse_queue.get()
-        if num_batch == -1:  ### parse_queue finished
-            tokenize_queue.put((-1, None, None))
-            sys.stderr.write("Thread %s finish\n" % (id))
-            break
-        tokenize_instance = tokenize(instance)
-        tokenize_queue.put((num_batch, tokenize_instance, pre_fix))
-        sys.stderr.write('.')
-
-
-def save_batch(data_dir, num_tokenize, data_dir_dict):
-    """
-        save data by batch
-        build dict.txt
-    """
-    token_count = 0
-    while True:
-        num_batch, instance, pre_fix = tokenize_queue.get()
-        if num_batch == -1:
-            token_count += 1
-            if token_count == num_tokenize:  #### tokenize finished.
-                break
-            else:
-                continue
-        save_data(instance, data_dir, pre_fix, num_batch)
-        create_dict(instance)  ## update dict
-
-    sys.stderr.write("save file finish\n")
-    f = open(data_dir_dict, 'w')
-    f.write('%s\t%s\n' % ('unk', '-1'))
-    for k, v in sorted(word_count.items(), key=operator.itemgetter(1), \
-                       reverse=True):
-        f.write('%s\t%s\n' % (k, v))
-    f.close()
-    sys.stderr.write("build dict finish\n")
-
-
-def parse_batch(data, num_tokenize):
-    """
-    parse data by batch
-    parse -> tokenize -> save
-    """
-    raw_txt = parse(data)
-    neg, pos = [], []
-    count = 0
-    sys.stderr.write("extract raw data\n")
-    for l in raw_txt:
-        rating = l["overall"]
-        text = l["reviewText"].lower()  # # convert words to lower case
-        if rating == 5.0 and text:
-            pos.append(text)
-        if rating < 3.0 and text:
-            neg.append(text)
-        if len(pos) == batch_size or len(neg) == batch_size:
-            if len(pos) == batch_size:
-                batch = pos
-                pre_fix = 'pos'
-            else:
-                batch = neg
-                pre_fix = 'neg'
-
-            parse_queue.put((count, batch, pre_fix))
-            count += 1
-            if pre_fix == 'pos':
-                pos = []
-            else:
-                neg = []
-
-    if len(pos) > 0:
-        parse_queue.put((count, pos, 'pos'))
-        count += 1
-    if len(neg) > 0:
-        parse_queue.put((count, neg, 'neg'))
-        count += 1
-    for i in range(num_tokenize):
-        parse_queue.put((-1, None, None))  #### for tokenize's input finished
-    sys.stderr.write("parsing finish\n")
-
-
-def option_parser():
-    parser = OptionParser(usage="usage: python preprcoess.py "\
-                                "-i data_path [options]")
-    parser.add_option(
-        "-i", "--data", action="store", dest="input", help="Input data path.")
-    parser.add_option(
-        "-s",
-        "--seed",
-        action="store",
-        dest="seed",
-        default=1024,
-        help="Set random seed.")
-    return parser.parse_args()
-
-
-def main():
-    reload(sys)
-    sys.setdefaultencoding('utf-8')
-    options, args = option_parser()
-    data = options.input
-    seed = options.seed
-    data_dir_dict = os.path.join(os.path.dirname(data), 'dict.txt')
-    data_dir = os.path.join(os.path.dirname(data), 'tmp')
-    pool = Pool(processes=num_tokenize + 2)
-    pool.apply_async(parse_batch, args=(data, num_tokenize))
-    for i in range(num_tokenize):
-        pool.apply_async(tokenize_batch, args=(str(i), ))
-    pool.apply_async(save_batch, args=(data_dir, num_tokenize, data_dir_dict))
-    pool.close()
-    pool.join()
-
-    file(os.path.join(os.path.dirname(data), 'labels.list'),
-         'w').write('neg\t0\npos\t1\n')
-
-
-if __name__ == '__main__':
-    main()
diff --git a/v1_api_demo/quick_start/dataprovider_bow.py b/v1_api_demo/quick_start/dataprovider_bow.py
deleted file mode 100644
index 2745495586449b5d1eb64ae570f73eb6b14dbdfe..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/dataprovider_bow.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer.PyDataProvider2 import *
-
-# id of the word not in dictionary
-UNK_IDX = 0
-
-
-# initializer is called by the framework during initialization.
-# It allows the user to describe the data types and setup the
-# necessary data structure for later use.
-# `settings` is an object. initializer need to properly fill settings.input_types.
-# initializer can also store other data structures needed to be used at process().
-# In this example, dictionary is stored in settings.
-# `dictionay` and `kwargs` are arguments passed from trainer_config.lr.py
-def initializer(settings, dictionary, **kwargs):
-    # Put the word dictionary into settings
-    settings.word_dict = dictionary
-
-    # setting.input_types specifies what the data types the data provider
-    # generates.
-    settings.input_types = {
-        # The first input is a sparse_binary_vector,
-        # which means each dimension of the vector is either 0 or 1. It is the
-        # bag-of-words (BOW) representation of the texts.
-        'word': sparse_binary_vector(len(dictionary)),
-        # The second input is an integer. It represents the category id of the
-        # sample. 2 means there are two labels in the dataset.
-        # (1 for positive and 0 for negative)
-        'label': integer_value(2)
-    }
-
-
-# Delaring a data provider. It has an initializer 'data_initialzer'.
-# It will cache the generated data of the first pass in memory, so that
-# during later pass, no on-the-fly data generation will be needed.
-# `setting` is the same object used by initializer()
-# `file_name` is the name of a file listed train_list or test_list file given
-# to define_py_data_sources2(). See trainer_config.lr.py.
-@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
-def process(settings, file_name):
-    # Open the input data file.
-    with open(file_name, 'r') as f:
-        # Read each line.
-        for line in f:
-            # Each line contains the label and text of the comment, separated by \t.
-            label, comment = line.strip().split('\t')
-
-            # Split the words into a list.
-            words = comment.split()
-
-            # convert the words into a list of ids by looking them up in word_dict.
-            word_vector = [settings.word_dict.get(w, UNK_IDX) for w in words]
-
-            # Return the features for the current comment. The first is a list
-            # of ids representing a 0-1 binary sparse vector of the text,
-            # the second is the integer id of the label.
-            yield {'word': word_vector, 'label': int(label)}
-
-
-def predict_initializer(settings, dictionary, **kwargs):
-    settings.word_dict = dictionary
-    settings.input_types = {'word': sparse_binary_vector(len(dictionary))}
-
-
-# Declaring a data provider for prediction. The difference with process
-# is that label is not generated.
-@provider(init_hook=predict_initializer, should_shuffle=False)
-def process_predict(settings, file_name):
-    with open(file_name, 'r') as f:
-        for line in f:
-            comment = line.strip().split()
-            word_vector = [settings.word_dict.get(w, UNK_IDX) for w in comment]
-            yield {'word': word_vector}
diff --git a/v1_api_demo/quick_start/dataprovider_emb.py b/v1_api_demo/quick_start/dataprovider_emb.py
deleted file mode 100755
index ddfa3ce9b73555cb3b7f5a44314ca35b12d41ede..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/dataprovider_emb.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer.PyDataProvider2 import *
-
-UNK_IDX = 0
-
-
-def initializer(settings, dictionary, **kwargs):
-    settings.word_dict = dictionary
-    settings.input_types = {
-        # Define the type of the first input as sequence of integer.
-        # The value of the integers range from 0 to len(dictrionary)-1
-        'word': integer_value_sequence(len(dictionary)),
-        # Define the second input for label id
-        'label': integer_value(2)
-    }
-
-
-@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
-def process(settings, file_name):
-    with open(file_name, 'r') as f:
-        for line in f:
-            label, comment = line.strip().split('\t')
-            words = comment.split()
-            word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]
-            yield {'word': word_slot, 'label': int(label)}
-
-
-def predict_initializer(settings, dictionary, **kwargs):
-    settings.word_dict = dictionary
-    settings.input_types = {'word': integer_value_sequence(len(dictionary))}
-
-
-@provider(init_hook=predict_initializer, should_shuffle=False)
-def process_predict(settings, file_name):
-    with open(file_name, 'r') as f:
-        for line in f:
-            comment = line.strip().split()
-            word_slot = [settings.word_dict.get(w, UNK_IDX) for w in comment]
-            yield {'word': word_slot}
diff --git a/v1_api_demo/quick_start/predict.sh b/v1_api_demo/quick_start/predict.sh
deleted file mode 100755
index e47c2dd01fb5c919203964e298018e6dc2bd366e..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/predict.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-cfg=trainer_config.lr.py
-#cfg=trainer_config.emb.py
-#cfg=trainer_config.cnn.py
-#cfg=trainer_config.lstm.py
-model="output/pass-00003"
-paddle train \
-    --config=$cfg \
-    --use_gpu=false \
-    --job=test \
-    --init_model_path=$model \
-    --config_args=is_predict=1 \
-    --predict_output_dir=. \
-2>&1 | tee 'predict.log'
-paddle usage -l 'predict.log' -e $? -n "quick_start_predict_${cfg}" >/dev/null 2>&1
-
-mv rank-00000 result.txt
diff --git a/v1_api_demo/quick_start/train.sh b/v1_api_demo/quick_start/train.sh
deleted file mode 100755
index 01697fed48054be8ad98a01d4cbb5029e6a1ead0..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/train.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-cfg=trainer_config.lr.py
-#cfg=trainer_config.emb.py
-#cfg=trainer_config.cnn.py
-#cfg=trainer_config.lstm.py
-#cfg=trainer_config.bidi-lstm.py
-#cfg=trainer_config.db-lstm.py
-#cfg=trainer_config.resnet-lstm.py
-paddle train \
-  --config=$cfg \
-  --save_dir=./output \
-  --trainer_count=4 \
-  --log_period=100 \
-  --num_passes=15 \
-  --use_gpu=false \
-  --show_parameter_stats_period=100 \
-  --test_all_data_in_one_period=1 \
-  2>&1 | tee 'train.log'
-paddle usage -l "train.log" -e $? -n "quick_start_${cfg}" >/dev/null 2>&1
diff --git a/v1_api_demo/quick_start/trainer_config.bidi-lstm.py b/v1_api_demo/quick_start/trainer_config.bidi-lstm.py
deleted file mode 100644
index ca1d1f8d099b5a3f5276c108855c5e890e7214fe..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/trainer_config.bidi-lstm.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# edit-mode: -*- python -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-dict_file = "./data/dict.txt"
-word_dict = dict()
-with open(dict_file, 'r') as f:
-    for i, line in enumerate(f):
-        w = line.strip().split()[0]
-        word_dict[w] = i
-
-is_predict = get_config_arg('is_predict', bool, False)
-trn = 'data/train.list' if not is_predict else None
-tst = 'data/test.list' if not is_predict else 'data/pred.list'
-process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(
-    train_list=trn,
-    test_list=tst,
-    module="dataprovider_emb",
-    obj=process,
-    args={"dictionary": word_dict})
-
-batch_size = 128 if not is_predict else 1
-settings(
-    batch_size=batch_size,
-    learning_rate=2e-3,
-    learning_method=AdamOptimizer(),
-    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25)
-
-bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
-data = data_layer(name="word", size=len(word_dict))
-emb = embedding_layer(input=data, size=128)
-
-bi_lstm = bidirectional_lstm(input=emb, size=128)
-dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
-
-output = fc_layer(
-    input=dropout, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
-
-if is_predict:
-    maxid = maxid_layer(output)
-    outputs([maxid, output])
-else:
-    label = data_layer(name="label", size=2)
-    cls = classification_cost(input=output, label=label)
-    outputs(cls)
diff --git a/v1_api_demo/quick_start/trainer_config.cnn.py b/v1_api_demo/quick_start/trainer_config.cnn.py
deleted file mode 100644
index f8c3d511f323ed9ec96be0a1951014c6db639003..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/trainer_config.cnn.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# edit-mode: -*- python -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-dict_file = "./data/dict.txt"
-word_dict = dict()
-with open(dict_file, 'r') as f:
-    for i, line in enumerate(f):
-        w = line.strip().split()[0]
-        word_dict[w] = i
-
-is_predict = get_config_arg('is_predict', bool, False)
-trn = 'data/train.list' if not is_predict else None
-tst = 'data/test.list' if not is_predict else 'data/pred.list'
-process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(
-    train_list=trn,
-    test_list=tst,
-    module="dataprovider_emb",
-    obj=process,
-    args={"dictionary": word_dict})
-
-batch_size = 128 if not is_predict else 1
-settings(
-    batch_size=batch_size,
-    learning_rate=2e-3,
-    learning_method=AdamOptimizer(),
-    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25)
-
-data = data_layer(name="word", size=len(word_dict))
-embedding = embedding_layer(input=data, size=128)
-conv = sequence_conv_pool(input=embedding, context_len=3, hidden_size=512)
-output = fc_layer(input=conv, size=2, act=SoftmaxActivation())
-if is_predict:
-    maxid = maxid_layer(output)
-    outputs([maxid, output])
-else:
-    label = data_layer(name="label", size=2)
-    cls = classification_cost(input=output, label=label)
-    outputs(cls)
diff --git a/v1_api_demo/quick_start/trainer_config.db-lstm.py b/v1_api_demo/quick_start/trainer_config.db-lstm.py
deleted file mode 100644
index fba802b4600b33cfbfd0820cce1f47e4d0f948ae..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/trainer_config.db-lstm.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# edit-mode: -*- python -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-dict_file = "./data/dict.txt"
-word_dict = dict()
-with open(dict_file, 'r') as f:
-    for i, line in enumerate(f):
-        w = line.strip().split()[0]
-        word_dict[w] = i
-
-is_predict = get_config_arg('is_predict', bool, False)
-trn = 'data/train.list' if not is_predict else None
-tst = 'data/test.list' if not is_predict else 'data/pred.list'
-process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(
-    train_list=trn,
-    test_list=tst,
-    module="dataprovider_emb",
-    obj=process,
-    args={"dictionary": word_dict})
-
-batch_size = 128 if not is_predict else 1
-settings(
-    batch_size=batch_size,
-    learning_rate=2e-3,
-    learning_method=AdamOptimizer(),
-    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25)
-
-bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
-
-data = data_layer(name="word", size=len(word_dict))
-emb = embedding_layer(input=data, size=128)
-
-hidden_0 = mixed_layer(size=128, input=[full_matrix_projection(input=emb)])
-lstm_0 = lstmemory(input=hidden_0, layer_attr=ExtraAttr(drop_rate=0.1))
-
-input_layers = [hidden_0, lstm_0]
-
-for i in range(1, 8):
-    fc = fc_layer(input=input_layers, size=128)
-    lstm = lstmemory(
-        input=fc,
-        layer_attr=ExtraAttr(drop_rate=0.1),
-        reverse=(i % 2) == 1, )
-    input_layers = [fc, lstm]
-
-lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
-
-output = fc_layer(
-    input=lstm_last, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
-
-if is_predict:
-    maxid = maxid_layer(output)
-    outputs([maxid, output])
-else:
-    label = data_layer(name="label", size=2)
-    cls = classification_cost(input=output, label=label)
-    outputs(cls)
diff --git a/v1_api_demo/quick_start/trainer_config.emb.py b/v1_api_demo/quick_start/trainer_config.emb.py
deleted file mode 100644
index 7410397ef656e363b232787995d3a869cd11b655..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/trainer_config.emb.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# edit-mode: -*- python -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-dict_file = "./data/dict.txt"
-word_dict = dict()
-with open(dict_file, 'r') as f:
-    for i, line in enumerate(f):
-        w = line.strip().split()[0]
-        word_dict[w] = i
-
-is_predict = get_config_arg('is_predict', bool, False)
-trn = 'data/train.list' if not is_predict else None
-tst = 'data/test.list' if not is_predict else 'data/pred.list'
-process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(
-    train_list=trn,
-    test_list=tst,
-    module="dataprovider_emb",
-    obj=process,
-    args={"dictionary": word_dict})
-
-batch_size = 128 if not is_predict else 1
-settings(
-    batch_size=batch_size, learning_rate=2e-3, learning_method=AdamOptimizer())
-
-data = data_layer(name="word", size=len(word_dict))
-embedding = embedding_layer(input=data, size=128)
-avg = pooling_layer(input=embedding, pooling_type=AvgPooling())
-output = fc_layer(input=avg, size=2, act=SoftmaxActivation())
-if is_predict:
-    maxid = maxid_layer(output)
-    outputs([maxid, output])
-else:
-    label = data_layer(name="label", size=2)
-    cls = classification_cost(input=output, label=label)
-    outputs(cls)
diff --git a/v1_api_demo/quick_start/trainer_config.lr.py b/v1_api_demo/quick_start/trainer_config.lr.py
deleted file mode 100644
index e5105aa89532d71c80c8ec77ca98ac6a8e9c8c58..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/trainer_config.lr.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# edit-mode: -*- python -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-dict_file = get_config_arg('dict_file', str, "./data/dict.txt")
-word_dict = dict()
-with open(dict_file, 'r') as f:
-    for i, line in enumerate(f):
-        w = line.strip().split()[0]
-        word_dict[w] = i
-
-is_predict = get_config_arg('is_predict', bool, False)
-trn = 'data/train.list' if not is_predict else None
-tst = 'data/test.list' if not is_predict else 'data/pred.list'
-process = 'process' if not is_predict else 'process_predict'
-
-# define the data sources for the model.
-# We need to use different process for training and prediction.
-# For training, the input data includes both word IDs and labels.
-# For prediction, the input data only includs word Ids.
-define_py_data_sources2(
-    train_list=trn,
-    test_list=tst,
-    module="dataprovider_bow",
-    obj=process,
-    args={"dictionary": word_dict})
-
-batch_size = 128 if not is_predict else 1
-settings(
-    batch_size=batch_size,
-    learning_rate=2e-3,
-    learning_method=AdamOptimizer(),
-    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25)
-
-# Define the data for text features. The size of the data layer is the number
-# of words in the dictionary.
-data = data_layer(name="word", size=len(word_dict))
-
-# Define a fully connected layer with logistic activation.
-# (also called softmax activation).
-output = fc_layer(input=data, size=2, act=SoftmaxActivation())
-
-if not is_predict:
-    # For training, we need label and cost
-
-    # define the category id for each example.
-    # The size of the data layer is the number of labels.
-    label = data_layer(name="label", size=2)
-
-    # Define cross-entropy classification loss and error.
-    cls = classification_cost(input=output, label=label)
-    outputs(cls)
-else:
-    # For prediction, no label is needed. We need to output
-    # We need to output classification result, and class probabilities.
-    maxid = maxid_layer(output)
-    outputs([maxid, output])
diff --git a/v1_api_demo/quick_start/trainer_config.lstm.py b/v1_api_demo/quick_start/trainer_config.lstm.py
deleted file mode 100644
index 43b4ddac2dca5f6b9aa28f055e843abf12e92312..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/trainer_config.lstm.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# edit-mode: -*- python -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-dict_file = "./data/dict.txt"
-word_dict = dict()
-with open(dict_file, 'r') as f:
-    for i, line in enumerate(f):
-        w = line.strip().split()[0]
-        word_dict[w] = i
-
-is_predict = get_config_arg('is_predict', bool, False)
-trn = 'data/train.list' if not is_predict else None
-tst = 'data/test.list' if not is_predict else 'data/pred.list'
-process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(
-    train_list=trn,
-    test_list=tst,
-    module="dataprovider_emb",
-    obj=process,
-    args={"dictionary": word_dict})
-
-batch_size = 128 if not is_predict else 1
-settings(
-    batch_size=batch_size,
-    learning_rate=2e-3,
-    learning_method=AdamOptimizer(),
-    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25)
-
-data = data_layer(name="word", size=len(word_dict))
-emb = embedding_layer(input=data, size=128)
-lstm = simple_lstm(
-    input=emb, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.25))
-lstm_max = pooling_layer(input=lstm, pooling_type=MaxPooling())
-output = fc_layer(input=lstm_max, size=2, act=SoftmaxActivation())
-if is_predict:
-    maxid = maxid_layer(output)
-    outputs([maxid, output])
-else:
-    label = data_layer(name="label", size=2)
-    cls = classification_cost(input=output, label=label)
-    outputs(cls)
diff --git a/v1_api_demo/quick_start/trainer_config.resnet-lstm.py b/v1_api_demo/quick_start/trainer_config.resnet-lstm.py
deleted file mode 100644
index 89a837abb7cdeaaa249160123e1f2001d23d7aa1..0000000000000000000000000000000000000000
--- a/v1_api_demo/quick_start/trainer_config.resnet-lstm.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# edit-mode: -*- python -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-This configuration is a demonstration of how to implement the stacked LSTM
-with residual connections, i.e. an LSTM layer takes the sum of the hidden states
-and inputs of the previous LSTM layer instead of only the hidden states.
-This architecture is from:
-Yonghui Wu, Mike Schuster, Zhifeng Chen, Quoc V. Le, Mohammad Norouzi,
-Wolfgang Macherey, Maxim Krikun, Yuan Cao, Qin Gao, Klaus Macherey,
-Jeff Klingner, Apurva Shah, Melvin Johnson, Xiaobing Liu, Lukasz Kaiser,
-Stephan Gouws, Yoshikiyo Kato, Taku Kudo, Hideto Kazawa, Keith Stevens,
-George Kurian, Nishant Patil, Wei Wang, Cliff Young, Jason Smith, Jason Riesa,
-Alex Rudnick, Oriol Vinyals, Greg Corrado, Macduff Hughes, Jeffrey Dean. 2016.
-Google's Neural Machine Translation System: Bridging the Gap between Human and
-Machine Translation. In arXiv https://arxiv.org/pdf/1609.08144v2.pdf
-Different from the architecture described in the paper, we use a stack single
-direction LSTM layers as the first layer instead of bi-directional LSTM. Also,
-since this is a demo code, to reduce computation time, we stacked 4 layers
-instead of 8 layers.
-"""
-
-from paddle.trainer_config_helpers import *
-
-dict_file = "./data/dict.txt"
-word_dict = dict()
-with open(dict_file, 'r') as f:
-    for i, line in enumerate(f):
-        w = line.strip().split()[0]
-        word_dict[w] = i
-
-is_predict = get_config_arg('is_predict', bool, False)
-trn = 'data/train.list' if not is_predict else None
-tst = 'data/test.list' if not is_predict else 'data/pred.list'
-process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(
-    train_list=trn,
-    test_list=tst,
-    module="dataprovider_emb",
-    obj=process,
-    args={"dictionary": word_dict})
-
-batch_size = 128 if not is_predict else 1
-settings(
-    batch_size=batch_size,
-    learning_rate=2e-3,
-    learning_method=AdamOptimizer(),
-    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25)
-
-bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
-
-data = data_layer(name="word", size=len(word_dict))
-emb = embedding_layer(input=data, size=128)
-lstm = simple_lstm(input=emb, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.1))
-
-previous_input, previous_hidden_state = emb, lstm
-
-for i in range(3):
-    # The input to the current layer is the sum of the hidden state
-    # and input of the previous layer.
-    current_input = addto_layer(input=[previous_input, previous_hidden_state])
-    hidden_state = simple_lstm(
-        input=current_input, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.1))
-    previous_input, previous_hidden_state = current_input, hidden_state
-
-lstm = previous_hidden_state
-
-lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
-output = fc_layer(
-    input=lstm_last, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
-
-if is_predict:
-    maxid = maxid_layer(output)
-    outputs([maxid, output])
-else:
-    label = data_layer(name="label", size=2)
-    cls = classification_cost(input=output, label=label)
-    outputs(cls)
diff --git a/v1_api_demo/sequence_tagging/data/get_data.sh b/v1_api_demo/sequence_tagging/data/get_data.sh
deleted file mode 100755
index 0cdb394035e782b3a647f7f13e79d55b5d3dff48..0000000000000000000000000000000000000000
--- a/v1_api_demo/sequence_tagging/data/get_data.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-cd $DIR
-
-wget http://www.cnts.ua.ac.be/conll2000/chunking/train.txt.gz
-wget http://www.cnts.ua.ac.be/conll2000/chunking/test.txt.gz
diff --git a/v1_api_demo/sequence_tagging/data/test.list b/v1_api_demo/sequence_tagging/data/test.list
deleted file mode 100644
index 073c0a0c9063ac55f762ac261746aa73057d70e8..0000000000000000000000000000000000000000
--- a/v1_api_demo/sequence_tagging/data/test.list
+++ /dev/null
@@ -1 +0,0 @@
-data/test.txt.gz
diff --git a/v1_api_demo/sequence_tagging/data/train.list b/v1_api_demo/sequence_tagging/data/train.list
deleted file mode 100644
index 43c24d5f6484a90fe883ad5516fe100d27c9ce47..0000000000000000000000000000000000000000
--- a/v1_api_demo/sequence_tagging/data/train.list
+++ /dev/null
@@ -1 +0,0 @@
-data/train.txt.gz
diff --git a/v1_api_demo/sequence_tagging/dataprovider.py b/v1_api_demo/sequence_tagging/dataprovider.py
deleted file mode 100644
index bb4b4465bc7e032c50c1d21263651e2578af67be..0000000000000000000000000000000000000000
--- a/v1_api_demo/sequence_tagging/dataprovider.py
+++ /dev/null
@@ -1,260 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer.PyDataProvider2 import *
-import gzip
-import logging
-
-logging.basicConfig(
-    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s', )
-logger = logging.getLogger('paddle')
-logger.setLevel(logging.INFO)
-
-OOV_POLICY_IGNORE = 0
-OOV_POLICY_USE = 1
-OOV_POLICY_ERROR = 2
-
-num_original_columns = 3
-
-# Feature combination patterns.
-# [[-1,0], [0,0]]  means previous token at column 0 and current token at 
-# column 0 are combined as one feature.
-patterns = [
-    [[-2, 0]],
-    [[-1, 0]],
-    [[0, 0]],
-    [[1, 0]],
-    [[2, 0]],
-    [[-1, 0], [0, 0]],
-    [[0, 0], [1, 0]],
-    [[-2, 1]],
-    [[-1, 1]],
-    [[0, 1]],
-    [[1, 1]],
-    [[2, 1]],
-    [[-2, 1], [-1, 1]],
-    [[-1, 1], [0, 1]],
-    [[0, 1], [1, 1]],
-    [[1, 1], [2, 1]],
-    [[-2, 1], [-1, 1], [0, 1]],
-    [[-1, 1], [0, 1], [1, 1]],
-    [[0, 1], [1, 1], [2, 1]],
-]
-
-dict_label = {
-    'B-ADJP': 0,
-    'I-ADJP': 1,
-    'B-ADVP': 2,
-    'I-ADVP': 3,
-    'B-CONJP': 4,
-    'I-CONJP': 5,
-    'B-INTJ': 6,
-    'I-INTJ': 7,
-    'B-LST': 8,
-    'I-LST': 9,
-    'B-NP': 10,
-    'I-NP': 11,
-    'B-PP': 12,
-    'I-PP': 13,
-    'B-PRT': 14,
-    'I-PRT': 15,
-    'B-SBAR': 16,
-    'I-SBAR': 17,
-    'B-UCP': 18,
-    'I-UCP': 19,
-    'B-VP': 20,
-    'I-VP': 21,
-    'O': 22
-}
-
-
-def make_features(sequence):
-    length = len(sequence)
-    num_features = len(sequence[0])
-
-    def get_features(pos):
-        if pos < 0:
-            return ['#B%s' % -pos] * num_features
-        if pos >= length:
-            return ['#E%s' % (pos - length + 1)] * num_features
-        return sequence[pos]
-
-    for i in xrange(length):
-        for pattern in patterns:
-            fname = '/'.join([get_features(i + pos)[f] for pos, f in pattern])
-            sequence[i].append(fname)
-
-
-'''
-Source file format:
-Each line is for one timestep. The features are separated by space.
-An empty line indicates end of a sequence.
-
-cutoff: a list of numbers. If count of a feature is smaller than this,
- it will be ignored.
-if oov_policy[i] is OOV_POLICY_USE, id 0 is reserved for OOV features of
-i-th column.
-
-return a list of dict for each column
-'''
-
-
-def create_dictionaries(filename, cutoff, oov_policy):
-    def add_to_dict(sequence, dicts):
-        num_features = len(dicts)
-        for features in sequence:
-            l = len(features)
-            assert l == num_features, "Wrong number of features " + line
-            for i in xrange(l):
-                if features[i] in dicts[i]:
-                    dicts[i][features[i]] += 1
-                else:
-                    dicts[i][features[i]] = 1
-
-    num_features = len(cutoff)
-    dicts = []
-    for i in xrange(num_features):
-        dicts.append(dict())
-
-    f = gzip.open(filename, 'rb')
-
-    sequence = []
-
-    for line in f:
-        line = line.strip()
-        if not line:
-            make_features(sequence)
-            add_to_dict(sequence, dicts)
-            sequence = []
-            continue
-        features = line.split(' ')
-        sequence.append(features)
-
-    for i in xrange(num_features):
-        dct = dicts[i]
-        n = 1 if oov_policy[i] == OOV_POLICY_USE else 0
-        todo = []
-        for k, v in dct.iteritems():
-            if v < cutoff[i]:
-                todo.append(k)
-            else:
-                dct[k] = n
-                n += 1
-
-        if oov_policy[i] == OOV_POLICY_USE:
-            # placeholder so that len(dct) will be the number of features
-            # including OOV
-            dct['#OOV#'] = 0
-
-        logger.info('column %d dict size=%d, ignored %d' % (i, n, len(todo)))
-        for k in todo:
-            del dct[k]
-
-    f.close()
-    return dicts
-
-
-def initializer(settings, **xargs):
-    cutoff = [3, 1, 0]
-    cutoff += [3] * len(patterns)
-    oov_policy = [OOV_POLICY_IGNORE, OOV_POLICY_ERROR, OOV_POLICY_ERROR]
-    oov_policy += [OOV_POLICY_IGNORE] * len(patterns)
-    dicts = create_dictionaries('data/train.txt.gz', cutoff, oov_policy)
-    dicts[2] = dict_label
-    settings.dicts = dicts
-    settings.oov_policy = oov_policy
-    input_types = []
-    num_features = len(dicts)
-    for i in xrange(num_original_columns):
-        input_types.append(integer_sequence(len(dicts[i])))
-        logger.info("slot %s size=%s" % (i, len(dicts[i])))
-    if patterns:
-        dim = 0
-        for i in xrange(num_original_columns, num_features):
-            dim += len(dicts[i])
-        input_types.append(sparse_binary_vector_sequence(dim))
-        logger.info("feature size=%s" % dim)
-    settings.input_types = input_types
-
-
-'''
-if oov_policy[i] == OOV_POLICY_USE, features in i-th column which are not
-existed in dicts[i] will be assigned to id 0.
-if oov_policy[i] == OOV_POLICY_ERROR, all features in i-th column MUST exist
-in dicts[i].
-'''
-
-
-@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
-def process(settings, filename):
-    input_file = filename
-    dicts = settings.dicts
-    oov_policy = settings.oov_policy
-
-    def gen_sample(sequence):
-        num_features = len(dicts)
-        sample = [list() for i in xrange(num_original_columns)]
-        if patterns:
-            sample.append([])
-        for features in sequence:
-            assert len(features) == num_features, \
-                "Wrong number of features: " + line
-            for i in xrange(num_original_columns):
-                id = dicts[i].get(features[i], -1)
-                if id != -1:
-                    sample[i].append(id)
-                elif oov_policy[i] == OOV_POLICY_IGNORE:
-                    sample[i].append(0xffffffff)
-                elif oov_policy[i] == OOV_POLICY_ERROR:
-                    logger.fatal("Unknown token: %s" % features[i])
-                else:
-                    sample[i].append(0)
-
-            if patterns:
-                dim = 0
-                vec = []
-                for i in xrange(num_original_columns, num_features):
-                    id = dicts[i].get(features[i], -1)
-                    if id != -1:
-                        vec.append(dim + id)
-                    elif oov_policy[i] == OOV_POLICY_IGNORE:
-                        pass
-                    elif oov_policy[i] == OOV_POLICY_ERROR:
-                        logger.fatal("Unknown token: %s" % features[i])
-                    else:
-                        vec.ids.append(dim + 0)
-
-                    dim += len(dicts[i])
-                sample[-1].append(vec)
-        return sample
-
-    num_features = len(dicts)
-    f = gzip.open(input_file, 'rb')
-
-    num_sequences = 0
-    sequence = []
-    for line in f:
-        line = line.strip()
-        if not line:
-            make_features(sequence)
-            yield gen_sample(sequence)
-            sequence = []
-            num_sequences += 1
-            continue
-        features = line.split(' ')
-        sequence.append(features)
-
-    f.close()
-
-    logger.info("num_sequences=%s" % num_sequences)
diff --git a/v1_api_demo/sequence_tagging/linear_crf.py b/v1_api_demo/sequence_tagging/linear_crf.py
deleted file mode 100644
index ea012ba1ae9c790ccefd3dd5f066aa92202128a2..0000000000000000000000000000000000000000
--- a/v1_api_demo/sequence_tagging/linear_crf.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-import math
-
-define_py_data_sources2(
-    train_list="data/train.list",
-    test_list="data/test.list",
-    module="dataprovider",
-    obj="process")
-
-batch_size = 1
-settings(
-    learning_method=MomentumOptimizer(),
-    batch_size=batch_size,
-    regularization=L2Regularization(batch_size * 1e-4),
-    model_average=ModelAverage(0.5),
-    learning_rate=1e-1,
-    learning_rate_decay_a=1e-5,
-    learning_rate_decay_b=0.25, )
-
-num_label_types = 23
-
-
-def get_simd_size(size):
-    return int(math.ceil(float(size) / 8)) * 8
-
-
-# Currently, in order to use sparse_update=True,
-# the size has to be aligned.
-num_label_types = get_simd_size(num_label_types)
-
-features = data_layer(name="features", size=76328)
-word = data_layer(name="word", size=6778)
-pos = data_layer(name="pos", size=44)
-chunk = data_layer(name="chunk", size=num_label_types)
-
-crf_input = fc_layer(
-    input=features,
-    size=num_label_types,
-    act=LinearActivation(),
-    bias_attr=False,
-    param_attr=ParamAttr(
-        initial_std=0, sparse_update=True))
-
-crf = crf_layer(
-    input=crf_input,
-    label=chunk,
-    param_attr=ParamAttr(
-        name="crfw", initial_std=0), )
-
-crf_decoding = crf_decoding_layer(
-    size=num_label_types,
-    input=crf_input,
-    label=chunk,
-    param_attr=ParamAttr(name="crfw"), )
-
-sum_evaluator(
-    name="error",
-    input=crf_decoding, )
-
-chunk_evaluator(
-    name="chunk_f1",
-    input=crf_decoding,
-    label=chunk,
-    chunk_scheme="IOB",
-    num_chunk_types=11, )
-
-inputs(word, pos, chunk, features)
-outputs(crf)
diff --git a/v1_api_demo/sequence_tagging/readme.md b/v1_api_demo/sequence_tagging/readme.md
deleted file mode 100644
index 2e17fffb83c532f5e5fec1227f169c97c1f20e22..0000000000000000000000000000000000000000
--- a/v1_api_demo/sequence_tagging/readme.md
+++ /dev/null
@@ -1,45 +0,0 @@
-# Sequence Tagging
-
-This demo is a sequence model for assigning tags to each token in a sentence. The task is described at <a href = "http://www.cnts.ua.ac.be/conll2000/chunking">CONLL2000 Text Chunking</a> task.
-
-## Download data
-```bash
-cd demo/sequence_tagging
-./data/get_data.sh
-```
-
-## Train model
-```bash
-cd demo/sequence_tagging
-./train.sh
-```
-
-## Model description
-
-We provide two models. One is a linear CRF model (linear_crf.py) with is equivalent to the one at <a href="http://leon.bottou.org/projects/sgd#stochastic_gradient_crfs">leon.bottou.org/projects/sgd</a>. The second one is a stacked bidirectional RNN and CRF model (rnn_crf.py).
-<center>
-<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
-
-<thead>
-<th scope="col" class="left">Model name</th>
-<th scope="col" class="left">Number of parameters</th>
-<th scope="col" class="left">F1 score</th>
-</thead>
-
-<tbody>
-<tr>
-<td class="left">linear_crf</td>
-<td class="left"> 1.8M </td>
-<td class="left"> 0.937</td>
-</tr>
-
-<tr>
-<td class="left">rnn_crf</td>
-<td class="left"> 960K </td>
-<td class="left">0.941</td>
-</tr>
-
-</tbody>
-</table>
-</center>
-<br>
diff --git a/v1_api_demo/sequence_tagging/rnn_crf.py b/v1_api_demo/sequence_tagging/rnn_crf.py
deleted file mode 100644
index 937a34df103663ecf0f0827bbfb9d82823c9b902..0000000000000000000000000000000000000000
--- a/v1_api_demo/sequence_tagging/rnn_crf.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-import math
-
-define_py_data_sources2(
-    train_list="data/train.list",
-    test_list="data/test.list",
-    module="dataprovider",
-    obj="process")
-
-batch_size = 16
-settings(
-    learning_method=MomentumOptimizer(),
-    batch_size=batch_size,
-    regularization=L2Regularization(batch_size * 1e-5),
-    model_average=ModelAverage(0.5),
-    learning_rate=2e-3,
-    learning_rate_decay_a=5e-7,
-    learning_rate_decay_b=0.5, )
-
-word_dim = 128
-hidden_dim = 128
-with_rnn = True
-
-initial_std = 1 / math.sqrt(hidden_dim)
-param_attr = ParamAttr(initial_std=initial_std)
-cpu_layer_attr = ExtraLayerAttribute(device=-1)
-
-default_device(0)
-
-num_label_types = 23
-
-features = data_layer(name="features", size=76328)
-word = data_layer(name="word", size=6778)
-pos = data_layer(name="pos", size=44)
-chunk = data_layer(
-    name="chunk", size=num_label_types, layer_attr=cpu_layer_attr)
-
-emb = embedding_layer(
-    input=word, size=word_dim, param_attr=ParamAttr(initial_std=0))
-
-hidden1 = mixed_layer(
-    size=hidden_dim,
-    act=STanhActivation(),
-    bias_attr=True,
-    input=[
-        full_matrix_projection(emb), table_projection(
-            pos, param_attr=param_attr)
-    ])
-
-if with_rnn:
-    rnn1 = recurrent_layer(
-        act=ReluActivation(),
-        bias_attr=True,
-        input=hidden1,
-        param_attr=ParamAttr(initial_std=0), )
-
-hidden2 = mixed_layer(
-    size=hidden_dim,
-    act=STanhActivation(),
-    bias_attr=True,
-    input=[full_matrix_projection(hidden1)] +
-    ([full_matrix_projection(
-        rnn1, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), )
-
-if with_rnn:
-    rnn2 = recurrent_layer(
-        reverse=True,
-        act=ReluActivation(),
-        bias_attr=True,
-        input=hidden2,
-        param_attr=ParamAttr(initial_std=0), )
-
-crf_input = mixed_layer(
-    size=num_label_types,
-    bias_attr=False,
-    input=[full_matrix_projection(hidden2), ] +
-    ([full_matrix_projection(
-        rnn2, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), )
-
-crf = crf_layer(
-    input=crf_input,
-    label=chunk,
-    param_attr=ParamAttr(
-        name="crfw", initial_std=0),
-    layer_attr=cpu_layer_attr, )
-
-crf_decoding = crf_decoding_layer(
-    size=num_label_types,
-    input=crf_input,
-    label=chunk,
-    param_attr=ParamAttr(name="crfw"),
-    layer_attr=cpu_layer_attr, )
-
-sum_evaluator(
-    name="error",
-    input=crf_decoding, )
-
-chunk_evaluator(
-    name="chunk_f1",
-    input=crf_decoding,
-    label=chunk,
-    chunk_scheme="IOB",
-    num_chunk_types=11, )
-
-inputs(word, pos, chunk, features)
-outputs(crf)
diff --git a/v1_api_demo/sequence_tagging/train.sh b/v1_api_demo/sequence_tagging/train.sh
deleted file mode 100755
index 37e196c84200dc26ccb523076a81dbc393b1280f..0000000000000000000000000000000000000000
--- a/v1_api_demo/sequence_tagging/train.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-
-paddle train \
-       --config rnn_crf.py \
-       --parallel_nn=1 \
-       --use_gpu=1 \
-       --dot_period=10 \
-       --log_period=1000 \
-       --test_period=0 \
-       --num_passes=10 \
-2>&1 | tee 'train.log'
-paddle usage -l 'train.log' -e $? -n "sequence_tagging_train" >/dev/null 2>&1
diff --git a/v1_api_demo/sequence_tagging/train_linear.sh b/v1_api_demo/sequence_tagging/train_linear.sh
deleted file mode 100755
index ad6e2d8ee7f813c69f9dd250c6f7bbb4403a0ed5..0000000000000000000000000000000000000000
--- a/v1_api_demo/sequence_tagging/train_linear.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-
-paddle train \
-       --config linear_crf.py \
-       --use_gpu=0 \
-       --dot_period=100 \
-       --log_period=10000 \
-       --test_period=0 \
-       --num_passes=10
-2>&1 | tee 'train_linear.log'
-paddle usage -l 'train_linear.log' -e $? -n "sequence_tagging_train_linear" >/dev/null 2>&1
diff --git a/v1_api_demo/traffic_prediction/README b/v1_api_demo/traffic_prediction/README
deleted file mode 100644
index 4c95188583513c332b7d7cb0a32d59336208e1aa..0000000000000000000000000000000000000000
--- a/v1_api_demo/traffic_prediction/README
+++ /dev/null
@@ -1,7 +0,0 @@
-run by:
-cd ./data
-sh get_data.sh
-cd ..
-sh train.sh
-sh predict.sh
-
diff --git a/v1_api_demo/traffic_prediction/data/get_data.sh b/v1_api_demo/traffic_prediction/data/get_data.sh
deleted file mode 100755
index f2fa548d4709c0361334f117bfb49e18d83c32f4..0000000000000000000000000000000000000000
--- a/v1_api_demo/traffic_prediction/data/get_data.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -e
-set -x
-
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-cd $DIR
-
-#download the dataset
-echo "Downloading traffic data..."
-wget http://paddlepaddle.cdn.bcebos.com/demo/traffic/traffic_data.tar.gz
-
-#extract package
-echo "Unzipping..."
-tar -zxvf traffic_data.tar.gz
-
-echo "data/speeds.csv" > train.list
-echo "data/speeds.csv" > test.list
-echo "data/speeds.csv" > pred.list
-
-echo "Done."
diff --git a/v1_api_demo/traffic_prediction/dataprovider.py b/v1_api_demo/traffic_prediction/dataprovider.py
deleted file mode 100644
index c7883b6950c369ee67c39b80ce1cefbbf9350459..0000000000000000000000000000000000000000
--- a/v1_api_demo/traffic_prediction/dataprovider.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer.PyDataProvider2 import *
-import sys
-import numpy as np
-TERM_NUM = 24
-FORECASTING_NUM = 24
-LABEL_VALUE_NUM = 4
-
-
-def initHook(settings, file_list, **kwargs):
-    """
-    Init hook is invoked before process data. It will set obj.slots and store data meta.
-
-    :param settings: global object. It will passed to process routine.
-    :type obj: object
-    :param file_list: the meta file object, which passed from trainer_config.py,but unused in this function.
-    :param kwargs: unused other arguments.
-    """
-    del kwargs  #unused 
-
-    settings.pool_size = sys.maxint
-    #Use a time seires of the past as feature.
-    #Dense_vector's expression form is [float,float,...,float]
-    settings.input_types = [dense_vector(TERM_NUM)]
-    #There are next FORECASTING_NUM fragments you need predict.
-    #Every predicted condition at time point has four states.
-    for i in range(FORECASTING_NUM):
-        settings.input_types.append(integer_value(LABEL_VALUE_NUM))
-
-
-@provider(
-    init_hook=initHook, cache=CacheType.CACHE_PASS_IN_MEM, should_shuffle=True)
-def process(settings, file_name):
-    with open(file_name) as f:
-        #abandon fields name
-        f.next()
-        for row_num, line in enumerate(f):
-            speeds = map(int, line.rstrip('\r\n').split(",")[1:])
-            # Get the max index.
-            end_time = len(speeds)
-            # Scanning and generating samples
-            for i in range(TERM_NUM, end_time - FORECASTING_NUM):
-                # For dense slot
-                pre_spd = map(float, speeds[i - TERM_NUM:i])
-
-                # Integer value need predicting, values start from 0, so every one minus 1.
-                fol_spd = [j - 1 for j in speeds[i:i + FORECASTING_NUM]]
-
-                # Predicting label is missing, abandon the sample.
-                if -1 in fol_spd:
-                    continue
-                yield [pre_spd] + fol_spd
-
-
-def predict_initHook(settings, file_list, **kwargs):
-    settings.pool_size = sys.maxint
-    settings.input_types = [dense_vector(TERM_NUM)]
-
-
-@provider(init_hook=predict_initHook, should_shuffle=False)
-def process_predict(settings, file_name):
-    with open(file_name) as f:
-        #abandon fields name
-        f.next()
-        for row_num, line in enumerate(f):
-            speeds = map(int, line.rstrip('\r\n').split(","))
-            end_time = len(speeds)
-            pre_spd = map(float, speeds[end_time - TERM_NUM:end_time])
-            yield pre_spd
diff --git a/v1_api_demo/traffic_prediction/gen_result.py b/v1_api_demo/traffic_prediction/gen_result.py
deleted file mode 100644
index 3da70b30315f863fd3582583e9a29540a09c1e7f..0000000000000000000000000000000000000000
--- a/v1_api_demo/traffic_prediction/gen_result.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-res = []
-with open('./rank-00000') as f:
-    for line in f:
-        pred = map(int, line.strip('\r\n;').split(";"))
-        #raw prediction range from 0 to 3
-        res.append([i + 1 for i in pred])
-
-file_name = open('./data/pred.list').read().strip('\r\n')
-
-FORECASTING_NUM = 24
-header = [
-    'id',
-    '201604200805',
-    '201604200810',
-    '201604200815',
-    '201604200820',
-    '201604200825',
-    '201604200830',
-    '201604200835',
-    '201604200840',
-    '201604200845',
-    '201604200850',
-    '201604200855',
-    '201604200900',
-    '201604200905',
-    '201604200910',
-    '201604200915',
-    '201604200920',
-    '201604200925',
-    '201604200930',
-    '201604200935',
-    '201604200940',
-    '201604200945',
-    '201604200950',
-    '201604200955',
-    '201604201000',
-]
-###################
-## To CSV format ##
-###################
-with open(file_name) as f:
-    f.next()
-    print ','.join(header)
-    for row_num, line in enumerate(f):
-        fields = line.rstrip('\r\n').split(',')
-        linkid = fields[0]
-        print linkid + ',' + ','.join(map(str, res[row_num]))
diff --git a/v1_api_demo/traffic_prediction/predict.sh b/v1_api_demo/traffic_prediction/predict.sh
deleted file mode 100755
index 2dbd5e8805dd97d35c7d58917f8ec6b5033bda03..0000000000000000000000000000000000000000
--- a/v1_api_demo/traffic_prediction/predict.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-cfg=trainer_config.py
-# pass choice 
-model="output/pass-00000"
-paddle train \
-    --config=$cfg \
-    --use_gpu=false \
-    --job=test \
-    --init_model_path=$model \
-    --config_args=is_predict=1 \
-    --predict_output_dir=. 
-
-python gen_result.py > result.csv
-
-rm -rf rank-00000
diff --git a/v1_api_demo/traffic_prediction/train.sh b/v1_api_demo/traffic_prediction/train.sh
deleted file mode 100755
index 48dfc5604f80042598c5c779bd450a5808fdfb64..0000000000000000000000000000000000000000
--- a/v1_api_demo/traffic_prediction/train.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-cfg=trainer_config.py
-paddle train \
-  --config=$cfg \
-  --save_dir=./output \
-  --trainer_count=4 \
-  --log_period=1000 \
-  --dot_period=10 \
-  --num_passes=10 \
-  --use_gpu=false \
-  --show_parameter_stats_period=3000 \
-  2>&1 | tee 'train.log'
diff --git a/v1_api_demo/traffic_prediction/trainer_config.py b/v1_api_demo/traffic_prediction/trainer_config.py
deleted file mode 100755
index 52d678624aff7ca2264c3c20e320004217d14397..0000000000000000000000000000000000000000
--- a/v1_api_demo/traffic_prediction/trainer_config.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from paddle.trainer_config_helpers import *
-
-################################### DATA Configuration #############################################
-is_predict = get_config_arg('is_predict', bool, False)
-trn = './data/train.list' if not is_predict else None
-tst = './data/test.list' if not is_predict else './data/pred.list'
-process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(
-    train_list=trn, test_list=tst, module="dataprovider", obj=process)
-################################### Parameter Configuaration #######################################
-TERM_NUM = 24
-FORECASTING_NUM = 24
-emb_size = 16
-batch_size = 128 if not is_predict else 1
-settings(
-    batch_size=batch_size,
-    learning_rate=1e-3,
-    learning_method=RMSPropOptimizer())
-################################### Algorithm Configuration ########################################
-
-output_label = []
-
-link_encode = data_layer(name='link_encode', size=TERM_NUM)
-for i in xrange(FORECASTING_NUM):
-    # Each task share same weight.
-    link_param = ParamAttr(
-        name='_link_vec.w', initial_max=1.0, initial_min=-1.0)
-    link_vec = fc_layer(input=link_encode, size=emb_size, param_attr=link_param)
-    score = fc_layer(input=link_vec, size=4, act=SoftmaxActivation())
-    if is_predict:
-        maxid = maxid_layer(score)
-        output_label.append(maxid)
-    else:
-        # Multi-task training.
-        label = data_layer(name='label_%dmin' % ((i + 1) * 5), size=4)
-        cls = classification_cost(
-            input=score, name="cost_%dmin" % ((i + 1) * 5), label=label)
-        output_label.append(cls)
-outputs(output_label)
diff --git a/v1_api_demo/vae/README.md b/v1_api_demo/vae/README.md
deleted file mode 100644
index e55d483b023773900729622a6cac44116fc79c76..0000000000000000000000000000000000000000
--- a/v1_api_demo/vae/README.md
+++ /dev/null
@@ -1,13 +0,0 @@
-#Variational Autoencoder (VAE)
-
-This demo implements VAE training described in the original paper (https://arxiv.org/abs/1312.6114).
-
-
-In order to run the model, first download the MNIST dataset by running the shell script in ./data.
-
-Then you can run the command below. The flag --useGpu specifies whether to use gpu for training (0 is cpu, 1 is gpu).  
-
-$python vae_train.py [--use_gpu 1]
-
-The generated images will be stored in ./samples/
-The corresponding models will be stored in ./params/
diff --git a/v1_api_demo/vae/data/get_mnist_data.sh b/v1_api_demo/vae/data/get_mnist_data.sh
deleted file mode 100755
index a77c81bf5af9ddb6634ff89460797ca543c5e517..0000000000000000000000000000000000000000
--- a/v1_api_demo/vae/data/get_mnist_data.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env sh
-# This script downloads the mnist data and unzips it.
-set -e
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-rm -rf "$DIR/mnist_data"
-mkdir "$DIR/mnist_data"
-cd "$DIR/mnist_data"
-
-echo "Downloading..."
-
-for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
-do
-    if [ ! -e $fname ]; then
-        wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
-        gunzip ${fname}.gz
-    fi
-done
diff --git a/v1_api_demo/vae/dataloader.py b/v1_api_demo/vae/dataloader.py
deleted file mode 100644
index e9ff95d44f825cd941b5687f754618e66d491e7f..0000000000000000000000000000000000000000
--- a/v1_api_demo/vae/dataloader.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-
-
-class MNISTloader():
-    def __init__(self,
-                 data_path="./data/mnist_data/",
-                 batch_size=60,
-                 process='train'):
-        self.batch_size = batch_size
-        self.data_path = data_path
-        self._pointer = 0
-        self.image_batches = np.array([])
-        self.process = process
-
-    def _extract_images(self, filename, n):
-        f = open(filename, 'rb')
-        f.read(16)
-        data = np.fromfile(f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28))
-        #Mapping data into [-1, 1]
-        data = data / 255. * 2. - 1
-        data_batches = np.split(data, 60000 / self.batch_size, 0)
-
-        f.close()
-
-        return data_batches
-
-    @property
-    def pointer(self):
-        return self._pointer
-
-    def load_data(self):
-        TRAIN_IMAGES = '%s/train-images-idx3-ubyte' % self.data_path
-        TEST_IMAGES = '%s/t10k-images-idx3-ubyte' % self.data_path
-
-        if self.process == 'train':
-            self.image_batches = self._extract_images(TRAIN_IMAGES, 60000)
-        else:
-            self.image_batches = self._extract_images(TEST_IMAGES, 10000)
-
-    def next_batch(self):
-        batch = self.image_batches[self._pointer]
-        self._pointer = (self._pointer + 1) % (60000 / self.batch_size)
-        return np.array(batch)
-
-    def reset_pointer(self):
-        self._pointer = 0
diff --git a/v1_api_demo/vae/vae_conf.py b/v1_api_demo/vae/vae_conf.py
deleted file mode 100644
index 301dd23793d19ec5946cc7bb07e32c53c04a972b..0000000000000000000000000000000000000000
--- a/v1_api_demo/vae/vae_conf.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-import numpy as np
-
-is_generating = get_config_arg("is_generating", bool, False)
-
-settings(batch_size=32, learning_rate=1e-3, learning_method=AdamOptimizer())
-
-X_dim = 28 * 28
-h_dim = 128
-z_dim = 100
-
-
-def reparameterization(mu, logvar):
-    eps = ParamAttr(initial_mean=0., initial_std=1)
-    with mixed_layer() as sigma:
-        sigma += dotmul_projection(layer_math.exp(logvar) * 0.5, param_attr=eps)
-    return mu + sigma
-
-
-def q_func(X):
-    """
-    xavier initialization
-    """
-    param_attr = ParamAttr(
-        name='share.w', initial_mean=0., initial_std=1. / np.sqrt(X_dim / 2.))
-    mu_param = ParamAttr(
-        name='mu.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
-    logvar_param = ParamAttr(
-        name='logvar.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
-
-    bias_attr = ParamAttr(name='share.bias', initial_mean=0., initial_std=0.)
-    mu_bias = ParamAttr(name='mu.bias', initial_mean=0., initial_std=0.)
-    logvar_bias = ParamAttr(name='logvar.bias', initial_mean=0., initial_std=0.)
-
-    share_layer = fc_layer(
-        X,
-        size=h_dim,
-        param_attr=param_attr,
-        bias_attr=bias_attr,
-        act=ReluActivation())
-
-    return (fc_layer(
-        share_layer,
-        size=z_dim,
-        param_attr=mu_param,
-        bias_attr=mu_bias,
-        act=LinearActivation()), fc_layer(
-            share_layer,
-            size=z_dim,
-            param_attr=logvar_param,
-            bias_attr=logvar_bias,
-            act=LinearActivation()))
-
-
-def generator(z):
-
-    hidden_param = ParamAttr(
-        name='hidden.w', initial_mean=0., initial_std=1. / np.sqrt(z_dim / 2.))
-    hidden_bias = ParamAttr(name='hidden.bias', initial_mean=0., initial_std=0.)
-    prob_param = ParamAttr(
-        name='prob.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
-    prob_bias = ParamAttr(name='prob.bias', initial_mean=0., initial_std=0.)
-
-    hidden_layer = fc_layer(
-        z,
-        size=h_dim,
-        act=ReluActivation(),
-        param_attr=hidden_param,
-        bias_attr=hidden_bias)
-    prob = fc_layer(
-        hidden_layer,
-        size=X_dim,
-        act=SigmoidActivation(),
-        param_attr=prob_param,
-        bias_attr=prob_bias)
-
-    return prob
-
-
-def reconstruct_error(prob, X):
-    cost = multi_binary_label_cross_entropy(input=prob, label=X)
-    return cost
-
-
-def KL_loss(mu, logvar):
-    with mixed_layer() as mu_square:
-        mu_square += dotmul_operator(mu, mu, scale=1.)
-
-    cost = 0.5 * sum_cost(layer_math.exp(logvar) + mu_square - 1. - logvar)
-
-    return cost
-
-
-if not is_generating:
-    x_batch = data_layer(name='x_batch', size=X_dim)
-    mu, logvar = q_func(x_batch)
-    z_samples = reparameterization(mu, logvar)
-    prob = generator(z_samples)
-    outputs(reconstruct_error(prob, x_batch) + KL_loss(mu, logvar))
-else:
-    z_samples = data_layer(name='noise', size=z_dim)
-    outputs(generator(z_samples))
diff --git a/v1_api_demo/vae/vae_train.py b/v1_api_demo/vae/vae_train.py
deleted file mode 100644
index 1babb011c77b92861cc680a2e1aaa8c9ae5d97b5..0000000000000000000000000000000000000000
--- a/v1_api_demo/vae/vae_train.py
+++ /dev/null
@@ -1,175 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import random
-import numpy as np
-import cPickle
-import sys, os
-from PIL import Image
-
-from paddle.trainer.config_parser import parse_config
-from paddle.trainer.config_parser import logger
-import py_paddle.swig_paddle as api
-import dataloader
-import matplotlib.pyplot as plt
-
-
-def plot_samples(samples):
-    fig = plt.figure(figsize=(4, 4))
-    gs = gridspec.GridSpec(4, 4)
-    gs.update(wspace=0.05, hspace=0.05)
-    for i, sample in enumerate(samples):
-        plt.subplot(gs[i])
-        plt.axis('off')
-        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')
-
-    return fig
-
-
-def CHECK_EQ(a, b):
-    assert a == b, "a=%s, b=%s" % (a, b)
-
-
-def get_fake_samples(generator_machine, batch_size, noise):
-    gen_inputs = api.Arguments.createArguments(1)
-    gen_inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
-    gen_outputs = api.Arguments.createArguments(0)
-    generator_machine.forward(gen_inputs, gen_outputs, api.PASS_TEST)
-    fake_samples = gen_outputs.getSlotValue(0).copyToNumpyMat()
-    return fake_samples
-
-
-def copy_shared_parameters(src, dst):
-    '''
-    copy the parameters from src to dst
-    :param src: the source of the parameters
-    :type src: GradientMachine
-    :param dst: the destination of the parameters
-    :type dst: GradientMachine
-    '''
-    src_params = [src.getParameter(i) for i in xrange(src.getParameterSize())]
-    src_params = dict([(p.getName(), p) for p in src_params])
-
-    for i in xrange(dst.getParameterSize()):
-        dst_param = dst.getParameter(i)
-        src_param = src_params.get(dst_param.getName(), None)
-        if src_param is None:
-            continue
-        src_value = src_param.getBuf(api.PARAMETER_VALUE)
-        dst_value = dst_param.getBuf(api.PARAMETER_VALUE)
-        CHECK_EQ(len(src_value), len(dst_value))
-        dst_value.copyFrom(src_value)
-        dst_param.setValueUpdated()
-
-
-def find(iterable, cond):
-    for item in iterable:
-        if cond(item):
-            return item
-    return None
-
-
-def get_layer_size(model_conf, layer_name):
-    layer_conf = find(model_conf.layers, lambda x: x.name == layer_name)
-    assert layer_conf is not None, "Cannot find '%s' layer" % layer_name
-    return layer_conf.size
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--use_gpu", default="1", help="1 means use gpu for training")
-    parser.add_argument("--gpu_id", default="0", help="the gpu_id parameter")
-    args = parser.parse_args()
-    use_gpu = args.use_gpu
-    assert use_gpu in ["0", "1"]
-
-    if not os.path.exists("./samples/"):
-        os.makedirs("./samples/")
-
-    if not os.path.exists("./params/"):
-        os.makedirs("./params/")
-
-    api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10',
-                   '--log_period=1000', '--gpu_id=' + args.gpu_id,
-                   '--save_dir=' + "./params/")
-
-    conf = "vae_conf.py"
-
-    trainer_conf = parse_config(conf, "is_generating=False")
-    gener_conf = parse_config(conf, "is_generating=True")
-
-    batch_size = trainer_conf.opt_config.batch_size
-
-    noise_dim = get_layer_size(gener_conf.model_config, "noise")
-
-    mnist = dataloader.MNISTloader(batch_size=batch_size)
-    mnist.load_data()
-
-    training_machine = api.GradientMachine.createFromConfigProto(
-        trainer_conf.model_config)
-
-    generator_machine = api.GradientMachine.createFromConfigProto(
-        gener_conf.model_config)
-
-    trainer = api.Trainer.create(trainer_conf, training_machine)
-
-    trainer.startTrain()
-
-    for train_pass in xrange(100):
-        trainer.startTrainPass()
-        mnist.reset_pointer()
-        i = 0
-        it = 0
-        while mnist.pointer != 0 or i == 0:
-            X = mnist.next_batch().astype('float32')
-
-            inputs = api.Arguments.createArguments(1)
-            inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(X))
-
-            trainer.trainOneDataBatch(batch_size, inputs)
-
-            if it % 1000 == 0:
-
-                outputs = api.Arguments.createArguments(0)
-                training_machine.forward(inputs, outputs, api.PASS_TEST)
-                loss = np.mean(outputs.getSlotValue(0).copyToNumpyMat())
-                print "\niter: {}".format(str(it).zfill(3))
-                print "VAE loss: {}".format(str(loss).zfill(3))
-
-                #Sync parameters between networks (GradientMachine) at the beginning
-                copy_shared_parameters(training_machine, generator_machine)
-
-                z_samples = np.random.randn(batch_size,
-                                            noise_dim).astype('float32')
-                samples = get_fake_samples(generator_machine, batch_size,
-                                           z_samples)
-
-                #Generating the first 16 images for a picture. 
-                figure = plot_samples(samples[:16])
-                plt.savefig(
-                    "./samples/{}_{}.png".format(
-                        str(train_pass).zfill(3), str(i).zfill(3)),
-                    bbox_inches='tight')
-                plt.close(figure)
-                i += 1
-            it += 1
-
-        trainer.finishTrainPass()
-    trainer.finishTrain()
-
-
-if __name__ == '__main__':
-    main()