diff --git a/demo/quick_start/data/README.md b/demo/quick_start/data/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..63abcf7ebf31903213e44cf492b93e09f61db14e
--- /dev/null
+++ b/demo/quick_start/data/README.md
@@ -0,0 +1,9 @@
+This dataset consists of electronics product reviews associated with
+binary labels (positive/negative) for sentiment classification.
+
+The preprocessed data can be downloaded by script `get_data.sh`.
+The data was derived from reviews_Electronics_5.json.gz at
+
+http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz
+
+If you want to process the raw data, you can use the script `proc_from_raw_data/get_data.sh`.
diff --git a/demo/quick_start/data/get_data.sh b/demo/quick_start/data/get_data.sh
index f355d63225b28ab495b34e72dd3be8d237ae08f4..952de3f3c8f52a7a6f84412f9b38f16ac2503ac2 100755
--- a/demo/quick_start/data/get_data.sh
+++ b/demo/quick_start/data/get_data.sh
@@ -17,14 +17,11 @@ set -e
 DIR="$( cd "$(dirname "$0")" ; pwd -P )"
 cd $DIR
 
-echo "Downloading Amazon Electronics reviews data..."
-# http://jmcauley.ucsd.edu/data/amazon/
-wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz
+# Download the preprocessed data
+wget http://paddlepaddle.bj.bcebos.com/demo/quick_start_preprocessed_data/preprocessed_data.tar.gz
 
-echo "Downloading mosesdecoder..."
-#https://github.com/moses-smt/mosesdecoder
-wget https://github.com/moses-smt/mosesdecoder/archive/master.zip
+# Extract package
+tar zxvf preprocessed_data.tar.gz
 
-unzip master.zip
-rm master.zip
-echo "Done."
+# Remove compressed package
+rm preprocessed_data.tar.gz
diff --git a/demo/quick_start/data/pred.list b/demo/quick_start/data/pred.list
deleted file mode 100644
index d88b2b63851101a8b40e706b32d8c17b5fabb201..0000000000000000000000000000000000000000
--- a/demo/quick_start/data/pred.list
+++ /dev/null
@@ -1 +0,0 @@
-./data/pred.txt
diff --git a/demo/quick_start/data/pred.txt b/demo/quick_start/data/pred.txt
deleted file mode 100644
index 6ed5f738ddaff6645448d5e606dcef1baf01b282..0000000000000000000000000000000000000000
--- a/demo/quick_start/data/pred.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-the device is cute , but that &apos;s just about all that &apos;s good. the specs are what you &apos;d expect : it &apos;s a wifi mic , with some noise filter options. the app has the option to upload your baby &apos;s name and photo , which is a cutesy touch. but the app is otherwise unstable and useless unless you upgrade for $ 60 / year.set up involves downloading the app , turning on the mic , switching your phone to the wifi network of the mic , telling the app your wifi settings , switching your wifi back to your home router. the app is then directly connected to your mic.the app is adware ! the main screen says &quot; cry notifications on / off : upgrade to evoz premium and receive a text message of email when your baby is crying &quot; .but the adware points out an important limitation , this monitor is only intended to be used from your home network. if you want to access it remotely , get a webcam. this app would make a lot more sense of the premium features were included with the hardware .
-don &apos;t be fooled by my one star rating. if there was a zero , i would have selected it. this product was a waste of my money.it has never worked like the company said it supposed to. i only have one device , an iphone 4gs. after charging the the iphone mid way , the i.sound portable power max 16,000 mah is completely drained. the led light no longer lit up. when plugging the isound portable power max into a wall outlet to charge , it would charge for about 20-30 minutes and then all four battery led indicator lit up showing a full charge. i would leave it on to charge for the full 8 hours or more but each time with the same result upon using. don &apos;t buy this thing. put your money to good use elsewhere .
diff --git a/demo/quick_start/preprocess.sh b/demo/quick_start/data/proc_from_raw_data/get_data.sh
similarity index 65%
rename from demo/quick_start/preprocess.sh
rename to demo/quick_start/data/proc_from_raw_data/get_data.sh
index c9190e2dd2ef754bf3c7287006322b52493dc3a0..cd85e26842dfccea78e4f26bdfee938887021f03 100755
--- a/demo/quick_start/preprocess.sh
+++ b/demo/quick_start/data/proc_from_raw_data/get_data.sh
@@ -16,10 +16,26 @@
 # 1. size of pos : neg = 1:1.
 # 2. size of testing set = min(25k, len(all_data) * 0.1), others is traning set.
 # 3. distinct train set and test set.
-# 4. build dict
 
 set -e
 
+DIR="$( cd "$(dirname "$0")" ; pwd -P )"
+cd $DIR
+
+# Download data
+echo "Downloading Amazon Electronics reviews data..."
+# http://jmcauley.ucsd.edu/data/amazon/
+wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz
+echo "Downloading mosesdecoder..."
+# https://github.com/moses-smt/mosesdecoder
+wget https://github.com/moses-smt/mosesdecoder/archive/master.zip
+
+unzip master.zip
+rm master.zip
+
+##################
+# Preprocess data 
+echo "Preprocess data..."
 export LC_ALL=C
 UNAME_STR=`uname`
 
@@ -29,11 +45,11 @@ else
   SHUF_PROG='gshuf'
 fi
 
-mkdir -p data/tmp
-python preprocess.py -i data/reviews_Electronics_5.json.gz
+mkdir -p tmp
+python preprocess.py -i reviews_Electronics_5.json.gz
 # uniq and shuffle
-cd data/tmp
-echo 'uniq and shuffle...'
+cd tmp
+echo 'Uniq and shuffle...'
 cat pos_*|sort|uniq|${SHUF_PROG}> pos.shuffed
 cat neg_*|sort|uniq|${SHUF_PROG}> neg.shuffed
 
@@ -53,11 +69,11 @@ cat train.pos train.neg | ${SHUF_PROG} >../train.txt
 cat test.pos test.neg | ${SHUF_PROG} >../test.txt
 
 cd -
-echo 'data/train.txt' > data/train.list
-echo 'data/test.txt' > data/test.list
+echo 'train.txt' > train.list
+echo 'test.txt' > test.list
 
 # use 30k dict
-rm -rf data/tmp
-mv data/dict.txt data/dict_all.txt
-cat data/dict_all.txt | head -n 30001 > data/dict.txt
-echo 'preprocess finished'
+rm -rf tmp
+mv dict.txt dict_all.txt
+cat dict_all.txt | head -n 30001 > dict.txt
+echo 'Done.'
diff --git a/demo/quick_start/preprocess.py b/demo/quick_start/data/proc_from_raw_data/preprocess.py
similarity index 95%
rename from demo/quick_start/preprocess.py
rename to demo/quick_start/data/proc_from_raw_data/preprocess.py
index d87fad632a7429f7d9682badabe4c72ca127354f..56c2c5f16ceb63ff88fa51ed78c2e77ea5b64592 100755
--- a/demo/quick_start/preprocess.py
+++ b/demo/quick_start/data/proc_from_raw_data/preprocess.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-1. (remove HTML before or not)tokensizing
+1. Tokenize the words and punctuation 
 2. pos sample : rating score 5; neg sample: rating score 1-2.
 
 Usage:
@@ -76,7 +76,11 @@ def tokenize(sentences):
     sentences : a list of input sentences.
     return: a list of processed text.
     """
-    dir = './data/mosesdecoder-master/scripts/tokenizer/tokenizer.perl'
+    dir = './mosesdecoder-master/scripts/tokenizer/tokenizer.perl'
+    if not os.path.exists(dir):
+        sys.exit(
+            "The ./mosesdecoder-master/scripts/tokenizer/tokenizer.perl does not exists."
+        )
     tokenizer_cmd = [dir, '-l', 'en', '-q', '-']
     assert isinstance(sentences, list)
     text = "\n".join(sentences)
@@ -104,7 +108,7 @@ def tokenize_batch(id):
         num_batch, instance, pre_fix = parse_queue.get()
         if num_batch == -1:  ### parse_queue finished
             tokenize_queue.put((-1, None, None))
-            sys.stderr.write("tokenize theread %s finish\n" % (id))
+            sys.stderr.write("Thread %s finish\n" % (id))
             break
         tokenize_instance = tokenize(instance)
         tokenize_queue.put((num_batch, tokenize_instance, pre_fix))
diff --git a/demo/semantic_role_labeling/data/get_data.sh b/demo/semantic_role_labeling/data/get_data.sh
index 55e33f4685627ed483aa6642c518a33558091531..99487e0d9a8c31d884c4a338386ad0ff8e5d9dc7 100644
--- a/demo/semantic_role_labeling/data/get_data.sh
+++ b/demo/semantic_role_labeling/data/get_data.sh
@@ -14,10 +14,10 @@
 # limitations under the License.
 set -e
 wget http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz
-wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/verbDict.txt --no-check-certificate
-wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/targetDict.txt --no-check-certificate
-wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/wordDict.txt --no-check-certificate
-wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/emb --no-check-certificate
+wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/verbDict.txt
+wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/targetDict.txt 
+wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt 
+wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/emb
 tar -xzvf conll05st-tests.tar.gz
 rm conll05st-tests.tar.gz
 cp ./conll05st-release/test.wsj/words/test.wsj.words.gz  .
diff --git a/doc/demo/quick_start/index_en.md b/doc/demo/quick_start/index_en.md
index 659485d9be1b6a3e9759a2fd040cb09d1f2a3005..ec548b5393d7b210d6409328c00917aeb679a451 100644
--- a/doc/demo/quick_start/index_en.md
+++ b/doc/demo/quick_start/index_en.md
@@ -59,12 +59,11 @@ To build your text classification system, your code will need to perform five st
 ## Preprocess data into standardized format
 In this example, you are going to use [Amazon electronic product review dataset](http://jmcauley.ucsd.edu/data/amazon/) to build a bunch of deep neural network models for text classification. Each text in this dataset is a product review. This dataset has two categories: “positive” and “negative”. Positive means the reviewer likes the product, while negative means the reviewer does not like the product.
 
-`demo/quick_start` in the [source code](https://github.com/baidu/Paddle) provides scripts for downloading data and preprocessing data as shown below. The data process takes several minutes (about 3 minutes in our machine).
+`demo/quick_start` in the [source code](https://github.com/PaddlePaddle/Paddle) provides script for downloading the preprocessed data as shown below. (If you want to process the raw data, you can use the script `demo/quick_start/data/proc_from_raw_data/get_data.sh`).
 
 ```bash
 cd demo/quick_start
 ./data/get_data.sh
-./preprocess.sh
 ```
 
 ## Transfer Data to Model
diff --git a/doc_cn/build_and_install/install/docker_install.rst b/doc_cn/build_and_install/install/docker_install.rst
index a5f5fb117e11e8ac1ae49e4271e826fa12d5e810..40339659be406ec72da8ad89b6d5dd38d72bb5ae 100644
--- a/doc_cn/build_and_install/install/docker_install.rst
+++ b/doc_cn/build_and_install/install/docker_install.rst
@@ -1,9 +1,7 @@
 安装PaddlePaddle的Docker镜像
 ============================
 
-PaddlePaddle提供了Docker的使用镜像。PaddlePaddle推荐使用Docker进行PaddlePaddle的部署和
-运行。Docker是一个基于容器的轻量级虚拟环境。具有和宿主机相近的运行效率，并提供
-了非常方便的二进制分发手段。
+PaddlePaddle项目提供官方 `Docker <https://www.docker.com/>`_ 镜像。Docker镜像是我们目前唯一官方支持的部署和运行方式。
 
 下述内容将分为如下几个类别描述。
 
@@ -41,7 +39,7 @@ PaddlePaddle提供的Docker镜像版本
 * CPU WITHOUT AVX: CPU版本，不支持AVX指令集的CPU也可以运行
 * GPU WITHOUT AVX: GPU版本，不需要AVX指令集的CPU也可以运行。
 
-用户可以选择对应版本的docker image。使用如下脚本可以确定本机的CPU知否支持 :code:`AVX` 指令集\:
+用户可以选择对应版本的docker image。使用如下脚本可以确定本机的CPU是否支持 :code:`AVX` 指令集\:
 
 ..  code-block:: bash
 
@@ -67,7 +65,7 @@ mac osx或者是windows机器，请参考
 
 ..  code-block:: bash
     
-    $ docker run -it paddledev/paddlepaddle:cpu-latest
+    $ docker run -it paddledev/paddle:cpu-latest
 
 即可启动和进入PaddlePaddle的container。如果运行GPU版本的PaddlePaddle，则需要先将
 cuda相关的Driver和设备映射进container中，脚本类似于
@@ -76,7 +74,7 @@ cuda相关的Driver和设备映射进container中，脚本类似于
 
     $ export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
     $ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
-    $ docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddlepaddle:latest-gpu
+    $ docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:gpu-latest
 
 进入Docker container后，运行 :code:`paddle version` 即可打印出PaddlePaddle的版本和构建
 信息。安装完成的PaddlePaddle主体包括三个部分， :code:`paddle` 脚本， python的
diff --git a/doc_cn/demo/quick_start/index.md b/doc_cn/demo/quick_start/index.md
index 4d9b24ba851a7aaaeb0d79bfbeb0703b8878b77f..4a6e07ee1ffd94cf8f781af307b53a96a78e6b93 100644
--- a/doc_cn/demo/quick_start/index.md
+++ b/doc_cn/demo/quick_start/index.md
@@ -32,13 +32,11 @@
 
 ## 数据格式准备(Data Preparation)
 在本问题中，我们使用[Amazon电子产品评论数据](http://jmcauley.ucsd.edu/data/amazon/)，
-将评论分为好评(正样本)和差评(负样本)两类。[源码](https://github.com/baidu/Paddle)的`demo/quick_start`里提供了数据下载脚本
-和预处理脚本。
+将评论分为好评(正样本)和差评(负样本)两类。[源码](https://github.com/PaddlePaddle/Paddle)的`demo/quick_start`里提供了下载已经预处理数据的脚本（如果想从最原始的数据处理，可以使用脚本 `./demo/quick_start/data/proc_from_raw_data/get_data.sh`）。
 
 ```bash
 cd demo/quick_start
 ./data/get_data.sh
-./preprocess.sh
 ```
 
 ## 数据向模型传送(Transfer Data to Model)
@@ -143,7 +141,7 @@ PyDataProvider2</a>。
 
 我们将以基本的逻辑回归网络作为起点，并逐渐展示更加深入的功能。更详细的网络配置
 连接请参考<a href = "../../../doc/layer.html">Layer文档</a>。
-所有配置在[源码](https://github.com/baidu/Paddle)`demo/quick_start`目录，首先列举逻辑回归网络。
+所有配置在[源码](https://github.com/PaddlePaddle/Paddle)`demo/quick_start`目录，首先列举逻辑回归网络。
 
 ### 逻辑回归模型(Logistic Regression)
 
diff --git a/doc_cn/introduction/index.md b/doc_cn/introduction/index.md
deleted file mode 100644
index 164cb7d4943dfbfcc00a2df7329ae2a877b2d703..0000000000000000000000000000000000000000
--- a/doc_cn/introduction/index.md
+++ /dev/null
@@ -1,105 +0,0 @@
-# 简介
-
-PaddlePaddle 是起源于百度的开源深度学习平台。它是简单易用的：你可以通过简单的十数行配置搭建经典的神经网络模型；它也是高效强大的：PaddlePaddle可以支撑复杂集群环境下超大模型的训练，令你受益于深度学习的前沿成果。在百度内部，已经有大量产品线使用了基于PaddlePaddle的深度学习技术。
-
-这份简短的介绍将像你展示如何利用PaddlePaddle解决一个经典的学习问题。
-
-## 1. 一个经典的任务
-
-让我们从一个基础问题开始：<a href="https://www.baidu.com/s?wd=单变量线性回归">单变量的线性回归</a>。问题假定观测到了一批二维空间上的点`(x, y) `，并且已知 `x` 和 `y` 之间存在着某种线性关系，我们的目标是通过观测数据还原这个线性关系。作为一个简单基础的模型，线性回归却有着广泛的应用场景。比如可以想象一个资产定价的简化场景，其中 `x` 对应于房屋的大小，`y` 对应于房屋价格。我们可以通过观察市场上房屋的情况获得二者之间的关系，从而为新房屋的定价提供参考。
-
-
-## 2. 准备数据
-
-假设变量 `X` 和 `Y` 的真实关系为： `Y = 2X + 0.3`，这里展示如何使用观测数据还原这一线性关系。如下Python代码将随机产生2000个观测点，它们将被用作PaddlePaddle的输入。产生PaddlePaddle的输入数据和写一段普通的Python脚本几乎一样，你唯一需要增加的就是定义输入数据的类型。
-
-```python
-# -*- coding:utf-8 -*-
-# dataprovider.py
-from paddle.trainer.PyDataProvider2 import *
-import random
-
-# 定义输入数据的类型: 2个浮点数
-@provider(input_types=[dense_vector(1), dense_vector(1)],use_seq=False)
-def process(settings, input_file):
-    for i in xrange(2000):
-        x = random.random()
-        yield [x], [2*x+0.3]
-```
-
-## 3. 训练模型
-
-为了还原 `Y = 2X + 0.3`，我们先从一条随机的直线 `Y' = wX + b` 开始，然后利用观测数据调整 `w` 和 `b` 使得 `Y'` 和 `Y` 的差距不断减小，最终趋于相同。这个过程就是模型的训练过程，而 `w` 和 `b` 就是模型的参数，即我们的训练目标。
-
-在PaddlePaddle里，该模型的网络配置如下。
-
-```python
-# -*- coding:utf-8 -*-
-# trainer_config.py
-from paddle.trainer_config_helpers import *
-
-# 1. 定义数据来源，调用上面的process函数获得观测数据
-data_file = 'empty.list'
-with open(data_file, 'w') as f: f.writelines(' ')
-define_py_data_sources2(train_list=data_file, test_list=None, 
-        module='dataprovider', obj='process',args={})
-
-# 2. 学习算法。控制如何改变模型参数 w 和 b
-settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
-
-# 3. 神经网络配置
-x = data_layer(name='x', size=1)
-y = data_layer(name='y', size=1)
-# 线性计算单元: y_predict = wx + b
-y_predict = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b'))
-# 损失计算，度量 y_predict 和真实 y 之间的差距
-cost = regression_cost(input=y_predict, label=y)
-outputs(cost)
-```
-这段简短的配置展示了PaddlePaddle的基本用法：
-
-- 首先，第一部分定义了数据输入。一般情况下，PaddlePaddle先从一个文件列表里获得数据文件地址，然后交给用户自定义的函数（例如上面的`process`函数）进行读入和预处理从而得到真实输入。本文中由于输入数据是随机生成的不需要读输入文件，所以放一个空列表（`empty.list`）即可。
-
-- 第二部分主要是选择学习算法，它定义了模型参数如何改变。PaddlePaddle提供了很多优秀的学习算法，但这里使用一个简单的基于momentum的算法就足够了，它每次读取12个数据进行计算和模型更新。
-
-- 最后一部分是神经网络的配置。由于PaddlePaddle已经实现了丰富的网络单元（Layer），所以很多时候你需要做的只是声明正确的网络单元并把它们拼接起来。这里使用了三种网络单元：
-	- **数据层**：数据层 `data_layer` 是神经网络的入口，它读入数据并将它们传输到下游的其它单元。这里数据层有两个，分别对应于变量 `X` 和 `Y`。
-	- **全连接层**：全连接层 `fc_layer` 是基础的计算单元，这里利用它建模变量之间的线性关系。计算单元是神经网络的核心，PaddlePaddle支持大量的计算单元和任意深度的网络连接，从而可以挖掘复杂的数据关系。
-	- **回归损失层**：回归损失层 `regression_cost`是众多损失函数层的一种，它们在训练过程作为网络的出口，用来计算模型的表现，并指导模型参数的改变。
-
-这样定义了网络结构并保存为`trainer_config.py`之后，运行训练命令即可：
- ```
- paddle train --config=trainer_config.py --save_dir=./output --num_passes=30
- ```
-
-PaddlePaddle将在观测数据集上迭代训练30轮，并将每轮的模型结果存放在 `./output` 路径下。从输出日志可以看到，随着轮数增加损失函数的输出在不断的减小，这意味着模型在不断的改进，直到逼近真实解：` Y = 2X + 0.3 `
-
-## 4. 模型检验
-
-训练完成后，我们希望能够检验模型的好坏。一种常用的做法是用模型对另外一组数据进行预测，然后评价预测的效果。但在这个例子中，由于已经知道了真实答案，我们可以直接观察模型的参数是否符合预期来进行检验。
-
-PaddlePaddle将每个模型参数作为一个numpy数组单独存为一个文件，所以可以利用如下方法读取模型的参数。
-
-```python
-import numpy as np
-import os
-
-def load(file_name):
-    with open(file_name, 'rb') as f:
-        f.read(16) # skip header for float type.
-        return np.fromfile(f, dtype=np.float32)
-        
-print 'w=%.6f, b=%.6f' % (load('output/pass-00029/w'), load('output/pass-00029/b'))
-# w=1.999743, b=0.300137
-```
-<center> ![](./parameters.png) </center>
-
-从图中可以看到，虽然 `w` 和 `b` 都使用随机值初始化，但在起初的几轮训练中它们都在快速逼近真实值，并且后续仍在不断改进，使得最终得到的模型几乎与真实模型重合。
-
-这样，我们就完成了对单变量线性回归问题的解决：将数据输入PaddlePaddle，训练模型，最后验证结果。
-
-## 5. 推荐后续阅读
-
-- <a href="../build_and_install/index.html">安装/编译</a>：PaddlePaddle的安装与编译文档。
-- <a href="../demo/quick_start/index.html">快速入门 </a>：使用商品评论分类任务，系统性的介绍如何一步步改进，最终得到产品级的深度模型。
-- <a href="../demo/index.html">示例</a>：各种实用案例，涵盖图像、文本、推荐等多个领域。
diff --git a/doc_cn/introduction/index.rst b/doc_cn/introduction/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..f6eb5456c007ca03ea6002109b1f27b8a99faa0f
--- /dev/null
+++ b/doc_cn/introduction/index.rst
@@ -0,0 +1,102 @@
+# 简介
+
+PaddlePaddle是源于百度的一个深度学习平台。这份简短的介绍将向你展示如何利用PaddlePaddle来解决一个经典的线性回归问题。
+
+## 1. 一个经典的任务
+
+我们展示如何用PaddlePaddle解决<a href="https://www.baidu.com/s?wd=单变量线性回归">单变量的线性回归</a>问题。线性回归的输入是一批点`(x, y) `，其中 `y = wx + b + ε`， 而 ε 是一个符合高斯分布的随机变量。线性回归的输出是从这批点估计出来的参数 w 和 b。
+
+一个例子是房产估值。我们假设房产的价格（y）是其大小（x）的一个线性函数，那么我们可以通过收集市场上房子的大小和价格，用来估计线性函数的参数w 和 b。
+
+## 2. 准备数据
+
+假设变量 `x` 和 `y` 的真实关系为： `y = 2x + 0.3 + ε`，这里展示如何使用观测数据来拟合这一线性关系。首先，Python代码将随机产生2000个观测点，作为线性回归的输入。下面脚本符合PaddlePaddle期待的读取数据的Python程序的模式。
+
+```python
+# dataprovider.py
+from paddle.trainer.PyDataProvider2 import *
+import random
+
+# 定义输入数据的类型: 2个浮点数
+@provider(input_types=[dense_vector(1), dense_vector(1)],use_seq=False)
+def process(settings, input_file):
+    for i in xrange(2000):
+        x = random.random()
+        yield [x], [2*x+0.3]
+```
+
+## 3. 训练模型
+
+为了还原 `y = 2x + 0.3`，我们先从一条随机的直线 `y' = wx + b` 开始，然后利用观测数据调整 `w` 和 `b` 使得 `y'` 和 `y` 的差距不断减小，最终趋于接近。这个过程就是模型的训练过程，而 `w` 和 `b` 就是模型的参数，即我们的训练目标。
+
+在PaddlePaddle里，该模型的网络配置如下。
+
+```python
+# trainer_config.py
+from paddle.trainer_config_helpers import *
+
+# 1. 定义数据来源，调用上面的process函数获得观测数据
+data_file = 'empty.list'
+with open(data_file, 'w') as f: f.writelines(' ')
+define_py_data_sources2(train_list=data_file, test_list=None, 
+        module='dataprovider', obj='process',args={})
+
+# 2. 学习算法。控制如何改变模型参数 w 和 b
+settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
+
+# 3. 神经网络配置
+x = data_layer(name='x', size=1)
+y = data_layer(name='y', size=1)
+# 线性计算网络层: ȳ = wx + b
+ȳ = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b'))
+# 计算误差函数，即  ȳ 和真实 y 之间的距离
+cost = regression_cost(input= ȳ, label=y)
+outputs(cost)
+```
+这段简短的配置展示了PaddlePaddle的基本用法：
+
+- 第一部分定义了数据输入。一般情况下，PaddlePaddle先从一个文件列表里获得数据文件地址，然后交给用户自定义的函数（例如上面的`process`函数）进行读入和预处理从而得到真实输入。本文中由于输入数据是随机生成的不需要读输入文件，所以放一个空列表（`empty.list`）即可。
+
+- 第二部分主要是选择学习算法，它定义了模型参数改变的规则。PaddlePaddle提供了很多优秀的学习算法，这里使用一个基于momentum的随机梯度下降(SGD)算法，该算法每批量(batch)读取12个采样数据进行随机梯度计算来更新更新。
+
+- 最后一部分是神经网络的配置。由于PaddlePaddle已经实现了丰富的网络层，所以很多时候你需要做的只是定义正确的网络层并把它们连接起来。这里使用了三种网络单元：
+	- **数据层**：数据层 `data_layer` 是神经网络的入口，它读入数据并将它们传输到接下来的网络层。这里数据层有两个，分别对应于变量 `x` 和 `y`。
+	- **全连接层**：全连接层 `fc_layer` 是基础的计算单元，这里利用它建模变量之间的线性关系。计算单元是神经网络的核心，PaddlePaddle支持大量的计算单元和任意深度的网络连接，从而可以拟合任意的函数来学习复杂的数据关系。
+	- **回归误差代价层**：回归误差代价层 `regression_cost`是众多误差代价函数层的一种，它们在训练过程作为网络的出口，用来计算模型的误差，是模型参数优化的目标函数。
+
+定义了网络结构并保存为`trainer_config.py`之后，运行以下训练命令：
+ ```
+ paddle train --config=trainer_config.py --save_dir=./output --num_passes=30
+ ```
+
+PaddlePaddle将在观测数据集上迭代训练30轮，并将每轮的模型结果存放在 `./output` 路径下。从输出日志可以看到，随着轮数增加误差代价函数的输出在不断的减小，这意味着模型在训练数据上不断的改进，直到逼近真实解：` y = 2x + 0.3 `
+
+## 4. 模型检验
+
+训练完成后，我们希望能够检验模型的好坏。一种常用的做法是用学习的模型对另外一组测试数据进行预测，评价预测的效果。在这个例子中，由于已经知道了真实答案，我们可以直接观察模型的参数是否符合预期来进行检验。
+
+PaddlePaddle将每个模型参数作为一个numpy数组单独存为一个文件，所以可以利用如下方法读取模型的参数。
+
+```python
+import numpy as np
+import os
+
+def load(file_name):
+    with open(file_name, 'rb') as f:
+        f.read(16) # skip header for float type.
+        return np.fromfile(f, dtype=np.float32)
+        
+print 'w=%.6f, b=%.6f' % (load('output/pass-00029/w'), load('output/pass-00029/b'))
+# w=1.999743, b=0.300137
+```
+<center> ![](./parameters.png) </center>
+
+从图中可以看到，虽然 `w` 和 `b` 都使用随机值初始化，但在起初的几轮训练中它们都在快速逼近真实值，并且后续仍在不断改进，使得最终得到的模型几乎与真实模型一致。
+
+这样，我们用PaddlePaddle解决了单变量线性回归问题， 包括数据输入，模型训练和最后的结果验证。
+
+## 5. 推荐后续阅读
+
+- <a href="../build_and_install/index.html">安装/编译</a>：PaddlePaddle的安装与编译文档。
+- <a href="../demo/quick_start/index.html">快速入门 </a>：使用商品评论分类任务，系统性的介绍如何一步步改进，最终得到产品级的深度模型。
+- <a href="../demo/index.html">示例</a>：各种实用案例，涵盖图像、文本、推荐等多个领域。
diff --git a/doc_cn/ui/cmd/dump_config.rst b/doc_cn/ui/cmd/dump_config.rst
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/doc_cn/ui/cmd/index.rst b/doc_cn/ui/cmd/index.rst
index f975d432c07f8f0cdc725af2b29c25b7bd6a0657..31a8b8a79f4a87101bd6030eb4e779fd11d65811 100644
--- a/doc_cn/ui/cmd/index.rst
+++ b/doc_cn/ui/cmd/index.rst
@@ -1,29 +1,20 @@
-PaddlePaddle的命令行参数
-========================
+命令
+====
 
-安装好PaddlePaddle后，在命令行直接敲击 ``paddle`` 或 ``paddle --help`` 会显示如下一些命令行参数。
+安装好PaddlePaddle后，在命令行直接敲击 ``paddle`` 或 ``paddle --help`` 会显示如下一些命令。
 
 * ``train`` Start a paddle_trainer
     启动一个PaddlePaddle训练进程。 ``paddle train`` 可以通过命令行参数 ``-local=true`` 启动一个单机的训练进程；也可以和 ``paddle pserver`` 一起使用启动多机的分布式训练进程。
 * ``pserver`` Start a paddle_pserver_main
     在多机分布式训练下启动PaddlePaddle的parameter server进程。
 * ``version`` Print paddle version
-    用于打印当前PaddlePaddle的版本和编译选项相关信息。
+    用于打印当前PaddlePaddle的版本和编译选项相关信息。常见的输出格式如下：1）第一行说明了PaddlePaddle的版本信息；2）第二行开始说明了一些主要的编译选项，具体意义可以参考 `编译参数选项文件 <../../build_and_install/cmake/compile_options.html>`_ 。
+
+    ..  literalinclude:: paddle_version.txt
+
 * ``merge_model`` Start a paddle_merge_model
     用于将PaddlePaddle的模型参数文件和模型配置文件打包成一个文件，方便做部署分发。
 * ``dump_config`` Dump the trainer config as proto string
     用于将PaddlePaddle的模型配置文件以proto string的格式打印出来。
 * ``make_diagram``
-    使用graphviz对PaddlePaddle的模型配置文件进行绘制。
-
-更详细的介绍请参考各命令行参数文档。
-
-..  toctree::
-    :glob:
-
-    paddle_train.rst
-    paddle_pserver.rst
-    paddle_version.rst
-    merge_model.rst
-    dump_config.rst
-    make_diagram.rst
+    使用graphviz对PaddlePaddle的模型配置文件进行绘制。
\ No newline at end of file
diff --git a/doc_cn/ui/cmd/make_diagram.rst b/doc_cn/ui/cmd/make_diagram.rst
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/doc_cn/ui/cmd/merge_model.rst b/doc_cn/ui/cmd/merge_model.rst
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/doc_cn/ui/cmd/paddle_pserver.rst b/doc_cn/ui/cmd/paddle_pserver.rst
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/doc_cn/ui/cmd/paddle_train.rst b/doc_cn/ui/cmd/paddle_train.rst
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/doc_cn/ui/cmd/paddle_version.rst b/doc_cn/ui/cmd/paddle_version.rst
deleted file mode 100644
index 537c23df75ea8eee5d17cc3f05bf17ed1bdfcb73..0000000000000000000000000000000000000000
--- a/doc_cn/ui/cmd/paddle_version.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-paddle version的命令行参数
-==========================
-
-paddle version用于打印当前的版本信息和相关编译选项。常见的输出格式如下。第一行说明了PaddlePaddle的版本信息，后面跟着一些主要的编译选项。编译选项的具体意义可以参考
-`编译参数选项文件 <../../build_and_install/cmake/compile_options.html>`_
-
-..  literalinclude:: paddle_version.txt
diff --git a/doc_cn/ui/index.rst b/doc_cn/ui/index.rst
index c53ebeefe1df0b40a3a9a165463a7469810ff9a9..ff36c9adb690f4126cf6ee332a9f0b09648270bd 100644
--- a/doc_cn/ui/index.rst
+++ b/doc_cn/ui/index.rst
@@ -11,20 +11,22 @@
     data_provider/dataprovider.rst
     data_provider/pydataprovider2.rst
 
-命令行参数
-==========
+命令及命令行参数
+================
 
 ..  toctree::
+    :maxdepth: 1
 
     cmd/index.rst
 
+* `参数用例 <../../doc/ui/cmd_argument/use_case.html>`_
 * `参数分类 <../../doc/ui/cmd_argument/argument_outline.html>`_
 * `参数描述 <../../doc/ui/cmd_argument/detail_introduction.html>`_
-* `参数用例 <../../doc/ui/cmd_argument/use_case.html>`_
 
 预测
 =======
 
 ..  toctree::
+    :maxdepth: 1
 
     predict/swig_py_paddle.rst
diff --git a/doc_cn/ui/predict/swig_py_paddle.rst b/doc_cn/ui/predict/swig_py_paddle.rst
index 4c0a0de820b7a51319ae0e9b1daac48387852084..89031dd72f5065b6919d873f5611a5e94e8b62e3 100644
--- a/doc_cn/ui/predict/swig_py_paddle.rst
+++ b/doc_cn/ui/predict/swig_py_paddle.rst
@@ -1,8 +1,8 @@
 基于Python的预测
 ================
 
-Python预测接口
---------------
+预测流程
+--------
 
 PaddlePaddle使用swig对常用的预测接口进行了封装，通过编译会生成py_paddle软件包，安装该软件包就可以在python环境下实现模型预测。可以使用python的 ``help()`` 函数查询软件包相关API说明。
 
@@ -20,8 +20,8 @@ PaddlePaddle使用swig对常用的预测接口进行了封装，通过编译会
   通过调用 ``forwardTest()`` 传入预测数据，直接返回计算结果。
 
 
-基于Python的预测Demo
---------------------
+预测Demo
+--------
 
 如下是一段使用mnist model来实现手写识别的预测代码。完整的代码见 ``src_root/doc/ui/predict/predict_sample.py`` 。mnist model可以通过 ``src_root\demo\mnist`` 目录下的demo训练出来。
 
diff --git a/paddle/math/BaseMatrix.cu b/paddle/math/BaseMatrix.cu
index 2f32b3fdd1a26c5b1bca43d0bd0ebb0896a012c4..a723ef7bc8329329fa82113f8e96a1bdbe750277 100644
--- a/paddle/math/BaseMatrix.cu
+++ b/paddle/math/BaseMatrix.cu
@@ -1240,6 +1240,12 @@ void BaseMatrixT<T>::assignAtOffset(BaseMatrixT& b, int64_t columnOffset) {
   }
 }
 
+DEFINE_MATRIX_BINARY_OP(DeepSwap, T tmp = a; a = b; b = tmp);
+template<class T>
+void BaseMatrixT<T>::deepSwap(BaseMatrixT& b) {
+    applyBinary(binary::DeepSwap<T>(), b);
+}
+
 template<>
 void BaseMatrixT<real>::rowDotMul(size_t destCol,
                                   BaseMatrixT& b,
diff --git a/paddle/math/BaseMatrix.h b/paddle/math/BaseMatrix.h
index d41dcee682cce15e94d45dafeb12bb0dce19b221..ea58c861a3d6a03642291c172af76795e90fcb92 100644
--- a/paddle/math/BaseMatrix.h
+++ b/paddle/math/BaseMatrix.h
@@ -455,6 +455,17 @@ public:
    */
   void assign(T p);
 
+  /**
+   * @code
+   * swap(this, b)
+   * example: swap two Matrices
+   * MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
+   * MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, width);
+   * cpuA->deepSwap(*cpuB);
+   * @endcode
+   */
+  void deepSwap(BaseMatrixT& b);
+
   /**
    * @code
    * this = this + p
diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp
index ae5bc5a86a1790ce30a8d7f83c9564f52d7cf7ea..de540dad4c8eefe5084c7089d7960d8ca8cf9875 100644
--- a/paddle/math/tests/test_matrixCompare.cpp
+++ b/paddle/math/tests/test_matrixCompare.cpp
@@ -448,6 +448,24 @@ void testMatrixZeroAtOffset(int height, int width) {
   MatrixCheckEqual(*cpuA, *cpuTest);
 }
 
+void testMatrixDeepSwap(int height, int width) {
+  MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
+  MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, width);
+  MatrixPtr cpuCopyA = std::make_shared<CpuMatrix>(height, width);
+  MatrixPtr cpuCopyB = std::make_shared<CpuMatrix>(height, width);
+
+  cpuA->randomizeUniform();
+  cpuB->randomizeUniform();
+  cpuCopyA->copyFrom(*cpuA);
+  cpuCopyB->copyFrom(*cpuB);
+
+  // swap matrix cpuA and cpuB
+  cpuA->deepSwap(*cpuB);
+
+  MatrixCheckEqual(*cpuA, *cpuCopyB);
+  MatrixCheckEqual(*cpuB, *cpuCopyA);
+}
+
 void testMatrixBinaryAdd(int height, int width) {
   MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
   MatrixPtr cpuB = std::make_shared<CpuMatrix>(height, width);
@@ -480,6 +498,7 @@ void testMatrixAssign(int height, int width) {
   MatrixCheckEqual(*cpuA, *outputCheck);
 }
 
+
 void testMatrixAdd(int height, int width) {
   MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
   MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width);
@@ -798,6 +817,7 @@ TEST(Matrix, unary) {
       testMatrixBinaryAdd(height, width);
       testMatrixTanh(height, width);
       testMatrixTanhDerivative(height, width);
+      testMatrixDeepSwap(height, width);
 
       // applyTernary
       testMatrixTernarySub(height, width);
diff --git a/paddle/scripts/submit_local.sh.in b/paddle/scripts/submit_local.sh.in
index 20ea2fedc4d464cdd5403af28bc917770c993b98..ace2c0dee972e338001a0e5a4045c32e64ff157e 100644
--- a/paddle/scripts/submit_local.sh.in
+++ b/paddle/scripts/submit_local.sh.in
@@ -29,6 +29,7 @@ function version(){
 }
 
 function ver2num() {
+  set -e
   # convert version to number.
   if [ -z "$1" ]; then # empty argument
     printf "%03d%03d%03d%03d%03d" 0
@@ -41,6 +42,7 @@ function ver2num() {
       printf "%03d%03d%03d%03d%03d" $VERN
     fi
   fi
+  set +e
 }
 
 PADDLE_CONF_HOME="$HOME/.config/paddle"
diff --git a/proto/CMakeLists.txt b/proto/CMakeLists.txt
index 461c73f14c2dc9377cc39ebb8f1273eee81730a3..ec68b53d440185f869566e2975a65d0c3fec5bc5 100644
--- a/proto/CMakeLists.txt
+++ b/proto/CMakeLists.txt
@@ -1,3 +1,12 @@
+execute_process(COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --version
+	OUTPUT_VARIABLE PROTOBUF_VERSION)
+string(REPLACE "libprotoc " "" PROTOBUF_VERSION ${PROTOBUF_VERSION})
+
+set(PROTOBUF_3 OFF)
+if (${PROTOBUF_VERSION} VERSION_GREATER "3.0.0" OR ${PROTOBUF_VERSION} VERSION_EQUAL "3.0.0")
+    set(PROTOBUF_3 ON)
+endif()
+
 set(proto_filenames
     DataConfig.proto
     DataFormat.proto
@@ -11,8 +20,12 @@ set(real_proto_files)
 # TODO(yuyang18): Some internal proto will also be depended on.
 #                 Find a way to automatically calculate all depends.
 foreach(filename ${proto_filenames})
+    set(PROTOBUF_3_FLAGS "")
+    if (PROTOBUF_3)
+        set(PROTOBUF_3_FLAGS "-Dproto3")
+    endif()
     add_custom_command(OUTPUT ${filename}
-        COMMAND ${M4_EXECUTABLE} -Dreal=${ACCURACY} -I '${INTERNAL_PROTO_PATH}'
+	COMMAND ${M4_EXECUTABLE} -Dreal=${ACCURACY} ${PROTOBUF_3_FLAGS} -I '${INTERNAL_PROTO_PATH}'
               ${PROJ_ROOT}/proto/${filename}.m4 > ${filename}
         DEPENDS ${PROJ_ROOT}/proto/${filename}.m4
         COMMENT "Generate ${filename}")
diff --git a/proto/DataConfig.proto.m4 b/proto/DataConfig.proto.m4
index 9862e4e7ef2ff96eafc91246e0b435c70fbe31d9..01d451ff7d5334f8f84d28973c2d7c4b4fac5885 100644
--- a/proto/DataConfig.proto.m4
+++ b/proto/DataConfig.proto.m4
@@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+ifdef(`proto3', `syntax = "proto2";')
 
 package paddle;
 
diff --git a/proto/DataFormat.proto.m4 b/proto/DataFormat.proto.m4
index 556eace5e194ef26991cc06d1f7794f14fbbdded..8a4a0be1b31a62cca35ca732a037ddc8b20786c4 100644
--- a/proto/DataFormat.proto.m4
+++ b/proto/DataFormat.proto.m4
@@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+ifdef(`proto3', `syntax = "proto2";')
 
 package paddle;
 
diff --git a/proto/ModelConfig.proto.m4 b/proto/ModelConfig.proto.m4
index c835cfd5221c8579b383c0a6f0b2f0f554eac6d2..68a5eb9dd2231b47cc8f83696ab18fdb907c44c0 100644
--- a/proto/ModelConfig.proto.m4
+++ b/proto/ModelConfig.proto.m4
@@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+ifdef(`proto3', `syntax = "proto2";')
 
 import "ParameterConfig.proto";
 
diff --git a/proto/ParameterConfig.proto.m4 b/proto/ParameterConfig.proto.m4
index e8d512445e5025f5663fbe3e20b4425cf1633a2b..26e7c3ef77b7377b8d6da4d947bcad27ae4edf72 100644
--- a/proto/ParameterConfig.proto.m4
+++ b/proto/ParameterConfig.proto.m4
@@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+ifdef(`proto3', `syntax = "proto2";')
 
 package paddle;
 
diff --git a/proto/ParameterService.proto.m4 b/proto/ParameterService.proto.m4
index 189dc1c9700bd821959bab80aef3721bd4940b5c..0b3f14a2ee5b3e1771f724bd9d271a3ecfd15038 100644
--- a/proto/ParameterService.proto.m4
+++ b/proto/ParameterService.proto.m4
@@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+ifdef(`proto3', `syntax = "proto2";')
 
 import "ParameterConfig.proto";
 import "TrainerConfig.proto";
@@ -20,7 +21,6 @@ package paddle;
 /**
  * Various structs for communicating with parameter server
  */
-
 enum ParameterUpdateMode {
   // Set parameter
    PSERVER_UPDATE_MODE_SET_PARAM = 0;//use local param
diff --git a/proto/TrainerConfig.proto.m4 b/proto/TrainerConfig.proto.m4
index 3b0e24f90bed8cdf0e102c12d2a4a041c17a8447..965c9cd39353970dd547f2a595eb99531f3693c6 100644
--- a/proto/TrainerConfig.proto.m4
+++ b/proto/TrainerConfig.proto.m4
@@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+ifdef(`proto3', `syntax = "proto2";')
 
 import "DataConfig.proto";
 import "ModelConfig.proto";