diff --git a/README.md b/README.md
index af65b74c8febb77824d2f9f95fee8ec4ee8c8b9d..46b97be4236a9f2316c97b47396187fbce2cb22b 100644
--- a/README.md
+++ b/README.md
@@ -35,13 +35,28 @@ We consider deploying deep learning inference service online to be a user-facing
 <h2 align="center">Installation</h2>
 
 We highly recommend you to run Paddle Serving in Docker, please visit [Run in Docker](https://github.com/PaddlePaddle/Serving/blob/develop/doc/RUN_IN_DOCKER.md)
+```
+# Run CPU Docker
+docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker exec -it test bash
+```
+```
+# Run GPU Docker
+nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+nvidia-docker exec -it test bash
+```
 
 ```shell
-pip install paddle-serving-client
-pip install paddle-serving-server
+pip install paddle-serving-client 
+pip install paddle-serving-server # CPU
+pip install paddle-serving-server-gpu # GPU
 ```
 
-You may need to use a domestic mirror source (in China, you can use the Tsinghua mirror source) to speed up the download.
+You may need to use a domestic mirror source (in China, you can use the Tsinghua mirror source, add `-i https://pypi.tuna.tsinghua.edu.cn/simple` to pip command) to speed up the download.
+ 
+Client package support Centos 7 and Ubuntu 18, or you can use HTTP service without install client.
 
 <h2 align="center">Quick Start Example</h2>
 
@@ -130,6 +145,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"words": "我爱北京天
 - **Description**: 
 ``` shell
 Image classification trained with Imagenet dataset. A label and corresponding probability will be returned.
+Note: This demo needs paddle-serving-server-gpu. 
 ```
 
 - **Download Servable Package**: 
@@ -245,6 +261,8 @@ curl -H "Content-Type:application/json" -X POST -d '{"url": "https://paddle-serv
 
 ### About Efficiency
 - [How to profile Paddle Serving latency?](python/examples/util)
+- [How to optimize performance?(Chinese)](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
+- [Deploy multi-services on one GPU(Chinese)](doc/PERFORMANCE_OPTIM_CN.md)
 - [CPU Benchmarks(Chinese)](doc/BENCHMARKING.md)
 - [GPU Benchmarks(Chinese)](doc/GPU_BENCHMARKING.md)
 
diff --git a/README_CN.md b/README_CN.md
index 3927a0fceaf6431d4dbd84462abc37eae0a412be..4cafb499ee36168b93a244c66f7d5af4ea831160 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -37,12 +37,27 @@ Paddle Serving 旨在帮助深度学习开发者轻易部署在线预测服务
 
 强烈建议您在Docker内构建Paddle Serving，请查看[如何在Docker中运行PaddleServing](doc/RUN_IN_DOCKER_CN.md)
 
+```
+# 启动 CPU Docker
+docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker exec -it test bash
+```
+```
+# 启动 GPU Docker
+nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+nvidia-docker exec -it test bash
+```
 ```shell
 pip install paddle-serving-client
-pip install paddle-serving-server
+pip install paddle-serving-server # CPU
+pip install paddle-serving-server-gpu # GPU
 ```
 
-您可能需要使用国内镜像源（例如清华源）来加速下载。
+您可能需要使用国内镜像源（例如清华源, 在pip命令中添加`-i https://pypi.tuna.tsinghua.edu.cn/simple`）来加速下载。
+
+客户端安装包支持Centos 7和Ubuntu 18，或者您可以使用HTTP服务，这种情况下不需要安装客户端。
 
 <h2 align="center">快速启动示例</h2>
 
@@ -135,6 +150,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"words": "我爱北京天
 - **介绍**: 
 ``` shell
 图像分类模型由Imagenet数据集训练而成，该服务会返回一个标签及其概率
+注意：本示例需要安装paddle-serving-server-gpu
 ```
 
 - **下载服务包**: 
@@ -251,6 +267,8 @@ curl -H "Content-Type:application/json" -X POST -d '{"url": "https://paddle-serv
 
 ### 关于Paddle Serving性能
 - [如何测试Paddle Serving性能？](python/examples/util/)
+- [如何优化性能?](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
+- [在一张GPU上启动多个预测服务](doc/PERFORMANCE_OPTIM_CN.md)
 - [CPU版Benchmarks](doc/BENCHMARKING.md)
 - [GPU版Benchmarks](doc/GPU_BENCHMARKING.md)
 
diff --git a/core/cube/cube-transfer/CMakeLists.txt b/core/cube/cube-transfer/CMakeLists.txt
index ab91c0f5f274d971d866ad33680a49103a641934..78e47c5b840631a3092f3a799e2424d370444a2e 100644
--- a/core/cube/cube-transfer/CMakeLists.txt
+++ b/core/cube/cube-transfer/CMakeLists.txt
@@ -18,9 +18,11 @@ project(cube-transfer Go)
 
 include(cmake/golang.cmake)
 
-ExternalGoProject_Add(docopt-go github.com/docopt/docopt-go)
 ExternalGoProject_Add(rfw github.com/mipearson/rfw)
-ExternalGoProject_Add(logex github.com/Badangel/logex)
+ExternalGoProject_Add(docopt-go github.com/docopt/docopt-go)  
+add_custom_target(logex
+                  COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get github.com/Badangel/logex
+                  DEPENDS rfw)
 
 add_subdirectory(src)
 install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/conf DESTINATION ${PADDLE_SERVING_INSTALL_DIR})
diff --git a/core/cube/cube-transfer/cmake/golang.cmake b/core/cube/cube-transfer/cmake/golang.cmake
index 817d029d946bad8da4f4cf2785e68d062fc4cada..5a26c5d2b08dc0dd9e23e3f724630d84eaabec9b 100644
--- a/core/cube/cube-transfer/cmake/golang.cmake
+++ b/core/cube/cube-transfer/cmake/golang.cmake
@@ -57,4 +57,4 @@ function(ADD_GO_LIBRARY NAME BUILD_TYPE)
   if(NOT BUILD_TYPE STREQUAL "STATIC")
     install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME} DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/bin)
   endif()
-endfunction(ADD_GO_LIBRARY)
\ No newline at end of file
+endfunction(ADD_GO_LIBRARY)
diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp
index 4388eef5a0d75dd07e6248648198bda912cbbe78..55571cef4c7511ed98dadb45d2b7a55103101cf7 100644
--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -314,7 +314,7 @@ int PredictorClient::batch_predict(
       tensor_vec.push_back(inst->add_tensor_array());
     }
 
-    VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name"
+    VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name "
             << "prepared";
     int vec_idx = 0;
     for (auto &name : float_feed_name) {
@@ -376,6 +376,7 @@ int PredictorClient::batch_predict(
   } else {
     client_infer_end = timeline.TimeStampUS();
     postprocess_start = client_infer_end;
+
     uint32_t model_num = res.outputs_size();
     predict_res_batch._models.resize(model_num);
     for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) {
@@ -385,9 +386,11 @@ int PredictorClient::batch_predict(
         predict_res_batch._models[m_idx]._int64_map[name].resize(batch_size);
         predict_res_batch._models[m_idx]._float_map[name].resize(batch_size);
       }
+      VLOG(2) << "response batch size " << output.insts_size();
+      VLOG(2) << "response var nmae " << output.insts(0).tensor_array_size();
       for (int bi = 0; bi < batch_size; bi++) {
+        int idx = 0;
         for (auto &name : fetch_name) {
-          int idx = _fetch_name_to_idx[name];
           int len = output.insts(bi).tensor_array(idx).data_size();
           if (_fetch_name_to_type[name] == 0) {
             int len = output.insts(bi).tensor_array(idx).int64_data_size();
@@ -412,6 +415,7 @@ int PredictorClient::batch_predict(
             }
           }
         }
+        idx += 1;
       }
     }
     postprocess_end = timeline.TimeStampUS();
diff --git a/doc/ABTEST_IN_PADDLE_SERVING.md b/doc/ABTEST_IN_PADDLE_SERVING.md
index 931da839d2d84aca8b3116201ba34a074db6f0e9..69e5ff4b6fdf11d3764f94cba83beee82f959c85 100644
--- a/doc/ABTEST_IN_PADDLE_SERVING.md
+++ b/doc/ABTEST_IN_PADDLE_SERVING.md
@@ -19,6 +19,7 @@ sh get_data.sh
 
 The following Python code will process the data `test_data/part-0` and write to the `processed.data` file.
 
+[//file]:#process.py
 ``` python
 from imdb_reader import IMDBDataset
 imdb_dataset = IMDBDataset()
@@ -59,7 +60,8 @@ exit
 
 Run the following Python code on the host computer to start client. Make sure that the host computer is installed with the `paddle-serving-client` package.
 
-``` go
+[//file]:#ab_client.py
+``` python
 from paddle_serving_client import Client
 
 client = Client()
@@ -94,3 +96,24 @@ When making prediction on the client side, if the parameter `need_variant_tag=Tr
 [lstm](total: 1867) acc: 0.490091055169
 [bow](total: 217) acc: 0.73732718894
 ```
+
+<!--
+cp ../Serving/python/examples/imdb/get_data.sh .
+cp ../Serving/python/examples/imdb/imdb_reader.py .
+pip install -U paddle_serving_server
+pip install -U paddle_serving_client
+pip install -U paddlepaddle
+sh get_data.sh
+python process.py
+python -m paddle_serving_server.serve --model imdb_bow_model --port 8000 --workdir workdir1 &
+sleep 5
+python -m paddle_serving_server.serve --model imdb_lstm_model --port 9000  --workdir workdir2 &
+sleep 5
+python ab_client.py >log.txt
+if [[ $? -eq 0 ]]; then
+    echo "test success"
+else
+    echo "test fail"
+fi
+ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
+-->
diff --git a/doc/BERT_10_MINS.md b/doc/BERT_10_MINS.md
index e668b3207c5228309d131e2353e815d26c8d4625..71f6f065f4101aae01e077910fc5b6bd6b039b46 100644
--- a/doc/BERT_10_MINS.md
+++ b/doc/BERT_10_MINS.md
@@ -102,4 +102,5 @@ if [[ $? -eq 0 ]]; then
 else
     echo "test fail"
 fi
+ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
 -->
diff --git a/doc/COMPILE.md b/doc/COMPILE.md
index 2858eb120d0f9d8157392a598faad2ef6cbafd87..41a79f082494b0ac22bb4479a5d246cdb6882a3d 100644
--- a/doc/COMPILE.md
+++ b/doc/COMPILE.md
@@ -4,14 +4,19 @@
 
 ## Compilation environment requirements
 
-- os: CentOS 6u3
-- gcc: 4.8.2 and later
-- go: 1.9.2 and later
-- git：2.17.1 and later
-- cmake：3.2.2 and later
-- python：2.7.2 and later
-
-It is recommended to use Docker to prepare the compilation environment for the Paddle service: [CPU Dockerfile.devel](../tools/Dockerfile.devel), [GPU Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
+- OS: CentOS 7
+- GCC: 4.8.2 and later
+- Golang: 1.9.2 and later
+- Git：2.17.1 and later
+- CMake：3.2.2 and later
+- Python：2.7.2 and later
+
+It is recommended to use Docker for compilation. We have prepared the Paddle Serving compilation environment for you: 
+
+- CPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-devel`，dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
+- GPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu-devel`，dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
+
+This document will take Python2 as an example to show how to compile Paddle Serving. If you want to compile with Python 3, just adjust the Python options of cmake.
 
 ## Get Code
 
diff --git a/doc/COMPILE_CN.md b/doc/COMPILE_CN.md
index bbe509f7c09e9e9082f1e7a2bfa6b823af7c2cc0..eb334232d98f26e68d719d10cbe458a356738d2f 100644
--- a/doc/COMPILE_CN.md
+++ b/doc/COMPILE_CN.md
@@ -4,14 +4,19 @@
 
 ## 编译环境设置
 
-- os: CentOS 6u3
-- gcc: 4.8.2及以上
-- go: 1.9.2及以上
-- git：2.17.1及以上
-- cmake：3.2.2及以上
-- python：2.7.2及以上
-
-推荐使用Docker准备Paddle Serving编译环境：[CPU Dockerfile.devel](../tools/Dockerfile.devel)，[GPU Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
+- OS: CentOS 7
+- GCC: 4.8.2及以上
+- Golang: 1.9.2及以上
+- Git：2.17.1及以上
+- CMake：3.2.2及以上
+- Python：2.7.2及以上
+
+推荐使用Docker编译，我们已经为您准备好了Paddle Serving编译环境：
+
+- CPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-devel`，dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
+- GPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu-devel`，dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
+
+本文档将以Python2为例介绍如何编译Paddle Serving。如果您想用Python3进行编译，只需要调整cmake的Python相关选项即可。
 
 ## 获取代码
 
diff --git a/doc/MULTI_SERVICE_ON_ONE_GPU_CN.md b/doc/MULTI_SERVICE_ON_ONE_GPU_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..3abe12100123a139e9ce56c7ff20035cdb1cbeeb
--- /dev/null
+++ b/doc/MULTI_SERVICE_ON_ONE_GPU_CN.md
@@ -0,0 +1,15 @@
+# 单卡多模型预测服务
+
+当客户端发送的请求数并不频繁的情况下，会造成服务端机器计算资源尤其是GPU资源的浪费，这种情况下，可以在服务端启动多个预测服务来提高资源利用率。Paddle Serving支持在单张显卡上部署多个预测服务，使用时只需要在启动单个服务时通过--gpu_ids参数将服务与显卡进行绑定，这样就可以将多个服务都绑定到同一张卡上。
+
+例如：
+
+```shell
+python -m paddle_serving_server_gpu.serve --model bert_seq20_model --port 9292 --gpu_ids 0
+python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9393 --gpu_ids 0
+```
+
+在卡0上，同时部署了bert示例和iamgenet示例。
+
+**注意：** 单张显卡内部进行推理计算时仍然为串行计算，这种方式是为了减少server端显卡的空闲时间。
+ 
diff --git a/doc/PERFORMANCE_OPTIM_CN.md b/doc/PERFORMANCE_OPTIM_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..dd17bc8afab8472f8f55b4870f73e4c481e97cd3
--- /dev/null
+++ b/doc/PERFORMANCE_OPTIM_CN.md
@@ -0,0 +1,13 @@
+# 性能优化
+
+由于模型结构的不同，在执行预测时不同的预测对计算资源的消耗也不相同，对于在线的预测服务来说，对计算资源要求较少的模型，通信的时间成本占比就会较高，称为通信密集型服务，对计算资源要求较多的模型，推理计算的时间成本较高，称为计算密集型服务。对于这两种服务类型，可以根据实际需求采取不同的方式进行优化
+
+对于一个预测服务来说，想要判断属于哪种类型，最简单的方法就是看时间占比，Paddle Serving提供了[Timeline工具](../python/examples/util/README_CN.md)，可以直观的展现预测服务中各阶段的耗时。
+
+对于通信密集型的预测服务，可以将请求进行聚合，在对延时可以容忍的限度内，将多个预测请求合并成一个batch进行预测。
+
+对于计算密集型的预测服务，可以使用GPU预测服务代替CPU预测服务，或者增加GPU预测服务的显卡数量。
+
+在相同条件下，Paddle Serving提供的HTTP预测服务的通信时间是大于RPC预测服务的，因此对于通信密集型的服务请优先考虑使用RPC的通信方式。
+
+对于模型较大，预测服务内存或显存占用较多的情况，可以通过将--mem_optim选项设置为True来开启内存/显存优化。
diff --git a/doc/RUN_IN_DOCKER.md b/doc/RUN_IN_DOCKER.md
index fd29d718b5a6390e0d2efbb1df94437d5a3d556d..e7b25362d113b18f6e779ccb9b92a3e3c8d13343 100644
--- a/doc/RUN_IN_DOCKER.md
+++ b/doc/RUN_IN_DOCKER.md
@@ -6,6 +6,8 @@
 
 Docker (GPU version requires nvidia-docker to be installed on the GPU machine)
 
+This document takes Python2 as an example to show how to run Paddle Serving in docker. You can also use Python3 to run related commands by replacing `python` with `python3`.
+
 ## CPU
 
 ### Get docker image
diff --git a/doc/RUN_IN_DOCKER_CN.md b/doc/RUN_IN_DOCKER_CN.md
index c6f31cac6b1e644d6ac1e52323164169830bddd5..3e84cf08c015b7fda0d957bf621173ec18c19498 100644
--- a/doc/RUN_IN_DOCKER_CN.md
+++ b/doc/RUN_IN_DOCKER_CN.md
@@ -6,6 +6,8 @@
 
 Docker（GPU版本需要在GPU机器上安装nvidia-docker）
 
+该文档以Python2为例展示如何在Docker中运行Paddle Serving，您也可以通过将`python`更换成`python3`来用Python3运行相关命令。
+
 ## CPU版本
 
 ### 获取镜像
diff --git a/doc/TRAIN_TO_SERVICE.md b/doc/TRAIN_TO_SERVICE.md
index c0d7d2ea3794a07e39407bb0b53a822cfedd173e..40d5dd95e4d7aad3b198898559321419b4b17833 100644
--- a/doc/TRAIN_TO_SERVICE.md
+++ b/doc/TRAIN_TO_SERVICE.md
@@ -288,7 +288,7 @@ The script receives data from standard input and prints out the probability that
 The client implemented in the previous step runs the prediction service as an example. The usage method is as follows:
 
 ```shell
-cat test_data/part-0 | python test_client.py imdb_lstm_client_conf / serving_client_conf.prototxt imdb.vocab
+cat test_data/part-0 | python test_client.py imdb_lstm_client_conf/serving_client_conf.prototxt imdb.vocab
 ```
 
 Using 2084 samples in the test_data/part-0 file for test testing, the model prediction accuracy is 88.19%.
@@ -350,7 +350,7 @@ In the above command, the first parameter is the saved server-side model and con
 After starting the HTTP prediction service, you can make prediction with a single command:
 
 ```
-curl -H "Content-Type: application / json" -X POST -d '{"words": "i am very sad | 0", "fetch": ["prediction"]}' http://127.0.0.1:9292/imdb/prediction
+curl -H "Content-Type: application/json" -X POST -d '{"words": "i am very sad | 0", "fetch": ["prediction"]}' http://127.0.0.1:9292/imdb/prediction
 ```
 When the inference process is normal, the prediction probability is returned, as shown below.
 
diff --git a/doc/doc_test_list b/doc/doc_test_list
index ef019de05d6075801434bae91de8cbdceb1fea91..8812f85e27f5230c68ad609e37840bbcc6589270 100644
--- a/doc/doc_test_list
+++ b/doc/doc_test_list
@@ -1 +1,2 @@
 BERT_10_MINS.md
+ABTEST_IN_PADDLE_SERVING.md
diff --git a/python/examples/fit_a_line/README.md b/python/examples/fit_a_line/README.md
index 24bd0363794104226218b83ab9817bc14481e35c..8ea146e9b7a8e781cbebd004bd54c6e0adfba7c2 100644
--- a/python/examples/fit_a_line/README.md
+++ b/python/examples/fit_a_line/README.md
@@ -1,25 +1,50 @@
-# Fit a line example, prediction through rpc service
+# Fit a line prediction example
 
 ([简体中文](./README_CN.md)|English)
 
-## Start rpc service
-``` shell
+## Get data
+
+```shell
 sh get_data.sh
+```
+
+
+
+## RPC service
+
+### Start server
+
+``` shell
 python test_server.py uci_housing_model/
 ```
 
-## Prediction
+You can also start the default RPC service with the following line of code:
+
+```shell
+python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
+```
+
+### Client prediction
+
+The `paddlepaddle` package is used in `test_client.py`, and you may need to download the corresponding package(`pip install paddlepaddle`).
+
 ``` shell
 python test_client.py uci_housing_client/serving_client_conf.prototxt
 ```
 
-## prediction through http service
-Start a web service with default web service hosting modules
+
+
+## HTTP service
+
+### Start server
+
+Start a web service with default web service hosting modules:
 ``` shell
-python -m paddle_serving_server.web_serve --model uci_housing_model/ --thread 10 --name uci --port 9393 --name uci
+python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --name uci
 ```
 
-## Prediction through http post
+### Client prediction
+
 ``` shell
 curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
 ```
diff --git a/python/examples/fit_a_line/README_CN.md b/python/examples/fit_a_line/README_CN.md
index 0ae611b311072ec4db27ac86128de420fa8b2bf0..3b97005bce14f9794b831066a1be2750d895e4f6 100644
--- a/python/examples/fit_a_line/README_CN.md
+++ b/python/examples/fit_a_line/README_CN.md
@@ -1,25 +1,51 @@
-# 线性回归，RPC预测服务示例
+# 线性回归预测服务示例
 
 (简体中文|[English](./README.md))
 
-## 开启RPC服务端
-``` shell
+## 获取数据
+
+```shell
 sh get_data.sh
+```
+
+
+
+## RPC服务
+
+### 开启服务端
+
+``` shell
 python test_server.py uci_housing_model/
 ```
 
-## RPC预测
+也可以通过下面的一行代码开启默认RPC服务：
+
+```shell
+python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
+```
+
+### 客户端预测
+
+`test_client.py`中使用了`paddlepaddle`包，需要进行下载（`pip install paddlepaddle`）。
+
 ``` shell
 python test_client.py uci_housing_client/serving_client_conf.prototxt
 ```
 
-## 开启HTTP服务端
-Start a web service with default web service hosting modules
+
+
+## HTTP服务
+
+### 开启服务端
+
+通过下面的一行代码开启默认web服务：
+
 ``` shell
-python -m paddle_serving_server.web_serve --model uci_housing_model/ --thread 10 --name uci --port 9393 --name uci
+python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --name uci
 ```
 
-## HTTP预测
+### 客户端预测
+
 ``` shell
 curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
 ```
diff --git a/python/examples/util/show_profile.py b/python/examples/util/show_profile.py
index c3e8adc0cea07310b6482c5eec29f123db01e17d..9153d939338f0ee171af539b9f955d51802ad547 100644
--- a/python/examples/util/show_profile.py
+++ b/python/examples/util/show_profile.py
@@ -10,7 +10,7 @@ time_dict = collections.OrderedDict()
 def prase(line):
     profile_list = line.split(" ")
     num = len(profile_list)
-    for idx in range(num / 2):
+    for idx in range(int(num / 2)):
         profile_0_list = profile_list[idx * 2].split(":")
         profile_1_list = profile_list[idx * 2 + 1].split(":")
         if len(profile_0_list[0].split("_")) == 2:
@@ -18,7 +18,7 @@ def prase(line):
         else:
             name = profile_0_list[0].split("_")[0] + "_" + profile_0_list[
                 0].split("_")[1]
-        cost = long(profile_1_list[1]) - long(profile_0_list[1])
+        cost = int(profile_1_list[1]) - int(profile_0_list[1])
         if name not in time_dict:
             time_dict[name] = cost
         else:
diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py
index 39e06ef14d9f2ab64bee0730ef29b03a5e2dc923..e0adc6e3cbe629d39a0293ba0e362d5115cf4d21 100644
--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -175,7 +175,6 @@ class Client(object):
         return self.fetch_names_
 
     def shape_check(self, feed, key):
-        seq_shape = 1
         if key in self.lod_tensor_set:
             return
         if len(feed[key]) != self.feed_tensor_len[key]:
@@ -192,7 +191,7 @@ class Client(object):
         elif isinstance(fetch, list):
             fetch_list = fetch
         else:
-            raise ValueError("fetch only accepts string and list of string")
+            raise ValueError("Fetch only accepts string and list of string")
 
         feed_batch = []
         if isinstance(feed, dict):
@@ -200,7 +199,7 @@ class Client(object):
         elif isinstance(feed, list):
             feed_batch = feed
         else:
-            raise ValueError("feed only accepts dict and list of dict")
+            raise ValueError("Feed only accepts dict and list of dict")
 
         int_slot_batch = []
         float_slot_batch = []
@@ -216,7 +215,7 @@ class Client(object):
 
         if len(fetch_names) == 0:
             raise ValueError(
-                "fetch names should not be empty or out of saved fetch list")
+                "Fetch names should not be empty or out of saved fetch list.")
             return {}
 
         for i, feed_i in enumerate(feed_batch):
@@ -224,7 +223,8 @@ class Client(object):
             float_slot = []
             for key in feed_i:
                 if key not in self.feed_names_:
-                    continue
+                    raise ValueError("Wrong feed name: {}.".format(key))
+                self.shape_check(feed_i, key)
                 if self.feed_types_[key] == int_type:
                     if i == 0:
                         int_feed_names.append(key)
@@ -233,6 +233,8 @@ class Client(object):
                     if i == 0:
                         float_feed_names.append(key)
                     float_slot.append(feed_i[key])
+            if len(int_slot) + len(float_slot) == 0:
+                raise ValueError("No feed data for predict.")
             int_slot_batch.append(int_slot)
             float_slot_batch.append(float_slot)
 
diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py
index 4cbfbf96905d3681c7e024a7a09164de9abaaea6..cfd5eee9ada15d1702be63af5f9cc09c85a57f0a 100644
--- a/python/paddle_serving_server_gpu/__init__.py
+++ b/python/paddle_serving_server_gpu/__init__.py
@@ -144,7 +144,7 @@ class Server(object):
             self.bin_path = os.environ["SERVING_BIN"]
 
     def check_cuda(self):
-        r = os.system("nvcc --version > /dev/null")
+        r = os.system("cat /usr/local/cuda/version.txt")
         if r != 0:
             raise SystemExit(
                 "CUDA not found, please check your environment or use cpu version by \"pip install paddle_serving_server\""
diff --git a/tools/Dockerfile b/tools/Dockerfile
index a39ce5bb76e411edeb94766d0c9aae23c6e7e62f..69b9b8bec4be49d6d4b1a5d8eb3fe5550ac1fa15 100644
--- a/tools/Dockerfile
+++ b/tools/Dockerfile
@@ -3,6 +3,7 @@ FROM centos:7.3.1611
 RUN yum -y install wget && \
     yum -y install epel-release && yum -y install patchelf && \
     yum -y install gcc make python-devel && \
+    yum -y install python3 python3-devel && \
     yum clean all && \
     curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
     python get-pip.py && rm get-pip.py
diff --git a/tools/Dockerfile.ci b/tools/Dockerfile.ci
index c3ababc7c0ca689c53122b8e41af2e350a937eb5..d53cee6b7389434afa07526682d84e2366ec16f5 100644
--- a/tools/Dockerfile.ci
+++ b/tools/Dockerfile.ci
@@ -26,6 +26,8 @@ RUN yum -y install wget >/dev/null \
     && make >/dev/null && make install >/dev/null \
     && cd .. \
     && rm -rf patchelf-0.10* \
+    && yum install -y python3 python3-devel \
+    && pip3 install google protobuf setuptools wheel flask \
     && yum -y update >/dev/null \
     && yum -y install dnf >/dev/null \
     && yum -y install dnf-plugins-core >/dev/null \
diff --git a/tools/Dockerfile.devel b/tools/Dockerfile.devel
index a4b5b5fe48b5c4d5c74d66dc688fa5d594a33266..6cb228f587054d5b579df0d85109d41c15c128e9 100644
--- a/tools/Dockerfile.devel
+++ b/tools/Dockerfile.devel
@@ -18,5 +18,7 @@ RUN yum -y install wget >/dev/null \
     && python get-pip.py >/dev/null \
     && pip install google protobuf setuptools wheel flask >/dev/null \
     && rm get-pip.py \
+    && yum install -y python3 python3-devel \
+    && pip3 install google protobuf setuptools wheel flask \
     && yum -y install epel-release && yum -y install patchelf \
     && yum clean all
diff --git a/tools/Dockerfile.gpu b/tools/Dockerfile.gpu
index 091f4a546b549a3dd53645e78ab49b1cd46bf5b3..a08bdf3daef103b5944df192fef967ebd9772b6c 100644
--- a/tools/Dockerfile.gpu
+++ b/tools/Dockerfile.gpu
@@ -6,6 +6,7 @@ RUN yum -y install wget && \
     yum -y install libSM-1.2.2-2.el7.x86_64 --setopt=protected_multilib=false && \
     yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false && \
     yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false && \
+    yum -y install python3 python3-devel && \
     yum clean all && \
     curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
     python get-pip.py && rm get-pip.py && \
diff --git a/tools/Dockerfile.gpu.devel b/tools/Dockerfile.gpu.devel
index a2233908dbcff4f2f2bbd3edad24b83cb5252e16..8cd7a6dbbddd5e1b60b7833086aa25cd849da519 100644
--- a/tools/Dockerfile.gpu.devel
+++ b/tools/Dockerfile.gpu.devel
@@ -19,5 +19,7 @@ RUN yum -y install wget >/dev/null \
     && python get-pip.py >/dev/null \
     && pip install google protobuf setuptools wheel flask >/dev/null \
     && rm get-pip.py \
+    && yum install -y python3 python3-devel \
+    && pip3 install google protobuf setuptools wheel flask \
     && yum -y install epel-release && yum -y install patchelf \
     && yum clean all
diff --git a/tools/serving_build.sh b/tools/serving_build.sh
index bdafd925f06f8f1e092ee011c8487e3b82579da4..6549838a11c8d9c119762c3429a06cae57fe31b6 100644
--- a/tools/serving_build.sh
+++ b/tools/serving_build.sh
@@ -266,12 +266,119 @@ function python_run_criteo_ctr_with_cube() {
     cd .. # pwd: /Serving/python/examples
 }
 
+function python_test_bert() {
+    # pwd: /Serving/python/examples
+    local TYPE=$1
+    yum install -y libXext libSM libXrender >/dev/null
+    pip install ujson
+    export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving
+    cd bert # pwd: /Serving/python/examples/bert
+    case $TYPE in
+        CPU)
+            pip install paddlehub
+            python prepare_model.py 20
+            sh get_data.sh
+            check_cmd "python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9292 &"
+            sleep 5
+            pip install paddle_serving_app
+            check_cmd "head -n 10 data-c.txt | python bert_client.py --model bert_seq20_client/serving_client_conf.prototxt"
+            kill_server_process
+            ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
+            ps -ef | grep "serving" | grep -v grep | awk '{print $2}' | xargs kill
+            echo "bert RPC inference pass" 
+            ;;
+        GPU)
+            pip install paddlehub
+            python prepare_model.py 20
+            sh get_data.sh
+            check_cmd "python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9292 --gpu_ids 0 &"
+            sleep 5
+            pip install paddle_serving_app
+            check_cmd "head -n 10 data-c.txt | python bert_client.py --model bert_seq20_client/serving_client_conf.prototxt"
+            kill_server_process
+            ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
+            echo "bert RPC inference pass"
+            ;;
+        *)
+    esac
+    echo "test bert $TYPE finished as expected."
+    unset SERVING_BIN
+    cd ..
+}
+
+function python_test_imdb() {
+    # pwd: /Serving/python/examples
+    local TYPE=$1
+    export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving
+    cd imdb # pwd: /Serving/python/examples/imdb
+    case $TYPE in
+        CPU)
+            sh get_data.sh
+            sleep 5
+            check_cmd "python -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9292 &"
+            check_cmd "head test_data/part-0 | python test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab"
+            echo "imdb CPU RPC inference pass"
+            ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
+            rm -rf work_dir1
+            sleep 5
+
+            check_cmd "python text_classify_service.py imdb_cnn_model/workdir/9292 imdb.vocab &"
+            sleep 5
+            check_cmd "curl -H "Content-Type:application/json" -X POST -d '{"words": "i am very sad | 0", "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction"
+            ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
+            ps -ef | grep "text_classify_service.py" | grep -v grep | awk '{print $2}' | xargs kill
+            echo "imdb CPU HTTP inference pass"           
+            ;;
+        GPU)
+            echo "imdb ignore GPU test"
+            ;;
+        *)
+    esac
+    echo "test imdb $TYPE finished as expected."
+    unset SERVING_BIN
+    cd ..
+}
+
+function python_test_lac() {
+    # pwd: /Serving/python/examples
+    local TYPE=$1
+    export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving
+    cd lac # pwd: /Serving/python/examples/lac
+    case $TYPE in
+        CPU)
+            sh get_data.sh
+            check_cmd "python -m paddle_serving_server.serve --model jieba_server_model/ --port 9292 &"
+            sleep 5
+            check_cmd "echo "我爱北京天安门" | python lac_client.py jieba_client_conf/serving_client_conf.prototxt lac_dict/"
+            echo "lac CPU RPC inference pass"
+            ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
+
+            check_cmd "python lac_web_service.py jieba_server_model/ lac_workdir 9292 &"
+            sleep 5
+            check_cmd "curl -H "Content-Type:application/json" -X POST -d '{"words": "我爱北京天安门", "fetch":["word_seg"]}' http://127.0.0.1:9292/lac/prediction"
+            ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
+            ps -ef | grep "lac_web_service" | grep -v grep | awk '{print $2}' | xargs kill
+            echo "lac CPU HTTP inference pass"
+            ;;
+        GPU)
+            echo "lac ignore GPU test"
+            ;;
+        *)
+    esac
+    echo "test lac $TYPE finished as expected."
+    unset SERVING_BIN
+    cd ..
+}
+
 function python_run_test() {
     # Using the compiled binary
     local TYPE=$1 # pwd: /Serving
     cd python/examples # pwd: /Serving/python/examples
     python_test_fit_a_line $TYPE # pwd: /Serving/python/examples
     python_run_criteo_ctr_with_cube $TYPE # pwd: /Serving/python/examples
+    python_test_bert $TYPE # pwd: /Serving/python/examples
+    python_test_imdb $TYPE 
+    python_test_lac $TYPE    
     echo "test python $TYPE part finished as expected."
     cd ../.. # pwd: /Serving
 }