diff --git a/core/cube/cube-transfer/CMakeLists.txt b/core/cube/cube-transfer/CMakeLists.txt
index ab91c0f5f274d971d866ad33680a49103a641934..78e47c5b840631a3092f3a799e2424d370444a2e 100644
--- a/core/cube/cube-transfer/CMakeLists.txt
+++ b/core/cube/cube-transfer/CMakeLists.txt
@@ -18,9 +18,11 @@ project(cube-transfer Go)
 
 include(cmake/golang.cmake)
 
-ExternalGoProject_Add(docopt-go github.com/docopt/docopt-go)
 ExternalGoProject_Add(rfw github.com/mipearson/rfw)
-ExternalGoProject_Add(logex github.com/Badangel/logex)
+ExternalGoProject_Add(docopt-go github.com/docopt/docopt-go)  
+add_custom_target(logex
+                  COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get github.com/Badangel/logex
+                  DEPENDS rfw)
 
 add_subdirectory(src)
 install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/conf DESTINATION ${PADDLE_SERVING_INSTALL_DIR})
diff --git a/core/cube/cube-transfer/cmake/golang.cmake b/core/cube/cube-transfer/cmake/golang.cmake
index 817d029d946bad8da4f4cf2785e68d062fc4cada..5a26c5d2b08dc0dd9e23e3f724630d84eaabec9b 100644
--- a/core/cube/cube-transfer/cmake/golang.cmake
+++ b/core/cube/cube-transfer/cmake/golang.cmake
@@ -57,4 +57,4 @@ function(ADD_GO_LIBRARY NAME BUILD_TYPE)
   if(NOT BUILD_TYPE STREQUAL "STATIC")
     install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME} DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/bin)
   endif()
-endfunction(ADD_GO_LIBRARY)
\ No newline at end of file
+endfunction(ADD_GO_LIBRARY)
diff --git a/doc/COMPILE.md b/doc/COMPILE.md
index 2858eb120d0f9d8157392a598faad2ef6cbafd87..41a79f082494b0ac22bb4479a5d246cdb6882a3d 100644
--- a/doc/COMPILE.md
+++ b/doc/COMPILE.md
@@ -4,14 +4,19 @@
 
 ## Compilation environment requirements
 
-- os: CentOS 6u3
-- gcc: 4.8.2 and later
-- go: 1.9.2 and later
-- git：2.17.1 and later
-- cmake：3.2.2 and later
-- python：2.7.2 and later
-
-It is recommended to use Docker to prepare the compilation environment for the Paddle service: [CPU Dockerfile.devel](../tools/Dockerfile.devel), [GPU Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
+- OS: CentOS 7
+- GCC: 4.8.2 and later
+- Golang: 1.9.2 and later
+- Git：2.17.1 and later
+- CMake：3.2.2 and later
+- Python：2.7.2 and later
+
+It is recommended to use Docker for compilation. We have prepared the Paddle Serving compilation environment for you: 
+
+- CPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-devel`，dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
+- GPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu-devel`，dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
+
+This document will take Python2 as an example to show how to compile Paddle Serving. If you want to compile with Python 3, just adjust the Python options of cmake.
 
 ## Get Code
 
diff --git a/doc/COMPILE_CN.md b/doc/COMPILE_CN.md
index bbe509f7c09e9e9082f1e7a2bfa6b823af7c2cc0..eb334232d98f26e68d719d10cbe458a356738d2f 100644
--- a/doc/COMPILE_CN.md
+++ b/doc/COMPILE_CN.md
@@ -4,14 +4,19 @@
 
 ## 编译环境设置
 
-- os: CentOS 6u3
-- gcc: 4.8.2及以上
-- go: 1.9.2及以上
-- git：2.17.1及以上
-- cmake：3.2.2及以上
-- python：2.7.2及以上
-
-推荐使用Docker准备Paddle Serving编译环境：[CPU Dockerfile.devel](../tools/Dockerfile.devel)，[GPU Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
+- OS: CentOS 7
+- GCC: 4.8.2及以上
+- Golang: 1.9.2及以上
+- Git：2.17.1及以上
+- CMake：3.2.2及以上
+- Python：2.7.2及以上
+
+推荐使用Docker编译，我们已经为您准备好了Paddle Serving编译环境：
+
+- CPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-devel`，dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
+- GPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu-devel`，dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
+
+本文档将以Python2为例介绍如何编译Paddle Serving。如果您想用Python3进行编译，只需要调整cmake的Python相关选项即可。
 
 ## 获取代码
 
diff --git a/doc/RUN_IN_DOCKER.md b/doc/RUN_IN_DOCKER.md
index fd29d718b5a6390e0d2efbb1df94437d5a3d556d..e7b25362d113b18f6e779ccb9b92a3e3c8d13343 100644
--- a/doc/RUN_IN_DOCKER.md
+++ b/doc/RUN_IN_DOCKER.md
@@ -6,6 +6,8 @@
 
 Docker (GPU version requires nvidia-docker to be installed on the GPU machine)
 
+This document takes Python2 as an example to show how to run Paddle Serving in docker. You can also use Python3 to run related commands by replacing `python` with `python3`.
+
 ## CPU
 
 ### Get docker image
diff --git a/doc/RUN_IN_DOCKER_CN.md b/doc/RUN_IN_DOCKER_CN.md
index c6f31cac6b1e644d6ac1e52323164169830bddd5..3e84cf08c015b7fda0d957bf621173ec18c19498 100644
--- a/doc/RUN_IN_DOCKER_CN.md
+++ b/doc/RUN_IN_DOCKER_CN.md
@@ -6,6 +6,8 @@
 
 Docker（GPU版本需要在GPU机器上安装nvidia-docker）
 
+该文档以Python2为例展示如何在Docker中运行Paddle Serving，您也可以通过将`python`更换成`python3`来用Python3运行相关命令。
+
 ## CPU版本
 
 ### 获取镜像
diff --git a/python/examples/bert/README.md b/python/examples/bert/README.md
index bd2af745312f4668e8746bcb897bd55642ecff5f..0b9ec5649491165669579044e95def0e766bca1a 100644
--- a/python/examples/bert/README.md
+++ b/python/examples/bert/README.md
@@ -15,12 +15,18 @@ pip install paddlehub
 
 run 
 ```
-python prepare_model.py 20
+python prepare_model.py 128
 ```
 
-the 20 in the command above means max_seq_len in BERT model, which is the length of sample after preprocessing.
-the config file and model file for server side are saved in the folder bert_seq20_model.
-the config file generated for client side is saved in the folder bert_seq20_client.
+the 128 in the command above means max_seq_len in BERT model, which is the length of sample after preprocessing.
+the config file and model file for server side are saved in the folder bert_seq128_model.
+the config file generated for client side is saved in the folder bert_seq128_client.
+
+You can also download the above model from BOS(max_seq_len=128). After decompression, the config file and model file for server side are stored in the bert_chinese_L-12_H-768_A-12_model folder, and the config file generated for client side is stored in the bert_chinese_L-12_H-768_A-12_client folder:
+```shell
+wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/bert_chinese_L-12_H-768_A-12.tar.gz
+tar -xzf bert_chinese_L-12_H-768_A-12.tar.gz
+```
 
 ### Getting Dict and Sample Dataset
 
@@ -32,11 +38,11 @@ this script will download Chinese Dictionary File vocab.txt and Chinese Sample D
 ### RPC Inference Service
 Run
 ```
-python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9292  #cpu inference service
+python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292  #cpu inference service
 ```
 Or
 ```
-python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9292 --gpu_ids 0 #launch gpu inference service at GPU 0
+python -m paddle_serving_server_gpu.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #launch gpu inference service at GPU 0
 ```
 
 ### RPC Inference
@@ -47,7 +53,7 @@ pip install paddle_serving_app
 ```
 Run
 ```
-head data-c.txt | python bert_client.py --model bert_seq20_client/serving_client_conf.prototxt
+head data-c.txt | python bert_client.py --model bert_seq128_client/serving_client_conf.prototxt
 ```
 
 the client reads data from data-c.txt and send prediction request, the prediction is given by word vector. (Due to massive data in the word vector, we do not print it).
@@ -58,7 +64,7 @@ the client reads data from data-c.txt and send prediction request, the predictio
 ```
 set environmental variable to specify which gpus are used, the command above means gpu 0 and gpu 1 is used.
 ```
- python bert_web_service.py bert_seq20_model/ 9292 #launch gpu inference service
+ python bert_web_service.py bert_seq128_model/ 9292 #launch gpu inference service
 ```
 ### HTTP Inference 
 
@@ -75,7 +81,7 @@ GPU：GPU V100 * 1
 CUDA/cudnn Version：CUDA 9.2，cudnn 7.1.4
 
 
-In the test, 10 thousand samples in the sample data are copied into 100 thousand samples. Each client thread sends a sample of the number of threads. The batch size is 1, the max_seq_len is 20, and the time unit is seconds.
+In the test, 10 thousand samples in the sample data are copied into 100 thousand samples. Each client thread sends a sample of the number of threads. The batch size is 1, the max_seq_len is 20(not 128 as described above), and the time unit is seconds.
 
 When the number of client threads is 4, the prediction speed can reach 432 samples per second.
 Because a single GPU can only perform serial calculations internally, increasing the number of client threads can only reduce the idle time of the GPU. Therefore, after the number of threads reaches 4, the increase in the number of threads does not improve the prediction speed.
diff --git a/python/examples/bert/README_CN.md b/python/examples/bert/README_CN.md
index 305010baf4b39d9682f87ed597776950d6c36aa6..fb74b024113474f2ebc454f5ef341755135fea6b 100644
--- a/python/examples/bert/README_CN.md
+++ b/python/examples/bert/README_CN.md
@@ -13,11 +13,17 @@ pip install paddlehub
 ```
 执行
 ```
-python prepare_model.py 20
+python prepare_model.py 128
+```
+参数128表示BERT模型中的max_seq_len，即预处理后的样本长度。
+生成server端配置文件与模型文件，存放在bert_seq128_model文件夹。
+生成client端配置文件，存放在bert_seq128_client文件夹。
+
+您也可以从bos上直接下载上述模型（max_seq_len=128），解压后server端配置文件与模型文件存放在bert_chinese_L-12_H-768_A-12_model文件夹，client端配置文件存放在bert_chinese_L-12_H-768_A-12_client文件夹：
+```shell
+wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/bert_chinese_L-12_H-768_A-12.tar.gz
+tar -xzf bert_chinese_L-12_H-768_A-12.tar.gz
 ```
-参数20表示BERT模型中的max_seq_len，即预处理后的样本长度。
-生成server端配置文件与模型文件，存放在bert_seq20_model文件夹
-生成client端配置文件，存放在bert_seq20_client文件夹
 
 ### 获取词典和样例数据
 
@@ -29,11 +35,11 @@ sh get_data.sh
 ### 启动RPC预测服务
 执行
 ```
-python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9292  #启动cpu预测服务
+python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292  #启动cpu预测服务
 ```
 或者
 ```
-python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9292 --gpu_ids 0 #在gpu 0上启动gpu预测服务
+python -m paddle_serving_server_gpu.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #在gpu 0上启动gpu预测服务
 ```
 
 ### 执行预测
@@ -44,7 +50,7 @@ pip install paddle_serving_app
 ```
 执行
 ```
-head data-c.txt | python bert_client.py --model bert_seq20_client/serving_client_conf.prototxt
+head data-c.txt | python bert_client.py --model bert_seq128_client/serving_client_conf.prototxt
 ```
 启动client读取data-c.txt中的数据进行预测，预测结果为文本的向量表示（由于数据较多，脚本中没有将输出进行打印），server端的地址在脚本中修改。
 
@@ -54,7 +60,7 @@ head data-c.txt | python bert_client.py --model bert_seq20_client/serving_client
 ```
 通过环境变量指定gpu预测服务使用的gpu，示例中指定索引为0和1的两块gpu
 ```
- python bert_web_service.py bert_seq20_model/ 9292 #启动gpu预测服务
+ python bert_web_service.py bert_seq128_model/ 9292 #启动gpu预测服务
 ```
 ### 执行预测
 
@@ -70,7 +76,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"words": "hello", "fetch":[
 
 环境：CUDA 9.2，cudnn 7.1.4
 
-测试中将样例数据中的1W个样本复制为10W个样本，每个client线程发送线程数分之一个样本，batch size为1，max_seq_len为20，时间单位为秒.
+测试中将样例数据中的1W个样本复制为10W个样本，每个client线程发送线程数分之一个样本，batch size为1，max_seq_len为20（而不是上面的128），时间单位为秒.
 
 在client线程数为4时，预测速度可以达到432样本每秒。
 由于单张GPU内部只能串行计算，client线程增多只能减少GPU的空闲时间，因此在线程数达到4之后，线程数增多对预测速度没有提升。
diff --git a/python/examples/bert/bert_client.py b/python/examples/bert/bert_client.py
index 51364c6745731017b31923d246990497115dc780..b33a80d88fcc28200a61bc6125afcea0a0352dab 100644
--- a/python/examples/bert/bert_client.py
+++ b/python/examples/bert/bert_client.py
@@ -29,7 +29,7 @@ from paddle_serving_app import ChineseBertReader
 
 args = benchmark_args()
 
-reader = ChineseBertReader({"max_seq_len": 20})
+reader = ChineseBertReader({"max_seq_len": 128})
 fetch = ["pooled_output"]
 endpoint_list = ["127.0.0.1:9292"]
 client = Client()
diff --git a/python/examples/bert/bert_web_service.py b/python/examples/bert/bert_web_service.py
index 04462ca3b16fecf818aadad63b4f67a8d97014fd..e22e379d67e076d4712c8971b6d342b4eaceadb2 100644
--- a/python/examples/bert/bert_web_service.py
+++ b/python/examples/bert/bert_web_service.py
@@ -21,7 +21,7 @@ import os
 
 class BertService(WebService):
     def load(self):
-        self.reader = BertReader(vocab_file="vocab.txt", max_seq_len=20)
+        self.reader = BertReader(vocab_file="vocab.txt", max_seq_len=128)
 
     def preprocess(self, feed={}, fetch=[]):
         feed_res = self.reader.process(feed["words"].encode("utf-8"))
diff --git a/python/examples/fit_a_line/README.md b/python/examples/fit_a_line/README.md
index 24bd0363794104226218b83ab9817bc14481e35c..8ea146e9b7a8e781cbebd004bd54c6e0adfba7c2 100644
--- a/python/examples/fit_a_line/README.md
+++ b/python/examples/fit_a_line/README.md
@@ -1,25 +1,50 @@
-# Fit a line example, prediction through rpc service
+# Fit a line prediction example
 
 ([简体中文](./README_CN.md)|English)
 
-## Start rpc service
-``` shell
+## Get data
+
+```shell
 sh get_data.sh
+```
+
+
+
+## RPC service
+
+### Start server
+
+``` shell
 python test_server.py uci_housing_model/
 ```
 
-## Prediction
+You can also start the default RPC service with the following line of code:
+
+```shell
+python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
+```
+
+### Client prediction
+
+The `paddlepaddle` package is used in `test_client.py`, and you may need to download the corresponding package(`pip install paddlepaddle`).
+
 ``` shell
 python test_client.py uci_housing_client/serving_client_conf.prototxt
 ```
 
-## prediction through http service
-Start a web service with default web service hosting modules
+
+
+## HTTP service
+
+### Start server
+
+Start a web service with default web service hosting modules:
 ``` shell
-python -m paddle_serving_server.web_serve --model uci_housing_model/ --thread 10 --name uci --port 9393 --name uci
+python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --name uci
 ```
 
-## Prediction through http post
+### Client prediction
+
 ``` shell
 curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
 ```
diff --git a/python/examples/fit_a_line/README_CN.md b/python/examples/fit_a_line/README_CN.md
index 0ae611b311072ec4db27ac86128de420fa8b2bf0..3b97005bce14f9794b831066a1be2750d895e4f6 100644
--- a/python/examples/fit_a_line/README_CN.md
+++ b/python/examples/fit_a_line/README_CN.md
@@ -1,25 +1,51 @@
-# 线性回归，RPC预测服务示例
+# 线性回归预测服务示例
 
 (简体中文|[English](./README.md))
 
-## 开启RPC服务端
-``` shell
+## 获取数据
+
+```shell
 sh get_data.sh
+```
+
+
+
+## RPC服务
+
+### 开启服务端
+
+``` shell
 python test_server.py uci_housing_model/
 ```
 
-## RPC预测
+也可以通过下面的一行代码开启默认RPC服务：
+
+```shell
+python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
+```
+
+### 客户端预测
+
+`test_client.py`中使用了`paddlepaddle`包，需要进行下载（`pip install paddlepaddle`）。
+
 ``` shell
 python test_client.py uci_housing_client/serving_client_conf.prototxt
 ```
 
-## 开启HTTP服务端
-Start a web service with default web service hosting modules
+
+
+## HTTP服务
+
+### 开启服务端
+
+通过下面的一行代码开启默认web服务：
+
 ``` shell
-python -m paddle_serving_server.web_serve --model uci_housing_model/ --thread 10 --name uci --port 9393 --name uci
+python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --name uci
 ```
 
-## HTTP预测
+### 客户端预测
+
 ``` shell
 curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
 ```
diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py
index 39e06ef14d9f2ab64bee0730ef29b03a5e2dc923..e0adc6e3cbe629d39a0293ba0e362d5115cf4d21 100644
--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -175,7 +175,6 @@ class Client(object):
         return self.fetch_names_
 
     def shape_check(self, feed, key):
-        seq_shape = 1
         if key in self.lod_tensor_set:
             return
         if len(feed[key]) != self.feed_tensor_len[key]:
@@ -192,7 +191,7 @@ class Client(object):
         elif isinstance(fetch, list):
             fetch_list = fetch
         else:
-            raise ValueError("fetch only accepts string and list of string")
+            raise ValueError("Fetch only accepts string and list of string")
 
         feed_batch = []
         if isinstance(feed, dict):
@@ -200,7 +199,7 @@ class Client(object):
         elif isinstance(feed, list):
             feed_batch = feed
         else:
-            raise ValueError("feed only accepts dict and list of dict")
+            raise ValueError("Feed only accepts dict and list of dict")
 
         int_slot_batch = []
         float_slot_batch = []
@@ -216,7 +215,7 @@ class Client(object):
 
         if len(fetch_names) == 0:
             raise ValueError(
-                "fetch names should not be empty or out of saved fetch list")
+                "Fetch names should not be empty or out of saved fetch list.")
             return {}
 
         for i, feed_i in enumerate(feed_batch):
@@ -224,7 +223,8 @@ class Client(object):
             float_slot = []
             for key in feed_i:
                 if key not in self.feed_names_:
-                    continue
+                    raise ValueError("Wrong feed name: {}.".format(key))
+                self.shape_check(feed_i, key)
                 if self.feed_types_[key] == int_type:
                     if i == 0:
                         int_feed_names.append(key)
@@ -233,6 +233,8 @@ class Client(object):
                     if i == 0:
                         float_feed_names.append(key)
                     float_slot.append(feed_i[key])
+            if len(int_slot) + len(float_slot) == 0:
+                raise ValueError("No feed data for predict.")
             int_slot_batch.append(int_slot)
             float_slot_batch.append(float_slot)
 
diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py
index d7d1fe10f8172803e4994bfd9c4987873ac1d3f1..d4984c39df866dcca45daa45d9fc15feaaba8635 100644
--- a/python/paddle_serving_server_gpu/__init__.py
+++ b/python/paddle_serving_server_gpu/__init__.py
@@ -306,6 +306,7 @@ class Server(object):
         self.check_local_bin()
         if not self.use_local_bin:
             self.download_bin()
+            # wait for other process to download server bin
             while not os.path.exists(self.server_path):
                 time.sleep(1)
         else:
@@ -339,6 +340,5 @@ class Server(object):
                       self.gpuid,)
         print("Going to Run Comand")
         print(command)
-        # wait for other process to download server bin
 
         os.system(command)
diff --git a/tools/Dockerfile b/tools/Dockerfile
index a39ce5bb76e411edeb94766d0c9aae23c6e7e62f..dc39adf01288f092143803557b322a0c8fbcb2b4 100644
--- a/tools/Dockerfile
+++ b/tools/Dockerfile
@@ -3,6 +3,10 @@ FROM centos:7.3.1611
 RUN yum -y install wget && \
     yum -y install epel-release && yum -y install patchelf && \
     yum -y install gcc make python-devel && \
+    yum -y install libSM-1.2.2-2.el7.x86_64 --setopt=protected_multilib=false && \
+    yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false && \
+    yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false && \
+    yum -y install python3 python3-devel && \
     yum clean all && \
     curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
     python get-pip.py && rm get-pip.py
diff --git a/tools/Dockerfile.ci b/tools/Dockerfile.ci
index c3ababc7c0ca689c53122b8e41af2e350a937eb5..8709075f6cf8f985e346999e76f6b273d7664193 100644
--- a/tools/Dockerfile.ci
+++ b/tools/Dockerfile.ci
@@ -2,6 +2,9 @@ FROM centos:7.3.1611
 RUN yum -y install wget >/dev/null \
     && yum -y install gcc gcc-c++ make glibc-static which >/dev/null \
     && yum -y install git openssl-devel curl-devel bzip2-devel python-devel >/dev/null \
+    && yum -y install libSM-1.2.2-2.el7.x86_64 --setopt=protected_multilib=false \
+    && yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false \
+    && yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false \
     && wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \
     && tar xzf cmake-3.2.0-Linux-x86_64.tar.gz \
     && mv cmake-3.2.0-Linux-x86_64 /usr/local/cmake3.2.0 \
@@ -26,6 +29,8 @@ RUN yum -y install wget >/dev/null \
     && make >/dev/null && make install >/dev/null \
     && cd .. \
     && rm -rf patchelf-0.10* \
+    && yum install -y python3 python3-devel \
+    && pip3 install google protobuf setuptools wheel flask \
     && yum -y update >/dev/null \
     && yum -y install dnf >/dev/null \
     && yum -y install dnf-plugins-core >/dev/null \
diff --git a/tools/Dockerfile.devel b/tools/Dockerfile.devel
index a4b5b5fe48b5c4d5c74d66dc688fa5d594a33266..6cb228f587054d5b579df0d85109d41c15c128e9 100644
--- a/tools/Dockerfile.devel
+++ b/tools/Dockerfile.devel
@@ -18,5 +18,7 @@ RUN yum -y install wget >/dev/null \
     && python get-pip.py >/dev/null \
     && pip install google protobuf setuptools wheel flask >/dev/null \
     && rm get-pip.py \
+    && yum install -y python3 python3-devel \
+    && pip3 install google protobuf setuptools wheel flask \
     && yum -y install epel-release && yum -y install patchelf \
     && yum clean all
diff --git a/tools/Dockerfile.gpu b/tools/Dockerfile.gpu
index 091f4a546b549a3dd53645e78ab49b1cd46bf5b3..a08bdf3daef103b5944df192fef967ebd9772b6c 100644
--- a/tools/Dockerfile.gpu
+++ b/tools/Dockerfile.gpu
@@ -6,6 +6,7 @@ RUN yum -y install wget && \
     yum -y install libSM-1.2.2-2.el7.x86_64 --setopt=protected_multilib=false && \
     yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false && \
     yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false && \
+    yum -y install python3 python3-devel && \
     yum clean all && \
     curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
     python get-pip.py && rm get-pip.py && \
diff --git a/tools/Dockerfile.gpu.devel b/tools/Dockerfile.gpu.devel
index a2233908dbcff4f2f2bbd3edad24b83cb5252e16..8cd7a6dbbddd5e1b60b7833086aa25cd849da519 100644
--- a/tools/Dockerfile.gpu.devel
+++ b/tools/Dockerfile.gpu.devel
@@ -19,5 +19,7 @@ RUN yum -y install wget >/dev/null \
     && python get-pip.py >/dev/null \
     && pip install google protobuf setuptools wheel flask >/dev/null \
     && rm get-pip.py \
+    && yum install -y python3 python3-devel \
+    && pip3 install google protobuf setuptools wheel flask \
     && yum -y install epel-release && yum -y install patchelf \
     && yum clean all
diff --git a/tools/serving_build.sh b/tools/serving_build.sh
index 6549838a11c8d9c119762c3429a06cae57fe31b6..5934cbca883bd6a40369bd0875b2789ceb8a4d1f 100644
--- a/tools/serving_build.sh
+++ b/tools/serving_build.sh
@@ -164,6 +164,7 @@ function python_test_fit_a_line() {
             fi
             ;;
         GPU)
+            export CUDA_VISIBLE_DEVICES=0
             # test rpc
             check_cmd "python -m paddle_serving_server_gpu.serve --model uci_housing_model --port 9393 --thread 4 --gpu_ids 0 > /dev/null &"
             sleep 5 # wait for the server to start
@@ -226,7 +227,7 @@ function python_run_criteo_ctr_with_cube() {
                 exit 1
             fi
             echo "criteo_ctr_with_cube inference auc test success"
-            ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
+            kill_server_process
             ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill
             ;;
         GPU)
@@ -253,7 +254,7 @@ function python_run_criteo_ctr_with_cube() {
                 exit 1
             fi
             echo "criteo_ctr_with_cube inference auc test success"
-            ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
+            kill_server_process
             ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill
             ;;
         *)
@@ -276,27 +277,48 @@ function python_test_bert() {
     case $TYPE in
         CPU)
             pip install paddlehub
-            python prepare_model.py 20
+            # Because download from paddlehub may timeout,
+            # download the model from bos(max_seq_len=128).
+            wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/bert_chinese_L-12_H-768_A-12.tar.gz
+            tar -xzf bert_chinese_L-12_H-768_A-12.tar.gz
             sh get_data.sh
-            check_cmd "python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9292 &"
+            check_cmd "python -m paddle_serving_server.serve --model bert_chinese_L-12_H-768_A-12_model --port 9292 &"
             sleep 5
             pip install paddle_serving_app
-            check_cmd "head -n 10 data-c.txt | python bert_client.py --model bert_seq20_client/serving_client_conf.prototxt"
+            check_cmd "head -n 10 data-c.txt | python bert_client.py --model bert_chinese_L-12_H-768_A-12_client/serving_client_conf.prototxt"
             kill_server_process
-            ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
-            ps -ef | grep "serving" | grep -v grep | awk '{print $2}' | xargs kill
+            # python prepare_model.py 20
+            # sh get_data.sh
+            # check_cmd "python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9292 &"
+            # sleep 5
+            # pip install paddle_serving_app
+            # check_cmd "head -n 10 data-c.txt | python bert_client.py --model bert_seq20_client/serving_client_conf.prototxt"
+            # kill_server_process
+            # ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
+            # ps -ef | grep "serving" | grep -v grep | awk '{print $2}' | xargs kill
             echo "bert RPC inference pass" 
             ;;
         GPU)
+            export CUDA_VISIBLE_DEVICES=0
             pip install paddlehub
-            python prepare_model.py 20
+            # Because download from paddlehub may timeout,
+            # download the model from bos(max_seq_len=128).
+            wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/bert_chinese_L-12_H-768_A-12.tar.gz
+            tar -xzf bert_chinese_L-12_H-768_A-12.tar.gz
             sh get_data.sh
-            check_cmd "python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9292 --gpu_ids 0 &"
+            check_cmd "python -m paddle_serving_server_gpu.serve --model bert_chinese_L-12_H-768_A-12_model --port 9292 --gpu_ids 0 &"
             sleep 5
             pip install paddle_serving_app
-            check_cmd "head -n 10 data-c.txt | python bert_client.py --model bert_seq20_client/serving_client_conf.prototxt"
+            check_cmd "head -n 10 data-c.txt | python bert_client.py --model bert_chinese_L-12_H-768_A-12_client/serving_client_conf.prototxt"
             kill_server_process
-            ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
+            # python prepare_model.py 20
+            # sh get_data.sh
+            # check_cmd "python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9292 --gpu_ids 0 &"
+            # sleep 5
+            # pip install paddle_serving_app
+            # check_cmd "head -n 10 data-c.txt | python bert_client.py --model bert_seq20_client/serving_client_conf.prototxt"
+            # kill_server_process
+            # ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
             echo "bert RPC inference pass"
             ;;
         *)
@@ -325,9 +347,10 @@ function python_test_imdb() {
             check_cmd "python text_classify_service.py imdb_cnn_model/workdir/9292 imdb.vocab &"
             sleep 5
             check_cmd "curl -H "Content-Type:application/json" -X POST -d '{"words": "i am very sad | 0", "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction"
+            kill_server_process
             ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
             ps -ef | grep "text_classify_service.py" | grep -v grep | awk '{print $2}' | xargs kill
-            echo "imdb CPU HTTP inference pass"           
+            echo "imdb CPU HTTP inference pass"
             ;;
         GPU)
             echo "imdb ignore GPU test"
@@ -356,6 +379,7 @@ function python_test_lac() {
             check_cmd "python lac_web_service.py jieba_server_model/ lac_workdir 9292 &"
             sleep 5
             check_cmd "curl -H "Content-Type:application/json" -X POST -d '{"words": "我爱北京天安门", "fetch":["word_seg"]}' http://127.0.0.1:9292/lac/prediction"
+            kill_server_process
             ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
             ps -ef | grep "lac_web_service" | grep -v grep | awk '{print $2}' | xargs kill
             echo "lac CPU HTTP inference pass"
@@ -377,7 +401,7 @@ function python_run_test() {
     python_test_fit_a_line $TYPE # pwd: /Serving/python/examples
     python_run_criteo_ctr_with_cube $TYPE # pwd: /Serving/python/examples
     python_test_bert $TYPE # pwd: /Serving/python/examples
-    python_test_imdb $TYPE 
+    python_test_imdb $TYPE # pwd: /Serving/python/examples 
     python_test_lac $TYPE    
     echo "test python $TYPE part finished as expected."
     cd ../.. # pwd: /Serving