merge core/general-client/src/general_model.cpp

c74f2671 · barrierye · 436da64c · 6875e319 · c74f2671 · c74f2671
26 changed file
--- a/README.md
+++ b/README.md
@@ -35,13 +35,28 @@ We consider deploying deep learning inference service online to be a user-facing
 <h2 align="center">Installation</h2>

 We highly recommend you to run Paddle Serving in Docker, please visit [Run in Docker](https://github.com/PaddlePaddle/Serving/blob/develop/doc/RUN_IN_DOCKER.md)
+```
+# Run CPU Docker
+docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker exec -it test bash
+```
+```
+# Run GPU Docker
+nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+nvidia-docker exec -it test bash
+```

 ```shell
-pip install paddle-serving-client
-pip install paddle-serving-server
+pip install paddle-serving-client 
+pip install paddle-serving-server # CPU
+pip install paddle-serving-server-gpu # GPU
 ```

-You may need to use a domestic mirror source (in China, you can use the Tsinghua mirror source) to speed up the download.
+You may need to use a domestic mirror source (in China, you can use the Tsinghua mirror source, add `-i https://pypi.tuna.tsinghua.edu.cn/simple` to pip command) to speed up the download.
+ 
+Client package support Centos 7 and Ubuntu 18, or you can use HTTP service without install client.

 <h2 align="center">Quick Start Example</h2>

@@ -130,6 +145,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"words": "我爱北京天
 - **Description**: 
 ``` shell
 Image classification trained with Imagenet dataset. A label and corresponding probability will be returned.
+Note: This demo needs paddle-serving-server-gpu. 
 ```

 - **Download Servable Package**: 
@@ -245,6 +261,8 @@ curl -H "Content-Type:application/json" -X POST -d '{"url": "https://paddle-serv

 ### About Efficiency
 - [How to profile Paddle Serving latency?](python/examples/util)
+- [How to optimize performance?(Chinese)](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
+- [Deploy multi-services on one GPU(Chinese)](doc/PERFORMANCE_OPTIM_CN.md)
 - [CPU Benchmarks(Chinese)](doc/BENCHMARKING.md)
 - [GPU Benchmarks(Chinese)](doc/GPU_BENCHMARKING.md)


--- a/README_CN.md
+++ b/README_CN.md
@@ -37,12 +37,27 @@ Paddle Serving 旨在帮助深度学习开发者轻易部署在线预测服务

 强烈建议您在Docker内构建Paddle Serving，请查看[如何在Docker中运行PaddleServing](doc/RUN_IN_DOCKER_CN.md)

+```
+# 启动 CPU Docker
+docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker exec -it test bash
+```
+```
+# 启动 GPU Docker
+nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+nvidia-docker exec -it test bash
+```
 ```shell
 pip install paddle-serving-client
-pip install paddle-serving-server
+pip install paddle-serving-server # CPU
+pip install paddle-serving-server-gpu # GPU
 ```

-您可能需要使用国内镜像源（例如清华源）来加速下载。
+您可能需要使用国内镜像源（例如清华源, 在pip命令中添加`-i https://pypi.tuna.tsinghua.edu.cn/simple`）来加速下载。
+
+客户端安装包支持Centos 7和Ubuntu 18，或者您可以使用HTTP服务，这种情况下不需要安装客户端。

 <h2 align="center">快速启动示例</h2>

@@ -135,6 +150,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"words": "我爱北京天
 - **介绍**: 
 ``` shell
 图像分类模型由Imagenet数据集训练而成，该服务会返回一个标签及其概率
+注意：本示例需要安装paddle-serving-server-gpu
 ```

 - **下载服务包**: 
@@ -251,6 +267,8 @@ curl -H "Content-Type:application/json" -X POST -d '{"url": "https://paddle-serv

 ### 关于Paddle Serving性能
 - [如何测试Paddle Serving性能？](python/examples/util/)
+- [如何优化性能?](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
+- [在一张GPU上启动多个预测服务](doc/PERFORMANCE_OPTIM_CN.md)
 - [CPU版Benchmarks](doc/BENCHMARKING.md)
 - [GPU版Benchmarks](doc/GPU_BENCHMARKING.md)


--- a/core/cube/cube-transfer/CMakeLists.txt
+++ b/core/cube/cube-transfer/CMakeLists.txt
@@ -18,9 +18,11 @@ project(cube-transfer Go)

 include(cmake/golang.cmake)

-ExternalGoProject_Add(docopt-go github.com/docopt/docopt-go)
 ExternalGoProject_Add(rfw github.com/mipearson/rfw)
-ExternalGoProject_Add(logex github.com/Badangel/logex)
+ExternalGoProject_Add(docopt-go github.com/docopt/docopt-go)  
+add_custom_target(logex
+                  COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get github.com/Badangel/logex
+                  DEPENDS rfw)

 add_subdirectory(src)
 install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/conf DESTINATION ${PADDLE_SERVING_INSTALL_DIR})
--- a/core/cube/cube-transfer/cmake/golang.cmake
+++ b/core/cube/cube-transfer/cmake/golang.cmake
@@ -57,4 +57,4 @@ function(ADD_GO_LIBRARY NAME BUILD_TYPE)
  if(NOT BUILD_TYPE STREQUAL "STATIC")
    install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME} DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/bin)
  endif()
-endfunction(ADD_GO_LIBRARY)
\ No newline at end of file
+endfunction(ADD_GO_LIBRARY)
--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -314,7 +314,7 @@ int PredictorClient::batch_predict(
      tensor_vec.push_back(inst->add_tensor_array());
    }

-    VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name"
+    VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name "
            << "prepared";
    int vec_idx = 0;
    for (auto &name : float_feed_name) {
@@ -376,6 +376,7 @@ int PredictorClient::batch_predict(
  } else {
    client_infer_end = timeline.TimeStampUS();
    postprocess_start = client_infer_end;
+
    uint32_t model_num = res.outputs_size();
    predict_res_batch._models.resize(model_num);
    for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) {
@@ -385,9 +386,11 @@ int PredictorClient::batch_predict(
        predict_res_batch._models[m_idx]._int64_map[name].resize(batch_size);
        predict_res_batch._models[m_idx]._float_map[name].resize(batch_size);
      }
+      VLOG(2) << "response batch size " << output.insts_size();
+      VLOG(2) << "response var nmae " << output.insts(0).tensor_array_size();
      for (int bi = 0; bi < batch_size; bi++) {
+        int idx = 0;
        for (auto &name : fetch_name) {
-          int idx = _fetch_name_to_idx[name];
          int len = output.insts(bi).tensor_array(idx).data_size();
          if (_fetch_name_to_type[name] == 0) {
            int len = output.insts(bi).tensor_array(idx).int64_data_size();
@@ -412,6 +415,7 @@ int PredictorClient::batch_predict(
            }
          }
        }
+        idx += 1;
      }
    }
    postprocess_end = timeline.TimeStampUS();

--- a/doc/ABTEST_IN_PADDLE_SERVING.md
+++ b/doc/ABTEST_IN_PADDLE_SERVING.md
@@ -19,6 +19,7 @@ sh get_data.sh

 The following Python code will process the data `test_data/part-0` and write to the `processed.data` file.

+[//file]:#process.py
 ``` python
 from imdb_reader import IMDBDataset
 imdb_dataset = IMDBDataset()
@@ -59,7 +60,8 @@ exit

 Run the following Python code on the host computer to start client. Make sure that the host computer is installed with the `paddle-serving-client` package.

-``` go
+[//file]:#ab_client.py
+``` python
 from paddle_serving_client import Client

 client = Client()
@@ -94,3 +96,24 @@ When making prediction on the client side, if the parameter `need_variant_tag=Tr
 [lstm](total: 1867) acc: 0.490091055169
 [bow](total: 217) acc: 0.73732718894
 ```
+
+<!--
+cp ../Serving/python/examples/imdb/get_data.sh .
+cp ../Serving/python/examples/imdb/imdb_reader.py .
+pip install -U paddle_serving_server
+pip install -U paddle_serving_client
+pip install -U paddlepaddle
+sh get_data.sh
+python process.py
+python -m paddle_serving_server.serve --model imdb_bow_model --port 8000 --workdir workdir1 &
+sleep 5
+python -m paddle_serving_server.serve --model imdb_lstm_model --port 9000  --workdir workdir2 &
+sleep 5
+python ab_client.py >log.txt
+if [[ $? -eq 0 ]]; then
+    echo "test success"
+else
+    echo "test fail"
+fi
+ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
+-->
--- a/doc/BERT_10_MINS.md
+++ b/doc/BERT_10_MINS.md
@@ -102,4 +102,5 @@ if [[ $? -eq 0 ]]; then
 else
    echo "test fail"
 fi
+ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
 -->
--- a/doc/COMPILE.md
+++ b/doc/COMPILE.md
@@ -4,14 +4,19 @@

 ## Compilation environment requirements

- os: CentOS 6u3
- gcc: 4.8.2 and later
- go: 1.9.2 and later
- git：2.17.1 and later
- cmake：3.2.2 and later
- python：2.7.2 and later
-
-It is recommended to use Docker to prepare the compilation environment for the Paddle service: [CPU Dockerfile.devel](../tools/Dockerfile.devel), [GPU Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
+- OS: CentOS 7
+- GCC: 4.8.2 and later
+- Golang: 1.9.2 and later
+- Git：2.17.1 and later
+- CMake：3.2.2 and later
+- Python：2.7.2 and later
+
+It is recommended to use Docker for compilation. We have prepared the Paddle Serving compilation environment for you: 
+
+- CPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-devel`，dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
+- GPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu-devel`，dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
+
+This document will take Python2 as an example to show how to compile Paddle Serving. If you want to compile with Python 3, just adjust the Python options of cmake.

 ## Get Code


--- a/doc/COMPILE_CN.md
+++ b/doc/COMPILE_CN.md
@@ -4,14 +4,19 @@

 ## 编译环境设置

- os: CentOS 6u3
- gcc: 4.8.2及以上
- go: 1.9.2及以上
- git：2.17.1及以上
- cmake：3.2.2及以上
- python：2.7.2及以上
-
-推荐使用Docker准备Paddle Serving编译环境：[CPU Dockerfile.devel](../tools/Dockerfile.devel)，[GPU Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
+- OS: CentOS 7
+- GCC: 4.8.2及以上
+- Golang: 1.9.2及以上
+- Git：2.17.1及以上
+- CMake：3.2.2及以上
+- Python：2.7.2及以上
+
+推荐使用Docker编译，我们已经为您准备好了Paddle Serving编译环境：
+
+- CPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-devel`，dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
+- GPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu-devel`，dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
+
+本文档将以Python2为例介绍如何编译Paddle Serving。如果您想用Python3进行编译，只需要调整cmake的Python相关选项即可。

 ## 获取代码


--- a/doc/MULTI_SERVICE_ON_ONE_GPU_CN.md
+++ b/doc/MULTI_SERVICE_ON_ONE_GPU_CN.md
+# 单卡多模型预测服务
+
+当客户端发送的请求数并不频繁的情况下，会造成服务端机器计算资源尤其是GPU资源的浪费，这种情况下，可以在服务端启动多个预测服务来提高资源利用率。Paddle Serving支持在单张显卡上部署多个预测服务，使用时只需要在启动单个服务时通过--gpu_ids参数将服务与显卡进行绑定，这样就可以将多个服务都绑定到同一张卡上。
+
+例如：
+
+```shell
+python -m paddle_serving_server_gpu.serve --model bert_seq20_model --port 9292 --gpu_ids 0
+python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9393 --gpu_ids 0
+```
+
+在卡0上，同时部署了bert示例和iamgenet示例。
+
+**注意：** 单张显卡内部进行推理计算时仍然为串行计算，这种方式是为了减少server端显卡的空闲时间。
+ 
--- a/doc/PERFORMANCE_OPTIM_CN.md
+++ b/doc/PERFORMANCE_OPTIM_CN.md
+# 性能优化
+
+由于模型结构的不同，在执行预测时不同的预测对计算资源的消耗也不相同，对于在线的预测服务来说，对计算资源要求较少的模型，通信的时间成本占比就会较高，称为通信密集型服务，对计算资源要求较多的模型，推理计算的时间成本较高，称为计算密集型服务。对于这两种服务类型，可以根据实际需求采取不同的方式进行优化
+
+对于一个预测服务来说，想要判断属于哪种类型，最简单的方法就是看时间占比，Paddle Serving提供了[Timeline工具](../python/examples/util/README_CN.md)，可以直观的展现预测服务中各阶段的耗时。
+
+对于通信密集型的预测服务，可以将请求进行聚合，在对延时可以容忍的限度内，将多个预测请求合并成一个batch进行预测。
+
+对于计算密集型的预测服务，可以使用GPU预测服务代替CPU预测服务，或者增加GPU预测服务的显卡数量。
+
+在相同条件下，Paddle Serving提供的HTTP预测服务的通信时间是大于RPC预测服务的，因此对于通信密集型的服务请优先考虑使用RPC的通信方式。
+
+对于模型较大，预测服务内存或显存占用较多的情况，可以通过将--mem_optim选项设置为True来开启内存/显存优化。
--- a/doc/RUN_IN_DOCKER.md
+++ b/doc/RUN_IN_DOCKER.md
@@ -6,6 +6,8 @@

 Docker (GPU version requires nvidia-docker to be installed on the GPU machine)

+This document takes Python2 as an example to show how to run Paddle Serving in docker. You can also use Python3 to run related commands by replacing `python` with `python3`.
+
 ## CPU

 ### Get docker image

--- a/doc/RUN_IN_DOCKER_CN.md
+++ b/doc/RUN_IN_DOCKER_CN.md
@@ -6,6 +6,8 @@

 Docker（GPU版本需要在GPU机器上安装nvidia-docker）

+该文档以Python2为例展示如何在Docker中运行Paddle Serving，您也可以通过将`python`更换成`python3`来用Python3运行相关命令。
+
 ## CPU版本

 ### 获取镜像

--- a/doc/TRAIN_TO_SERVICE.md
+++ b/doc/TRAIN_TO_SERVICE.md
@@ -288,7 +288,7 @@ The script receives data from standard input and prints out the probability that
 The client implemented in the previous step runs the prediction service as an example. The usage method is as follows:

 ```shell
-cat test_data/part-0 | python test_client.py imdb_lstm_client_conf / serving_client_conf.prototxt imdb.vocab
+cat test_data/part-0 | python test_client.py imdb_lstm_client_conf/serving_client_conf.prototxt imdb.vocab
 ```

 Using 2084 samples in the test_data/part-0 file for test testing, the model prediction accuracy is 88.19%.
@@ -350,7 +350,7 @@ In the above command, the first parameter is the saved server-side model and con
 After starting the HTTP prediction service, you can make prediction with a single command:

 ```
-curl -H "Content-Type: application / json" -X POST -d '{"words": "i am very sad | 0", "fetch": ["prediction"]}' http://127.0.0.1:9292/imdb/prediction
+curl -H "Content-Type: application/json" -X POST -d '{"words": "i am very sad | 0", "fetch": ["prediction"]}' http://127.0.0.1:9292/imdb/prediction
 ```
 When the inference process is normal, the prediction probability is returned, as shown below.


--- a/doc/doc_test_list
+++ b/doc/doc_test_list
 BERT_10_MINS.md
+ABTEST_IN_PADDLE_SERVING.md
--- a/python/examples/fit_a_line/README.md
+++ b/python/examples/fit_a_line/README.md
-# Fit a line example, prediction through rpc service
+# Fit a line prediction example

 ([简体中文](./README_CN.md)|English)

-## Start rpc service
-``` shell
+## Get data
+
+```shell
 sh get_data.sh
+```
+
+
+
+## RPC service
+
+### Start server
+
+``` shell
 python test_server.py uci_housing_model/
 ```

-## Prediction
+You can also start the default RPC service with the following line of code:
+
+```shell
+python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
+```
+
+### Client prediction
+
+The `paddlepaddle` package is used in `test_client.py`, and you may need to download the corresponding package(`pip install paddlepaddle`).
+
 ``` shell
 python test_client.py uci_housing_client/serving_client_conf.prototxt
 ```

-## prediction through http service
-Start a web service with default web service hosting modules
+
+
+## HTTP service
+
+### Start server
+
+Start a web service with default web service hosting modules:
 ``` shell
-python -m paddle_serving_server.web_serve --model uci_housing_model/ --thread 10 --name uci --port 9393 --name uci
+python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --name uci
 ```

-## Prediction through http post
+### Client prediction
+
 ``` shell
 curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
 ```
--- a/python/examples/fit_a_line/README_CN.md
+++ b/python/examples/fit_a_line/README_CN.md
-# 线性回归，RPC预测服务示例
+# 线性回归预测服务示例

 (简体中文|[English](./README.md))

-## 开启RPC服务端
-``` shell
+## 获取数据
+
+```shell
 sh get_data.sh
+```
+
+
+
+## RPC服务
+
+### 开启服务端
+
+``` shell
 python test_server.py uci_housing_model/
 ```

-## RPC预测
+也可以通过下面的一行代码开启默认RPC服务：
+
+```shell
+python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
+```
+
+### 客户端预测
+
+`test_client.py`中使用了`paddlepaddle`包，需要进行下载（`pip install paddlepaddle`）。
+
 ``` shell
 python test_client.py uci_housing_client/serving_client_conf.prototxt
 ```

-## 开启HTTP服务端
-Start a web service with default web service hosting modules
+
+
+## HTTP服务
+
+### 开启服务端
+
+通过下面的一行代码开启默认web服务：
+
 ``` shell
-python -m paddle_serving_server.web_serve --model uci_housing_model/ --thread 10 --name uci --port 9393 --name uci
+python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --name uci
 ```

-## HTTP预测
+### 客户端预测
+
 ``` shell
 curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
 ```
--- a/python/examples/util/show_profile.py
+++ b/python/examples/util/show_profile.py
@@ -10,7 +10,7 @@ time_dict = collections.OrderedDict()
 def prase(line):
    profile_list = line.split(" ")
    num = len(profile_list)
-    for idx in range(num / 2):
+    for idx in range(int(num / 2)):
        profile_0_list = profile_list[idx * 2].split(":")
        profile_1_list = profile_list[idx * 2 + 1].split(":")
        if len(profile_0_list[0].split("_")) == 2:
@@ -18,7 +18,7 @@ def prase(line):
        else:
            name = profile_0_list[0].split("_")[0] + "_" + profile_0_list[
                0].split("_")[1]
-        cost = long(profile_1_list[1]) - long(profile_0_list[1])
+        cost = int(profile_1_list[1]) - int(profile_0_list[1])
        if name not in time_dict:
            time_dict[name] = cost
        else:

--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -175,7 +175,6 @@ class Client(object):
        return self.fetch_names_

    def shape_check(self, feed, key):
-        seq_shape = 1
        if key in self.lod_tensor_set:
            return
        if len(feed[key]) != self.feed_tensor_len[key]:
@@ -192,7 +191,7 @@ class Client(object):
        elif isinstance(fetch, list):
            fetch_list = fetch
        else:
-            raise ValueError("fetch only accepts string and list of string")
+            raise ValueError("Fetch only accepts string and list of string")

        feed_batch = []
        if isinstance(feed, dict):
@@ -200,7 +199,7 @@ class Client(object):
        elif isinstance(feed, list):
            feed_batch = feed
        else:
-            raise ValueError("feed only accepts dict and list of dict")
+            raise ValueError("Feed only accepts dict and list of dict")

        int_slot_batch = []
        float_slot_batch = []
@@ -216,7 +215,7 @@ class Client(object):

        if len(fetch_names) == 0:
            raise ValueError(
-                "fetch names should not be empty or out of saved fetch list")
+                "Fetch names should not be empty or out of saved fetch list.")
            return {}

        for i, feed_i in enumerate(feed_batch):
@@ -224,7 +223,8 @@ class Client(object):
            float_slot = []
            for key in feed_i:
                if key not in self.feed_names_:
-                    continue
+                    raise ValueError("Wrong feed name: {}.".format(key))
+                self.shape_check(feed_i, key)
                if self.feed_types_[key] == int_type:
                    if i == 0:
                        int_feed_names.append(key)
@@ -233,6 +233,8 @@ class Client(object):
                    if i == 0:
                        float_feed_names.append(key)
                    float_slot.append(feed_i[key])
+            if len(int_slot) + len(float_slot) == 0:
+                raise ValueError("No feed data for predict.")
            int_slot_batch.append(int_slot)
            float_slot_batch.append(float_slot)


--- a/python/paddle_serving_server_gpu/__init__.py
+++ b/python/paddle_serving_server_gpu/__init__.py
@@ -144,7 +144,7 @@ class Server(object):
            self.bin_path = os.environ["SERVING_BIN"]

    def check_cuda(self):
-        r = os.system("nvcc --version > /dev/null")
+        r = os.system("cat /usr/local/cuda/version.txt")
        if r != 0:
            raise SystemExit(
                "CUDA not found, please check your environment or use cpu version by \"pip install paddle_serving_server\""

--- a/tools/Dockerfile
+++ b/tools/Dockerfile
@@ -3,6 +3,7 @@ FROM centos:7.3.1611
 RUN yum -y install wget && \
    yum -y install epel-release && yum -y install patchelf && \
    yum -y install gcc make python-devel && \
+    yum -y install python3 python3-devel && \
    yum clean all && \
    curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
    python get-pip.py && rm get-pip.py
--- a/tools/Dockerfile.ci
+++ b/tools/Dockerfile.ci
@@ -26,6 +26,8 @@ RUN yum -y install wget >/dev/null \
    && make >/dev/null && make install >/dev/null \
    && cd .. \
    && rm -rf patchelf-0.10* \
+    && yum install -y python3 python3-devel \
+    && pip3 install google protobuf setuptools wheel flask \
    && yum -y update >/dev/null \
    && yum -y install dnf >/dev/null \
    && yum -y install dnf-plugins-core >/dev/null \

--- a/tools/Dockerfile.devel
+++ b/tools/Dockerfile.devel
@@ -18,5 +18,7 @@ RUN yum -y install wget >/dev/null \
    && python get-pip.py >/dev/null \
    && pip install google protobuf setuptools wheel flask >/dev/null \
    && rm get-pip.py \
+    && yum install -y python3 python3-devel \
+    && pip3 install google protobuf setuptools wheel flask \
    && yum -y install epel-release && yum -y install patchelf \
    && yum clean all
--- a/tools/Dockerfile.gpu
+++ b/tools/Dockerfile.gpu
@@ -6,6 +6,7 @@ RUN yum -y install wget && \
    yum -y install libSM-1.2.2-2.el7.x86_64 --setopt=protected_multilib=false && \
    yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false && \
    yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false && \
+    yum -y install python3 python3-devel && \
    yum clean all && \
    curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
    python get-pip.py && rm get-pip.py && \

--- a/tools/Dockerfile.gpu.devel
+++ b/tools/Dockerfile.gpu.devel
@@ -19,5 +19,7 @@ RUN yum -y install wget >/dev/null \
    && python get-pip.py >/dev/null \
    && pip install google protobuf setuptools wheel flask >/dev/null \
    && rm get-pip.py \
+    && yum install -y python3 python3-devel \
+    && pip3 install google protobuf setuptools wheel flask \
    && yum -y install epel-release && yum -y install patchelf \
    && yum clean all
--- a/tools/serving_build.sh
+++ b/tools/serving_build.sh
@@ -266,12 +266,119 @@ function python_run_criteo_ctr_with_cube() {
    cd .. # pwd: /Serving/python/examples
 }

+function python_test_bert() {
+    # pwd: /Serving/python/examples
+    local TYPE=$1
+    yum install -y libXext libSM libXrender >/dev/null
+    pip install ujson
+    export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving
+    cd bert # pwd: /Serving/python/examples/bert
+    case $TYPE in
+        CPU)
+            pip install paddlehub
+            python prepare_model.py 20
+            sh get_data.sh
+            check_cmd "python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9292 &"
+            sleep 5
+            pip install paddle_serving_app
+            check_cmd "head -n 10 data-c.txt | python bert_client.py --model bert_seq20_client/serving_client_conf.prototxt"
+            kill_server_process
+            ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
+            ps -ef | grep "serving" | grep -v grep | awk '{print $2}' | xargs kill
+            echo "bert RPC inference pass" 
+            ;;
+        GPU)
+            pip install paddlehub
+            python prepare_model.py 20
+            sh get_data.sh
+            check_cmd "python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9292 --gpu_ids 0 &"
+            sleep 5
+            pip install paddle_serving_app
+            check_cmd "head -n 10 data-c.txt | python bert_client.py --model bert_seq20_client/serving_client_conf.prototxt"
+            kill_server_process
+            ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
+            echo "bert RPC inference pass"
+            ;;
+        *)
+    esac
+    echo "test bert $TYPE finished as expected."
+    unset SERVING_BIN
+    cd ..
+}
+
+function python_test_imdb() {
+    # pwd: /Serving/python/examples
+    local TYPE=$1
+    export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving
+    cd imdb # pwd: /Serving/python/examples/imdb
+    case $TYPE in
+        CPU)
+            sh get_data.sh
+            sleep 5
+            check_cmd "python -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9292 &"
+            check_cmd "head test_data/part-0 | python test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab"
+            echo "imdb CPU RPC inference pass"
+            ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
+            rm -rf work_dir1
+            sleep 5
+
+            check_cmd "python text_classify_service.py imdb_cnn_model/workdir/9292 imdb.vocab &"
+            sleep 5
+            check_cmd "curl -H "Content-Type:application/json" -X POST -d '{"words": "i am very sad | 0", "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction"
+            ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
+            ps -ef | grep "text_classify_service.py" | grep -v grep | awk '{print $2}' | xargs kill
+            echo "imdb CPU HTTP inference pass"           
+            ;;
+        GPU)
+            echo "imdb ignore GPU test"
+            ;;
+        *)
+    esac
+    echo "test imdb $TYPE finished as expected."
+    unset SERVING_BIN
+    cd ..
+}
+
+function python_test_lac() {
+    # pwd: /Serving/python/examples
+    local TYPE=$1
+    export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving
+    cd lac # pwd: /Serving/python/examples/lac
+    case $TYPE in
+        CPU)
+            sh get_data.sh
+            check_cmd "python -m paddle_serving_server.serve --model jieba_server_model/ --port 9292 &"
+            sleep 5
+            check_cmd "echo "我爱北京天安门" | python lac_client.py jieba_client_conf/serving_client_conf.prototxt lac_dict/"
+            echo "lac CPU RPC inference pass"
+            ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
+
+            check_cmd "python lac_web_service.py jieba_server_model/ lac_workdir 9292 &"
+            sleep 5
+            check_cmd "curl -H "Content-Type:application/json" -X POST -d '{"words": "我爱北京天安门", "fetch":["word_seg"]}' http://127.0.0.1:9292/lac/prediction"
+            ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
+            ps -ef | grep "lac_web_service" | grep -v grep | awk '{print $2}' | xargs kill
+            echo "lac CPU HTTP inference pass"
+            ;;
+        GPU)
+            echo "lac ignore GPU test"
+            ;;
+        *)
+    esac
+    echo "test lac $TYPE finished as expected."
+    unset SERVING_BIN
+    cd ..
+}
+
 function python_run_test() {
    # Using the compiled binary
    local TYPE=$1 # pwd: /Serving
    cd python/examples # pwd: /Serving/python/examples
    python_test_fit_a_line $TYPE # pwd: /Serving/python/examples
    python_run_criteo_ctr_with_cube $TYPE # pwd: /Serving/python/examples
+    python_test_bert $TYPE # pwd: /Serving/python/examples
+    python_test_imdb $TYPE 
+    python_test_lac $TYPE    
    echo "test python $TYPE part finished as expected."
    cd ../.. # pwd: /Serving
 }