diff --git a/README.md b/README.md
index eb7785801977e37853664c9ca41081cab0b537cc..81ad14c993852b1c73f44957f199d97336580388 100644
--- a/README.md
+++ b/README.md
@@ -16,15 +16,15 @@
     <br>
 <p>
 
-[中文](https://github.com/PaddlePaddle/Serving/blob/develop/README_CN.md)
+<h2 align="center">Motivation</h2>
 
-## Motivation
 Paddle Serving helps deep learning developers deploy an online inference service without much effort. **The goal of this project**: once you have trained a deep neural nets with [Paddle](https://github.com/PaddlePaddle/Paddle), you already have a model inference service. A demo of serving is as follows:
 <p align="center">
     <img src="doc/demo.gif" width="700">
 </p>
 
-## Key Features
+<h2 align="center">Key Features</h2>
+
 - Integrate with Paddle training pipeline seemlessly, most paddle models can be deployed **with one line command**.
 - **Industrial serving features** supported, such as models management, online loading, online A/B testing etc.
 - **Distributed Key-Value indexing** supported that is especially useful for large scale sparse features as model inputs.
@@ -32,14 +32,16 @@ Paddle Serving helps deep learning developers deploy an online inference service
 - **Multiple programming languages** supported on client side, such as Golang, C++ and python
 - **Extensible framework design** that can support model serving beyond Paddle.
 
-## Installation
+<h2 align="center">Installation</h2>
+
+We highly recommend you to run Paddle Serving in Docker, please visit [Run in Docker](https://github.com/PaddlePaddle/Serving/blob/develop/doc/RUN_IN_DOCKER.md)
 
 ```shell
 pip install paddle-serving-client
 pip install paddle-serving-server
 ```
 
-## Quick Start Example
+<h2 align="center">Quick Start Example</h2>
 
 ### Boston House Price Prediction model
 ``` shell
@@ -54,6 +56,17 @@ Paddle Serving provides HTTP and RPC based service for users to access
 ``` shell
 python -m paddle_serving_server.web_serve --model uci_housing_model --thread 10 --port 9292 --name uci
 ```
+<center>
+
+| Argument | Type | Default | Description |
+|--------------|------|-----------|--------------------------------|
+| `thread` | int | `10` | Concurrency of current service |
+| `port` | int | `9292` | Exposed port of current service to users|
+| `name` | str | `""` | Service name, can be used to generate HTTP request url |
+| `model` | str | `""` | Path of paddle model directory to be served |
+
+</center>
+
 ``` shell
 curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
 ```
@@ -78,47 +91,75 @@ print(fetch_map)
 
 ```
 
-## Models waiting for you to deploy
+<h2 align="center"> Pre-built services with Paddle Serving</h2>
 
+<h3 align="center">Chinese Word Segmentation</h4>
 
-<center>
+- **Description**: Chinese word segmentation HTTP service that can be deployed with one line command.
+
+- **Download**: 
+``` shell
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/lac/lac_model_jieba_web.tar.gz
+```
+- **Host web service**: 
+``` shell
+tar -xzf lac_model_jieba_web.tar.gz
+python lac_web_service.py jieba_server_model/ lac_workdir 9292
+```
+- **Request sample**: 
+``` shell
+curl -H "Content-Type:application/json" -X POST -d '{"words": "我爱北京天安门", "fetch":["crf_decode"]}' http://127.0.0.1:9292/lac/prediction
+```
+- **Request result**: 
+``` shell
+{"word_seg":"我|爱|北京|天安门"}
+```
 
-|      Model Name      	|              Resnet50              	|
-|:--------------------:	|:----------------------------------:	|
-|      Package URL     	|           To be released           	|
-|      Description     	| Get the representation of an image 	|
-| Training Data Source 	|              Imagenet              	|
 
-</center>
+<h3 align="center">Chinese Sentence To Vector</h4>
+
+<h3 align="center">Image To Vector</h4>
+
+<h3 align="center">Image Classification</h4>
 
 
-## Document
 
-[How to save a servable model?](doc/SAVE.md)
+<h2 align="center">Document</h2>
 
-[How to config Serving native operators on server side?](doc/SERVER_DAG.md)
+### New to Paddle Serving
+- [How to save a servable model?](doc/SAVE.md)
+- [An end-to-end tutorial from training to serving](doc/END_TO_END.md)
+- [Write Bert-as-Service in 10 minutes](doc/Bert_10_mins.md)
 
-[How to develop a new Serving operator](doc/NEW_OPERATOR.md)
+### Developers
+- [How to config Serving native operators on server side?](doc/SERVER_DAG.md)
+- [How to develop a new Serving operator](doc/NEW_OPERATOR.md)
+- [Golang client](doc/IMDB_GO_CLIENT.md)
+- [Compile from source code(Chinese)](doc/COMPILE.md)
 
-[Golang client](doc/IMDB_GO_CLIENT.md)
+### About Efficiency
+- [How profile serving efficiency?(Chinese)](https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/util)
 
-[Compile from source code(Chinese)](doc/COMPILE.md)
+### FAQ
+- [FAQ(Chinese)](doc/FAQ.md)
 
-[How profile serving efficiency?(Chinese)](https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/util)
+### Design
+- [Design Doc(Chinese)](doc/DESIGN.md)
 
-[FAQ(Chinese)](doc/FAQ.md)
+<h2 align="center">Community</h2>
 
-[Design Doc(Chinese)](doc/DESIGN.md)
+### Slack
 
-## Join Community
 To connect with other users and contributors, welcome to join our [Slack channel](https://paddleserving.slack.com/archives/CUBPKHKMJ)
 
-## Contribution
+### Contribution
 
 If you want to contribute code to Paddle Serving, please reference [Contribution Guidelines](doc/CONTRIBUTE.md)
 
 ### Feedback
+
 For any feedback or to report a bug, please propose a [GitHub Issue](https://github.com/PaddlePaddle/Serving/issues).
 
-## License
+### License
+
 [Apache 2.0 License](https://github.com/PaddlePaddle/Serving/blob/develop/LICENSE)
diff --git a/README_CN.md b/README_CN.md
index 995b6d0c66a39eae8302209f538ae43385f673f8..8400038f840a9f26a1342d9fcf4bd9729adcb06c 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -22,6 +22,8 @@ Paddle Serving 帮助深度学习开发者轻易部署在线预测服务。 **
 
 ## 安装
 
+强烈建议您在Docker内构建Paddle Serving，请查看[如何在Docker中运行PaddleServing](doc/RUN_IN_DOCKER_CN.md)
+
 ```shell
 pip install paddle-serving-client
 pip install paddle-serving-server
diff --git a/doc/README_CN.md b/doc/README_CN.md
index 7bdcbe40280c954187e3ff619b4f7cf8e6f47a7d..f8d42e6f1e72f1ac34939e5795df3e6604924bad 100644
--- a/doc/README_CN.md
+++ b/doc/README_CN.md
@@ -9,6 +9,9 @@ Paddle Serving是PaddlePaddle的在线预估服务框架，能够帮助开发者
 Paddle Serving当前的develop版本支持轻量级Python API进行快速预测，并且与Paddle的训练可以打通。我们以最经典的波士顿房价预测为示例，完整说明在单机进行模型训练以及使用Paddle Serving进行模型部署的过程。
 
 #### 安装
+
+强烈建议您在Docker内构建Paddle Serving，请查看[如何在Docker中运行PaddleServing](doc/RUN_IN_DOCKER_CN.md)
+
 ```
 pip install paddle-serving-client
 pip install paddle-serving-server
diff --git a/doc/RUN_IN_DOCKER.md b/doc/RUN_IN_DOCKER.md
new file mode 100644
index 0000000000000000000000000000000000000000..345aabed52cb30282057ea7f5ba4953a9681d6d8
--- /dev/null
+++ b/doc/RUN_IN_DOCKER.md
@@ -0,0 +1,175 @@
+# How to run PaddleServing in Docker
+
+## Requirements
+
+Docker (GPU version requires nvidia-docker to be installed on the GPU machine)
+
+## CPU
+
+### Get docker image
+
+You can get images in two ways:
+
+1. Pull image directly
+
+   ```bash
+   docker pull hub.baidubce.com/ctr/paddleserving:0.1.3
+   ```
+
+2. Building image based on dockerfile
+
+   Create a new folder and copy [Dockerfile](../tools/Dockerfile) to this folder, and run the following command:
+
+   ```bash
+   docker build -t hub.baidubce.com/ctr/paddleserving:0.1.3 .
+   ```
+
+### Create container
+
+```bash
+docker run -p 9292:9292 --name test -dit hub.baidubce.com/ctr/paddleserving:0.1.3
+docker exec -it test bash
+```
+
+The `-p` option is to map the `9292` port of the container to the `9292` port of the host.
+
+### Install PaddleServing
+
+In order to make the image smaller, the PaddleServing package is not installed in the image. You can run the following command to install it
+
+```bash
+pip install paddle-serving-server
+```
+
+### Test example
+
+Get the trained Boston house price prediction model by the following command:
+
+```bash
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz
+tar -xzf uci_housing.tar.gz
+```
+
+- Test HTTP service
+
+  Running on the Server side (inside the container):
+
+  ```bash
+  python -m paddle_serving_server.web_serve --model uci_housing_model --thread 10 --port 9292 --name uci &>std.log 2>err.log &
+  ```
+
+  Running on the Client side (inside or outside the container):
+
+  ```bash
+  curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
+  ```
+
+- Test RPC service
+
+  Running on the Server side (inside the container):
+
+  ```bash
+  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 &>std.log 2>err.log &
+  ```
+
+  Running following Python code on the Client side (inside or outside the container, The `paddle-serving-client` package needs to be installed):
+
+  ```bash
+  from paddle_serving_client import Client
+  
+  client = Client()
+  client.load_client_config("uci_housing_client/serving_client_conf.prototxt")
+  client.connect(["127.0.0.1:9292"])
+  data = [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727,
+          -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]
+  fetch_map = client.predict(feed={"x": data}, fetch=["price"])
+  print(fetch_map)
+  ```
+
+  
+
+## GPU
+
+The GPU version is basically the same as the CPU version, with only some differences in interface naming (GPU version requires nvidia-docker to be installed on the GPU machine).
+
+### Get docker image
+
+You can also get images in two ways:
+
+1. Pull image directly
+
+   ```bash
+   nvidia-docker pull hub.baidubce.com/ctr/paddleserving:0.1.3-gpu
+   ```
+
+2. Building image based on dockerfile
+
+   Create a new folder and copy [Dockerfile.gpu](../tools/Dockerfile.gpu) to this folder, and run the following command:
+
+   ```bash
+   nvidia-docker build -t hub.baidubce.com/ctr/paddleserving:0.1.3-gpu .
+   ```
+
+### Create container
+
+```bash
+nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/ctr/paddleserving:0.1.3-gpu
+nvidia-docker exec -it test bash
+```
+
+The `-p` option is to map the `9292` port of the container to the `9292` port of the host.
+
+### Install PaddleServing
+
+In order to make the image smaller, the PaddleServing package is not installed in the image. You can run the following command to install it:
+
+```bash
+pip install paddle-serving-server-gpu
+```
+
+### Test example
+
+Get the trained Boston house price prediction model by the following command:
+
+```bash
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz
+tar -xzf uci_housing.tar.gz
+```
+
+- Test HTTP service
+
+  Running on the Server side (inside the container):
+
+  ```bash
+  python -m paddle_serving_server_gpu.web_serve --model uci_housing_model --thread 10 --port 9292 --name uci
+  ```
+
+  Running on the Client side (inside or outside the container):
+
+  ```bash
+  curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
+  ```
+
+- Test RPC service
+
+  Running on the Server side (inside the container):
+
+  ```bash
+  python -m paddle_serving_server_gpu.serve --model uci_housing_model --thread 10 --port 9292
+  ```
+
+  Running following Python code on the Client side (inside or outside the container, The `paddle-serving-client` package needs to be installed):
+
+  ```bash
+  from paddle_serving_client import Client
+  
+  client = Client()
+  client.load_client_config("uci_housing_client/serving_client_conf.prototxt")
+  client.connect(["127.0.0.1:9292"])
+  data = [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727,
+          -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]
+  fetch_map = client.predict(feed={"x": data}, fetch=["price"])
+  print(fetch_map)
+  ```
+
+  
diff --git a/doc/RUN_IN_DOCKER_CN.md b/doc/RUN_IN_DOCKER_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..7e2f28bdbd73c793faee96def6b625e1bbff2ba9
--- /dev/null
+++ b/doc/RUN_IN_DOCKER_CN.md
@@ -0,0 +1,171 @@
+# 如何在Docker中运行PaddleServing
+
+## 环境要求
+
+Docker（GPU版本需要在GPU机器上安装nvidia-docker）
+
+## CPU版本
+
+### 获取镜像
+
+可以通过两种方式获取镜像。
+
+1. 直接拉取镜像
+
+   ```bash
+   docker pull hub.baidubce.com/ctr/paddleserving:0.1.3
+   ```
+
+2. 基于Dockerfile构建镜像
+
+   建立新目录，复制[Dockerfile](../tools/Dockerfile)内容到该目录下Dockerfile文件。执行
+
+   ```bash
+   docker build -t hub.baidubce.com/ctr/paddleserving:0.1.3 .
+   ```
+
+### 创建容器并进入
+
+```bash
+docker run -p 9292:9292 --name test -dit hub.baidubce.com/ctr/paddleserving:0.1.3
+docker exec -it test bash
+```
+
+`-p`选项是为了将容器的`9292`端口映射到宿主机的`9292`端口。
+
+### 安装PaddleServing
+
+为了减小镜像的体积，镜像中没有安装Serving包，要执行下面命令进行安装
+
+```bash
+pip install paddle-serving-server
+```
+
+### 测试example
+
+通过下面命令获取训练好的Boston房价预估模型：
+
+```bash
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz
+tar -xzf uci_housing.tar.gz
+```
+
+- 测试HTTP服务
+
+  在Server端（容器内）运行：
+
+  ```bash
+  python -m paddle_serving_server.web_serve --model uci_housing_model --thread 10 --port 9292 --name uci &>std.log 2>err.log &
+  ```
+
+  在Client端（容器内或容器外）运行：
+
+  ```bash
+  curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
+  ```
+
+- 测试RPC服务
+
+  在Server端（容器内）运行：
+
+  ```bash
+  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 &>std.log 2>err.log &
+  ```
+
+  在Client端（容器内或容器外，需要安装`paddle-serving-client`包）运行下面Python代码：
+
+  ```python
+  from paddle_serving_client import Client
+  
+  client = Client()
+  client.load_client_config("uci_housing_client/serving_client_conf.prototxt")
+  client.connect(["127.0.0.1:9292"])
+  data = [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727,
+          -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]
+  fetch_map = client.predict(feed={"x": data}, fetch=["price"])
+  print(fetch_map)
+  ```
+
+## GPU版本
+
+GPU版本与CPU版本基本一致，只有部分接口命名的差别（GPU版本需要在GPU机器上安装nvidia-docker）。
+
+### 获取镜像
+
+可以通过两种方式获取镜像。
+
+1. 直接拉取镜像
+
+   ```bash
+   nvidia-docker pull hub.baidubce.com/ctr/paddleserving:0.1.3-gpu
+   ```
+
+2. 基于Dockerfile构建镜像
+
+   建立新目录，复制[Dockerfile.gpu](../tools/Dockerfile.gpu)内容到该目录下Dockerfile文件。执行
+
+   ```bash
+   nvidia-docker build -t hub.baidubce.com/ctr/paddleserving:0.1.3-gpu .
+   ```
+
+### 创建容器并进入
+
+```bash
+nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/ctr/paddleserving:0.1.3-gpu
+nvidia-docker exec -it test bash
+```
+
+`-p`选项是为了将容器的`9292`端口映射到宿主机的`9292`端口。
+
+### 安装PaddleServing
+
+为了减小镜像的体积，镜像中没有安装Serving包，要执行下面命令进行安装
+
+```bash
+pip install paddle-serving-server-gpu
+```
+
+### 测试example
+
+通过下面命令获取训练好的Boston房价预估模型：
+
+```bash
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz
+tar -xzf uci_housing.tar.gz
+```
+
+- 测试HTTP服务
+
+  在Server端（容器内）运行：
+
+  ```bash
+  python -m paddle_serving_server_gpu.web_serve --model uci_housing_model --thread 10 --port 9292 --name uci 
+  ```
+
+  在Client端（容器内或容器外）运行：
+
+  ```bash
+  curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
+  ```
+
+- 测试RPC服务
+
+  在Server端（容器内）运行：
+
+  ```bash
+  python -m paddle_serving_server_gpu.serve --model uci_housing_model --thread 10 --port 9292
+  ```
+
+  在Client端（容器内或容器外，需要安装`paddle-serving-client`包）运行下面Python代码：
+
+  ```bash
+  from paddle_serving_client import Client
+  
+  client = Client()
+  client.load_client_config("uci_housing_client/serving_client_conf.prototxt")
+  client.connect(["127.0.0.1:9292"])
+  data = [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727,
+          -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]
+  fetch_map = client.predict(feed={"x": data}, fetch=["price"])
+  print(fetch_map)
+  ```
diff --git a/doc/demo.gif b/doc/demo.gif
index 1fd6b40024aacff3c97bb0607454156d3a8d0649..8d1accfc405686cf49891a8c97fe75f52e3daf12 100644
Binary files a/doc/demo.gif and b/doc/demo.gif differ
diff --git a/python/examples/imagenet/image_classification_service_gpu.py b/python/examples/imagenet/image_classification_service_gpu.py
index 8fc92d918867142c6c442cb9eba61e2a9fb1f0e5..8a0bea938638c57a609a604181420929c4a9ca59 100644
--- a/python/examples/imagenet/image_classification_service_gpu.py
+++ b/python/examples/imagenet/image_classification_service_gpu.py
@@ -14,16 +14,13 @@
 
 from paddle_serving_server_gpu.web_service import WebService
 import sys
-import os
+import cv2
 import base64
+import numpy as np
 from image_reader import ImageReader
 
 
 class ImageService(WebService):
-    """
-    preprocessing function for image classification
-    """
-
     def preprocess(self, feed={}, fetch=[]):
         reader = ImageReader()
         if "image" not in feed:
@@ -37,9 +34,7 @@ class ImageService(WebService):
 
 image_service = ImageService(name="image")
 image_service.load_model_config(sys.argv[1])
-gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"]
-gpus = [int(x) for x in gpu_ids.split(",")]
-image_service.set_gpus(gpus)
+image_service.set_gpus("0,1,2,3")
 image_service.prepare_server(
     workdir=sys.argv[2], port=int(sys.argv[3]), device="gpu")
 image_service.run_server()
diff --git a/python/examples/imagenet/image_http_client.py b/python/examples/imagenet/image_http_client.py
index f8b15dafc73ffac1e121610204ffe3cce23748a3..b61f0dd7d8d5ed25ecc828b5d0882ba11a116019 100644
--- a/python/examples/imagenet/image_http_client.py
+++ b/python/examples/imagenet/image_http_client.py
@@ -16,6 +16,7 @@ import requests
 import base64
 import json
 import time
+import os
 
 
 def predict(image_path, server):
@@ -23,13 +24,17 @@ def predict(image_path, server):
     req = json.dumps({"image": image, "fetch": ["score"]})
     r = requests.post(
         server, data=req, headers={"Content-Type": "application/json"})
+    return r
 
 
 if __name__ == "__main__":
-    server = "http://127.0.0.1:9393/image/prediction"
-    image_path = "./data/n01440764_10026.JPEG"
+    server = "http://127.0.0.1:9295/image/prediction"
+    #image_path = "./data/n01440764_10026.JPEG"
+    image_list = os.listdir("./data/image_data/n01440764/")
     start = time.time()
-    for i in range(1000):
-        predict(image_path, server)
+    for img in image_list:
+        image_file = "./data/image_data/n01440764/" + img
+        res = predict(image_file, server)
+        print(res.json()["score"][0])
     end = time.time()
     print(end - start)
diff --git a/python/examples/lac/benchmark.py b/python/examples/lac/benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..53d0881ed74e5e19104a70fb93d6872141d27afd
--- /dev/null
+++ b/python/examples/lac/benchmark.py
@@ -0,0 +1,53 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
+import sys
+import time
+import requests
+from lac_reader import LACReader
+from paddle_serving_client import Client
+from paddle_serving_client.utils import MultiThreadRunner
+from paddle_serving_client.utils import benchmark_args
+
+args = benchmark_args()
+
+
+def single_func(idx, resource):
+    reader = LACReader("lac_dict")
+    start = time.time()
+    if args.request == "rpc":
+        client = Client()
+        client.load_client_config(args.model)
+        client.connect([args.endpoint])
+        fin = open("jieba_test.txt")
+        for line in fin:
+            feed_data = reader.process(line)
+            fetch_map = client.predict(
+                feed={"words": feed_data}, fetch=["crf_decode"])
+    elif args.request == "http":
+        fin = open("jieba_test.txt")
+        for line in fin:
+            req_data = {"words": line.strip(), "fetch": ["crf_decode"]}
+            r = requests.post(
+                "http://{}/lac/prediction".format(args.endpoint),
+                data={"words": line.strip(),
+                      "fetch": ["crf_decode"]})
+    end = time.time()
+    return [[end - start]]
+
+
+multi_thread_runner = MultiThreadRunner()
+result = multi_thread_runner.run(single_func, args.thread, {})
+print(result)
diff --git a/python/examples/lac/get_data.sh b/python/examples/lac/get_data.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6b72850d35b7a7b5e43b34d31c7a903e05f07440
--- /dev/null
+++ b/python/examples/lac/get_data.sh
@@ -0,0 +1,2 @@
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/lac/lac_model.tar.gz
+tar -zxvf lac_model.tar.gz
diff --git a/python/examples/lac/lac_client.py b/python/examples/lac/lac_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..f2a8e858ed72ac4043a2bb3162a39a2aff233043
--- /dev/null
+++ b/python/examples/lac/lac_client.py
@@ -0,0 +1,35 @@
+# encoding=utf-8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
+from paddle_serving_client import Client
+from lac_reader import LACReader
+import sys
+import os
+import io
+
+client = Client()
+client.load_client_config(sys.argv[1])
+client.connect(["127.0.0.1:9280"])
+
+reader = LACReader(sys.argv[2])
+for line in sys.stdin:
+    if len(line) <= 0:
+        continue
+    feed_data = reader.process(line)
+    if len(feed_data) <= 0:
+        continue
+    fetch_map = client.predict(feed={"words": feed_data}, fetch=["crf_decode"])
+    print(fetch_map)
diff --git a/python/examples/lac/lac_http_client.py b/python/examples/lac/lac_http_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..852d785f368e95bb16bfd5804e3153b022945f59
--- /dev/null
+++ b/python/examples/lac/lac_http_client.py
@@ -0,0 +1,27 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#coding=utf-8
+import requests
+import json
+import time
+
+if __name__ == "__main__":
+    server = "http://127.0.0.1:9280/lac/prediction"
+    fin = open("jieba_test.txt", "r")
+    start = time.time()
+    for line in fin:
+        req_data = {"words": line.strip(), "fetch": ["crf_decode"]}
+        r = requests.post(server, json=req_data)
+    end = time.time()
+    print(end - start)
diff --git a/python/examples/lac/lac_reader.py b/python/examples/lac/lac_reader.py
new file mode 100644
index 0000000000000000000000000000000000000000..087ec8bb9e1a44afa2ba5a1cc9931e350aa76fb7
--- /dev/null
+++ b/python/examples/lac/lac_reader.py
@@ -0,0 +1,101 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle_serving_client import Client
+import sys
+reload(sys)
+sys.setdefaultencoding('utf-8')
+import os
+import io
+
+
+def load_kv_dict(dict_path,
+                 reverse=False,
+                 delimiter="\t",
+                 key_func=None,
+                 value_func=None):
+    result_dict = {}
+    for line in io.open(dict_path, "r", encoding="utf8"):
+        terms = line.strip("\n").split(delimiter)
+        if len(terms) != 2:
+            continue
+        if reverse:
+            value, key = terms
+        else:
+            key, value = terms
+        if key in result_dict:
+            raise KeyError("key duplicated with [%s]" % (key))
+        if key_func:
+            key = key_func(key)
+        if value_func:
+            value = value_func(value)
+        result_dict[key] = value
+    return result_dict
+
+
+class LACReader(object):
+    """data reader"""
+
+    def __init__(self, dict_folder):
+        # read dict
+        #basepath = os.path.abspath(__file__)
+        #folder = os.path.dirname(basepath)
+        word_dict_path = os.path.join(dict_folder, "word.dic")
+        label_dict_path = os.path.join(dict_folder, "tag.dic")
+        self.word2id_dict = load_kv_dict(
+            word_dict_path, reverse=True, value_func=int)
+        self.id2word_dict = load_kv_dict(word_dict_path)
+        self.label2id_dict = load_kv_dict(
+            label_dict_path, reverse=True, value_func=int)
+        self.id2label_dict = load_kv_dict(label_dict_path)
+
+    @property
+    def vocab_size(self):
+        """vocabulary size"""
+        return max(self.word2id_dict.values()) + 1
+
+    @property
+    def num_labels(self):
+        """num_labels"""
+        return max(self.label2id_dict.values()) + 1
+
+    def word_to_ids(self, words):
+        """convert word to word index"""
+        word_ids = []
+        idx = 0
+        try:
+            words = unicode(words, 'utf-8')
+        except:
+            pass
+        for word in words:
+            if word not in self.word2id_dict:
+                word = "OOV"
+            word_id = self.word2id_dict[word]
+            word_ids.append(word_id)
+        return word_ids
+
+    def label_to_ids(self, labels):
+        """convert label to label index"""
+        label_ids = []
+        for label in labels:
+            if label not in self.label2id_dict:
+                label = "O"
+            label_id = self.label2id_dict[label]
+            label_ids.append(label_id)
+        return label_ids
+
+    def process(self, sent):
+        words = sent.strip()
+        word_ids = self.word_to_ids(words)
+        return word_ids
diff --git a/python/examples/lac/lac_web_service.py b/python/examples/lac/lac_web_service.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a58c6a43caea4045220546488226da121bfdc17
--- /dev/null
+++ b/python/examples/lac/lac_web_service.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle_serving_server.web_service import WebService
+import sys
+from lac_reader import LACReader
+
+
+class LACService(WebService):
+    def load_reader(self):
+        self.reader = LACReader("lac_dict")
+
+    def preprocess(self, feed={}, fetch=[]):
+        if "words" not in feed:
+            raise ("feed data error!")
+        feed_data = self.reader.process(feed["words"])
+        return {"words": feed_data}, fetch
+
+
+lac_service = LACService(name="lac")
+lac_service.load_model_config(sys.argv[1])
+lac_service.load_reader()
+lac_service.prepare_server(
+    workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu")
+lac_service.run_server()
diff --git a/python/examples/lac/utils.py b/python/examples/lac/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..64602902f362cc847c705a3e18d3e76255961314
--- /dev/null
+++ b/python/examples/lac/utils.py
@@ -0,0 +1,141 @@
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+util tools
+"""
+from __future__ import print_function
+import os
+import sys
+import numpy as np
+import paddle.fluid as fluid
+import io
+
+
+def str2bool(v):
+    """
+    argparse does not support True or False in python
+    """
+    return v.lower() in ("true", "t", "1")
+
+
+def parse_result(words, crf_decode, dataset):
+    """ parse result """
+    offset_list = (crf_decode.lod())[0]
+    words = np.array(words)
+    crf_decode = np.array(crf_decode)
+    batch_size = len(offset_list) - 1
+
+    for sent_index in range(batch_size):
+        begin, end = offset_list[sent_index], offset_list[sent_index + 1]
+        sent = []
+        for id in words[begin:end]:
+            if dataset.id2word_dict[str(id[0])] == 'OOV':
+                sent.append(' ')
+            else:
+                sent.append(dataset.id2word_dict[str(id[0])])
+        tags = [
+            dataset.id2label_dict[str(id[0])] for id in crf_decode[begin:end]
+        ]
+
+        sent_out = []
+        tags_out = []
+        parital_word = ""
+        for ind, tag in enumerate(tags):
+            # for the first word
+            if parital_word == "":
+                parital_word = sent[ind]
+                tags_out.append(tag.split('-')[0])
+                continue
+
+            # for the beginning of word
+            if tag.endswith("-B") or (tag == "O" and tags[ind - 1] != "O"):
+                sent_out.append(parital_word)
+                tags_out.append(tag.split('-')[0])
+                parital_word = sent[ind]
+                continue
+
+            parital_word += sent[ind]
+
+        # append the last word, except for len(tags)=0
+        if len(sent_out) < len(tags_out):
+            sent_out.append(parital_word)
+    return sent_out, tags_out
+
+
+def parse_padding_result(words, crf_decode, seq_lens, dataset):
+    """ parse padding result """
+    words = np.squeeze(words)
+    batch_size = len(seq_lens)
+
+    batch_out = []
+    for sent_index in range(batch_size):
+
+        sent = []
+        for id in words[begin:end]:
+            if dataset.id2word_dict[str(id[0])] == 'OOV':
+                sent.append(' ')
+            else:
+                sent.append(dataset.id2word_dict[str(id[0])])
+        tags = [
+            dataset.id2label_dict[str(id)]
+            for id in crf_decode[sent_index][1:seq_lens[sent_index] - 1]
+        ]
+
+        sent_out = []
+        tags_out = []
+        parital_word = ""
+        for ind, tag in enumerate(tags):
+            # for the first word
+            if parital_word == "":
+                parital_word = sent[ind]
+                tags_out.append(tag.split('-')[0])
+                continue
+
+            # for the beginning of word
+            if tag.endswith("-B") or (tag == "O" and tags[ind - 1] != "O"):
+                sent_out.append(parital_word)
+                tags_out.append(tag.split('-')[0])
+                parital_word = sent[ind]
+                continue
+
+            parital_word += sent[ind]
+
+        # append the last word, except for len(tags)=0
+        if len(sent_out) < len(tags_out):
+            sent_out.append(parital_word)
+
+        batch_out.append([sent_out, tags_out])
+    return batch_out
+
+
+def init_checkpoint(exe, init_checkpoint_path, main_program):
+    """
+    Init CheckPoint
+    """
+    assert os.path.exists(
+        init_checkpoint_path), "[%s] cann't be found." % init_checkpoint_path
+
+    def existed_persitables(var):
+        """
+        If existed presitabels
+        """
+        if not fluid.io.is_persistable(var):
+            return False
+        return os.path.exists(os.path.join(init_checkpoint_path, var.name))
+
+    fluid.io.load_vars(
+        exe,
+        init_checkpoint_path,
+        main_program=main_program,
+        predicate=existed_persitables)
diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py
index c86c3f46b3b3ef83fb5fe630031cf28a95c52649..279e3a895e975473fc5569c4716368c3dda1d9f1 100644
--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -18,6 +18,7 @@ Usage:
         python -m paddle_serving_server.serve --model ./serving_server_model --port 9292
 """
 import argparse
+from .web_service import WebService
 
 
 def parse_args():  # pylint: disable=doc-string-missing
@@ -28,6 +29,8 @@ def parse_args():  # pylint: disable=doc-string-missing
         "--model", type=str, default="", help="Model for serving")
     parser.add_argument(
         "--port", type=int, default=9292, help="Port the server")
+    parser.add_argument(
+        "--name", type=str, default="None", help="Web service name")
     parser.add_argument(
         "--workdir",
         type=str,
@@ -71,4 +74,13 @@ def start_standard_model():  # pylint: disable=doc-string-missing
 
 
 if __name__ == "__main__":
-    start_standard_model()
+
+    args = parse_args()
+    if args.name == "None":
+        start_standard_model()
+    else:
+        service = WebService(name=args.name)
+        service.load_model_config(args.model)
+        service.prepare_server(
+            workdir=args.workdir, port=args.port, device=args.device)
+        service.run_server()
diff --git a/python/paddle_serving_server/web_serve.py b/python/paddle_serving_server/web_serve.py
deleted file mode 100644
index 46437ad5e53288c6ab03b32ea8882e1b3cfa66a3..0000000000000000000000000000000000000000
--- a/python/paddle_serving_server/web_serve.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Usage:
-    Host a trained paddle model with one line command
-    Example:
-        python -m paddle_serving_server.web_serve --model ./serving_server_model --port 9292
-"""
-import argparse
-from multiprocessing import Pool, Process
-from .web_service import WebService
-
-
-def parse_args():  # pylint: disable=doc-string-missing
-    parser = argparse.ArgumentParser("web_serve")
-    parser.add_argument(
-        "--thread", type=int, default=10, help="Concurrency of server")
-    parser.add_argument(
-        "--model", type=str, default="", help="Model for serving")
-    parser.add_argument(
-        "--port", type=int, default=9292, help="Port the server")
-    parser.add_argument(
-        "--workdir",
-        type=str,
-        default="workdir",
-        help="Working dir of current service")
-    parser.add_argument(
-        "--device", type=str, default="cpu", help="Type of device")
-    parser.add_argument(
-        "--name", type=str, default="default", help="Default service name")
-    return parser.parse_args()
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    service = WebService(name=args.name)
-    service.load_model_config(args.model)
-    service.prepare_server(
-        workdir=args.workdir, port=args.port, device=args.device)
-    service.run_server()
diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py
index 8ee1e137fb8fe282d26bda95e4b4bffa6f670f11..02b55801c35fb5d1ed7e35c249ac07e4d3eb45ab 100644
--- a/python/paddle_serving_server_gpu/__init__.py
+++ b/python/paddle_serving_server_gpu/__init__.py
@@ -42,7 +42,7 @@ def serve_args():
         "--device", type=str, default="gpu", help="Type of device")
     parser.add_argument("--gpu_ids", type=str, default="", help="gpu ids")
     parser.add_argument(
-        "--name", type=str, default="default", help="Default service name")
+        "--name", type=str, default="None", help="Default service name")
     return parser.parse_args()
 
 
diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py
index b12034667867f6ccbfe88668e37b9512f048bd63..882e6a270ff26eada6d94ce9c0c81b0ef3b7a3d8 100644
--- a/python/paddle_serving_server_gpu/serve.py
+++ b/python/paddle_serving_server_gpu/serve.py
@@ -89,4 +89,18 @@ def start_multi_card(args):  # pylint: disable=doc-string-missing
 
 if __name__ == "__main__":
     args = serve_args()
-    start_multi_card(args)
+    if args.name == "None":
+        start_multi_card(args)
+    else:
+        web_service = WebService(name=args.name)
+        web_service.load_model_config(args.model)
+        gpu_ids = []
+        if args.gpu_ids == "":
+            if "CUDA_VISIBLE_DEVICES" in os.environ:
+                gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"]
+        if len(gpu_ids) > 0:
+            gpus = [int(x) for x in gpu_ids.split(",")]
+            web_service.set_gpus(gpus)
+        web_service.prepare_server(
+            workdir=args.workdir, port=args.port, device=args.device)
+        web_service.run_server()
diff --git a/python/paddle_serving_server_gpu/web_serve.py b/python/paddle_serving_server_gpu/web_serve.py
deleted file mode 100644
index 734e6d7b93b4f3ad22f330b1545b63c6ac6f2838..0000000000000000000000000000000000000000
--- a/python/paddle_serving_server_gpu/web_serve.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Usage:
-    Host a trained paddle model with one line command
-    Example:
-        python -m paddle_serving_server.web_serve --model ./serving_server_model --port 9292
-"""
-import os
-from multiprocessing import Pool, Process
-from .web_service import WebService
-import paddle_serving_server_gpu as serving
-from paddle_serving_server_gpu import serve_args
-
-if __name__ == "__main__":
-    args = serve_args()
-    web_service = WebService(name=args.name)
-    web_service.load_model_config(args.model)
-    gpu_ids = []
-    if args.gpu_ids == "":
-        if "CUDA_VISIBLE_DEVICES" in os.environ:
-            gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"]
-    if len(gpu_ids) > 0:
-        gpus = [int(x) for x in gpu_ids.split(",")]
-        web_service.set_gpus(gpus)
-    web_service.prepare_server(
-        workdir=args.workdir, port=args.port, device=args.device)
-    web_service.run_server()
diff --git a/python/paddle_serving_server_gpu/web_service.py b/python/paddle_serving_server_gpu/web_service.py
index fbb52470d48f45795c6e910600a1368d4bf3d8d2..4d88994cc6094488aaf71ff3e37a74acc93579c4 100755
--- a/python/paddle_serving_server_gpu/web_service.py
+++ b/python/paddle_serving_server_gpu/web_service.py
@@ -15,7 +15,7 @@
 # pylint: disable=doc-string-missing
 
 from flask import Flask, request, abort
-from multiprocessing import Pool, Process
+from multiprocessing import Pool, Process, Queue
 from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server
 import paddle_serving_server_gpu as serving
 from paddle_serving_client import Client
@@ -29,12 +29,13 @@ class WebService(object):
         self.name = name
         self.gpus = []
         self.rpc_service_list = []
+        self.input_queues = []
 
     def load_model_config(self, model_config):
         self.model_config = model_config
 
     def set_gpus(self, gpus):
-        self.gpus = gpus
+        self.gpus = [int(x) for x in gpus.split(",")]
 
     def default_rpc_service(self,
                             workdir="conf",
@@ -86,60 +87,101 @@ class WebService(object):
                         gpuid,
                         thread_num=10))
 
+    def producers(self, inputqueue, endpoint):
+        client = Client()
+        client.load_client_config("{}/serving_server_conf.prototxt".format(
+            self.model_config))
+        client.connect([endpoint])
+        while True:
+            request_json = inputqueue.get()
+            feed, fetch = self.preprocess(request_json, request_json["fetch"])
+            if "fetch" in feed:
+                del feed["fetch"]
+            fetch_map = client.predict(feed=feed, fetch=fetch)
+            fetch_map = self.postprocess(
+                feed=request_json, fetch=fetch, fetch_map=fetch_map)
+            self.output_queue.put(fetch_map)
+
     def _launch_web_service(self, gpu_num):
         app_instance = Flask(__name__)
-        client_list = []
-        if gpu_num > 1:
-            gpu_num = 0
-        for i in range(gpu_num):
-            client_service = Client()
-            client_service.load_client_config(
-                "{}/serving_server_conf.prototxt".format(self.model_config))
-            client_service.connect(["0.0.0.0:{}".format(self.port + i + 1)])
-            client_list.append(client_service)
-            time.sleep(1)
         service_name = "/" + self.name + "/prediction"
 
+        self.input_queues = []
+        self.output_queue = Queue()
+        for i in range(gpu_num):
+            self.input_queues.append(Queue())
+
+        producer_list = []
+        for i, input_q in enumerate(self.input_queues):
+            producer_processes = Process(
+                target=self.producers,
+                args=(
+                    input_q,
+                    "0.0.0.0:{}".format(self.port + 1 + i), ))
+            producer_list.append(producer_processes)
+
+        for p in producer_list:
+            p.start()
+
+        client = Client()
+        client.load_client_config("{}/serving_server_conf.prototxt".format(
+            self.model_config))
+        client.connect(["0.0.0.0:{}".format(self.port + 1)])
+
+        self.idx = 0
+
         @app_instance.route(service_name, methods=['POST'])
         def get_prediction():
             if not request.json:
                 abort(400)
             if "fetch" not in request.json:
                 abort(400)
+
+            self.input_queues[self.idx].put(request.json)
+
+            #self.input_queues[0].put(request.json)
+            self.idx += 1
+            if self.idx >= len(self.gpus):
+                self.idx = 0
+            result = self.output_queue.get()
+            return result
+            '''
             feed, fetch = self.preprocess(request.json, request.json["fetch"])
             if "fetch" in feed:
                 del feed["fetch"]
-            fetch_map = client_list[0].predict(feed=feed, fetch=fetch)
+            fetch_map = client.predict(feed=feed, fetch=fetch)
             fetch_map = self.postprocess(
                 feed=request.json, fetch=fetch, fetch_map=fetch_map)
             return fetch_map
+            '''
 
         app_instance.run(host="0.0.0.0",
                          port=self.port,
                          threaded=False,
                          processes=1)
 
+        for p in producer_list:
+            p.join()
+
     def run_server(self):
         import socket
         localIP = socket.gethostbyname(socket.gethostname())
         print("web service address:")
         print("http://{}:{}/{}/prediction".format(localIP, self.port,
                                                   self.name))
-
-        rpc_processes = []
-        for idx in range(len(self.rpc_service_list)):
-            p_rpc = Process(target=self._launch_rpc_service, args=(idx, ))
-            rpc_processes.append(p_rpc)
-
-        for p in rpc_processes:
+        server_pros = []
+        for i, service in enumerate(self.rpc_service_list):
+            p = Process(target=self._launch_rpc_service, args=(i, ))
+            server_pros.append(p)
+        for p in server_pros:
             p.start()
 
         p_web = Process(
             target=self._launch_web_service, args=(len(self.gpus), ))
         p_web.start()
-        for p in rpc_processes:
-            p.join()
         p_web.join()
+        for p in server_pros:
+            p.join()
 
     def preprocess(self, feed={}, fetch=[]):
         return feed, fetch
diff --git a/tools/Dockerfile b/tools/Dockerfile
index 359dd52c0726e9a421138bf3ecf4d6cff3b2036f..a39ce5bb76e411edeb94766d0c9aae23c6e7e62f 100644
--- a/tools/Dockerfile
+++ b/tools/Dockerfile
@@ -1,24 +1,8 @@
-FROM centos:centos6.10
-RUN yum -y install wget \
-    && wget http://people.centos.org/tru/devtools-2/devtools-2.repo -O /etc/yum.repos.d/devtoolset-2.repo \
-    && yum -y install devtoolset-2-gcc devtoolset-2-gcc-c++ devtoolset-2-binutils \
-    && source /opt/rh/devtoolset-2/enable \
-    && echo "source /opt/rh/devtoolset-2/enable" >> /etc/profile \
-    && yum -y install git openssl-devel curl-devel bzip2-devel \
-    && wget https://cmake.org/files/v3.5/cmake-3.5.2.tar.gz \
-    && tar xvf cmake-3.5.2.tar.gz \
-    && cd cmake-3.5.2 \
-    &&  ./bootstrap --prefix=/usr \
-    && make \
-    && make install \
-    && cd .. \
-    && rm -r cmake-3.5.2* \
-    && wget https://dl.google.com/go/go1.12.12.linux-amd64.tar.gz \
-    && tar -xzvf go1.12.12.linux-amd64.tar.gz \
-    && mv go /usr/local/go \
-    && rm go1.12.12.linux-amd64.tar.gz \
-    && echo "export GOROOT=/usr/local/go" >> /root/.bashrc \
-    && echo "export GOPATH=$HOME/go" >> /root/.bashrc \
-    && echo "export PATH=$PATH:/usr/local/go/bin" >> /root/.bashrc
-
+FROM centos:7.3.1611
 
+RUN yum -y install wget && \
+    yum -y install epel-release && yum -y install patchelf && \
+    yum -y install gcc make python-devel && \
+    yum clean all && \
+    curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
+    python get-pip.py && rm get-pip.py
diff --git a/tools/Dockerfile.devel b/tools/Dockerfile.devel
new file mode 100644
index 0000000000000000000000000000000000000000..a4b5b5fe48b5c4d5c74d66dc688fa5d594a33266
--- /dev/null
+++ b/tools/Dockerfile.devel
@@ -0,0 +1,22 @@
+FROM centos:7.3.1611
+RUN yum -y install wget >/dev/null \
+    && yum -y install gcc gcc-c++ make glibc-static which >/dev/null \
+    && yum -y install git openssl-devel curl-devel bzip2-devel python-devel >/dev/null \
+    && wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \
+    && tar xzf cmake-3.2.0-Linux-x86_64.tar.gz \
+    && mv cmake-3.2.0-Linux-x86_64 /usr/local/cmake3.2.0 \
+    && echo 'export PATH=/usr/local/cmake3.2.0/bin:$PATH' >> /root/.bashrc \
+    && rm cmake-3.2.0-Linux-x86_64.tar.gz \
+    && wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \
+    && tar xzf go1.14.linux-amd64.tar.gz \
+    && mv go /usr/local/go \
+    && echo 'export GOROOT=/usr/local/go' >> /root/.bashrc \
+    && echo 'export PATH=/usr/local/go/bin:$PATH' >> /root/.bashrc \
+    && rm go1.14.linux-amd64.tar.gz \
+    && yum -y install python-devel sqlite-devel >/dev/null \
+    && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \
+    && python get-pip.py >/dev/null \
+    && pip install google protobuf setuptools wheel flask >/dev/null \
+    && rm get-pip.py \
+    && yum -y install epel-release && yum -y install patchelf \
+    && yum clean all
diff --git a/tools/Dockerfile.gpu b/tools/Dockerfile.gpu
index 5258854691866ab9357a947f42081ae8db6984d7..427ae83bcb805ec70c1e6d575e84234f17e9fb30 100644
--- a/tools/Dockerfile.gpu
+++ b/tools/Dockerfile.gpu
@@ -1,11 +1,15 @@
-FROM paddlepaddle/paddle_manylinux_devel:cuda9.0_cudnn7
-RUN yum -y install git openssl-devel curl-devel bzip2-devel \
-    && wget https://dl.google.com/go/go1.12.12.linux-amd64.tar.gz \
-    && tar -xzvf go1.12.12.linux-amd64.tar.gz \
-    && rm -rf /usr/local/go \
-    && mv go /usr/local/go \
-    && rm go1.12.12.linux-amd64.tar.gz \
-    && echo "GOROOT=/usr/local/go" >> /root/.bashrc \
-    && echo "GOPATH=$HOME/go" >> /root/.bashrc \
-    && echo "PATH=$PATH:$GOROOT/bin" >> /root/.bashrc
+FROM nvidia/cuda:9.0-cudnn7-runtime-centos7
 
+RUN yum -y install wget && \
+    yum -y install epel-release && yum -y install patchelf && \
+    yum -y install gcc make python-devel && \
+    yum -y install libSM-1.2.2-2.el7.x86_64 --setopt=protected_multilib=false && \
+    yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false && \
+    yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false && \
+    yum clean all && \
+    curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
+    python get-pip.py && rm get-pip.py && \
+    ln -s /usr/local/cuda-9.0/lib64/libcublas.so.9.0 /usr/local/cuda-9.0/lib64/libcublas.so && \
+    echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64':$LD_LIBRARY_PATH >> /root/.bashrc && \
+    ln -s /usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudnn.so.7 /usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudnn.so && \
+    echo 'export LD_LIBRARY_PATH=/usr/local/cuda-9.0/targets/x86_64-linux/lib:$LD_LIBRARY_PATH' >> /root/.bashrc
diff --git a/tools/serving_build.sh b/tools/serving_build.sh
index 163f4f74429066581aa17cc78b3ab00947ba4d77..b810e3139803bd363c771c6f655cef6595177dc8 100644
--- a/tools/serving_build.sh
+++ b/tools/serving_build.sh
@@ -83,7 +83,7 @@ function python_test_fit_a_line() {
             check_cmd "python test_client.py uci_housing_client/serving_client_conf.prototxt > /dev/null"
             ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
             # test web
-            check_cmd "python -m paddle_serving_server.web_serve --model uci_housing_model/ --name uci --port 9399 --name uci > /dev/null &"
+            check_cmd "python -m paddle_serving_server.serve --model uci_housing_model/ --name uci --port 9399 --name uci > /dev/null &"
             sleep 5
             check_cmd "curl -H \"Content-Type:application/json\" -X POST -d '{\"x\": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], \"fetch\":[\"price\"]}' http://127.0.0.1:9399/uci/prediction"
             ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill