fix ce script

819519ec · MRXLT · ee72ff4b · d3e8fedf · 819519ec · 819519ec
147 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -85,6 +85,17 @@ include(generic)
 include(flags)
 endif()
+if (APP)
+include(external/zlib)
+include(external/boost)
+include(external/protobuf)
+include(external/gflags)
+include(external/glog)
+include(external/pybind11)
+include(external/python)
+include(generic)
+endif()
 if (SERVER)
 include(external/cudnn)
 include(paddlepaddle)

--- a/README.md
+++ b/README.md
+([简体中文](./README_CN.md)|English)
 <p align="center">
    <br>
 <img src='doc/serving_logo.png' width = "600" height = "130">
    <br>
 <p>
 <p align="center">
    <br>
    <a href="https://travis-ci.com/PaddlePaddle/Serving">
@@ -23,28 +26,20 @@ We consider deploying deep learning inference service online to be a user-facing
    <img src="doc/demo.gif" width="700">
 </p>
-<h2 align="center">Some Key Features</h2>
- Integrate with Paddle training pipeline seamlessly, most paddle models can be deployed **with one line command**.
- **Industrial serving features** supported, such as models management, online loading, online A/B testing etc.
- **Distributed Key-Value indexing** supported which is especially useful for large scale sparse features as model inputs.
- **Highly concurrent and efficient communication** between clients and servers supported.
- **Multiple programming languages** supported on client side, such as Golang, C++ and python.
- **Extensible framework design** which can support model serving beyond Paddle.
 <h2 align="center">Installation</h2>
 We **highly recommend** you to **run Paddle Serving in Docker**, please visit [Run in Docker](https://github.com/PaddlePaddle/Serving/blob/develop/doc/RUN_IN_DOCKER.md)
 ```
 # Run CPU Docker
-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker pull hub.baidubce.com/paddlepaddle/serving:latest
-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest
 docker exec -it test bash
 ```
 ```
 # Run GPU Docker
-nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:latest-gpu
-nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest-gpu
 nvidia-docker exec -it test bash
 ```
@@ -56,10 +51,44 @@ pip install paddle-serving-server-gpu # GPU
 You may need to use a domestic mirror source (in China, you can use the Tsinghua mirror source, add `-i https://pypi.tuna.tsinghua.edu.cn/simple` to pip command) to speed up the download.
+If you need install modules compiled with develop branch, please download packages from [latest packages list](./doc/LATEST_PACKAGES.md) and install with `pip install` command.
 Client package support Centos 7 and Ubuntu 18, or you can use HTTP service without install client.
+<h2 align="center"> Pre-built services with Paddle Serving</h2>
+<h3 align="center">Chinese Word Segmentation</h4>
+``` shell
+> python -m paddle_serving_app.package -get_model lac
+> tar -xzf lac.tar.gz
+> python lac_web_service.py 9292 &
+> curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://127.0.0.1:9393/lac/prediction
+{"result":[{"word_seg":"我|爱|北京|天安门"}]}
+```
+<h3 align="center">Image Classification</h4>
+<p align="center">
+    <br>
+<img src='https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg' width = "200" height = "200">
+    <br>
+<p>
+``` shell
+> python -m paddle_serving_app.package -get_model resnet_v2_50_imagenet
+> tar -xzf resnet_v2_50_imagenet.tar.gz
+> python resnet50_imagenet_classify.py resnet50_serving_model &
+> curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9292/image/prediction
+{"result":{"label":["daisy"],"prob":[0.9341403245925903]}}
+```
 <h2 align="center">Quick Start Example</h2>
+This quick start example is only for users who already have a model to deploy and we prepare a ready-to-deploy model here. If you want to know how to use paddle serving from offline training to online serving, please reference to [Train_To_Service](https://github.com/PaddlePaddle/Serving/blob/develop/doc/TRAIN_TO_SERVICE.md)
 ### Boston House Price Prediction model
 ``` shell
 wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz
@@ -82,7 +111,9 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
 | `port` | int | `9292` | Exposed port of current service to users|
 | `name` | str | `""` | Service name, can be used to generate HTTP request url |
 | `model` | str | `""` | Path of paddle model directory to be served |
-| `mem_optim` | bool | `False` | Enable memory optimization |
+| `mem_optim` | bool | `False` | Enable memory / graphic memory optimization |
+| `ir_optim` | bool | `False` | Enable analysis and optimization of calculation graph |
+| `use_mkl` (Only for cpu version) | bool | `False` | Run inference with MKL |
 Here, we use `curl` to send a HTTP POST request to the service we just started. Users can use any python library to send HTTP POST as well, e.g, [requests](https://requests.readthedocs.io/en/master/).
 </center>
@@ -113,138 +144,13 @@ print(fetch_map)
 ```
 Here, `client.predict` function has two arguments. `feed` is a `python dict` with model input variable alias name and values. `fetch` assigns the prediction variables to be returned from servers. In the example, the name of `"x"` and `"price"` are assigned when the servable model is saved during training.
-<h2 align="center"> Pre-built services with Paddle Serving</h2>
+<h2 align="center">Some Key Features of Paddle Serving</h2>
-<h3 align="center">Chinese Word Segmentation</h4>
- **Description**: 
-``` shell
-Chinese word segmentation HTTP service that can be deployed with one line command.
-```
- **Download Servable Package**: 
-``` shell
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/lac/lac_model_jieba_web.tar.gz
-```
- **Host web service**: 
-``` shell
-tar -xzf lac_model_jieba_web.tar.gz
-python lac_web_service.py jieba_server_model/ lac_workdir 9292
-```
- **Request sample**: 
-``` shell
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://127.0.0.1:9292/lac/prediction
-```
- **Request result**: 
-``` shell
-{"word_seg":"我|爱|北京|天安门"}
-```
-<h3 align="center">Image Classification</h4>
- **Description**: 
-``` shell
-Image classification trained with Imagenet dataset. A label and corresponding probability will be returned.
-Note: This demo needs paddle-serving-server-gpu. 
-```
- **Download Servable Package**: 
-``` shell
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imagenet-example/imagenet_demo.tar.gz
-```
- **Host web service**: 
-``` shell
-tar -xzf imagenet_demo.tar.gz
-python image_classification_service_demo.py resnet50_serving_model
-```
- **Request sample**: 
-<p align="center">
-    <br>
-<img src='https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg' width = "200" height = "200">
-    <br>
-<p>
-``` shell
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9292/image/prediction
-```
- **Request result**: 
-``` shell
-{"label":"daisy","prob":0.9341403245925903}
-```
-<h3 align="center">More Demos</h3>
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name         | Bert-Base-Baike                                              |
-| URL                | [https://paddle-serving.bj.bcebos.com/bert_example/bert_seq128.tar.gz](https://paddle-serving.bj.bcebos.com/bert_example%2Fbert_seq128.tar.gz) |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/bert |
-| Description        | Get semantic representation from a Chinese Sentence          |
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name         | Resnet50-Imagenet                                            |
-| URL                | [https://paddle-serving.bj.bcebos.com/imagenet-example/ResNet50_vd.tar.gz](https://paddle-serving.bj.bcebos.com/imagenet-example%2FResNet50_vd.tar.gz) |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imagenet |
-| Description        | Get image semantic representation from an image              |
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name         | Resnet101-Imagenet                                           |
-| URL                | https://paddle-serving.bj.bcebos.com/imagenet-example/ResNet101_vd.tar.gz |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imagenet |
-| Description        | Get image semantic representation from an image              |
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name         | CNN-IMDB                                                     |
-| URL                | https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imdb |
-| Description        | Get category probability from an English Sentence            |
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name         | LSTM-IMDB                                                    |
-| URL                | https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imdb |
-| Description        | Get category probability from an English Sentence            |
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name         | BOW-IMDB                                                     |
-| URL                | https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imdb |
-| Description        | Get category probability from an English Sentence            |
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name         | Jieba-LAC                                                    |
-| URL                | https://paddle-serving.bj.bcebos.com/lac/lac_model.tar.gz    |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/lac |
-| Description        | Get word segmentation from a Chinese Sentence                |
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| Model Name         | DNN-CTR                                                      |
-| URL                | https://paddle-serving.bj.bcebos.com/criteo_ctr_example/criteo_ctr_demo_model.tar.gz                            |
-| Client/Server Code | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/criteo_ctr |
-| Description        | Get click probability from a feature vector of item          |
+- Integrate with Paddle training pipeline seamlessly, most paddle models can be deployed **with one line command**.
+- **Industrial serving features** supported, such as models management, online loading, online A/B testing etc.
+- **Distributed Key-Value indexing** supported which is especially useful for large scale sparse features as model inputs.
+- **Highly concurrent and efficient communication** between clients and servers supported.
+- **Multiple programming languages** supported on client side, such as Golang, C++ and python.
 <h2 align="center">Document</h2>
@@ -259,11 +165,13 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://pa
 - [How to develop a new Web Service?](doc/NEW_WEB_SERVICE.md)
 - [Golang client](doc/IMDB_GO_CLIENT.md)
 - [Compile from source code](doc/COMPILE.md)
+- [Deploy Web Service with uWSGI](doc/UWSGI_DEPLOY.md)
+- [Hot loading for model file](doc/HOT_LOADING_IN_SERVING.md)
 ### About Efficiency
 - [How to profile Paddle Serving latency?](python/examples/util)
- [How to optimize performance?(Chinese)](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
+- [How to optimize performance?(Chinese)](doc/PERFORMANCE_OPTIM_CN.md)
- [Deploy multi-services on one GPU(Chinese)](doc/PERFORMANCE_OPTIM_CN.md)
+- [Deploy multi-services on one GPU(Chinese)](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
 - [CPU Benchmarks(Chinese)](doc/BENCHMARKING.md)
 - [GPU Benchmarks(Chinese)](doc/GPU_BENCHMARKING.md)

--- a/README_CN.md
+++ b/README_CN.md
+(简体中文|[English](./README.md))
 <p align="center">
    <br>
 <img src='https://paddle-serving.bj.bcebos.com/imdb-demo%2FLogoMakr-3Bd2NM-300dpi.png' width = "600" height = "130">
    <br>
 <p>
 <p align="center">
    <br>
    <a href="https://travis-ci.com/PaddlePaddle/Serving">
@@ -24,14 +27,7 @@ Paddle Serving 旨在帮助深度学习开发者轻易部署在线预测服务
    <img src="doc/demo.gif" width="700">
 </p>
-<h2 align="center">核心功能</h2>
- 与Paddle训练紧密连接，绝大部分Paddle模型可以 **一键部署**.
- 支持 **工业级的服务能力** 例如模型管理，在线加载，在线A/B测试等.
- 支持 **分布式键值对索引** 助力于大规模稀疏特征作为模型输入.
- 支持客户端和服务端之间 **高并发和高效通信**.
- 支持 **多种编程语言** 开发客户端，例如Golang，C++和Python.
- **可伸缩框架设计** 可支持不限于Paddle的模型服务.
 <h2 align="center">安装</h2>
@@ -39,14 +35,14 @@ Paddle Serving 旨在帮助深度学习开发者轻易部署在线预测服务
 ```
 # 启动 CPU Docker
-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker pull hub.baidubce.com/paddlepaddle/serving:latest
-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest
 docker exec -it test bash
 ```
 ```
 # 启动 GPU Docker
-nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:latest-gpu
-nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest-gpu
 nvidia-docker exec -it test bash
 ```
 ```shell
@@ -57,9 +53,42 @@ pip install paddle-serving-server-gpu # GPU
 您可能需要使用国内镜像源（例如清华源, 在pip命令中添加`-i https://pypi.tuna.tsinghua.edu.cn/simple`）来加速下载。
+如果需要使用develop分支编译的安装包，请从[最新安装包列表](./doc/LATEST_PACKAGES.md)中获取下载地址进行下载，使用`pip install`命令进行安装。
 客户端安装包支持Centos 7和Ubuntu 18，或者您可以使用HTTP服务，这种情况下不需要安装客户端。
-<h2 align="center">快速启动示例</h2>
+<h2 align="center"> Paddle Serving预装的服务 </h2>
+<h3 align="center">中文分词</h4>
+``` shell
+> python -m paddle_serving_app.package -get_model lac
+> tar -xzf lac.tar.gz
+> python lac_web_service.py 9292 &
+> curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://127.0.0.1:9393/lac/prediction
+{"result":[{"word_seg":"我|爱|北京|天安门"}]}
+```
+<h3 align="center">图像分类</h4>
+<p align="center">
+    <br>
+<img src='https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg' width = "200" height = "200">
+    <br>
+<p>
+``` shell
+> python -m paddle_serving_app.package -get_model resnet_v2_50_imagenet
+> tar -xzf resnet_v2_50_imagenet.tar.gz
+> python resnet50_imagenet_classify.py resnet50_serving_model &
+> curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9292/image/prediction
+{"result":{"label":["daisy"],"prob":[0.9341403245925903]}}
+```
+<h2 align="center">快速开始示例</h2>
+这个快速开始示例主要是为了给那些已经有一个要部署的模型的用户准备的，而且我们也提供了一个可以用来部署的模型。如果您想知道如何从离线训练到在线服务走完全流程，请参考[从训练到部署](https://github.com/PaddlePaddle/Serving/blob/develop/doc/TRAIN_TO_SERVICE_CN.md)
 <h3 align="center">波士顿房价预测</h3>
@@ -87,6 +116,8 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
 | `name` | str | `""` | Service name, can be used to generate HTTP request url |
 | `model` | str | `""` | Path of paddle model directory to be served |
 | `mem_optim` | bool | `False` | Enable memory optimization |
+| `ir_optim` | bool | `False` | Enable analysis and optimization of calculation graph |
+| `use_mkl` (Only for cpu version) | bool | `False` | Run inference with MKL |
 我们使用 `curl` 命令来发送HTTP POST请求给刚刚启动的服务。用户也可以调用python库来发送HTTP POST请求，请参考英文文档 [requests](https://requests.readthedocs.io/en/master/)。
 </center>
@@ -118,139 +149,13 @@ print(fetch_map)
 ```
 在这里，`client.predict`函数具有两个参数。 `feed`是带有模型输入变量别名和值的`python dict`。 `fetch`被要从服务器返回的预测变量赋值。 在该示例中，在训练过程中保存可服务模型时，被赋值的tensor名为`"x"`和`"price"`。
-<h2 align="center">Paddle Serving预装的服务</h2>
+<h2 align="center">Paddle Serving的核心功能</h2>
-<h3 align="center">中文分词模型</h4>
- **介绍**: 
-``` shell
-本示例为中文分词HTTP服务一键部署
-```
- **下载服务包**: 
-``` shell
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/lac/lac_model_jieba_web.tar.gz
-```
- **启动web服务**: 
-``` shell
-tar -xzf lac_model_jieba_web.tar.gz
-python lac_web_service.py jieba_server_model/ lac_workdir 9292
-```
- **客户端请求示例**: 
-``` shell
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://127.0.0.1:9292/lac/prediction
-```
- **返回结果示例**: 
-``` shell
-{"word_seg":"我|爱|北京|天安门"}
-```
-<h3 align="center">图像分类模型</h4>
- **介绍**: 
-``` shell
-图像分类模型由Imagenet数据集训练而成，该服务会返回一个标签及其概率
-注意：本示例需要安装paddle-serving-server-gpu
-```
- **下载服务包**: 
-``` shell
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imagenet-example/imagenet_demo.tar.gz
-```
- **启动web服务**: 
-``` shell
-tar -xzf imagenet_demo.tar.gz
-python image_classification_service_demo.py resnet50_serving_model
-```
- **客户端请求示例**: 
-<p align="center">
-    <br>
-<img src='https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg' width = "200" height = "200">
-    <br>
-<p>
-``` shell
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9292/image/prediction
-```
- **返回结果示例**: 
-``` shell
-{"label":"daisy","prob":0.9341403245925903}
-```
-<h3 align="center">更多示例</h3>
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名              | Bert-Base-Baike                                              |
-| 下载链接                | [https://paddle-serving.bj.bcebos.com/bert_example/bert_seq128.tar.gz](https://paddle-serving.bj.bcebos.com/bert_example%2Fbert_seq128.tar.gz) |
-| 客户端/服务端代码     | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/bert |
-| 介绍                | 获得一个中文语句的语义表示          |
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名         | Resnet50-Imagenet                                            |
-| 下载链接                | [https://paddle-serving.bj.bcebos.com/imagenet-example/ResNet50_vd.tar.gz](https://paddle-serving.bj.bcebos.com/imagenet-example%2FResNet50_vd.tar.gz) |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imagenet |
-| 介绍        | 获得一张图片的图像语义表示              |
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名       | Resnet101-Imagenet                                           |
-| 下载链接                | https://paddle-serving.bj.bcebos.com/imagenet-example/ResNet101_vd.tar.gz |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imagenet |
-| 介绍      | 获得一张图片的图像语义表示              |
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名        | CNN-IMDB                                                     |
-| 下载链接                | https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imdb |
-| 介绍       | 从一个中文语句获得类别及其概率           |
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名         | LSTM-IMDB                                                    |
-| 下载链接               | https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imdb |
-| 介绍        | 从一个英文语句获得类别及其概率            |
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名         | BOW-IMDB                                                     |
-| 下载链接                | https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imdb |
-| 介绍       | 从一个英文语句获得类别及其概率            |
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名         | Jieba-LAC                                                    |
-| 下载链接                | https://paddle-serving.bj.bcebos.com/lac/lac_model.tar.gz    |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/lac |
-| 介绍       | 获取中文语句的分词                |
-| Key                | Value                                                        |
-| :----------------- | :----------------------------------------------------------- |
-| 模型名         | DNN-CTR                                                      |
-| 下载链接                | https://paddle-serving.bj.bcebos.com/criteo_ctr_example/criteo_ctr_demo_model.tar.gz                    |
-| 客户端/服务端代码 | https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/criteo_ctr |
-| 介绍        | 从项目的特征向量中获得点击概率        |
+- 与Paddle训练紧密连接，绝大部分Paddle模型可以 **一键部署**.
+- 支持 **工业级的服务能力** 例如模型管理，在线加载，在线A/B测试等.
+- 支持 **分布式键值对索引** 助力于大规模稀疏特征作为模型输入.
+- 支持客户端和服务端之间 **高并发和高效通信**.
+- 支持 **多种编程语言** 开发客户端，例如Golang，C++和Python.
 <h2 align="center">文档</h2>
@@ -265,11 +170,13 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://pa
 - [如何开发一个新的Web Service?](doc/NEW_WEB_SERVICE_CN.md)
 - [如何在Paddle Serving使用Go Client?](doc/IMDB_GO_CLIENT_CN.md)
 - [如何编译PaddleServing?](doc/COMPILE_CN.md)
+- [如何使用uWSGI部署Web Service](doc/UWSGI_DEPLOY_CN.md)
+- [如何实现模型文件热加载](doc/HOT_LOADING_IN_SERVING_CN.md)
 ### 关于Paddle Serving性能
 - [如何测试Paddle Serving性能？](python/examples/util/)
- [如何优化性能?](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
+- [如何优化性能?](doc/PERFORMANCE_OPTIM_CN.md)
- [在一张GPU上启动多个预测服务](doc/PERFORMANCE_OPTIM_CN.md)
+- [在一张GPU上启动多个预测服务](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
 - [CPU版Benchmarks](doc/BENCHMARKING.md)
 - [GPU版Benchmarks](doc/GPU_BENCHMARKING.md)

--- a/cmake/paddlepaddle.cmake
+++ b/cmake/paddlepaddle.cmake
@@ -31,7 +31,7 @@ message( "WITH_GPU = ${WITH_GPU}")
 # Paddle Version should be one of:
 # latest: latest develop build
 # version number like 1.5.2
-SET(PADDLE_VERSION "1.7.1")
+SET(PADDLE_VERSION "1.7.2")
 if (WITH_GPU)
    SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda${CUDA_VERSION_MAJOR}-cudnn7-avx-mkl")

--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -23,6 +23,11 @@ add_subdirectory(pdcodegen)
 add_subdirectory(sdk-cpp)
 endif()
+if (APP)
+add_subdirectory(configure)
+endif()
 if(CLIENT)
 add_subdirectory(general-client)
 endif()

--- a/core/configure/CMakeLists.txt
+++ b/core/configure/CMakeLists.txt
+if (SERVER OR CLIENT)
 LIST(APPEND protofiles
        ${CMAKE_CURRENT_LIST_DIR}/proto/server_configure.proto
        ${CMAKE_CURRENT_LIST_DIR}/proto/sdk_configure.proto
@@ -28,6 +29,7 @@ FILE(GLOB inc ${CMAKE_CURRENT_BINARY_DIR}/*.pb.h)
 install(FILES ${inc}
        DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/configure)
+endif()
 py_proto_compile(general_model_config_py_proto SRCS proto/general_model_config.proto)
 add_custom_target(general_model_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
@@ -51,6 +53,14 @@ add_custom_command(TARGET general_model_config_py_proto POST_BUILD
 endif()
+if (APP)
+add_custom_command(TARGET general_model_config_py_proto POST_BUILD
+                COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto
+                COMMAND cp *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto
+                COMMENT "Copy generated general_model_config proto file into directory paddle_serving_app/proto."
+                WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+endif()
 if (SERVER)
 py_proto_compile(server_config_py_proto SRCS proto/server_configure.proto)
 add_custom_target(server_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)

--- a/core/configure/proto/server_configure.proto
+++ b/core/configure/proto/server_configure.proto
@@ -43,6 +43,7 @@ message EngineDesc {
  optional bool enable_memory_optimization = 13;
  optional bool static_optimization = 14;
  optional bool force_update_static_cache = 15;
+  optional bool enable_ir_optimization = 16;
 };
 // model_toolkit conf

--- a/core/cube/cube-agent/src/agent/util.go
+++ b/core/cube/cube-agent/src/agent/util.go
@@ -83,9 +83,6 @@ func JsonReq(method, requrl string, timeout int, kv *map[string]string,
 }
 func GetHdfsMeta(src string) (master, ugi, path string, err error) {
-	//src = "hdfs://root:rootpasst@st1-inf-platform0.st01.baidu.com:54310/user/mis_user/news_dnn_ctr_cube_1/1501836820/news_dnn_ctr_cube_1_part54.tar"
-	//src = "hdfs://st1-inf-platform0.st01.baidu.com:54310/user/mis_user/news_dnn_ctr_cube_1/1501836820/news_dnn_ctr_cube_1_part54.tar"
 	ugiBegin := strings.Index(src, "//")
 	ugiPos := strings.LastIndex(src, "@")
 	if ugiPos != -1 && ugiBegin != -1 {

--- a/core/general-client/CMakeLists.txt
+++ b/core/general-client/CMakeLists.txt
 if(CLIENT)
 add_subdirectory(pybind11)
 pybind11_add_module(serving_client src/general_model.cpp src/pybind_general_model.cpp)
-target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
+target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -Wl,-rpath,'$ORIGIN'/lib)
 endif()
--- a/core/general-client/include/general_model.h
+++ b/core/general-client/include/general_model.h
@@ -69,15 +69,27 @@ class ModelRes {
  const std::vector<int64_t>& get_int64_by_name(const std::string& name) {
    return _int64_value_map[name];
  }
+  std::vector<int64_t>&& get_int64_by_name_with_rv(const std::string& name) {
+    return std::move(_int64_value_map[name]);
+  }
  const std::vector<float>& get_float_by_name(const std::string& name) {
    return _float_value_map[name];
  }
-  const std::vector<int>& get_shape(const std::string& name) {
+  std::vector<float>&& get_float_by_name_with_rv(const std::string& name) {
+    return std::move(_float_value_map[name]);
+  }
+  const std::vector<int>& get_shape_by_name(const std::string& name) {
    return _shape_map[name];
  }
-  const std::vector<int>& get_lod(const std::string& name) {
+  std::vector<int>&& get_shape_by_name_with_rv(const std::string& name) {
+    return std::move(_shape_map[name]);
+  }
+  const std::vector<int>& get_lod_by_name(const std::string& name) {
    return _lod_map[name];
  }
+  std::vector<int>&& get_lod_by_name_with_rv(const std::string& name) {
+    return std::move(_lod_map[name]);
+  }
  void set_engine_name(const std::string& engine_name) {
    _engine_name = engine_name;
  }
@@ -121,17 +133,33 @@ class PredictorRes {
                                                const std::string& name) {
    return _models[model_idx].get_int64_by_name(name);
  }
+  std::vector<int64_t>&& get_int64_by_name_with_rv(const int model_idx,
+                                                   const std::string& name) {
+    return std::move(_models[model_idx].get_int64_by_name_with_rv(name));
+  }
  const std::vector<float>& get_float_by_name(const int model_idx,
                                              const std::string& name) {
    return _models[model_idx].get_float_by_name(name);
  }
-  const std::vector<int>& get_shape(const int model_idx,
+  std::vector<float>&& get_float_by_name_with_rv(const int model_idx,
-                                    const std::string& name) {
+                                                 const std::string& name) {
-    return _models[model_idx].get_shape(name);
+    return std::move(_models[model_idx].get_float_by_name_with_rv(name));
+  }
+  const std::vector<int>& get_shape_by_name(const int model_idx,
+                                            const std::string& name) {
+    return _models[model_idx].get_shape_by_name(name);
+  }
+  const std::vector<int>&& get_shape_by_name_with_rv(const int model_idx,
+                                                     const std::string& name) {
+    return std::move(_models[model_idx].get_shape_by_name_with_rv(name));
+  }
+  const std::vector<int>& get_lod_by_name(const int model_idx,
+                                          const std::string& name) {
+    return _models[model_idx].get_lod_by_name(name);
  }
-  const std::vector<int>& get_lod(const int model_idx,
+  const std::vector<int>&& get_lod_by_name_with_rv(const int model_idx,
-                                  const std::string& name) {
+                                                   const std::string& name) {
-    return _models[model_idx].get_lod(name);
+    return std::move(_models[model_idx].get_lod_by_name_with_rv(name));
  }
  void add_model_res(ModelRes&& res) {
    _engine_names.push_back(res.engine_name());

--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -258,9 +258,10 @@ int PredictorClient::batch_predict(
      ModelRes model;
      model.set_engine_name(output.engine_name());
+      int idx = 0;
      for (auto &name : fetch_name) {
        // int idx = _fetch_name_to_idx[name];
-        int idx = 0;
        int shape_size = output.insts(0).tensor_array(idx).shape_size();
        VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
                << shape_size;
@@ -279,9 +280,9 @@ int PredictorClient::batch_predict(
        idx += 1;
      }
+      idx = 0;
      for (auto &name : fetch_name) {
        // int idx = _fetch_name_to_idx[name];
-        int idx = 0;
        if (_fetch_name_to_type[name] == 0) {
          VLOG(2) << "ferch var " << name << "type int";
          model._int64_value_map[name].resize(
@@ -345,7 +346,7 @@ int PredictorClient::numpy_predict(
    PredictorRes &predict_res_batch,
    const int &pid) {
  int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
+  VLOG(2) << "batch size: " << batch_size;
  predict_res_batch.clear();
  Timer timeline;
  int64_t preprocess_start = timeline.TimeStampUS();
@@ -462,7 +463,7 @@ int PredictorClient::numpy_predict(
            for (ssize_t j = 0; j < int_array.shape(1); j++) {
              for (ssize_t k = 0; k < int_array.shape(2); k++) {
                for (ssize_t l = 0; k < int_array.shape(3); l++) {
-                  tensor->add_float_data(int_array(i, j, k, l));
+                  tensor->add_int64_data(int_array(i, j, k, l));
                }
              }
            }
@@ -474,7 +475,7 @@ int PredictorClient::numpy_predict(
          for (ssize_t i = 0; i < int_array.shape(0); i++) {
            for (ssize_t j = 0; j < int_array.shape(1); j++) {
              for (ssize_t k = 0; k < int_array.shape(2); k++) {
-                tensor->add_float_data(int_array(i, j, k));
+                tensor->add_int64_data(int_array(i, j, k));
              }
            }
          }
@@ -484,7 +485,7 @@ int PredictorClient::numpy_predict(
          auto int_array = int_feed[vec_idx].unchecked<2>();
          for (ssize_t i = 0; i < int_array.shape(0); i++) {
            for (ssize_t j = 0; j < int_array.shape(1); j++) {
-              tensor->add_float_data(int_array(i, j));
+              tensor->add_int64_data(int_array(i, j));
            }
          }
          break;
@@ -492,7 +493,7 @@ int PredictorClient::numpy_predict(
        case 1: {
          auto int_array = int_feed[vec_idx].unchecked<1>();
          for (ssize_t i = 0; i < int_array.shape(0); i++) {
-            tensor->add_float_data(int_array(i));
+            tensor->add_int64_data(int_array(i));
          }
          break;
        }
@@ -536,9 +537,9 @@ int PredictorClient::numpy_predict(
      ModelRes model;
      model.set_engine_name(output.engine_name());
+      int idx = 0;
      for (auto &name : fetch_name) {
        // int idx = _fetch_name_to_idx[name];
-        int idx = 0;
        int shape_size = output.insts(0).tensor_array(idx).shape_size();
        VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
                << shape_size;
@@ -557,9 +558,10 @@ int PredictorClient::numpy_predict(
        idx += 1;
      }
+      idx = 0;
      for (auto &name : fetch_name) {
        // int idx = _fetch_name_to_idx[name];
-        int idx = 0;
        if (_fetch_name_to_type[name] == 0) {
          VLOG(2) << "ferch var " << name << "type int";
          model._int64_value_map[name].resize(

--- a/core/general-client/src/pybind_general_model.cpp
+++ b/core/general-client/src/pybind_general_model.cpp
@@ -32,24 +32,41 @@ PYBIND11_MODULE(serving_client, m) {
      .def(py::init())
      .def("get_int64_by_name",
           [](PredictorRes &self, int model_idx, std::string &name) {
-             return self.get_int64_by_name(model_idx, name);
+             // see more: https://github.com/pybind/pybind11/issues/1042
-           },
+             std::vector<int64_t> *ptr = new std::vector<int64_t>(
-           py::return_value_policy::reference)
+                 std::move(self.get_int64_by_name_with_rv(model_idx, name)));
+             auto capsule = py::capsule(ptr, [](void *p) {
+               delete reinterpret_cast<std::vector<int64_t> *>(p);
+             });
+             return py::array(ptr->size(), ptr->data(), capsule);
+           })
      .def("get_float_by_name",
           [](PredictorRes &self, int model_idx, std::string &name) {
-             return self.get_float_by_name(model_idx, name);
+             std::vector<float> *ptr = new std::vector<float>(
-           },
+                 std::move(self.get_float_by_name_with_rv(model_idx, name)));
-           py::return_value_policy::reference)
+             auto capsule = py::capsule(ptr, [](void *p) {
+               delete reinterpret_cast<std::vector<float> *>(p);
+             });
+             return py::array(ptr->size(), ptr->data(), capsule);
+           })
      .def("get_shape",
           [](PredictorRes &self, int model_idx, std::string &name) {
-             return self.get_shape(model_idx, name);
+             std::vector<int> *ptr = new std::vector<int>(
-           },
+                 std::move(self.get_shape_by_name_with_rv(model_idx, name)));
-           py::return_value_policy::reference)
+             auto capsule = py::capsule(ptr, [](void *p) {
+               delete reinterpret_cast<std::vector<int> *>(p);
+             });
+             return py::array(ptr->size(), ptr->data(), capsule);
+           })
      .def("get_lod",
           [](PredictorRes &self, int model_idx, std::string &name) {
-             return self.get_lod(model_idx, name);
+             std::vector<int> *ptr = new std::vector<int>(
-           },
+                 std::move(self.get_lod_by_name_with_rv(model_idx, name)));
-           py::return_value_policy::reference)
+             auto capsule = py::capsule(ptr, [](void *p) {
+               delete reinterpret_cast<std::vector<int> *>(p);
+             });
+             return py::array(ptr->size(), ptr->data(), capsule);
+           })
      .def("variant_tag", [](PredictorRes &self) { return self.variant_tag(); })
      .def("get_engine_names",
           [](PredictorRes &self) { return self.get_engine_names(); });
@@ -100,7 +117,8 @@ PYBIND11_MODULE(serving_client, m) {
                                       fetch_name,
                                       predict_res_batch,
                                       pid);
-           })
+           },
+           py::call_guard<py::gil_scoped_release>())
      .def("numpy_predict",
           [](PredictorClient &self,
              const std::vector<std::vector<py::array_t<float>>>

--- a/core/general-server/op/general_reader_op.cpp
+++ b/core/general-server/op/general_reader_op.cpp
@@ -131,7 +131,7 @@ int GeneralReaderOp::inference() {
      lod_tensor.dtype = paddle::PaddleDType::FLOAT32;
    }
-    if (req->insts(0).tensor_array(i).shape(0) == -1) {
+    if (model_config->_is_lod_feed[i]) {
      lod_tensor.lod.resize(1);
      lod_tensor.lod[0].push_back(0);
      VLOG(2) << "var[" << i << "] is lod_tensor";
@@ -153,6 +153,7 @@ int GeneralReaderOp::inference() {
  // specify the memory needed for output tensor_vector
  for (int i = 0; i < var_num; ++i) {
    if (out->at(i).lod.size() == 1) {
+      int tensor_size = 0;
      for (int j = 0; j < batch_size; ++j) {
        const Tensor &tensor = req->insts(j).tensor_array(i);
        int data_len = 0;
@@ -162,15 +163,28 @@ int GeneralReaderOp::inference() {
          data_len = tensor.float_data_size();
        }
        VLOG(2) << "tensor size for var[" << i << "]: " << data_len;
+        tensor_size += data_len;
        int cur_len = out->at(i).lod[0].back();
        VLOG(2) << "current len: " << cur_len;
-        out->at(i).lod[0].push_back(cur_len + data_len);
+        int sample_len = 0;
-        VLOG(2) << "new len: " << cur_len + data_len;
+        if (tensor.shape_size() == 1) {
+          sample_len = data_len;
+        } else {
+          sample_len = tensor.shape(0);
+        }
+        out->at(i).lod[0].push_back(cur_len + sample_len);
+        VLOG(2) << "new len: " << cur_len + sample_len;
+      }
+      out->at(i).data.Resize(tensor_size * elem_size[i]);
+      out->at(i).shape = {out->at(i).lod[0].back()};
+      for (int j = 1; j < req->insts(0).tensor_array(i).shape_size(); ++j) {
+        out->at(i).shape.push_back(req->insts(0).tensor_array(i).shape(j));
+      }
+      if (out->at(i).shape.size() == 1) {
+        out->at(i).shape.push_back(1);
      }
-      out->at(i).data.Resize(out->at(i).lod[0].back() * elem_size[i]);
-      out->at(i).shape = {out->at(i).lod[0].back(), 1};
      VLOG(2) << "var[" << i
              << "] is lod_tensor and len=" << out->at(i).lod[0].back();
    } else {

--- a/core/general-server/op/general_response_op.cpp
+++ b/core/general-server/op/general_response_op.cpp
@@ -15,8 +15,10 @@
 #include "core/general-server/op/general_response_op.h"
 #include <algorithm>
 #include <iostream>
+#include <map>
 #include <memory>
 #include <sstream>
+#include <utility>
 #include "core/general-server/op/general_infer_helper.h"
 #include "core/predictor/framework/infer.h"
 #include "core/predictor/framework/memory.h"
@@ -86,17 +88,20 @@ int GeneralResponseOp::inference() {
    // To get the order of model return values
    output->set_engine_name(pre_name);
    FetchInst *fetch_inst = output->add_insts();
    for (auto &idx : fetch_index) {
      Tensor *tensor = fetch_inst->add_tensor_array();
      tensor->set_elem_type(1);
      if (model_config->_is_lod_fetch[idx]) {
-        VLOG(2) << "out[" << idx << "] is lod_tensor";
+        VLOG(2) << "out[" << idx << "] " << model_config->_fetch_name[idx]
+                << " is lod_tensor";
        for (int k = 0; k < in->at(idx).shape.size(); ++k) {
          VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k];
          tensor->add_shape(in->at(idx).shape[k]);
        }
      } else {
-        VLOG(2) << "out[" << idx << "] is tensor";
+        VLOG(2) << "out[" << idx << "] " << model_config->_fetch_name[idx]
+                << " is tensor";
        for (int k = 0; k < in->at(idx).shape.size(); ++k) {
          VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k];
          tensor->add_shape(in->at(idx).shape[k]);
@@ -111,6 +116,8 @@ int GeneralResponseOp::inference() {
        cap *= in->at(idx).shape[j];
      }
      if (in->at(idx).dtype == paddle::PaddleDType::INT64) {
+        VLOG(2) << "Prepare float var [" << model_config->_fetch_name[idx]
+                << "].";
        int64_t *data_ptr = static_cast<int64_t *>(in->at(idx).data.data());
        if (model_config->_is_lod_fetch[idx]) {
          FetchInst *fetch_p = output->mutable_insts(0);
@@ -127,8 +134,11 @@ int GeneralResponseOp::inference() {
            fetch_p->mutable_tensor_array(var_idx)->add_int64_data(data_ptr[j]);
          }
        }
+        VLOG(2) << "fetch var [" << model_config->_fetch_name[idx] << "] ready";
        var_idx++;
      } else if (in->at(idx).dtype == paddle::PaddleDType::FLOAT32) {
+        VLOG(2) << "Prepare float var [" << model_config->_fetch_name[idx]
+                << "].";
        float *data_ptr = static_cast<float *>(in->at(idx).data.data());
        if (model_config->_is_lod_fetch[idx]) {
          FetchInst *fetch_p = output->mutable_insts(0);
@@ -145,6 +155,7 @@ int GeneralResponseOp::inference() {
            fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[j]);
          }
        }
+        VLOG(2) << "fetch var [" << model_config->_fetch_name[idx] << "] ready";
        var_idx++;
      }
    }

--- a/core/predictor/framework/infer.h
+++ b/core/predictor/framework/infer.h
@@ -35,6 +35,7 @@ class InferEngineCreationParams {
  InferEngineCreationParams() {
    _path = "";
    _enable_memory_optimization = false;
+    _enable_ir_optimization = false;
    _static_optimization = false;
    _force_update_static_cache = false;
  }
@@ -45,10 +46,16 @@ class InferEngineCreationParams {
    _enable_memory_optimization = enable_memory_optimization;
  }
+  void set_enable_ir_optimization(bool enable_ir_optimization) {
+    _enable_ir_optimization = enable_ir_optimization;
+  }
  bool enable_memory_optimization() const {
    return _enable_memory_optimization;
  }
+  bool enable_ir_optimization() const { return _enable_ir_optimization; }
  void set_static_optimization(bool static_optimization = false) {
    _static_optimization = static_optimization;
  }
@@ -68,6 +75,7 @@ class InferEngineCreationParams {
              << "model_path = " << _path << ", "
              << "enable_memory_optimization = " << _enable_memory_optimization
              << ", "
+              << "enable_ir_optimization = " << _enable_ir_optimization << ", "
              << "static_optimization = " << _static_optimization << ", "
              << "force_update_static_cache = " << _force_update_static_cache;
  }
@@ -75,6 +83,7 @@ class InferEngineCreationParams {
 private:
  std::string _path;
  bool _enable_memory_optimization;
+  bool _enable_ir_optimization;
  bool _static_optimization;
  bool _force_update_static_cache;
 };
@@ -150,6 +159,11 @@ class ReloadableInferEngine : public InferEngine {
      force_update_static_cache = conf.force_update_static_cache();
    }
+    if (conf.has_enable_ir_optimization()) {
+      _infer_engine_params.set_enable_ir_optimization(
+          conf.enable_ir_optimization());
+    }
    _infer_engine_params.set_path(_model_data_path);
    if (enable_memory_optimization) {
      _infer_engine_params.set_enable_memory_optimization(true);

--- a/core/sdk-cpp/include/endpoint_config.h
+++ b/core/sdk-cpp/include/endpoint_config.h
@@ -22,23 +22,23 @@ namespace baidu {
 namespace paddle_serving {
 namespace sdk_cpp {
-#define PARSE_CONF_ITEM(conf, item, name, fail)             \
+#define PARSE_CONF_ITEM(conf, item, name, fail)          \
-  do {                                                      \
+  do {                                                   \
-    if (conf.has_##name()) {                                \
+    if (conf.has_##name()) {                             \
-      item.set(conf.name());                                \
+      item.set(conf.name());                             \
-    } else {                                                \
+    } else {                                             \
-      LOG(ERROR) << "Not found key in configue: " << #name; \
+      VLOG(2) << "Not found key in configue: " << #name; \
-    }                                                       \
+    }                                                    \
  } while (0)
-#define ASSIGN_CONF_ITEM(dest, src, fail)                          \
+#define ASSIGN_CONF_ITEM(dest, src, fail)                       \
-  do {                                                             \
+  do {                                                          \
-    if (!src.init) {                                               \
+    if (!src.init) {                                            \
-      LOG(ERROR) << "Cannot assign an unintialized item: " << #src \
+      VLOG(2) << "Cannot assign an unintialized item: " << #src \
-                 << " to dest: " << #dest;                         \
+              << " to dest: " << #dest;                         \
-      return fail;                                                 \
+      return fail;                                              \
-    }                                                              \
+    }                                                           \
-    dest = src.value;                                              \
+    dest = src.value;                                           \
  } while (0)
 template <typename T>

--- a/doc/ABTEST_IN_PADDLE_SERVING.md
+++ b/doc/ABTEST_IN_PADDLE_SERVING.md
@@ -21,7 +21,7 @@ The following Python code will process the data `test_data/part-0` and write to
 [//file]:#process.py
 ``` python
-from imdb_reader import IMDBDataset
+from paddle_serving_app.reader import IMDBDataset
 imdb_dataset = IMDBDataset()
 imdb_dataset.load_resource('imdb.vocab')
@@ -39,7 +39,7 @@ Here, we [use docker](https://github.com/PaddlePaddle/Serving/blob/develop/doc/R
 First, start the BOW server, which enables the `8000` port:
 ``` shell
-docker run -dit -v $PWD/imdb_bow_model:/model -p 8000:8000 --name bow-server hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker run -dit -v $PWD/imdb_bow_model:/model -p 8000:8000 --name bow-server hub.baidubce.com/paddlepaddle/serving:latest
 docker exec -it bow-server bash
 pip install paddle-serving-server
 python -m paddle_serving_server.serve --model model --port 8000 >std.log 2>err.log &
@@ -49,7 +49,7 @@ exit
 Similarly, start the LSTM server, which enables the `9000` port:
 ```bash
-docker run -dit -v $PWD/imdb_lstm_model:/model -p 9000:9000 --name lstm-server hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker run -dit -v $PWD/imdb_lstm_model:/model -p 9000:9000 --name lstm-server hub.baidubce.com/paddlepaddle/serving:latest
 docker exec -it lstm-server bash
 pip install paddle-serving-server
 python -m paddle_serving_server.serve --model model --port 9000 >std.log 2>err.log &
@@ -78,7 +78,7 @@ with open('processed.data') as f:
        feed = {"words": word_ids}
        fetch = ["acc", "cost", "prediction"]
        [fetch_map, tag] = client.predict(feed=feed, fetch=fetch, need_variant_tag=True)
-        if (float(fetch_map["prediction"][1]) - 0.5) * (float(label[0]) - 0.5) > 0:
+        if (float(fetch_map["prediction"][0][1]) - 0.5) * (float(label[0]) - 0.5) > 0:
            cnt[tag]['acc'] += 1
        cnt[tag]['total'] += 1
@@ -88,7 +88,7 @@ with open('processed.data') as f:
 In the code, the function `client.add_variant(tag, clusters, variant_weight)` is to add a variant with label `tag` and flow weight `variant_weight`. In this example, a BOW variant with label of `bow` and flow weight of `10`, and an LSTM variant with label of `lstm` and a flow weight of `90` are added. The flow on the client side will be distributed to two variants according to the ratio of `10:90`.
-When making prediction on the client side, if the parameter `need_variant_tag=True` is specified, the response will contains the variant tag corresponding to the distribution flow.
+When making prediction on the client side, if the parameter `need_variant_tag=True` is specified, the response will contain the variant tag corresponding to the distribution flow.
 ### Expected Results

--- a/doc/ABTEST_IN_PADDLE_SERVING_CN.md
+++ b/doc/ABTEST_IN_PADDLE_SERVING_CN.md
@@ -20,7 +20,7 @@ sh get_data.sh
 下面Python代码将处理`test_data/part-0`的数据，写入`processed.data`文件中。
 ```python
-from imdb_reader import IMDBDataset
+from paddle_serving_app.reader import IMDBDataset
 imdb_dataset = IMDBDataset()
 imdb_dataset.load_resource('imdb.vocab')
@@ -38,7 +38,7 @@ with open('test_data/part-0') as fin:
 首先启动BOW Server，该服务启用`8000`端口：
 ```bash
-docker run -dit -v $PWD/imdb_bow_model:/model -p 8000:8000 --name bow-server hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker run -dit -v $PWD/imdb_bow_model:/model -p 8000:8000 --name bow-server hub.baidubce.com/paddlepaddle/serving:latest
 docker exec -it bow-server bash
 pip install paddle-serving-server -i https://pypi.tuna.tsinghua.edu.cn/simple
 python -m paddle_serving_server.serve --model model --port 8000 >std.log 2>err.log &
@@ -48,7 +48,7 @@ exit
 同理启动LSTM Server，该服务启用`9000`端口：
 ```bash
-docker run -dit -v $PWD/imdb_lstm_model:/model -p 9000:9000 --name lstm-server hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker run -dit -v $PWD/imdb_lstm_model:/model -p 9000:9000 --name lstm-server hub.baidubce.com/paddlepaddle/serving:latest
 docker exec -it lstm-server bash
 pip install paddle-serving-server -i https://pypi.tuna.tsinghua.edu.cn/simple
 python -m paddle_serving_server.serve --model model --port 9000 >std.log 2>err.log &
@@ -76,7 +76,7 @@ with open('processed.data') as f:
        feed = {"words": word_ids}
        fetch = ["acc", "cost", "prediction"]
        [fetch_map, tag] = client.predict(feed=feed, fetch=fetch, need_variant_tag=True)
-        if (float(fetch_map["prediction"][1]) - 0.5) * (float(label[0]) - 0.5) > 0:
+        if (float(fetch_map["prediction"][0][1]) - 0.5) * (float(label[0]) - 0.5) > 0:
            cnt[tag]['acc'] += 1
        cnt[tag]['total'] += 1

--- a/doc/BERT_10_MINS_CN.md
+++ b/doc/BERT_10_MINS_CN.md
@@ -13,10 +13,10 @@ import paddlehub as hub
 model_name = "bert_chinese_L-12_H-768_A-12"
 module = hub.Module(model_name)
 inputs, outputs, program = module.context(trainable=True, max_seq_len=20)
-feed_keys = ["input_ids", "position_ids", "segment_ids", "input_mask", "pooled_output", "sequence_output"]
+feed_keys = ["input_ids", "position_ids", "segment_ids", "input_mask"]
 fetch_keys = ["pooled_output", "sequence_output"]
 feed_dict = dict(zip(feed_keys, [inputs[x] for x in feed_keys]))
-fetch_dict = dict(zip(fetch_keys, [outputs[x]] for x in fetch_keys))
+fetch_dict = dict(zip(fetch_keys, [outputs[x] for x in fetch_keys]))
 import paddle_serving_client.io as serving_io
 serving_io.save_model("bert_seq20_model", "bert_seq20_client", feed_dict, fetch_dict, program)

--- a/doc/COMPILE.md
+++ b/doc/COMPILE.md
@@ -9,14 +9,18 @@
 - Golang: 1.9.2 and later
 - Git：2.17.1 and later
 - CMake：3.2.2 and later
- Python：2.7.2 and later
+- Python：2.7.2 and later / 3.6 and later
 It is recommended to use Docker for compilation. We have prepared the Paddle Serving compilation environment for you: 
- CPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-devel`，dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
+- CPU: `hub.baidubce.com/paddlepaddle/serving:latest-devel`，dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
- GPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu-devel`，dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
+- GPU: `hub.baidubce.com/paddlepaddle/serving:latest-gpu-devel`，dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
-This document will take Python2 as an example to show how to compile Paddle Serving. If you want to compile with Python 3, just adjust the Python options of cmake.
+This document will take Python2 as an example to show how to compile Paddle Serving. If you want to compile with Python3, just adjust the Python options of cmake:
+- Set `DPYTHON_INCLUDE_DIR` to `$PYTHONROOT/include/python3.6m/`
+- Set  `DPYTHON_LIBRARIES` to `$PYTHONROOT/lib64/libpython3.6.so`
+- Set `DPYTHON_EXECUTABLE` to `$PYTHONROOT/bin/python3.6`
 ## Get Code
@@ -32,6 +36,8 @@ cd Serving && git submodule update --init --recursive
 export PYTHONROOT=/usr/
 ```
+In the default centos7 image we provide, the Python path is `/usr/bin/python`. If you want to use our centos6 image, you need to set it to `export PYTHONROOT=/usr/local/python2.7/`.
 ## Compile Server
 ### Integrated CPU version paddle inference library
@@ -54,6 +60,8 @@ make -j10
 execute `make install` to put targets under directory `./output`
+**Attention：** After the compilation is successful, you need to set the path of `SERVING_BIN`. See [Note](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE.md#Note) for details.
 ## Compile Client
 ``` shell

--- a/doc/COMPILE_CN.md
+++ b/doc/COMPILE_CN.md
@@ -9,14 +9,18 @@
 - Golang: 1.9.2及以上
 - Git：2.17.1及以上
 - CMake：3.2.2及以上
- Python：2.7.2及以上
+- Python：2.7.2及以上 / 3.6及以上
 推荐使用Docker编译，我们已经为您准备好了Paddle Serving编译环境：
- CPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-devel`，dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
+- CPU: `hub.baidubce.com/paddlepaddle/serving:latest-devel`，dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
- GPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu-devel`，dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
+- GPU: `hub.baidubce.com/paddlepaddle/serving:latest-gpu-devel`，dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
-本文档将以Python2为例介绍如何编译Paddle Serving。如果您想用Python3进行编译，只需要调整cmake的Python相关选项即可。
+本文档将以Python2为例介绍如何编译Paddle Serving。如果您想用Python3进行编译，只需要调整cmake的Python相关选项即可：
+- 将`DPYTHON_INCLUDE_DIR`设置为`$PYTHONROOT/include/python3.6m/`
+- 将`DPYTHON_LIBRARIES`设置为`$PYTHONROOT/lib64/libpython3.6.so`
+- 将`DPYTHON_EXECUTABLE`设置为`$PYTHONROOT/bin/python3.6`
 ## 获取代码
@@ -32,6 +36,8 @@ cd Serving && git submodule update --init --recursive
 export PYTHONROOT=/usr/
 ```
+我们提供默认Centos7的Python路径为`/usr/bin/python`，如果您要使用我们的Centos6镜像，需要将其设置为`export PYTHONROOT=/usr/local/python2.7/`。
 ## 编译Server部分
 ### 集成CPU版本Paddle Inference Library
@@ -54,6 +60,8 @@ make -j10
 执行`make install`可以把目标产出放在`./output`目录下。
+**注意：** 编译成功后，需要设置`SERVING_BIN`路径，详见后面的[注意事项](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE_CN.md#注意事项)。
 ## 编译Client部分
 ``` shell

--- a/doc/DESIGN_DOC_CN.md
+++ b/doc/DESIGN_DOC_CN.md
@@ -26,7 +26,7 @@ serving_io.save_model("serving_model", "client_conf",
                      {"words": data}, {"prediction": prediction},
                      fluid.default_main_program())
 ```
-代码示例中，`{"words": data}`和`{"prediction": prediction}`分别指定了模型的输入和输出，`"words"`和`"prediction"`是输出和输出变量的别名，设计别名的目的是为了使开发者能够记忆自己训练模型的输入输出对应的字段。`data`和`prediction`则是Paddle训练过程中的`[Variable](https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/fluid_cn/Variable_cn.html#variable)`，通常代表张量([Tensor](https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/fluid_cn/Tensor_cn.html#tensor))或变长张量([LodTensor](https://www.paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/basic_concept/lod_tensor.html#lodtensor))。调用保存命令后，会按照用户指定的`"serving_model"`和`"client_conf"`生成两个目录，内容如下：
+代码示例中，`{"words": data}`和`{"prediction": prediction}`分别指定了模型的输入和输出，`"words"`和`"prediction"`是输入和输出变量的别名，设计别名的目的是为了使开发者能够记忆自己训练模型的输入输出对应的字段。`data`和`prediction`则是Paddle训练过程中的`[Variable](https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/fluid_cn/Variable_cn.html#variable)`，通常代表张量([Tensor](https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/fluid_cn/Tensor_cn.html#tensor))或变长张量([LodTensor](https://www.paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/basic_concept/lod_tensor.html#lodtensor))。调用保存命令后，会按照用户指定的`"serving_model"`和`"client_conf"`生成两个目录，内容如下：
 ``` shell
 .
 ├── client_conf

--- a/doc/HOT_LOADING_IN_SERVING.md
+++ b/doc/HOT_LOADING_IN_SERVING.md
@@ -46,7 +46,7 @@ In this example, the production model is uploaded to HDFS in `product_path` fold
 ### Product model
-Run the following Python code products model in `product_path` folder. Every 60 seconds, the package file of Boston house price prediction model `uci_housing.tar.gz` will be generated and uploaded to the path of HDFS `/`. After uploading, the timestamp file `donefile` will be updated and uploaded to the path of HDFS `/`.
+Run the following Python code products model in `product_path` folder(You need to modify Hadoop related parameters before running). Every 60 seconds, the package file of Boston house price prediction model `uci_housing.tar.gz` will be generated and uploaded to the path of HDFS `/`. After uploading, the timestamp file `donefile` will be updated and uploaded to the path of HDFS `/`.
 ```python
 import os
@@ -82,9 +82,14 @@ exe = fluid.Executor(place)
 exe.run(fluid.default_startup_program())
 def push_to_hdfs(local_file_path, remote_path):
-    hadoop_bin = '/hadoop-3.1.2/bin/hadoop'
+    afs = 'afs://***.***.***.***:***' # User needs to change
-    os.system('{} fs -put -f {} {}'.format(
+    uci = '***,***' # User needs to change
-      hadoop_bin, local_file_path, remote_path))
+    hadoop_bin = '/path/to/haddop/bin' # User needs to change
+    prefix = '{} fs -Dfs.default.name={} -Dhadoop.job.ugi={}'.format(hadoop_bin, afs, uci)
+    os.system('{} -rmr {}/{}'.format(
+      prefix, remote_path, local_file_path))
+    os.system('{} -put {} {}'.format(
+      prefix, local_file_path, remote_path))
 name = "uci_housing"
 for pass_id in range(30):

--- a/doc/HOT_LOADING_IN_SERVING_CN.md
+++ b/doc/HOT_LOADING_IN_SERVING_CN.md
@@ -46,7 +46,7 @@ Paddle Serving提供了一个自动监控脚本，远端地址更新模型后会
 ### 生产模型
-在`product_path`下运行下面的Python代码生产模型，每隔 60 秒会产出 Boston 房价预测模型的打包文件`uci_housing.tar.gz`并上传至hdfs的`/`路径下，上传完毕后更新时间戳文件`donefile`并上传至hdfs的`/`路径下。
+在`product_path`下运行下面的Python代码生产模型（运行前需要修改hadoop相关的参数），每隔 60 秒会产出 Boston 房价预测模型的打包文件`uci_housing.tar.gz`并上传至hdfs的`/`路径下，上传完毕后更新时间戳文件`donefile`并上传至hdfs的`/`路径下。
 ```python
 import os
@@ -82,9 +82,14 @@ exe = fluid.Executor(place)
 exe.run(fluid.default_startup_program())
 def push_to_hdfs(local_file_path, remote_path):
-    hadoop_bin = '/hadoop-3.1.2/bin/hadoop'
+    afs = 'afs://***.***.***.***:***' # User needs to change
-    os.system('{} fs -put -f {} {}'.format(
+    uci = '***,***' # User needs to change
-      hadoop_bin, local_file_path, remote_path))
+    hadoop_bin = '/path/to/haddop/bin' # User needs to change
+    prefix = '{} fs -Dfs.default.name={} -Dhadoop.job.ugi={}'.format(hadoop_bin, afs, uci)
+    os.system('{} -rmr {}/{}'.format(
+      prefix, remote_path, local_file_path))
+    os.system('{} -put {} {}'.format(
+      prefix, local_file_path, remote_path))
 name = "uci_housing"
 for pass_id in range(30):

--- a/doc/LATEST_PACKAGES.md
+++ b/doc/LATEST_PACKAGES.md
+# Latest Wheel Packages
+## CPU server
+### Python 3
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.0-py3-none-any.whl
+```
+### Python 2
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.0-py2-none-any.whl
+```
+## GPU server
+### Python 3
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.0-py3-none-any.whl
+```
+### Python 2
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.0-py2-none-any.whl
+```
+## Client
+### Python 3.7
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.0-cp37-none-manylinux1_x86_64.whl
+```
+### Python 3.6
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.0-cp36-none-manylinux1_x86_64.whl
+```
+### Python 2.7
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.0-cp27-none-manylinux1_x86_64.whl
+```
+## App
+### Python 3
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.0-py3-none-any.whl
+```
+### Python 2
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.0-py2-none-any.whl
+```
--- a/doc/PERFORMANCE_OPTIM.md
+++ b/doc/PERFORMANCE_OPTIM.md
+# Performance Optimization
+([简体中文](./PERFORMANCE_OPTIM_CN.md)|English)
+Due to different model structures, different prediction services consume different computing resources when performing predictions. For online prediction services, models that require less computing resources will have a higher proportion of communication time cost, which is called communication-intensive service. Models that require more computing resources have a higher time cost for inference calculations, which is called computation-intensive services.
+For a prediction service, the easiest way to determine the type of service is to look at the time ratio. Paddle Serving provides [Timeline tool](../python/examples/util/README_CN.md), which can intuitively display the time spent in each stage of the prediction service.
+For communication-intensive prediction services, requests can be aggregated, and within a limit that can tolerate delay, multiple prediction requests can be combined into a batch for prediction.
+For computation-intensive prediction services, you can use GPU prediction services instead of CPU prediction services, or increase the number of graphics cards for GPU prediction services.
+Under the same conditions, the communication time of the HTTP prediction service provided by Paddle Serving is longer than that of the RPC prediction service, so for communication-intensive services, please give priority to using RPC communication.
+Parameters for performance optimization:
+| Parameters | Type | Default | Description                                                  |
+| ---------- | ---- | ------- | ------------------------------------------------------------ |
+| mem_optim  | bool | False   | Enable memory / graphic memory optimization                                   |
+| ir_optim   | bool | Fasle   | Enable analysis and optimization of calculation graph,including OP fusion, etc |
--- a/doc/PERFORMANCE_OPTIM_CN.md
+++ b/doc/PERFORMANCE_OPTIM_CN.md
 # 性能优化
-由于模型结构的不同，在执行预测时不同的预测对计算资源的消耗也不相同，对于在线的预测服务来说，对计算资源要求较少的模型，通信的时间成本占比就会较高，称为通信密集型服务，对计算资源要求较多的模型，推理计算的时间成本较高，称为计算密集型服务。对于这两种服务类型，可以根据实际需求采取不同的方式进行优化
+(简体中文|[English](./PERFORMANCE_OPTIM.md))
+由于模型结构的不同，在执行预测时不同的预测服务对计算资源的消耗也不相同。对于在线的预测服务来说，对计算资源要求较少的模型，通信的时间成本占比就会较高，称为通信密集型服务，对计算资源要求较多的模型，推理计算的时间成本较高，称为计算密集型服务。对于这两种服务类型，可以根据实际需求采取不同的方式进行优化
 对于一个预测服务来说，想要判断属于哪种类型，最简单的方法就是看时间占比，Paddle Serving提供了[Timeline工具](../python/examples/util/README_CN.md)，可以直观的展现预测服务中各阶段的耗时。
@@ -10,4 +12,9 @@
 在相同条件下，Paddle Serving提供的HTTP预测服务的通信时间是大于RPC预测服务的，因此对于通信密集型的服务请优先考虑使用RPC的通信方式。
-对于模型较大，预测服务内存或显存占用较多的情况，可以通过将--mem_optim选项设置为True来开启内存/显存优化。
+性能优化相关参数：
+| 参数      | 类型 | 默认值 | 含义                      |
+| --------- | ---- | ------ | -------------------------------- |
+| mem_optim | bool | False  | 开启内存/显存优化                |
+| ir_optim  | bool | Fasle  | 开启计算图分析优化，包括OP融合等 |
--- a/doc/RUN_IN_DOCKER.md
+++ b/doc/RUN_IN_DOCKER.md
@@ -17,7 +17,7 @@ You can get images in two ways:
 1. Pull image directly
   ```bash
-   docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0
+   docker pull hub.baidubce.com/paddlepaddle/serving:latest
   ```
 2. Building image based on dockerfile
@@ -25,13 +25,13 @@ You can get images in two ways:
   Create a new folder and copy [Dockerfile](../tools/Dockerfile) to this folder, and run the following command:
   ```bash
-   docker build -t hub.baidubce.com/paddlepaddle/serving:0.2.0 .
+   docker build -t hub.baidubce.com/paddlepaddle/serving:latest .
   ```
 ### Create container
 ```bash
-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest
 docker exec -it test bash
 ```
@@ -53,12 +53,6 @@ pip install paddle-serving-server -i https://pypi.tuna.tsinghua.edu.cn/simple
 ### Test example
-Before running the GPU version of the Server side code, you need to set the `CUDA_VISIBLE_DEVICES` environment variable to specify which GPUs the prediction service uses. The following example specifies two GPUs with indexes 0 and 1:
-```bash
-export CUDA_VISIBLE_DEVICES=0,1
-```
 Get the trained Boston house price prediction model by the following command:
 ```bash
@@ -71,13 +65,13 @@ tar -xzf uci_housing.tar.gz
  Running on the Server side (inside the container):
  ```bash
-  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 --name uci &>std.log 2>err.log &
+  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 --name uci >std.log 2>err.log &
  ```
  Running on the Client side (inside or outside the container):
  ```bash
-  curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
+  curl -H "Content-Type:application/json" -X POST -d '{"feed":{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}, "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
  ```
 - Test RPC service
@@ -85,7 +79,7 @@ tar -xzf uci_housing.tar.gz
  Running on the Server side (inside the container):
  ```bash
-  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 &>std.log 2>err.log &
+  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 >std.log 2>err.log &
  ```
  Running following Python code on the Client side (inside or outside the container, The `paddle-serving-client` package needs to be installed):
@@ -115,7 +109,7 @@ You can also get images in two ways:
 1. Pull image directly
   ```bash
-   nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+   nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:latest-gpu
   ```
 2. Building image based on dockerfile
@@ -123,13 +117,13 @@ You can also get images in two ways:
   Create a new folder and copy [Dockerfile.gpu](../tools/Dockerfile.gpu) to this folder, and run the following command:
   ```bash
-   nvidia-docker build -t hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu .
+   nvidia-docker build -t hub.baidubce.com/paddlepaddle/serving:latest-gpu .
   ```
 ### Create container
 ```bash
-nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest-gpu
 nvidia-docker exec -it test bash
 ```
@@ -176,7 +170,7 @@ tar -xzf uci_housing.tar.gz
  Running on the Client side (inside or outside the container):
  ```bash
-  curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
+  curl -H "Content-Type:application/json" -X POST -d '{"feed":{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}, "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
  ```
 - Test RPC service

--- a/doc/RUN_IN_DOCKER_CN.md
+++ b/doc/RUN_IN_DOCKER_CN.md
@@ -17,7 +17,7 @@ Docker（GPU版本需要在GPU机器上安装nvidia-docker）
 1. 直接拉取镜像
   ```bash
-   docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0
+   docker pull hub.baidubce.com/paddlepaddle/serving:latest
   ```
 2. 基于Dockerfile构建镜像
@@ -25,13 +25,13 @@ Docker（GPU版本需要在GPU机器上安装nvidia-docker）
   建立新目录，复制[Dockerfile](../tools/Dockerfile)内容到该目录下Dockerfile文件。执行
   ```bash
-   docker build -t hub.baidubce.com/paddlepaddle/serving:0.2.0 .
+   docker build -t hub.baidubce.com/paddlepaddle/serving:latest .
   ```
 ### 创建容器并进入
 ```bash
-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0
+docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest
 docker exec -it test bash
 ```
@@ -65,13 +65,13 @@ tar -xzf uci_housing.tar.gz
  在Server端（容器内）运行：
  ```bash
-  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 --name uci &>std.log 2>err.log &
+  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 --name uci >std.log 2>err.log &
  ```
  在Client端（容器内或容器外）运行：
  ```bash
-  curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
+  curl -H "Content-Type:application/json" -X POST -d '{"feed":{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}, "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
  ```
 - 测试RPC服务
@@ -79,7 +79,7 @@ tar -xzf uci_housing.tar.gz
  在Server端（容器内）运行：
  ```bash
-  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 &>std.log 2>err.log &
+  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 >std.log 2>err.log &
  ```
  在Client端（容器内或容器外，需要安装`paddle-serving-client`包）运行下面Python代码：
@@ -107,7 +107,7 @@ GPU版本与CPU版本基本一致，只有部分接口命名的差别（GPU版
 1. 直接拉取镜像
   ```bash
-   nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+   nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:latest-gpu
   ```
 2. 基于Dockerfile构建镜像
@@ -115,13 +115,13 @@ GPU版本与CPU版本基本一致，只有部分接口命名的差别（GPU版
   建立新目录，复制[Dockerfile.gpu](../tools/Dockerfile.gpu)内容到该目录下Dockerfile文件。执行
   ```bash
-   nvidia-docker build -t hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu .
+   nvidia-docker build -t hub.baidubce.com/paddlepaddle/serving:latest-gpu .
   ```
 ### 创建容器并进入
 ```bash
-nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu
+nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest-gpu
 nvidia-docker exec -it test bash
 ```
@@ -168,7 +168,7 @@ tar -xzf uci_housing.tar.gz
  在Client端（容器内或容器外）运行：
  ```bash
-  curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
+  curl -H "Content-Type:application/json" -X POST -d '{"feed":{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}, "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
  ```
 - 测试RPC服务

--- a/doc/SAVE.md
+++ b/doc/SAVE.md
-## How to save a servable model of Paddle Serving?
+# How to save a servable model of Paddle Serving?
 ([简体中文](./SAVE_CN.md)|English)
- Currently, paddle serving provides a save_model interface for users to access, the interface is similar with `save_inference_model` of Paddle.
+## Save from training or prediction script 
+Currently, paddle serving provides a save_model interface for users to access, the interface is similar with `save_inference_model` of Paddle.
 ``` python
 import paddle_serving_client.io as serving_io
 serving_io.save_model("imdb_model", "imdb_client_conf",
                      {"words": data}, {"prediction": prediction},
                      fluid.default_main_program())
 ```
-`imdb_model` is the server side model with serving configurations. `imdb_client_conf` is the client rpc configurations. Serving has a 
+`imdb_model` is the server side model with serving configurations. `imdb_client_conf` is the client rpc configurations. 
-dictionary for `Feed` and `Fetch` variables for client to assign. In the example, `{"words": data}` is the feed dict that specify the input of saved inference model. `{"prediction": prediction}` is the fetch dic that specify the output of saved inference model. An alias name can be defined for feed and fetch variables. An example of how to use alias name
+Serving has a dictionary for `Feed` and `Fetch` variables for client to assign. In the example, `{"words": data}` is the feed dict that specify the input of saved inference model. `{"prediction": prediction}` is the fetch dic that specify the output of saved inference model. An alias name can be defined for feed and fetch variables. An example of how to use alias name
 is as follows:
 ``` python
 from paddle_serving_client import Client
@@ -29,3 +31,19 @@ for line in sys.stdin:
    fetch_map = client.predict(feed=feed, fetch=fetch)
    print("{} {}".format(fetch_map["prediction"][1], label[0]))
 ```
+## Export from saved model files
+If you have saved model files using Paddle's `save_inference_model` API, you can use Paddle Serving's` inference_model_to_serving` API to convert it into a model file that can be used for Paddle Serving.
+```python
+import paddle_serving_client.io as serving_io
+serving_io.inference_model_to_serving(dirname, serving_server="serving_server", serving_client="serving_client", model_filename=None, params_filename=None )
+```
+dirname (str) - Path of saved model files. Program file and parameter files are saved in this directory.
+serving_server (str, optional) - The path of model files and configuration files for server. Default: "serving_server".
+serving_client (str, optional) - The path of configuration files for client. Default: "serving_client".
+model_filename (str, optional) - The name of file to load the inference program. If it is None, the default filename `__model__` will be used. Default: None.
+paras_filename (str, optional) - The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. Default: None.
--- a/doc/SAVE_CN.md
+++ b/doc/SAVE_CN.md
-## 怎样保存用于Paddle Serving的模型？
+# 怎样保存用于Paddle Serving的模型？
 (简体中文|[English](./SAVE.md))
- 目前，Paddle Serving提供了一个save_model接口供用户访问，该接口与Paddle的`save_inference_model`类似。
+## 从训练或预测脚本中保存
+目前，Paddle Serving提供了一个save_model接口供用户访问，该接口与Paddle的`save_inference_model`类似。
 ``` python
 import paddle_serving_client.io as serving_io
@@ -10,7 +11,9 @@ serving_io.save_model("imdb_model", "imdb_client_conf",
                      {"words": data}, {"prediction": prediction},
                      fluid.default_main_program())
 ```
-imdb_model是具有服务配置的服务器端模型。 imdb_client_conf是客户端rpc配置。 Serving有一个 提供给用户存放Feed和Fetch变量信息的字典。 在示例中，`{words”：data}` 是用于指定已保存推理模型输入的提要字典。`{"prediction"：projection}`是指定保存的推理模型输出的字典。可以为feed和fetch变量定义一个别名。 如何使用别名的例子 示例如下：
+imdb_model是具有服务配置的服务器端模型。 imdb_client_conf是客户端rpc配置。
+Serving有一个提供给用户存放Feed和Fetch变量信息的字典。 在示例中，`{"words"：data}` 是用于指定已保存推理模型输入的提要字典。`{"prediction"：projection}`是指定保存的推理模型输出的字典。可以为feed和fetch变量定义一个别名。 如何使用别名的例子 示例如下：
 ``` python
 from paddle_serving_client import Client
@@ -29,3 +32,19 @@ for line in sys.stdin:
    fetch_map = client.predict(feed=feed, fetch=fetch)
    print("{} {}".format(fetch_map["prediction"][1], label[0]))
 ```
+## 从已保存的模型文件中导出
+如果已使用Paddle 的`save_inference_model`接口保存出预测要使用的模型，则可以通过Paddle Serving的`inference_model_to_serving`接口转换成可用于Paddle Serving的模型文件。
+```python
+import paddle_serving_client.io as serving_io
+serving_io.inference_model_to_serving(dirname, serving_server="serving_server", serving_client="serving_client",  model_filename=None, params_filename=None)
+```
+dirname (str) – 需要转换的模型文件存储路径，Program结构文件和参数文件均保存在此目录。
+serving_server (str, 可选) - 转换后的模型文件和配置文件的存储路径。默认值为serving_server。
+serving_client (str, 可选) - 转换后的客户端配置文件存储路径。默认值为serving_client。
+model_filename (str，可选) – 存储需要转换的模型Inference Program结构的文件名称。如果设置为None，则使用 `__model__` 作为默认的文件名。默认值为None。
+params_filename (str，可选) – 存储需要转换的模型所有参数的文件名称。当且仅当所有模型参数被保存在一个单独的二进制文件中，它才需要被指定。如果模型参数是存储在各自分离的文件中，设置它的值为None。默认值为None。
--- a/doc/TRAIN_TO_SERVICE.md
+++ b/doc/TRAIN_TO_SERVICE.md
@@ -350,12 +350,12 @@ In the above command, the first parameter is the saved server-side model and con
 After starting the HTTP prediction service, you can make prediction with a single command:
 ```
-curl -H "Content-Type: application/json" -X POST -d '{"words": "i am very sad | 0", "fetch": ["prediction"]}' http://127.0.0.1:9292/imdb/prediction
+curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction
 ```
 When the inference process is normal, the prediction probability is returned, as shown below.
 ```
-{"prediction": [0.5592559576034546,0.44074398279190063]}
+{"result":{"prediction":[[0.4389057457447052,0.561094343662262]]}}
 ```
 **Note**: The effect of each model training may be slightly different, and the inferred probability value using the trained model may not be consistent with the example.
--- a/doc/TRAIN_TO_SERVICE_CN.md
+++ b/doc/TRAIN_TO_SERVICE_CN.md
@@ -353,12 +353,12 @@ python text_classify_service.py imdb_cnn_model/ workdir/ 9292 imdb.vocab
 启动完HTTP预测服务，即可通过一行命令进行预测：
 ```
-curl -H "Content-Type:application/json" -X POST -d '{"words": "i am very sad | 0", "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction
+curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction
 ```
 预测流程正常时，会返回预测概率，示例如下。
 ```
-{"prediction":[0.5592559576034546,0.44074398279190063]}
+{"result":{"prediction":[[0.4389057457447052,0.561094343662262]]}}
 ```
 **注意**：每次模型训练的效果可能略有不同，使用训练出的模型预测概率数值可能与示例不一致。
--- a/doc/UWSGI_DEPLOY.md
+++ b/doc/UWSGI_DEPLOY.md
-# 使用uwsgi启动HTTP预测服务
+# Deploy HTTP service with uWSGI
-在提供的fit_a_line示例中，启动HTTP预测服务后会看到有以下信息：
+([简体中文](./UWSGI_DEPLOY_CN.md)|English)
+In fit_a_line example, after starting the HTTP prediction service, you will see the following information:
 ```shell
 web service address:
@@ -13,46 +15,31 @@ http://10.127.3.150:9393/uci/prediction
 * Running on http://0.0.0.0:9393/ (Press CTRL+C to quit)
 ```
-这里会提示启动的HTTP服务是开发模式，并不能用于生产环境的部署。Flask启动的服务环境不够稳定也无法承受大量请求的并发，实际部署过程中配合需要WSGI（Web Server Gateway Interface）使用。
+Here you will be prompted that the HTTP service started is in development mode and cannot be used for production deployment. 
+The prediction service started by Flask is not stable enough to withstand the concurrency of a large number of requests. In the actual deployment process, WSGI (Web Server Gateway Interface) is used.
-下面我们展示一下如何使用[uWSGI](https://github.com/unbit/uwsgi)模块来部署HTTP预测服务用于生产环境。
+Next, we will show how to use the [uWSGI](https://github.com/unbit/uwsgi) module to deploy HTTP prediction services for production environments.
-编写HTTP服务脚本
 ```python
 #uwsgi_service.py
 from paddle_serving_server.web_service import WebService
-from flask import Flask, request
-#配置预测服务
+#Define prediction service
 uci_service = WebService(name = "uci")
 uci_service.load_model_config("./uci_housing_model")
 uci_service.prepare_server(workdir="./workdir", port=int(9500), device="cpu")
-uci_service.run_server()
+uci_service.run_rpc_service()
+#Get flask application
-#配置flask服务
+app_instance = uci_service.get_app_instance()
-app_instance = Flask(__name__)
-@app_instance.before_first_request
-def init():
-    global uci_service
-    uci_service._launch_web_service()
-service_name = "/" + uci_service.name + "/prediction"
-@app_instance.route(service_name, methods=["POST"])
-def run():
-    return uci_service.get_prediction(request)
-#run方法用于直接调试中直接启动服务
-if __name__ == "__main__":
-    app_instance.run()
 ```
-使用uwsgi启动HTTP服务
+Start service with uWSGI
 ```bash
-uwsgi --http :9000 --wsgi-file uwsgi_service.py --callable app_instance --processes 4
+uwsgi --http :9393 --module uwsgi_service:app_instance
 ```
-使用--processes参数可以指定服务的进程数，请注意目前Serving HTTP 服务暂时不支持多线程的方式使用。
+Use the --processes parameter to specify the number of service processes. 
-更多uWSGI的信息请参考[uWSGI使用文档](https://uwsgi-docs.readthedocs.io/en/latest/)
+For more information about uWSGI, please refer to [uWSGI documentation](https://uwsgi-docs.readthedocs.io/en/latest/)
--- a/doc/UWSGI_DEPLOY_CN.md
+++ b/doc/UWSGI_DEPLOY_CN.md
+# 使用uwsgi启动HTTP预测服务
+(简体中文|[English](./UWSGI_DEPLOY.md))
+在提供的fit_a_line示例中，启动HTTP预测服务后会看到有以下信息：
+```shell
+web service address:
+http://10.127.3.150:9393/uci/prediction
+ * Serving Flask app "serve" (lazy loading)
+ * Environment: production
+   WARNING: This is a development server. Do not use it in a production deployment.
+   Use a production WSGI server instead.
+ * Debug mode: off
+ * Running on http://0.0.0.0:9393/ (Press CTRL+C to quit)
+```
+这里会提示启动的HTTP服务是开发模式，并不能用于生产环境的部署。Flask启动的服务环境不够稳定也无法承受大量请求的并发，实际部署过程中配合需要WSGI（Web Server Gateway Interface）使用。
+下面我们展示一下如何使用[uWSGI](https://github.com/unbit/uwsgi)模块来部署HTTP预测服务用于生产环境。
+编写HTTP服务脚本
+```python
+#uwsgi_service.py
+from paddle_serving_server.web_service import WebService
+#配置预测服务
+uci_service = WebService(name = "uci")
+uci_service.load_model_config("./uci_housing_model")
+uci_service.prepare_server(workdir="./workdir", port=int(9500), device="cpu")
+uci_service.run_rpc_service()
+#获取flask服务
+app_instance = uci_service.get_app_instance()
+```
+使用uwsgi启动HTTP服务
+```bash
+uwsgi --http :9393 --module uwsgi_service:app_instance
+```
+使用--processes参数可以指定服务的进程数。
+更多uWSGI的信息请参考[uWSGI使用文档](https://uwsgi-docs.readthedocs.io/en/latest/)
--- a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
+++ b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
@@ -194,6 +194,12 @@ class FluidCpuAnalysisDirCore : public FluidFamilyCore {
      analysis_config.EnableMemoryOptim();
    }
+    if (params.enable_ir_optimization()) {
+      analysis_config.SwitchIrOptim(true);
+    } else {
+      analysis_config.SwitchIrOptim(false);
+    }
    AutoLock lock(GlobalPaddleCreateMutex::instance());
    _core =
        paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);

--- a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
+++ b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
@@ -198,6 +198,12 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
      analysis_config.EnableMemoryOptim();
    }
+    if (params.enable_ir_optimization()) {
+      analysis_config.SwitchIrOptim(true);
+    } else {
+      analysis_config.SwitchIrOptim(false);
+    }
    AutoLock lock(GlobalPaddleCreateMutex::instance());
    _core =
        paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);

--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -19,6 +19,8 @@ endif()
 if (CLIENT)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.client.in
    ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../tools/python_tag.py
+    ${CMAKE_CURRENT_BINARY_DIR}/python_tag.py)
 endif()
 if (APP)
@@ -43,7 +45,8 @@ if (APP)
 add_custom_command(
        OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
        COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_app/ ${PADDLE_SERVING_BINARY_DIR}/python/
-        COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel)
+        COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
+        DEPENDS ${SERVING_APP_CORE} general_model_config_py_proto ${PY_FILES})
 add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
 endif()
@@ -52,6 +55,7 @@ add_custom_command(
 	OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
 	COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_client/ ${PADDLE_SERVING_BINARY_DIR}/python/
 	COMMAND ${CMAKE_COMMAND} -E copy ${SERVING_CLIENT_CORE} ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/serving_client.so
+    COMMAND env ${py_env} ${PYTHON_EXECUTABLE} python_tag.py
 	COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
 	DEPENDS ${SERVING_CLIENT_CORE} sdk_configure_py_proto ${PY_FILES})
 add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINARY_DIR}/.timestamp)

--- a/python/examples/bert/README.md
+++ b/python/examples/bert/README.md
@@ -71,28 +71,3 @@ set environmental variable to specify which gpus are used, the command above mea
 ```
 curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction
 ```
-### Benchmark
-Model：bert_chinese_L-12_H-768_A-12
-GPU：GPU V100 * 1
-CUDA/cudnn Version：CUDA 9.2，cudnn 7.1.4
-In the test, 10 thousand samples in the sample data are copied into 100 thousand samples. Each client thread sends a sample of the number of threads. The batch size is 1, the max_seq_len is 20(not 128 as described above), and the time unit is seconds.
-When the number of client threads is 4, the prediction speed can reach 432 samples per second.
-Because a single GPU can only perform serial calculations internally, increasing the number of client threads can only reduce the idle time of the GPU. Therefore, after the number of threads reaches 4, the increase in the number of threads does not improve the prediction speed.
-| client  thread num | prepro | client infer | op0   | op1    | op2  | postpro | total  |
-| ------------------ | ------ | ------------ | ----- | ------ | ---- | ------- | ------ |
-| 1                  | 3.05   | 290.54       | 0.37  | 239.15 | 6.43 | 0.71    | 365.63 |
-| 4                  | 0.85   | 213.66       | 0.091 | 200.39 | 1.62 | 0.2     | 231.45 |
-| 8                  | 0.42   | 223.12       | 0.043 | 110.99 | 0.8  | 0.098   | 232.05 |
-| 12                 | 0.32   | 225.26       | 0.029 | 73.87  | 0.53 | 0.078   | 231.45 |
-| 16                 | 0.23   | 227.26       | 0.022 | 55.61  | 0.4  | 0.056   | 231.9  |
-the following is the client thread num - latency bar chart:
-![bert benchmark](../../../doc/bert-benchmark-batch-size-1.png)
--- a/python/examples/bert/README_CN.md
+++ b/python/examples/bert/README_CN.md
@@ -67,27 +67,3 @@ head data-c.txt | python bert_client.py --model bert_seq128_client/serving_clien
 ```
 curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction
 ```
-### Benchmark
-模型：bert_chinese_L-12_H-768_A-12
-设备：GPU V100 * 1
-环境：CUDA 9.2，cudnn 7.1.4
-测试中将样例数据中的1W个样本复制为10W个样本，每个client线程发送线程数分之一个样本，batch size为1，max_seq_len为20（而不是上面的128），时间单位为秒.
-在client线程数为4时，预测速度可以达到432样本每秒。
-由于单张GPU内部只能串行计算，client线程增多只能减少GPU的空闲时间，因此在线程数达到4之后，线程数增多对预测速度没有提升。
-| client  thread num | prepro | client infer | op0   | op1    | op2  | postpro | total  |
-| ------------------ | ------ | ------------ | ----- | ------ | ---- | ------- | ------ |
-| 1                  | 3.05   | 290.54       | 0.37  | 239.15 | 6.43 | 0.71    | 365.63 |
-| 4                  | 0.85   | 213.66       | 0.091 | 200.39 | 1.62 | 0.2     | 231.45 |
-| 8                  | 0.42   | 223.12       | 0.043 | 110.99 | 0.8  | 0.098   | 232.05 |
-| 12                 | 0.32   | 225.26       | 0.029 | 73.87  | 0.53 | 0.078   | 231.45 |
-| 16                 | 0.23   | 227.26       | 0.022 | 55.61  | 0.4  | 0.056   | 231.9  |
-总耗时变化规律如下：  
-![bert benchmark](../../../doc/bert-benchmark-batch-size-1.png)
--- a/python/examples/bert/benchmark.py
+++ b/python/examples/bert/benchmark.py
@@ -22,11 +22,8 @@ import time
 from paddle_serving_client import Client
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
-from batching import pad_batch_data
+from paddle_serving_app.reader import ChineseBertReader
-import tokenization
-import requests
-import json
-from bert_reader import BertReader
 args = benchmark_args()
@@ -45,8 +42,7 @@ def single_func(idx, resource):
        latency_list = []
    if args.request == "rpc":
-        reader = BertReader(vocab_file="vocab.txt", max_seq_len=20)
+        reader = ChineseBertReader({"max_seq_len": 128})
        fetch = ["pooled_output"]
        client = Client()
        client.load_client_config(args.model)
@@ -78,7 +74,10 @@ def single_func(idx, resource):
    elif args.request == "http":
        raise ("not implemented")
    end = time.time()
-    return [[end - start], latency_list]
+    if latency_flags:
+        return [[end - start], latency_list]
+    else:
+        return [[end - start]]
 if __name__ == '__main__':
@@ -86,7 +85,7 @@ if __name__ == '__main__':
    endpoint_list = [
        "127.0.0.1:9292", "127.0.0.1:9293", "127.0.0.1:9294", "127.0.0.1:9295"
    ]
-    turns = 1000
+    turns = 10
    start = time.time()
    result = multi_thread_runner.run(
        single_func, args.thread, {"endpoint": endpoint_list,

--- a/python/examples/bert/benchmark.sh
+++ b/python/examples/bert/benchmark.sh
@@ -3,25 +3,25 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3
 export FLAGS_profile_server=1
 export FLAGS_profile_client=1
 export FLAGS_serving_latency=1
-python -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog &
+python3 -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim False --ir_optim True 2> elog > stdlog &
 sleep 5
 #warm up
-$PYTHONROOT/bin/python benchmark.py --thread 8 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
+python3 benchmark.py --thread 8 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
 for thread_num in 4 8 16
 do
 for batch_size in 1 4 16 64 256
 do
-    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
+    python3 benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
    echo "model name :" $1
    echo "thread num :" $thread_num
    echo "batch size :" $batch_size
    echo "=================Done===================="
    echo "model name :$1" >> profile_log_$1
    echo "batch size :$batch_size" >> profile_log_$1
-    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log_$1
+    python3 ../util/show_profile.py profile $thread_num >> profile_log_$1
    tail -n 8 profile >> profile_log_$1
    echo "" >> profile_log_$1
 done

--- a/python/examples/bert/bert_client.py
+++ b/python/examples/bert/bert_client.py
@@ -25,7 +25,7 @@ from paddlehub.common.logger import logger
 import socket
 from paddle_serving_client import Client
 from paddle_serving_client.utils import benchmark_args
-from paddle_serving_app import ChineseBertReader
+from paddle_serving_app.reader import ChineseBertReader
 args = benchmark_args()

--- a/python/examples/bert/bert_web_service.py
+++ b/python/examples/bert/bert_web_service.py
@@ -14,19 +14,22 @@
 # limitations under the License.
 # pylint: disable=doc-string-missing
 from paddle_serving_server_gpu.web_service import WebService
-from bert_reader import BertReader
+from paddle_serving_app.reader import ChineseBertReader
 import sys
 import os
 class BertService(WebService):
    def load(self):
-        self.reader = BertReader(vocab_file="vocab.txt", max_seq_len=128)
+        self.reader = ChineseBertReader({
+            "vocab_file": "vocab.txt",
+            "max_seq_len": 128
+        })
-    def preprocess(self, feed={}, fetch=[]):
+    def preprocess(self, feed=[], fetch=[]):
-        feed_res = [{
+        feed_res = [
-            "words": self.reader.process(ins["words"].encode("utf-8"))
+            self.reader.process(ins["words"].encode("utf-8")) for ins in feed
-        } for ins in feed]
+        ]
        return feed_res, fetch
@@ -37,5 +40,5 @@ gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"]
 bert_service.set_gpus(gpu_ids)
 bert_service.prepare_server(
    workdir="workdir", port=int(sys.argv[2]), device="gpu")
-bert_service.run_server()
+bert_service.run_rpc_service()
-bert_service.run_flask()
+bert_service.run_web_service()
--- a/python/examples/imagenet/image_http_client.py
+++ b/python/examples/imagenet/image_http_client.py
@@ -12,37 +12,32 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import requests
+from paddle_serving_client import Client
-import base64
+from paddle_serving_app.reader import ChineseBertReader
-import json
-import time
-import os
 import sys
-py_version = sys.version_info[0]
+client = Client()
+client.load_client_config("./bert_seq32_client/serving_client_conf.prototxt")
+client.connect(["127.0.0.1:9292"])
+reader = ChineseBertReader({"max_seq_len": 32})
+fetch = ["sequence_10", "sequence_12", "pooled_output"]
+expected_shape = {
+    "sequence_10": (4, 32, 768),
+    "sequence_12": (4, 32, 768),
+    "pooled_output": (4, 768)
+}
+batch_size = 4
+feed_batch = []
-def predict(image_path, server):
+for line in sys.stdin:
-    if py_version == 2:
+    feed = reader.process(line)
-        image = base64.b64encode(open(image_path).read())
+    if len(feed_batch) < batch_size:
+        feed_batch.append(feed)
    else:
-        image = base64.b64encode(open(image_path, "rb").read()).decode("utf-8")
+        fetch_map = client.predict(feed=feed_batch, fetch=fetch)
-    req = json.dumps({"feed": [{"image": image}], "fetch": ["score"]})
+        feed_batch = []
-    r = requests.post(
+        for var_name in fetch:
-        server, data=req, headers={"Content-Type": "application/json"})
+            if fetch_map[var_name].shape != expected_shape[var_name]:
-    try:
+                print("fetch var {} shape error.".format(var_name))
-        print(r.json()["result"]["score"])
+                sys.exit(1)
-    except ValueError:
-        print(r.text)
-    return r
-if __name__ == "__main__":
-    server = "http://127.0.0.1:9393/image/prediction"
-    image_list = os.listdir("./image_data/n01440764/")
-    start = time.time()
-    for img in image_list:
-        image_file = "./image_data/n01440764/" + img
-        res = predict(image_file, server)
-    end = time.time()
-    print(end - start)
--- a/python/examples/cascade_rcnn/README.md
+++ b/python/examples/cascade_rcnn/README.md
+# Cascade RCNN model on Paddle Serving
+([简体中文](./README_CN.md)|English)
+### Get The Cascade RCNN Model
+```
+sh get_data.sh
+```
+If you want to have more detection models, please refer to [Paddle Detection Model Zoo](https://github.com/PaddlePaddle/PaddleDetection/blob/release/0.2/docs/MODEL_ZOO_cn.md)
+### Start the service
+```
+python -m paddle_serving_server_gpu.serve --model serving_server --port 9292 --gpu_id 0
+```
+### Perform prediction
+```
+python test_client.py 
+```
+Image with bounding boxes and json result would be saved in `output` folder.
--- a/python/examples/cascade_rcnn/README_CN.md
+++ b/python/examples/cascade_rcnn/README_CN.md
+# 使用Paddle Serving部署Cascade RCNN模型
+(简体中文|[English](./README.md))
+## 获得Cascade RCNN模型
+```
+sh get_data.sh
+```
+如果你想要更多的检测模型，请参考[Paddle检测模型库](https://github.com/PaddlePaddle/PaddleDetection/blob/release/0.2/docs/MODEL_ZOO_cn.md)
+### 启动服务
+```
+python -m paddle_serving_server_gpu.serve --model serving_server --port 9292 --gpu_id 0
+```
+### 执行预测
+```
+python test_client.py
+```
+客户端已经为图片做好了后处理，在`output`文件夹下存放各个框的json格式信息还有后处理结果图片。
--- a/python/examples/lac/get_data.sh
+++ b/python/examples/lac/get_data.sh
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/lac/lac_model_jieba_web.tar.gz
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/cascade_rcnn_r50_fpx_1x_serving.tar.gz
-tar -zxvf lac_model_jieba_web.tar.gz
+tar xf cascade_rcnn_r50_fpx_1x_serving.tar.gz
--- a/python/examples/criteo_ctr_with_cube/README.md
+++ b/python/examples/criteo_ctr_with_cube/README.md
@@ -2,16 +2,6 @@
 ([简体中文](./README_CN.md)|English)
-### Compile Source Code
-in the root directory of this git project
-```
-mkdir build_server
-cd build_server
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON ..
-make -j10
-make install -j10
-```
 ### Get Sample Dataset
 go to directory `python/examples/criteo_ctr_with_cube`
@@ -31,7 +21,9 @@ the model will be in ./ctr_server_model_kv and ./ctr_client_config.
 ### Start Sparse Parameter Indexing Service
 ```
-cp ../../../build_server/output/bin/cube* ./cube/
+wget https://paddle-serving.bj.bcebos.com/others/cube_app.tar.gz
+tar xf cube_app.tar.gz
+mv cube_app/cube* ./cube/
 sh cube_prepare.sh &
 ```

--- a/python/examples/criteo_ctr_with_cube/README_CN.md
+++ b/python/examples/criteo_ctr_with_cube/README_CN.md
 ## 带稀疏参数索引服务的CTR预测服务
 (简体中文|[English](./README.md))
-### 编译源代码
-在本项目的根目录下，执行
-```
-mkdir build_server
-cd build_server
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON ..
-make -j10
-make install -j10
-```
 ### 获取样例数据
 进入目录 `python/examples/criteo_ctr_with_cube`
 ```
@@ -29,7 +19,9 @@ mv models/data ./cube/
 ### 启动稀疏参数索引服务
 ```
-cp ../../../build_server/output/bin/cube* ./cube/
+wget https://paddle-serving.bj.bcebos.com/others/cube_app.tar.gz
+tar xf cube_app.tar.gz
+mv cube_app/cube* ./cube/
 sh cube_prepare.sh &
 ```

--- a/python/examples/deeplabv3/N0060.jpg
+++ b/python/examples/deeplabv3/N0060.jpg
--- a/python/examples/deeplabv3/README.md
+++ b/python/examples/deeplabv3/README.md
+# Image Segmentation
+## Get Model
+```
+python -m paddle_serving_app.package --get_model deeplabv3
+tar -xzvf deeplabv3.tar.gz
+```
+## RPC Service
+### Start Service
+```
+python -m paddle_serving_server_gpu.serve --model deeplabv3_server --gpu_ids 0 --port 9494
+```
+### Client Prediction
+```
+python deeplabv3_client.py
+```
--- a/python/examples/deeplabv3/README_CN.md
+++ b/python/examples/deeplabv3/README_CN.md
+# 图像分割
+## 获取模型
+```
+python -m paddle_serving_app.package --get_model deeplabv3
+tar -xzvf deeplabv3.tar.gz
+```
+## RPC 服务
+### 启动服务端
+```
+python -m paddle_serving_server_gpu.serve --model deeplabv3_server --gpu_ids 0 --port 9494
+```
+### 客户端预测
+```
+python deeplabv3_client.py
--- a/python/examples/imagenet/image_classification_service.py
+++ b/python/examples/imagenet/image_classification_service.py
@@ -12,30 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from paddle_serving_server.web_service import WebService
+from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, File2Image, Resize, Transpose, BGR2RGB, SegPostprocess
 import sys
 import cv2
-import base64
-import numpy as np
-from paddle_serving_app import ImageReader
+client = Client()
+client.load_client_config("deeplabv3_client/serving_client_conf.prototxt")
+client.connect(["127.0.0.1:9494"])
-class ImageService(WebService):
+preprocess = Sequential(
-    def preprocess(self, feed={}, fetch=[]):
+    [File2Image(), Resize(
-        reader = ImageReader()
+        (512, 512), interpolation=cv2.INTER_LINEAR)])
-        feed_batch = []
-        for ins in feed:
-            if "image" not in ins:
-                raise ("feed data error!")
-            sample = base64.b64decode(ins["image"])
-            img = reader.process_image(sample)
-            feed_batch.append({"image": img})
-        return feed_batch, fetch
+postprocess = SegPostprocess(2)
-image_service = ImageService(name="image")
+filename = "N0060.jpg"
-image_service.load_model_config(sys.argv[1])
+im = preprocess(filename)
-image_service.prepare_server(
+fetch_map = client.predict(feed={"image": im}, fetch=["output"])
-    workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu")
+fetch_map["filename"] = filename
-image_service.run_server()
+postprocess(fetch_map)
-image_service.run_flask()
--- a/python/examples/faster_rcnn_model/README.md
+++ b/python/examples/faster_rcnn_model/README.md
@@ -12,8 +12,8 @@ If you want to have more detection models, please refer to [Paddle Detection Mod
 ### Start the service
 ```
 tar xf faster_rcnn_model.tar.gz
-mv faster_rcnn_model/pddet *.
+mv faster_rcnn_model/pddet* .
-GLOG_v=2 python -m paddle_serving_server_gpu.serve --model pddet_serving_model --port 9494 --gpu_id 0
+GLOG_v=2 python -m paddle_serving_server_gpu.serve --model pddet_serving_model --port 9494 --gpu_ids 0
 ```
 ### Perform prediction

--- a/python/examples/faster_rcnn_model/README_CN.md
+++ b/python/examples/faster_rcnn_model/README_CN.md
@@ -13,7 +13,7 @@ wget https://paddle-serving.bj.bcebos.com/pddet_demo/infer_cfg.yml
 ```
 tar xf faster_rcnn_model.tar.gz
 mv faster_rcnn_model/pddet* ./
-GLOG_v=2 python -m paddle_serving_server_gpu.serve --model pddet_serving_model --port 9494 --gpu_id 0
+GLOG_v=2 python -m paddle_serving_server_gpu.serve --model pddet_serving_model --port 9494 --gpu_ids 0
 ```
 ### 执行预测

--- a/python/examples/faster_rcnn_model/label_list.txt
+++ b/python/examples/faster_rcnn_model/label_list.txt
+background
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+dining table
+toilet
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
--- a/python/examples/faster_rcnn_model/test_client.py
+++ b/python/examples/faster_rcnn_model/test_client.py
@@ -13,21 +13,29 @@
 # limitations under the License.
 from paddle_serving_client import Client
+from paddle_serving_app.reader import *
 import sys
-import os
-import time
-from paddle_serving_app.reader.pddet import Detection
 import numpy as np
-py_version = sys.version_info[0]
+preprocess = Sequential([
+    File2Image(), BGR2RGB(), Div(255.0),
+    Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False),
+    Resize(640, 640), Transpose((2, 0, 1))
+])
-feed_var_names = ['image', 'im_shape', 'im_info']
+postprocess = RCNNPostprocess("label_list.txt", "output")
-fetch_var_names = ['multiclass_nms']
-pddet = Detection(config_path=sys.argv[2], output_dir="./output")
-feed_dict = pddet.preprocess(feed_var_names, sys.argv[3])
 client = Client()
 client.load_client_config(sys.argv[1])
 client.connect(['127.0.0.1:9494'])
-fetch_map = client.predict(feed=feed_dict, fetch=fetch_var_names)
-outs = fetch_map.values()
+im = preprocess(sys.argv[3])
-pddet.postprocess(fetch_map, fetch_var_names)
+fetch_map = client.predict(
+    feed={
+        "image": im,
+        "im_info": np.array(list(im.shape[1:]) + [1.0]),
+        "im_shape": np.array(list(im.shape[1:]) + [1.0])
+    },
+    fetch=["multiclass_nms"])
+fetch_map["image"] = sys.argv[3]
+postprocess(fetch_map)
--- a/python/examples/fit_a_line/test_multi_process_client.py
+++ b/python/examples/fit_a_line/test_multi_process_client.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle_serving_client import Client
+from paddle_serving_client.utils import MultiThreadRunner
+import paddle
+def single_func(idx, resource):
+    client = Client()
+    client.load_client_config(
+        "./uci_housing_client/serving_client_conf.prototxt")
+    client.connect(["127.0.0.1:9293", "127.0.0.1:9292"])
+    x = [
+        0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584,
+        0.6283, 0.4919, 0.1856, 0.0795, -0.0332
+    ]
+    for i in range(1000):
+        fetch_map = client.predict(feed={"x": x}, fetch=["price"])
+        if fetch_map is None:
+            return [[None]]
+    return [[0]]
+multi_thread_runner = MultiThreadRunner()
+thread_num = 4
+result = multi_thread_runner.run(single_func, thread_num, {})
+if None in result[0]:
+    exit(1)
--- a/python/examples/imagenet/README.md
+++ b/python/examples/imagenet/README.md
@@ -8,34 +8,42 @@ The example uses the ResNet50_vd model to perform the imagenet 1000 classificati
 ```
 sh get_model.sh
 ```
-### HTTP Infer
+### Install preprocess module
+```
+pip install paddle_serving_app
+```
+### HTTP Service
 launch server side
 ```
-python image_classification_service.py ResNet50_vd_model workdir 9393 #cpu inference service
+python resnet50_web_service.py ResNet50_vd_model cpu 9696 #cpu inference service
 ```
 ```
-python image_classification_service_gpu.py ResNet50_vd_model workdir 9393 #gpu inference service
+python resnet50_web_service.py ResNet50_vd_model gpu 9696 #gpu inference service
 ```
 client send inference request
 ```
-python image_http_client.py
+curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9696/image/prediction
 ```
-### RPC Infer
+### RPC Service
 launch server side
 ```
-python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9393 #cpu inference service
+python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu inference service
 ```
 ```
-python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9393 --gpu_ids 0 #gpu inference service
+python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu inference service
 ```
 client send inference request
 ```
-python image_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
+python resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
 ```
-*the port of server side in this example is 9393, the sample data used by client side is in the folder ./data. These parameter can be modified in practice*
+*the port of server side in this example is 9696
--- a/python/examples/imagenet/README_CN.md
+++ b/python/examples/imagenet/README_CN.md
@@ -8,34 +8,42 @@
 ```
 sh get_model.sh
 ```
-### 执行HTTP预测服务
+### 安装数据预处理模块
+```
+pip install paddle_serving_app
+```
+### HTTP服务
 启动server端
 ```
-python image_classification_service.py ResNet50_vd_model workdir 9393 #cpu预测服务
+python resnet50_web_service.py ResNet50_vd_model cpu 9696 #cpu预测服务
 ```
 ```
-python image_classification_service_gpu.py ResNet50_vd_model workdir 9393 #gpu预测服务
+python resnet50_web_service.py ResNet50_vd_model gpu 9696 #gpu预测服务
 ```
-client端进行预测
+发送HTTP POST请求
 ```
-python image_http_client.py
+curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9696/image/prediction
 ```
-### 执行RPC预测服务
+### RPC服务
 启动server端
 ```
-python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9393 #cpu预测服务
+python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu预测服务
 ```
 ```
-python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9393 --gpu_ids 0 #gpu预测服务
+python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu预测服务
 ```
 client端进行预测
 ```
-python image_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
+python resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
 ```
-*server端示例中服务端口为9393端口，client端示例中数据来自./data文件夹，server端地址为本地9393端口，可根据实际情况更改脚本。*
+*server端示例中服务端口为9696端口
--- a/python/examples/imagenet/benchmark.py
+++ b/python/examples/imagenet/benchmark.py
@@ -19,15 +19,22 @@ from __future__ import unicode_literals, absolute_import
 import os
 import sys
 import time
+import requests
+import json
+import base64
 from paddle_serving_client import Client
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args
-import requests
+from paddle_serving_app.reader import Sequential, URL2Image, Resize
-import json
+from paddle_serving_app.reader import CenterCrop, RGB2BGR, Transpose, Div, Normalize
-from image_reader import ImageReader
 args = benchmark_args()
+seq_preprocess = Sequential([
+    URL2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+    Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
+])
 def single_func(idx, resource):
    file_list = []
@@ -36,6 +43,10 @@ def single_func(idx, resource):
    img_list = []
    for i in range(1000):
        img_list.append(open("./image_data/n01440764/" + file_list[i]).read())
+    profile_flags = False
+    if "FLAGS_profile_client" in os.environ and os.environ[
+            "FLAGS_profile_client"]:
+        profile_flags = True
    if args.request == "rpc":
        reader = ImageReader()
        fetch = ["score"]
@@ -46,16 +57,36 @@ def single_func(idx, resource):
        for i in range(1000):
            if args.batch_size >= 1:
                feed_batch = []
+                i_start = time.time()
                for bi in range(args.batch_size):
-                    img = reader.process_image(img_list[i])
+                    img = seq_preprocess(img_list[i])
-                    img = img.reshape(-1)
                    feed_batch.append({"image": img})
+                i_end = time.time()
+                if profile_flags:
+                    print("PROFILE\tpid:{}\timage_pre_0:{} image_pre_1:{}".
+                          format(os.getpid(),
+                                 int(round(i_start * 1000000)),
+                                 int(round(i_end * 1000000))))
                result = client.predict(feed=feed_batch, fetch=fetch)
            else:
                print("unsupport batch size {}".format(args.batch_size))
    elif args.request == "http":
-        raise ("no batch predict for http")
+        py_version = 2
+        server = "http://" + resource["endpoint"][idx % len(resource[
+            "endpoint"])] + "/image/prediction"
+        start = time.time()
+        for i in range(1000):
+            if py_version == 2:
+                image = base64.b64encode(
+                    open("./image_data/n01440764/" + file_list[i]).read())
+            else:
+                image = base64.b64encode(open(image_path, "rb").read()).decode(
+                    "utf-8")
+            req = json.dumps({"feed": [{"image": image}], "fetch": ["score"]})
+            r = requests.post(
+                server, data=req, headers={"Content-Type": "application/json"})
    end = time.time()
    return [[end - start]]

--- a/python/examples/imagenet/benchmark_batch.py.lprof
+++ b/python/examples/imagenet/benchmark_batch.py.lprof
--- a/python/examples/imagenet/daisy.jpg
+++ b/python/examples/imagenet/daisy.jpg
--- a/python/examples/imagenet/flower.jpg
+++ b/python/examples/imagenet/flower.jpg
--- a/python/examples/imagenet/image_reader.py
+++ b/python/examples/imagenet/image_reader.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import cv2
-import numpy as np
-class ImageReader():
-    def __init__(self):
-        self.image_mean = [0.485, 0.456, 0.406]
-        self.image_std = [0.229, 0.224, 0.225]
-        self.image_shape = [3, 224, 224]
-        self.resize_short_size = 256
-        self.interpolation = None
-    def resize_short(self, img, target_size, interpolation=None):
-        """resize image
-        Args:
-            img: image data
-            target_size: resize short target size
-            interpolation: interpolation mode
-        Returns:
-            resized image data
-        """
-        percent = float(target_size) / min(img.shape[0], img.shape[1])
-        resized_width = int(round(img.shape[1] * percent))
-        resized_height = int(round(img.shape[0] * percent))
-        if interpolation:
-            resized = cv2.resize(
-                img, (resized_width, resized_height),
-                interpolation=interpolation)
-        else:
-            resized = cv2.resize(img, (resized_width, resized_height))
-        return resized
-    def crop_image(self, img, target_size, center):
-        """crop image
-        Args:
-            img: images data
-            target_size: crop target size
-            center: crop mode
-        Returns:
-            img: cropped image data
-        """
-        height, width = img.shape[:2]
-        size = target_size
-        if center == True:
-            w_start = (width - size) // 2
-            h_start = (height - size) // 2
-        else:
-            w_start = np.random.randint(0, width - size + 1)
-            h_start = np.random.randint(0, height - size + 1)
-        w_end = w_start + size
-        h_end = h_start + size
-        img = img[h_start:h_end, w_start:w_end, :]
-        return img
-    def process_image(self, sample):
-        """ process_image """
-        mean = self.image_mean
-        std = self.image_std
-        crop_size = self.image_shape[1]
-        data = np.fromstring(sample, np.uint8)
-        img = cv2.imdecode(data, cv2.IMREAD_COLOR)
-        if img is None:
-            print("img is None, pass it.")
-            return None
-        if crop_size > 0:
-            target_size = self.resize_short_size
-            img = self.resize_short(
-                img, target_size, interpolation=self.interpolation)
-            img = self.crop_image(img, target_size=crop_size, center=True)
-        img = img[:, :, ::-1]
-        img = img.astype('float32').transpose((2, 0, 1)) / 255
-        img_mean = np.array(mean).reshape((3, 1, 1))
-        img_std = np.array(std).reshape((3, 1, 1))
-        img -= img_mean
-        img /= img_std
-        return img
--- a/python/examples/imagenet/imagenet.label
+++ b/python/examples/imagenet/imagenet.label
+tench, Tinca tinca,
+goldfish, Carassius auratus,
+great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias,
+tiger shark, Galeocerdo cuvieri,
+hammerhead, hammerhead shark,
+electric ray, crampfish, numbfish, torpedo,
+stingray,
+cock,
+hen,
+ostrich, Struthio camelus,
+brambling, Fringilla montifringilla,
+goldfinch, Carduelis carduelis,
+house finch, linnet, Carpodacus mexicanus,
+junco, snowbird,
+indigo bunting, indigo finch, indigo bird, Passerina cyanea,
+robin, American robin, Turdus migratorius,
+bulbul,
+jay,
+magpie,
+chickadee,
+water ouzel, dipper,
+kite,
+bald eagle, American eagle, Haliaeetus leucocephalus,
+vulture,
+great grey owl, great gray owl, Strix nebulosa,
+European fire salamander, Salamandra salamandra,
+common newt, Triturus vulgaris,
+eft,
+spotted salamander, Ambystoma maculatum,
+axolotl, mud puppy, Ambystoma mexicanum,
+bullfrog, Rana catesbeiana,
+tree frog, tree-frog,
+tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui,
+loggerhead, loggerhead turtle, Caretta caretta,
+leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea,
+mud turtle,
+terrapin,
+box turtle, box tortoise,
+banded gecko,
+common iguana, iguana, Iguana iguana,
+American chameleon, anole, Anolis carolinensis,
+whiptail, whiptail lizard,
+agama,
+frilled lizard, Chlamydosaurus kingi,
+alligator lizard,
+Gila monster, Heloderma suspectum,
+green lizard, Lacerta viridis,
+African chameleon, Chamaeleo chamaeleon,
+Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis,
+African crocodile, Nile crocodile, Crocodylus niloticus,
+American alligator, Alligator mississipiensis,
+triceratops,
+thunder snake, worm snake, Carphophis amoenus,
+ringneck snake, ring-necked snake, ring snake,
+hognose snake, puff adder, sand viper,
+green snake, grass snake,
+king snake, kingsnake,
+garter snake, grass snake,
+water snake,
+vine snake,
+night snake, Hypsiglena torquata,
+boa constrictor, Constrictor constrictor,
+rock python, rock snake, Python sebae,
+Indian cobra, Naja naja,
+green mamba,
+sea snake,
+horned viper, cerastes, sand viper, horned asp, Cerastes cornutus,
+diamondback, diamondback rattlesnake, Crotalus adamanteus,
+sidewinder, horned rattlesnake, Crotalus cerastes,
+trilobite,
+harvestman, daddy longlegs, Phalangium opilio,
+scorpion,
+black and gold garden spider, Argiope aurantia,
+barn spider, Araneus cavaticus,
+garden spider, Aranea diademata,
+black widow, Latrodectus mactans,
+tarantula,
+wolf spider, hunting spider,
+tick,
+centipede,
+black grouse,
+ptarmigan,
+ruffed grouse, partridge, Bonasa umbellus,
+prairie chicken, prairie grouse, prairie fowl,
+peacock,
+quail,
+partridge,
+African grey, African gray, Psittacus erithacus,
+macaw,
+sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita,
+lorikeet,
+coucal,
+bee eater,
+hornbill,
+hummingbird,
+jacamar,
+toucan,
+drake,
+red-breasted merganser, Mergus serrator,
+goose,
+black swan, Cygnus atratus,
+tusker,
+echidna, spiny anteater, anteater,
+platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus,
+wallaby, brush kangaroo,
+koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus,
+wombat,
+jellyfish,
+sea anemone, anemone,
+brain coral,
+flatworm, platyhelminth,
+nematode, nematode worm, roundworm,
+conch,
+snail,
+slug,
+sea slug, nudibranch,
+chiton, coat-of-mail shell, sea cradle, polyplacophore,
+chambered nautilus, pearly nautilus, nautilus,
+Dungeness crab, Cancer magister,
+rock crab, Cancer irroratus,
+fiddler crab,
+king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica,
+American lobster, Northern lobster, Maine lobster, Homarus americanus,
+spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish,
+crayfish, crawfish, crawdad, crawdaddy,
+hermit crab,
+isopod,
+white stork, Ciconia ciconia,
+black stork, Ciconia nigra,
+spoonbill,
+flamingo,
+little blue heron, Egretta caerulea,
+American egret, great white heron, Egretta albus,
+bittern,
+crane,
+limpkin, Aramus pictus,
+European gallinule, Porphyrio porphyrio,
+American coot, marsh hen, mud hen, water hen, Fulica americana,
+bustard,
+ruddy turnstone, Arenaria interpres,
+red-backed sandpiper, dunlin, Erolia alpina,
+redshank, Tringa totanus,
+dowitcher,
+oystercatcher, oyster catcher,
+pelican,
+king penguin, Aptenodytes patagonica,
+albatross, mollymawk,
+grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus,
+killer whale, killer, orca, grampus, sea wolf, Orcinus orca,
+dugong, Dugong dugon,
+sea lion,
+Chihuahua,
+Japanese spaniel,
+Maltese dog, Maltese terrier, Maltese,
+Pekinese, Pekingese, Peke,
+Shih-Tzu,
+Blenheim spaniel,
+papillon,
+toy terrier,
+Rhodesian ridgeback,
+Afghan hound, Afghan,
+basset, basset hound,
+beagle,
+bloodhound, sleuthhound,
+bluetick,
+black-and-tan coonhound,
+Walker hound, Walker foxhound,
+English foxhound,
+redbone,
+borzoi, Russian wolfhound,
+Irish wolfhound,
+Italian greyhound,
+whippet,
+Ibizan hound, Ibizan Podenco,
+Norwegian elkhound, elkhound,
+otterhound, otter hound,
+Saluki, gazelle hound,
+Scottish deerhound, deerhound,
+Weimaraner,
+Staffordshire bullterrier, Staffordshire bull terrier,
+American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier,
+Bedlington terrier,
+Border terrier,
+Kerry blue terrier,
+Irish terrier,
+Norfolk terrier,
+Norwich terrier,
+Yorkshire terrier,
+wire-haired fox terrier,
+Lakeland terrier,
+Sealyham terrier, Sealyham,
+Airedale, Airedale terrier,
+cairn, cairn terrier,
+Australian terrier,
+Dandie Dinmont, Dandie Dinmont terrier,
+Boston bull, Boston terrier,
+miniature schnauzer,
+giant schnauzer,
+standard schnauzer,
+Scotch terrier, Scottish terrier, Scottie,
+Tibetan terrier, chrysanthemum dog,
+silky terrier, Sydney silky,
+soft-coated wheaten terrier,
+West Highland white terrier,
+Lhasa, Lhasa apso,
+flat-coated retriever,
+curly-coated retriever,
+golden retriever,
+Labrador retriever,
+Chesapeake Bay retriever,
+German short-haired pointer,
+vizsla, Hungarian pointer,
+English setter,
+Irish setter, red setter,
+Gordon setter,
+Brittany spaniel,
+clumber, clumber spaniel,
+English springer, English springer spaniel,
+Welsh springer spaniel,
+cocker spaniel, English cocker spaniel, cocker,
+Sussex spaniel,
+Irish water spaniel,
+kuvasz,
+schipperke,
+groenendael,
+malinois,
+briard,
+kelpie,
+komondor,
+Old English sheepdog, bobtail,
+Shetland sheepdog, Shetland sheep dog, Shetland,
+collie,
+Border collie,
+Bouvier des Flandres, Bouviers des Flandres,
+Rottweiler,
+German shepherd, German shepherd dog, German police dog, alsatian,
+Doberman, Doberman pinscher,
+miniature pinscher,
+Greater Swiss Mountain dog,
+Bernese mountain dog,
+Appenzeller,
+EntleBucher,
+boxer,
+bull mastiff,
+Tibetan mastiff,
+French bulldog,
+Great Dane,
+Saint Bernard, St Bernard,
+Eskimo dog, husky,
+malamute, malemute, Alaskan malamute,
+Siberian husky,
+dalmatian, coach dog, carriage dog,
+affenpinscher, monkey pinscher, monkey dog,
+basenji,
+pug, pug-dog,
+Leonberg,
+Newfoundland, Newfoundland dog,
+Great Pyrenees,
+Samoyed, Samoyede,
+Pomeranian,
+chow, chow chow,
+keeshond,
+Brabancon griffon,
+Pembroke, Pembroke Welsh corgi,
+Cardigan, Cardigan Welsh corgi,
+toy poodle,
+miniature poodle,
+standard poodle,
+Mexican hairless,
+timber wolf, grey wolf, gray wolf, Canis lupus,
+white wolf, Arctic wolf, Canis lupus tundrarum,
+red wolf, maned wolf, Canis rufus, Canis niger,
+coyote, prairie wolf, brush wolf, Canis latrans,
+dingo, warrigal, warragal, Canis dingo,
+dhole, Cuon alpinus,
+African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus,
+hyena, hyaena,
+red fox, Vulpes vulpes,
+kit fox, Vulpes macrotis,
+Arctic fox, white fox, Alopex lagopus,
+grey fox, gray fox, Urocyon cinereoargenteus,
+tabby, tabby cat,
+tiger cat,
+Persian cat,
+Siamese cat, Siamese,
+Egyptian cat,
+cougar, puma, catamount, mountain lion, painter, panther, Felis concolor,
+lynx, catamount,
+leopard, Panthera pardus,
+snow leopard, ounce, Panthera uncia,
+jaguar, panther, Panthera onca, Felis onca,
+lion, king of beasts, Panthera leo,
+tiger, Panthera tigris,
+cheetah, chetah, Acinonyx jubatus,
+brown bear, bruin, Ursus arctos,
+American black bear, black bear, Ursus americanus, Euarctos americanus,
+ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus,
+sloth bear, Melursus ursinus, Ursus ursinus,
+mongoose,
+meerkat, mierkat,
+tiger beetle,
+ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle,
+ground beetle, carabid beetle,
+long-horned beetle, longicorn, longicorn beetle,
+leaf beetle, chrysomelid,
+dung beetle,
+rhinoceros beetle,
+weevil,
+fly,
+bee,
+ant, emmet, pismire,
+grasshopper, hopper,
+cricket,
+walking stick, walkingstick, stick insect,
+cockroach, roach,
+mantis, mantid,
+cicada, cicala,
+leafhopper,
+lacewing, lacewing fly,
+"dragonfly, darning needle, devils darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk",
+damselfly,
+admiral,
+ringlet, ringlet butterfly,
+monarch, monarch butterfly, milkweed butterfly, Danaus plexippus,
+cabbage butterfly,
+sulphur butterfly, sulfur butterfly,
+lycaenid, lycaenid butterfly,
+starfish, sea star,
+sea urchin,
+sea cucumber, holothurian,
+wood rabbit, cottontail, cottontail rabbit,
+hare,
+Angora, Angora rabbit,
+hamster,
+porcupine, hedgehog,
+fox squirrel, eastern fox squirrel, Sciurus niger,
+marmot,
+beaver,
+guinea pig, Cavia cobaya,
+sorrel,
+zebra,
+hog, pig, grunter, squealer, Sus scrofa,
+wild boar, boar, Sus scrofa,
+warthog,
+hippopotamus, hippo, river horse, Hippopotamus amphibius,
+ox,
+water buffalo, water ox, Asiatic buffalo, Bubalus bubalis,
+bison,
+ram, tup,
+bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis,
+ibex, Capra ibex,
+hartebeest,
+impala, Aepyceros melampus,
+gazelle,
+Arabian camel, dromedary, Camelus dromedarius,
+llama,
+weasel,
+mink,
+polecat, fitch, foulmart, foumart, Mustela putorius,
+black-footed ferret, ferret, Mustela nigripes,
+otter,
+skunk, polecat, wood pussy,
+badger,
+armadillo,
+three-toed sloth, ai, Bradypus tridactylus,
+orangutan, orang, orangutang, Pongo pygmaeus,
+gorilla, Gorilla gorilla,
+chimpanzee, chimp, Pan troglodytes,
+gibbon, Hylobates lar,
+siamang, Hylobates syndactylus, Symphalangus syndactylus,
+guenon, guenon monkey,
+patas, hussar monkey, Erythrocebus patas,
+baboon,
+macaque,
+langur,
+colobus, colobus monkey,
+proboscis monkey, Nasalis larvatus,
+marmoset,
+capuchin, ringtail, Cebus capucinus,
+howler monkey, howler,
+titi, titi monkey,
+spider monkey, Ateles geoffroyi,
+squirrel monkey, Saimiri sciureus,
+Madagascar cat, ring-tailed lemur, Lemur catta,
+indri, indris, Indri indri, Indri brevicaudatus,
+Indian elephant, Elephas maximus,
+African elephant, Loxodonta africana,
+lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens,
+giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca,
+barracouta, snoek,
+eel,
+coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch,
+rock beauty, Holocanthus tricolor,
+anemone fish,
+sturgeon,
+gar, garfish, garpike, billfish, Lepisosteus osseus,
+lionfish,
+puffer, pufferfish, blowfish, globefish,
+abacus,
+abaya,
+"academic gown, academic robe, judges robe",
+accordion, piano accordion, squeeze box,
+acoustic guitar,
+aircraft carrier, carrier, flattop, attack aircraft carrier,
+airliner,
+airship, dirigible,
+altar,
+ambulance,
+amphibian, amphibious vehicle,
+analog clock,
+apiary, bee house,
+apron,
+ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin,
+assault rifle, assault gun,
+backpack, back pack, knapsack, packsack, rucksack, haversack,
+bakery, bakeshop, bakehouse,
+balance beam, beam,
+balloon,
+ballpoint, ballpoint pen, ballpen, Biro,
+Band Aid,
+banjo,
+bannister, banister, balustrade, balusters, handrail,
+barbell,
+barber chair,
+barbershop,
+barn,
+barometer,
+barrel, cask,
+barrow, garden cart, lawn cart, wheelbarrow,
+baseball,
+basketball,
+bassinet,
+bassoon,
+bathing cap, swimming cap,
+bath towel,
+bathtub, bathing tub, bath, tub,
+beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon,
+beacon, lighthouse, beacon light, pharos,
+beaker,
+bearskin, busby, shako,
+beer bottle,
+beer glass,
+bell cote, bell cot,
+bib,
+bicycle-built-for-two, tandem bicycle, tandem,
+bikini, two-piece,
+binder, ring-binder,
+binoculars, field glasses, opera glasses,
+birdhouse,
+boathouse,
+bobsled, bobsleigh, bob,
+bolo tie, bolo, bola tie, bola,
+bonnet, poke bonnet,
+bookcase,
+bookshop, bookstore, bookstall,
+bottlecap,
+bow,
+bow tie, bow-tie, bowtie,
+brass, memorial tablet, plaque,
+brassiere, bra, bandeau,
+breakwater, groin, groyne, mole, bulwark, seawall, jetty,
+breastplate, aegis, egis,
+broom,
+bucket, pail,
+buckle,
+bulletproof vest,
+bullet train, bullet,
+butcher shop, meat market,
+cab, hack, taxi, taxicab,
+caldron, cauldron,
+candle, taper, wax light,
+cannon,
+canoe,
+can opener, tin opener,
+cardigan,
+car mirror,
+carousel, carrousel, merry-go-round, roundabout, whirligig,
+"carpenters kit, tool kit",
+carton,
+car wheel,
+cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM,
+cassette,
+cassette player,
+castle,
+catamaran,
+CD player,
+cello, violoncello,
+cellular telephone, cellular phone, cellphone, cell, mobile phone,
+chain,
+chainlink fence,
+chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour,
+chain saw, chainsaw,
+chest,
+chiffonier, commode,
+chime, bell, gong,
+china cabinet, china closet,
+Christmas stocking,
+church, church building,
+cinema, movie theater, movie theatre, movie house, picture palace,
+cleaver, meat cleaver, chopper,
+cliff dwelling,
+cloak,
+clog, geta, patten, sabot,
+cocktail shaker,
+coffee mug,
+coffeepot,
+coil, spiral, volute, whorl, helix,
+combination lock,
+computer keyboard, keypad,
+confectionery, confectionary, candy store,
+container ship, containership, container vessel,
+convertible,
+corkscrew, bottle screw,
+cornet, horn, trumpet, trump,
+cowboy boot,
+cowboy hat, ten-gallon hat,
+cradle,
+crane,
+crash helmet,
+crate,
+crib, cot,
+Crock Pot,
+croquet ball,
+crutch,
+cuirass,
+dam, dike, dyke,
+desk,
+desktop computer,
+dial telephone, dial phone,
+diaper, nappy, napkin,
+digital clock,
+digital watch,
+dining table, board,
+dishrag, dishcloth,
+dishwasher, dish washer, dishwashing machine,
+disk brake, disc brake,
+dock, dockage, docking facility,
+dogsled, dog sled, dog sleigh,
+dome,
+doormat, welcome mat,
+drilling platform, offshore rig,
+drum, membranophone, tympan,
+drumstick,
+dumbbell,
+Dutch oven,
+electric fan, blower,
+electric guitar,
+electric locomotive,
+entertainment center,
+envelope,
+espresso maker,
+face powder,
+feather boa, boa,
+file, file cabinet, filing cabinet,
+fireboat,
+fire engine, fire truck,
+fire screen, fireguard,
+flagpole, flagstaff,
+flute, transverse flute,
+folding chair,
+football helmet,
+forklift,
+fountain,
+fountain pen,
+four-poster,
+freight car,
+French horn, horn,
+frying pan, frypan, skillet,
+fur coat,
+garbage truck, dustcart,
+gasmask, respirator, gas helmet,
+gas pump, gasoline pump, petrol pump, island dispenser,
+goblet,
+go-kart,
+golf ball,
+golfcart, golf cart,
+gondola,
+gong, tam-tam,
+gown,
+grand piano, grand,
+greenhouse, nursery, glasshouse,
+grille, radiator grille,
+grocery store, grocery, food market, market,
+guillotine,
+hair slide,
+hair spray,
+half track,
+hammer,
+hamper,
+hand blower, blow dryer, blow drier, hair dryer, hair drier,
+hand-held computer, hand-held microcomputer,
+handkerchief, hankie, hanky, hankey,
+hard disc, hard disk, fixed disk,
+harmonica, mouth organ, harp, mouth harp,
+harp,
+harvester, reaper,
+hatchet,
+holster,
+home theater, home theatre,
+honeycomb,
+hook, claw,
+hoopskirt, crinoline,
+horizontal bar, high bar,
+horse cart, horse-cart,
+hourglass,
+iPod,
+iron, smoothing iron,
+"jack-o-lantern",
+jean, blue jean, denim,
+jeep, landrover,
+jersey, T-shirt, tee shirt,
+jigsaw puzzle,
+jinrikisha, ricksha, rickshaw,
+joystick,
+kimono,
+knee pad,
+knot,
+lab coat, laboratory coat,
+ladle,
+lampshade, lamp shade,
+laptop, laptop computer,
+lawn mower, mower,
+lens cap, lens cover,
+letter opener, paper knife, paperknife,
+library,
+lifeboat,
+lighter, light, igniter, ignitor,
+limousine, limo,
+liner, ocean liner,
+lipstick, lip rouge,
+Loafer,
+lotion,
+loudspeaker, speaker, speaker unit, loudspeaker system, speaker system,
+"loupe, jewelers loupe",
+lumbermill, sawmill,
+magnetic compass,
+mailbag, postbag,
+mailbox, letter box,
+maillot,
+maillot, tank suit,
+manhole cover,
+maraca,
+marimba, xylophone,
+mask,
+matchstick,
+maypole,
+maze, labyrinth,
+measuring cup,
+medicine chest, medicine cabinet,
+megalith, megalithic structure,
+microphone, mike,
+microwave, microwave oven,
+military uniform,
+milk can,
+minibus,
+miniskirt, mini,
+minivan,
+missile,
+mitten,
+mixing bowl,
+mobile home, manufactured home,
+Model T,
+modem,
+monastery,
+monitor,
+moped,
+mortar,
+mortarboard,
+mosque,
+mosquito net,
+motor scooter, scooter,
+mountain bike, all-terrain bike, off-roader,
+mountain tent,
+mouse, computer mouse,
+mousetrap,
+moving van,
+muzzle,
+nail,
+neck brace,
+necklace,
+nipple,
+notebook, notebook computer,
+obelisk,
+oboe, hautboy, hautbois,
+ocarina, sweet potato,
+odometer, hodometer, mileometer, milometer,
+oil filter,
+organ, pipe organ,
+oscilloscope, scope, cathode-ray oscilloscope, CRO,
+overskirt,
+oxcart,
+oxygen mask,
+packet,
+paddle, boat paddle,
+paddlewheel, paddle wheel,
+padlock,
+paintbrush,
+"pajama, pyjama, pjs, jammies",
+palace,
+panpipe, pandean pipe, syrinx,
+paper towel,
+parachute, chute,
+parallel bars, bars,
+park bench,
+parking meter,
+passenger car, coach, carriage,
+patio, terrace,
+pay-phone, pay-station,
+pedestal, plinth, footstall,
+pencil box, pencil case,
+pencil sharpener,
+perfume, essence,
+Petri dish,
+photocopier,
+pick, plectrum, plectron,
+pickelhaube,
+picket fence, paling,
+pickup, pickup truck,
+pier,
+piggy bank, penny bank,
+pill bottle,
+pillow,
+ping-pong ball,
+pinwheel,
+pirate, pirate ship,
+pitcher, ewer,
+"plane, carpenters plane, woodworking plane",
+planetarium,
+plastic bag,
+plate rack,
+plow, plough,
+"plunger, plumbers helper",
+Polaroid camera, Polaroid Land camera,
+pole,
+police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria,
+poncho,
+pool table, billiard table, snooker table,
+pop bottle, soda bottle,
+pot, flowerpot,
+"potters wheel",
+power drill,
+prayer rug, prayer mat,
+printer,
+prison, prison house,
+projectile, missile,
+projector,
+puck, hockey puck,
+punching bag, punch bag, punching ball, punchball,
+purse,
+quill, quill pen,
+quilt, comforter, comfort, puff,
+racer, race car, racing car,
+racket, racquet,
+radiator,
+radio, wireless,
+radio telescope, radio reflector,
+rain barrel,
+recreational vehicle, RV, R.V.,
+reel,
+reflex camera,
+refrigerator, icebox,
+remote control, remote,
+restaurant, eating house, eating place, eatery,
+revolver, six-gun, six-shooter,
+rifle,
+rocking chair, rocker,
+rotisserie,
+rubber eraser, rubber, pencil eraser,
+rugby ball,
+rule, ruler,
+running shoe,
+safe,
+safety pin,
+saltshaker, salt shaker,
+sandal,
+sarong,
+sax, saxophone,
+scabbard,
+scale, weighing machine,
+school bus,
+schooner,
+scoreboard,
+screen, CRT screen,
+screw,
+screwdriver,
+seat belt, seatbelt,
+sewing machine,
+shield, buckler,
+shoe shop, shoe-shop, shoe store,
+shoji,
+shopping basket,
+shopping cart,
+shovel,
+shower cap,
+shower curtain,
+ski,
+ski mask,
+sleeping bag,
+slide rule, slipstick,
+sliding door,
+slot, one-armed bandit,
+snorkel,
+snowmobile,
+snowplow, snowplough,
+soap dispenser,
+soccer ball,
+sock,
+solar dish, solar collector, solar furnace,
+sombrero,
+soup bowl,
+space bar,
+space heater,
+space shuttle,
+spatula,
+speedboat,
+"spider web, spiders web",
+spindle,
+sports car, sport car,
+spotlight, spot,
+stage,
+steam locomotive,
+steel arch bridge,
+steel drum,
+stethoscope,
+stole,
+stone wall,
+stopwatch, stop watch,
+stove,
+strainer,
+streetcar, tram, tramcar, trolley, trolley car,
+stretcher,
+studio couch, day bed,
+stupa, tope,
+submarine, pigboat, sub, U-boat,
+suit, suit of clothes,
+sundial,
+sunglass,
+sunglasses, dark glasses, shades,
+sunscreen, sunblock, sun blocker,
+suspension bridge,
+swab, swob, mop,
+sweatshirt,
+swimming trunks, bathing trunks,
+swing,
+switch, electric switch, electrical switch,
+syringe,
+table lamp,
+tank, army tank, armored combat vehicle, armoured combat vehicle,
+tape player,
+teapot,
+teddy, teddy bear,
+television, television system,
+tennis ball,
+thatch, thatched roof,
+theater curtain, theatre curtain,
+thimble,
+thresher, thrasher, threshing machine,
+throne,
+tile roof,
+toaster,
+tobacco shop, tobacconist shop, tobacconist,
+toilet seat,
+torch,
+totem pole,
+tow truck, tow car, wrecker,
+toyshop,
+tractor,
+trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi,
+tray,
+trench coat,
+tricycle, trike, velocipede,
+trimaran,
+tripod,
+triumphal arch,
+trolleybus, trolley coach, trackless trolley,
+trombone,
+tub, vat,
+turnstile,
+typewriter keyboard,
+umbrella,
+unicycle, monocycle,
+upright, upright piano,
+vacuum, vacuum cleaner,
+vase,
+vault,
+velvet,
+vending machine,
+vestment,
+viaduct,
+violin, fiddle,
+volleyball,
+waffle iron,
+wall clock,
+wallet, billfold, notecase, pocketbook,
+wardrobe, closet, press,
+warplane, military plane,
+washbasin, handbasin, washbowl, lavabo, wash-hand basin,
+washer, automatic washer, washing machine,
+water bottle,
+water jug,
+water tower,
+whiskey jug,
+whistle,
+wig,
+window screen,
+window shade,
+Windsor tie,
+wine bottle,
+wing,
+wok,
+wooden spoon,
+wool, woolen, woollen,
+worm fence, snake fence, snake-rail fence, Virginia fence,
+wreck,
+yawl,
+yurt,
+web site, website, internet site, site,
+comic book,
+crossword puzzle, crossword,
+street sign,
+traffic light, traffic signal, stoplight,
+book jacket, dust cover, dust jacket, dust wrapper,
+menu,
+plate,
+guacamole,
+consomme,
+hot pot, hotpot,
+trifle,
+ice cream, icecream,
+ice lolly, lolly, lollipop, popsicle,
+French loaf,
+bagel, beigel,
+pretzel,
+cheeseburger,
+hotdog, hot dog, red hot,
+mashed potato,
+head cabbage,
+broccoli,
+cauliflower,
+zucchini, courgette,
+spaghetti squash,
+acorn squash,
+butternut squash,
+cucumber, cuke,
+artichoke, globe artichoke,
+bell pepper,
+cardoon,
+mushroom,
+Granny Smith,
+strawberry,
+orange,
+lemon,
+fig,
+pineapple, ananas,
+banana,
+jackfruit, jak, jack,
+custard apple,
+pomegranate,
+hay,
+carbonara,
+chocolate sauce, chocolate syrup,
+dough,
+meat loaf, meatloaf,
+pizza, pizza pie,
+potpie,
+burrito,
+red wine,
+espresso,
+cup,
+eggnog,
+alp,
+bubble,
+cliff, drop, drop-off,
+coral reef,
+geyser,
+lakeside, lakeshore,
+promontory, headland, head, foreland,
+sandbar, sand bar,
+seashore, coast, seacoast, sea-coast,
+valley, vale,
+volcano,
+ballplayer, baseball player,
+groom, bridegroom,
+scuba diver,
+rapeseed,
+daisy,
+"yellow ladys slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",
+corn,
+acorn,
+hip, rose hip, rosehip,
+buckeye, horse chestnut, conker,
+coral fungus,
+agaric,
+gyromitra,
+stinkhorn, carrion fungus,
+earthstar,
+hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa,
+bolete,
+ear, spike, capitulum,
+toilet tissue, toilet paper, bathroom tissue
--- a/python/examples/imagenet/resnet50_rpc_client.py
+++ b/python/examples/imagenet/resnet50_rpc_client.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, URL2Image, Resize
+from paddle_serving_app.reader import CenterCrop, RGB2BGR, Transpose, Div, Normalize
+import time
+client = Client()
+client.load_client_config(sys.argv[1])
+client.connect(["127.0.0.1:9696"])
+label_dict = {}
+label_idx = 0
+with open("imagenet.label") as fin:
+    for line in fin:
+        label_dict[label_idx] = line.strip()
+        label_idx += 1
+seq = Sequential([
+    URL2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+    Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
+])
+start = time.time()
+image_file = "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"
+for i in range(10):
+    img = seq(image_file)
+    fetch_map = client.predict(feed={"image": img}, fetch=["score"])
+    prob = max(fetch_map["score"][0])
+    label = label_dict[fetch_map["score"][0].tolist().index(prob)].strip(
+    ).replace(",", "")
+    print("prediction: {}, probability: {}".format(label, prob))
+end = time.time()
+print(end - start)
--- a/python/examples/imagenet/resnet50_web_service.py
+++ b/python/examples/imagenet/resnet50_web_service.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize
+if len(sys.argv) != 4:
+    print("python resnet50_web_service.py model device port")
+    sys.exit(-1)
+device = sys.argv[2]
+if device == "cpu":
+    from paddle_serving_server.web_service import WebService
+else:
+    from paddle_serving_server_gpu.web_service import WebService
+class ImageService(WebService):
+    def init_imagenet_setting(self):
+        self.seq = Sequential([
+            URL2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose(
+                (2, 0, 1)), Div(255), Normalize([0.485, 0.456, 0.406],
+                                                [0.229, 0.224, 0.225], True)
+        ])
+        self.label_dict = {}
+        label_idx = 0
+        with open("imagenet.label") as fin:
+            for line in fin:
+                self.label_dict[label_idx] = line.strip()
+                label_idx += 1
+    def preprocess(self, feed=[], fetch=[]):
+        feed_batch = []
+        for ins in feed:
+            if "image" not in ins:
+                raise ("feed data error!")
+            img = self.seq(ins["image"])
+            feed_batch.append({"image": img})
+        return feed_batch, fetch
+    def postprocess(self, feed=[], fetch=[], fetch_map={}):
+        score_list = fetch_map["score"]
+        result = {"label": [], "prob": []}
+        for score in score_list:
+            max_score = max(score)
+            result["label"].append(self.label_dict[score.index(max_score)]
+                                   .strip().replace(",", ""))
+            result["prob"].append(max_score)
+        return result
+image_service = ImageService(name="image")
+image_service.load_model_config(sys.argv[1])
+image_service.init_imagenet_setting()
+if device == "gpu":
+    image_service.set_gpus("0,1")
+image_service.prepare_server(
+    workdir="workdir", port=int(sys.argv[3]), device=device)
+image_service.run_rpc_service()
+image_service.run_web_service()
--- a/python/examples/imdb/README.md
+++ b/python/examples/imdb/README.md
@@ -30,27 +30,3 @@ python text_classify_service.py imdb_cnn_model/ workdir/ 9292 imdb.vocab
 ```
 curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction
 ```
-### Benchmark
-CPU ：Intel(R) Xeon(R)  Gold 6271 CPU @ 2.60GHz * 48
-Model ：[CNN](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/imdb/nets.py)
-server thread num ： 16
-In this test, client sends 25000 test samples totally, the bar chart given later is the latency of single thread, the unit is second, from which we know the predict efficiency is improved greatly by multi-thread compared to single-thread. 8.7 times improvement is made by 16 threads prediction.
-| client  thread num | prepro | client infer | op0    | op1   | op2    | postpro | total |
-| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- |
-| 1                  | 1.09   | 28.79        | 0.094  | 20.59 | 0.047  | 0.034   | 31.41 |
-| 4                  | 0.22   | 7.41         | 0.023  | 5.01  | 0.011  | 0.0098  | 8.01  |
-| 8                  | 0.11   | 4.7          | 0.012  | 2.61  | 0.0062 | 0.0049  | 5.01  |
-| 12                 | 0.081  | 4.69         | 0.0078 | 1.72  | 0.0042 | 0.0035  | 4.91  |
-| 16                 | 0.058  | 3.46         | 0.0061 | 1.32  | 0.0033 | 0.003   | 3.63  |
-| 20                 | 0.049  | 3.77         | 0.0047 | 1.03  | 0.0025 | 0.0022  | 3.91  |
-| 24                 | 0.041  | 3.86         | 0.0039 | 0.85  | 0.002  | 0.0017  | 3.98  |
-The thread-latency bar chart is as follow：
-![total cost](../../../doc/imdb-benchmark-server-16.png)
--- a/python/examples/imdb/README_CN.md
+++ b/python/examples/imdb/README_CN.md
@@ -29,27 +29,3 @@ python text_classify_service.py imdb_cnn_model/ workdir/ 9292 imdb.vocab
 ```
 curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction
 ```
-### Benchmark
-设备 ：Intel(R) Xeon(R)  Gold 6271 CPU @ 2.60GHz * 48
-模型 ：[CNN](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/imdb/nets.py)
-server thread num ： 16
-测试中，client共发送25000条测试样本，图中数据为单个线程的耗时，时间单位为秒。可以看出，client端多线程的预测速度相比单线程有明显提升，在16线程时预测速度是单线程的8.7倍。
-| client  thread num | prepro | client infer | op0    | op1   | op2    | postpro | total |
-| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- |
-| 1                  | 1.09   | 28.79        | 0.094  | 20.59 | 0.047  | 0.034   | 31.41 |
-| 4                  | 0.22   | 7.41         | 0.023  | 5.01  | 0.011  | 0.0098  | 8.01  |
-| 8                  | 0.11   | 4.7          | 0.012  | 2.61  | 0.0062 | 0.0049  | 5.01  |
-| 12                 | 0.081  | 4.69         | 0.0078 | 1.72  | 0.0042 | 0.0035  | 4.91  |
-| 16                 | 0.058  | 3.46         | 0.0061 | 1.32  | 0.0033 | 0.003   | 3.63  |
-| 20                 | 0.049  | 3.77         | 0.0047 | 1.03  | 0.0025 | 0.0022  | 3.91  |
-| 24                 | 0.041  | 3.86         | 0.0039 | 0.85  | 0.002  | 0.0017  | 3.98  |
-预测总耗时变化规律如下：
-![total cost](../../../doc/imdb-benchmark-server-16.png)
--- a/python/examples/imdb/benchmark.py
+++ b/python/examples/imdb/benchmark.py
@@ -16,7 +16,7 @@
 import sys
 import time
 import requests
-from imdb_reader import IMDBDataset
+from paddle_serving_app.reader import IMDBDataset
 from paddle_serving_client import Client
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args
@@ -37,26 +37,39 @@ def single_func(idx, resource):
        client.load_client_config(args.model)
        client.connect([args.endpoint])
        for i in range(1000):
-            if args.batch_size == 1:
+            if args.batch_size >= 1:
-                word_ids, label = imdb_dataset.get_words_and_label(line)
+                feed_batch = []
-                fetch_map = client.predict(
+                for bi in range(args.batch_size):
-                    feed={"words": word_ids}, fetch=["prediction"])
+                    word_ids, label = imdb_dataset.get_words_and_label(dataset[
+                        bi])
+                    feed_batch.append({"words": word_ids})
+                result = client.predict(feed=feed_batch, fetch=["prediction"])
+                if result is None:
+                    raise ("predict failed.")
            else:
                print("unsupport batch size {}".format(args.batch_size))
    elif args.request == "http":
-        for fn in filelist:
+        if args.batch_size >= 1:
-            fin = open(fn)
+            feed_batch = []
-            for line in fin:
+            for bi in range(args.batch_size):
-                word_ids, label = imdb_dataset.get_words_and_label(line)
+                feed_batch.append({"words": dataset[bi]})
-                r = requests.post(
+            r = requests.post(
-                    "http://{}/imdb/prediction".format(args.endpoint),
+                "http://{}/imdb/prediction".format(args.endpoint),
-                    data={"words": word_ids,
+                json={"feed": feed_batch,
-                          "fetch": ["prediction"]})
+                      "fetch": ["prediction"]})
+            if r.status_code != 200:
+                print('HTTP status code -ne 200')
+                raise ("predict failed.")
+        else:
+            print("unsupport batch size {}".format(args.batch_size))
    end = time.time()
    return [[end - start]]
 multi_thread_runner = MultiThreadRunner()
 result = multi_thread_runner.run(single_func, args.thread, {})
-print(result)
+avg_cost = 0
+for cost in result[0]:
+    avg_cost += cost
+print("total cost {} s of each thread".format(avg_cost / args.thread))
--- a/python/examples/imdb/benchmark.sh
+++ b/python/examples/imdb/benchmark.sh
 rm profile_log
 for thread_num in 1 2 4 8 16
 do
-    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --model imdbo_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
+for batch_size in 1 2 4 8 16 32 64 128 256 512
+do
+    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --batch_size $batch_size --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
    echo "========================================"
    echo "batch size : $batch_size" >> profile_log
    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
    tail -n 1 profile >> profile_log
 done
+done
--- a/python/examples/imdb/benchmark_batch.py
+++ b/python/examples/imdb/benchmark_batch.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-import sys
-import time
-import requests
-from imdb_reader import IMDBDataset
-from paddle_serving_client import Client
-from paddle_serving_client.utils import MultiThreadRunner
-from paddle_serving_client.utils import benchmark_args
-args = benchmark_args()
-def single_func(idx, resource):
-    imdb_dataset = IMDBDataset()
-    imdb_dataset.load_resource("./imdb.vocab")
-    dataset = []
-    with open("./test_data/part-0") as fin:
-        for line in fin:
-            dataset.append(line.strip())
-    start = time.time()
-    if args.request == "rpc":
-        client = Client()
-        client.load_client_config(args.model)
-        client.connect([args.endpoint])
-        for i in range(1000):
-            if args.batch_size >= 1:
-                feed_batch = []
-                for bi in range(args.batch_size):
-                    word_ids, label = imdb_dataset.get_words_and_label(dataset[
-                        bi])
-                    feed_batch.append({"words": word_ids})
-                result = client.predict(feed=feed_batch, fetch=["prediction"])
-                if result is None:
-                    raise ("predict failed.")
-            else:
-                print("unsupport batch size {}".format(args.batch_size))
-    elif args.request == "http":
-        if args.batch_size >= 1:
-            feed_batch = []
-            for bi in range(args.batch_size):
-                feed_batch.append({"words": dataset[bi]})
-            r = requests.post(
-                "http://{}/imdb/prediction".format(args.endpoint),
-                json={"feed": feed_batch,
-                      "fetch": ["prediction"]})
-            if r.status_code != 200:
-                print('HTTP status code -ne 200')
-                raise ("predict failed.")
-        else:
-            print("unsupport batch size {}".format(args.batch_size))
-    end = time.time()
-    return [[end - start]]
-multi_thread_runner = MultiThreadRunner()
-result = multi_thread_runner.run(single_func, args.thread, {})
-avg_cost = 0
-for cost in result[0]:
-    avg_cost += cost
-print("total cost {} s of each thread".format(avg_cost / args.thread))
--- a/python/examples/imdb/benchmark_batch.sh
+++ b/python/examples/imdb/benchmark_batch.sh
-rm profile_log
-for thread_num in 1 2 4 8 16
-do
-for batch_size in 1 2 4 8 16 32 64 128 256 512
-do
-    $PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
-    echo "========================================"
-    echo "batch size : $batch_size" >> profile_log
-    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
-    tail -n 1 profile >> profile_log
-done
-done
--- a/python/examples/imdb/test_client.py
+++ b/python/examples/imdb/test_client.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # pylint: disable=doc-string-missing
 from paddle_serving_client import Client
-from imdb_reader import IMDBDataset
+from paddle_serving_app.reader import IMDBDataset
 import sys
 client = Client()

--- a/python/examples/imdb/test_client_batch.py
+++ b/python/examples/imdb/test_client_batch.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-from paddle_serving_client import Client
-import sys
-import subprocess
-from multiprocessing import Pool
-import time
-def batch_predict(batch_size=4):
-    client = Client()
-    client.load_client_config(conf_file)
-    client.connect(["127.0.0.1:9292"])
-    fetch = ["acc", "cost", "prediction"]
-    feed_batch = []
-    for line in sys.stdin:
-        group = line.strip().split()
-        words = [int(x) for x in group[1:int(group[0])]]
-        label = [int(group[-1])]
-        feed = {"words": words, "label": label}
-        feed_batch.append(feed)
-        if len(feed_batch) == batch_size:
-            fetch_batch = client.batch_predict(
-                feed_batch=feed_batch, fetch=fetch)
-            for i in range(batch_size):
-                print("{} {}".format(fetch_batch[i]["prediction"][1],
-                                     feed_batch[i]["label"][0]))
-            feed_batch = []
-    if len(feed_batch) > 0:
-        fetch_batch = client.batch_predict(feed_batch=feed_batch, fetch=fetch)
-        for i in range(len(feed_batch)):
-            print("{} {}".format(fetch_batch[i]["prediction"][1], feed_batch[i][
-                "label"][0]))
-if __name__ == '__main__':
-    conf_file = sys.argv[1]
-    batch_size = int(sys.argv[2])
-    batch_predict(batch_size)
--- a/python/examples/imdb/text_classify_service.py
+++ b/python/examples/imdb/text_classify_service.py
@@ -14,7 +14,7 @@
 # pylint: disable=doc-string-missing
 from paddle_serving_server.web_service import WebService
-from imdb_reader import IMDBDataset
+from paddle_serving_app.reader import IMDBDataset
 import sys
@@ -37,5 +37,5 @@ imdb_service.load_model_config(sys.argv[1])
 imdb_service.prepare_server(
    workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu")
 imdb_service.prepare_dict({"dict_file_path": sys.argv[4]})
-imdb_service.run_server()
+imdb_service.run_rpc_service()
-imdb_service.run_flask()
+imdb_service.run_web_service()
--- a/python/examples/lac/README.md
+++ b/python/examples/lac/README.md
@@ -2,28 +2,27 @@
 ([简体中文](./README_CN.md)|English)
-### Get model files and sample data
+### Get Model
 ```
-sh get_data.sh
+python -m paddle_serving_app.package --get_model lac
+tar -xzvf lac.tar.gz
 ```
-the package downloaded contains lac model config along with lac dictionary.
 #### Start RPC inference service
 ```
-python -m paddle_serving_server.serve --model jieba_server_model/ --port 9292
+python -m paddle_serving_server.serve --model lac_model/ --port 9292
 ```
 ### RPC Infer
 ```
-echo "我爱北京天安门" | python lac_client.py jieba_client_conf/serving_client_conf.prototxt lac_dict/
+echo "我爱北京天安门" | python lac_client.py lac_client/serving_client_conf.prototxt
 ```
-it will get the segmentation result
+It will get the segmentation result. 
 ### Start HTTP inference service
 ```
-python lac_web_service.py jieba_server_model/ lac_workdir 9292
+python lac_web_service.py lac_model/ lac_workdir 9292
 ```
 ### HTTP Infer

--- a/python/examples/lac/README_CN.md
+++ b/python/examples/lac/README_CN.md
@@ -2,28 +2,27 @@
 (简体中文|[English](./README.md))
-### 获取模型和字典文件
+### 获取模型
 ```
-sh get_data.sh
+python -m paddle_serving_app.package --get_model lac
+tar -xzvf lac.tar.gz
 ```
-下载包里包含了lac模型和lac模型预测需要的字典文件
 #### 开启RPC预测服务
 ```
-python -m paddle_serving_server.serve --model jieba_server_model/ --port 9292
+python -m paddle_serving_server.serve --model lac_model/ --port 9292
 ```
 ### 执行RPC预测
 ```
-echo "我爱北京天安门" | python lac_client.py jieba_client_conf/serving_client_conf.prototxt lac_dict/
+echo "我爱北京天安门" | python lac_client.py lac_client/serving_client_conf.prototxt
 ```
 我们就能得到分词结果
 ### 开启HTTP预测服务
 ```
-python lac_web_service.py jieba_server_model/ lac_workdir 9292
+python lac_web_service.py lac_model/ lac_workdir 9292
 ```
 ### 执行HTTP预测

--- a/python/examples/lac/benchmark.py
+++ b/python/examples/lac/benchmark.py
@@ -16,7 +16,7 @@
 import sys
 import time
 import requests
-from lac_reader import LACReader
+from paddle_serving_app.reader import LACReader
 from paddle_serving_client import Client
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args
@@ -25,7 +25,7 @@ args = benchmark_args()
 def single_func(idx, resource):
-    reader = LACReader("lac_dict")
+    reader = LACReader()
    start = time.time()
    if args.request == "rpc":
        client = Client()

--- a/python/examples/lac/lac_client.py
+++ b/python/examples/lac/lac_client.py
@@ -15,7 +15,7 @@
 # pylint: disable=doc-string-missing
 from paddle_serving_client import Client
-from lac_reader import LACReader
+from paddle_serving_app.reader import LACReader
 import sys
 import os
 import io
@@ -24,7 +24,7 @@ client = Client()
 client.load_client_config(sys.argv[1])
 client.connect(["127.0.0.1:9292"])
-reader = LACReader(sys.argv[2])
+reader = LACReader()
 for line in sys.stdin:
    if len(line) <= 0:
        continue
@@ -32,4 +32,7 @@ for line in sys.stdin:
    if len(feed_data) <= 0:
        continue
    fetch_map = client.predict(feed={"words": feed_data}, fetch=["crf_decode"])
-    print(fetch_map)
+    begin = fetch_map['crf_decode.lod'][0]
+    end = fetch_map['crf_decode.lod'][1]
+    segs = reader.parse_result(line, fetch_map["crf_decode"][begin:end])
+    print("word_seg: " + "|".join(str(words) for words in segs))
--- a/python/examples/lac/lac_reader.py
+++ b/python/examples/lac/lac_reader.py
@@ -14,8 +14,10 @@
 from paddle_serving_client import Client
 import sys
-reload(sys)
+py_version = sys.version_info[0]
-sys.setdefaultencoding('utf-8')
+if py_version == 2:
+    reload(sys)
+    sys.setdefaultencoding('utf-8')
 import os
 import io

--- a/python/examples/lac/lac_web_service.py
+++ b/python/examples/lac/lac_web_service.py
@@ -14,12 +14,12 @@
 from paddle_serving_server.web_service import WebService
 import sys
-from lac_reader import LACReader
+from paddle_serving_app.reader import LACReader
 class LACService(WebService):
    def load_reader(self):
-        self.reader = LACReader("lac_dict")
+        self.reader = LACReader()
    def preprocess(self, feed={}, fetch=[]):
        feed_batch = []
@@ -47,5 +47,5 @@ lac_service.load_model_config(sys.argv[1])
 lac_service.load_reader()
 lac_service.prepare_server(
    workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu")
-lac_service.run_server()
+lac_service.run_rpc_service()
-lac_service.run_flask()
+lac_service.run_web_service()
--- a/python/examples/mobilenet/README.md
+++ b/python/examples/mobilenet/README.md
+# Image Classification
+## Get Model
+```
+python -m paddle_serving_app.package --get_model mobilenet_v2_imagenet
+tar -xzvf mobilenet_v2_imagenet.tar.gz
+```
+## RPC Service
+### Start Service
+```
+python -m paddle_serving_server_gpu.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393
+```
+### Client Prediction
+```
+python mobilenet_tutorial.py
+```
--- a/python/examples/mobilenet/README_CN.md
+++ b/python/examples/mobilenet/README_CN.md
+# 图像分类
+## 获取模型
+```
+python -m paddle_serving_app.package --get_model mobilenet_v2_imagenet
+tar -xzvf mobilenet_v2_imagenet.tar.gz
+```
+## RPC 服务
+### 启动服务端
+```
+python -m paddle_serving_server_gpu.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393
+```
+### 客户端预测
+```
+python mobilenet_tutorial.py
+```
--- a/python/examples/mobilenet/daisy.jpg
+++ b/python/examples/mobilenet/daisy.jpg
--- a/python/examples/mobilenet/mobilenet_tutorial.py
+++ b/python/examples/mobilenet/mobilenet_tutorial.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, File2Image, Resize
+from paddle_serving_app.reader import CenterCrop, RGB2BGR, Transpose, Div, Normalize
+client = Client()
+client.load_client_config(
+    "mobilenet_v2_imagenet_client/serving_client_conf.prototxt")
+client.connect(["127.0.0.1:9393"])
+seq = Sequential([
+    File2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+    Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
+])
+image_file = "daisy.jpg"
+img = seq(image_file)
+fetch_map = client.predict(feed={"image": img}, fetch=["feature_map"])
+print(fetch_map["feature_map"].reshape(-1))
--- a/python/examples/ocr/README.md
+++ b/python/examples/ocr/README.md
+# OCR 
+## Get Model
+```
+python -m paddle_serving_app.package --get_model ocr_rec
+tar -xzvf ocr_rec.tar.gz
+```
+## RPC Service
+### Start Service
+```
+python -m paddle_serving_server.serve --model ocr_rec_model --port 9292
+```
+### Client Prediction
+```
+python test_ocr_rec_client.py
+```
--- a/python/examples/imagenet/image_rpc_client.py
+++ b/python/examples/imagenet/image_rpc_client.py
@@ -12,23 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import sys
-from image_reader import ImageReader
 from paddle_serving_client import Client
-import time
+from paddle_serving_app.reader import OCRReader
+import cv2
 client = Client()
-client.load_client_config(sys.argv[1])
+client.load_client_config("ocr_rec_client/serving_client_conf.prototxt")
-client.connect(["127.0.0.1:9393"])
+client.connect(["127.0.0.1:9292"])
-reader = ImageReader()
-start = time.time()
+image_file_list = ["./test_rec.jpg"]
-for i in range(1000):
+img = cv2.imread(image_file_list[0])
-    with open("./data/n01440764_10026.JPEG", "rb") as f:
+ocr_reader = OCRReader()
-        img = f.read()
+feed = {"image": ocr_reader.preprocess([img])}
-    img = reader.process_image(img)
+fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
-    fetch_map = client.predict(feed={"image": img}, fetch=["score"])
+fetch_map = client.predict(feed=feed, fetch=fetch)
-end = time.time()
+rec_res = ocr_reader.postprocess(fetch_map)
-print(end - start)
+print(image_file_list[0])
+print(rec_res[0][0])
-#print(fetch_map["score"])
--- a/python/examples/ocr/test_rec.jpg
+++ b/python/examples/ocr/test_rec.jpg
--- a/python/examples/resnet_v2_50/README.md
+++ b/python/examples/resnet_v2_50/README.md
+# Image Classification
+## Get Model
+```
+python -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
+tar -xzvf resnet_v2_50_imagenet.tar.gz
+```
+## RPC Service
+### Start Service
+```
+python -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393
+```
+### Client Prediction
+```
+python resnet50_v2_tutorial.py
+```
--- a/python/examples/resnet_v2_50/README_CN.md
+++ b/python/examples/resnet_v2_50/README_CN.md
+# 图像分类
+## 获取模型
+```
+python -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
+tar -xzvf resnet_v2_50_imagenet.tar.gz
+```
+## RPC 服务
+### 启动服务端
+```
+python -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393
+```
+### 客户端预测
+```
+python resnet50_v2_tutorial.py
+```
--- a/python/examples/resnet_v2_50/daisy.jpg
+++ b/python/examples/resnet_v2_50/daisy.jpg
--- a/python/examples/resnet_v2_50/resnet50_debug.py
+++ b/python/examples/resnet_v2_50/resnet50_debug.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop
+from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize
+from paddle_serving_app.local_predict import Debugger
+import sys
+debugger = Debugger()
+debugger.load_model_config(sys.argv[1], gpu=True)
+seq = Sequential([
+    File2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+    Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
+])
+image_file = "daisy.jpg"
+img = seq(image_file)
+fetch_map = debugger.predict(feed={"image": img}, fetch=["feature_map"])
+print(fetch_map["feature_map"].reshape(-1))
--- a/python/examples/resnet_v2_50/resnet50_v2_tutorial.py
+++ b/python/examples/resnet_v2_50/resnet50_v2_tutorial.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop
+from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize
+client = Client()
+client.load_client_config(
+    "resnet_v2_50_imagenet_client/serving_client_conf.prototxt")
+client.connect(["127.0.0.1:9393"])
+seq = Sequential([
+    File2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+    Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
+])
+image_file = "daisy.jpg"
+img = seq(image_file)
+fetch_map = client.predict(feed={"image": img}, fetch=["score"])
+print(fetch_map["score"].reshape(-1))
--- a/python/examples/senta/README.md
+++ b/python/examples/senta/README.md
-# Chinese sentence sentiment classification
+# Chinese Sentence Sentiment Classification
 ([简体中文](./README_CN.md)|English)
-## Get model files and sample data
+## Get Model
 ```
-sh get_data.sh
+python -m paddle_serving_app.package --get_model senta_bilstm
+python -m paddle_serving_app.package --get_model lac
+tar -xzvf senta_bilstm.tar.gz
+tar -xzvf lac.tar.gz
 ```
-## Start http service
+## Start HTTP Service
 ```
-python senta_web_service.py senta_bilstm_model/ workdir 9292
+python -m paddle_serving_server.serve --model lac_model --port 9300
+python senta_web_service.py
 ```
-In the Chinese sentiment classification task, the Chinese word segmentation needs to be done through [LAC task] (../lac). Set model path by ```lac_model_path``` and dictionary path by ```lac_dict_path```. 
+In the Chinese sentiment classification task, the Chinese word segmentation needs to be done through [LAC task] (../lac). 
-In this demo, the LAC task is placed in the preprocessing part of the HTTP prediction service of the sentiment classification task. The LAC prediction service is deployed on the CPU, and the sentiment classification task is deployed on the GPU, which can be changed according to the actual situation.
+In this demo, the LAC task is placed in the preprocessing part of the HTTP prediction service of the sentiment classification task.
 ## Client prediction
 ```
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "天气不错"}], "fetch":["class_probs"]}' http://127.0.0.1:9292/senta/prediction
+curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "天气不错"}], "fetch":["class_probs"]}' http://127.0.0.1:9393/senta/prediction
 ```
--- a/python/examples/senta/README_CN.md
+++ b/python/examples/senta/README_CN.md
 # 中文语句情感分类
 (简体中文|[English](./README.md))
-## 获取模型文件和样例数据
+## 获取模型文件
 ```
-sh get_data.sh
+python -m paddle_serving_app.package --get_model senta_bilstm
+python -m paddle_serving_app.package --get_model lac
+tar -xzvf lac.tar.gz
+tar -xzvf senta_bilstm.tar.gz
 ```
 ## 启动HTTP服务
 ```
-python senta_web_service.py senta_bilstm_model/ workdir 9292
+python -m paddle_serving_server.serve --model lac_model --port 9300
+python senta_web_service.py
 ```
-中文情感分类任务中需要先通过[LAC任务](../lac)进行中文分词，在脚本中通过```lac_model_path```参数配置LAC任务的模型文件路径,```lac_dict_path```参数配置LAC任务词典路径。
+中文情感分类任务中需要先通过[LAC任务](../lac)进行中文分词。
-示例中将LAC任务放在情感分类任务的HTTP预测服务的预处理部分，LAC预测服务部署在CPU上，情感分类任务部署在GPU上,可以根据实际情况进行更改。
+示例中将LAC任务放在情感分类任务的HTTP预测服务的预处理部分。
 ## 客户端预测
 ```
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "天气不错"}], "fetch":["class_probs"]}' http://127.0.0.1:9292/senta/prediction
+curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "天气不错"}], "fetch":["class_probs"]}' http://127.0.0.1:9393/senta/prediction
 ```
--- a/python/examples/senta/get_data.sh
+++ b/python/examples/senta/get_data.sh
 wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SentimentAnalysis/senta_bilstm.tar.gz --no-check-certificate
 tar -xzvf senta_bilstm.tar.gz
-wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/LexicalAnalysis/lac_model.tar.gz --no-check-certificate
+wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/LexicalAnalysis/lac.tar.gz --no-check-certificate
-tar -xzvf lac_model.tar.gz
+tar -xzvf lac.tar.gz
 wget https://paddle-serving.bj.bcebos.com/reader/lac/lac_dict.tar.gz  --no-check-certificate
 tar -xzvf lac_dict.tar.gz
 wget https://paddle-serving.bj.bcebos.com/reader/senta/vocab.txt --no-check-certificate
--- a/python/examples/senta/senta_web_service.py
+++ b/python/examples/senta/senta_web_service.py
+#encoding=utf-8
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -12,97 +13,49 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from paddle_serving_server_gpu.web_service import WebService
+from paddle_serving_server.web_service import WebService
 from paddle_serving_client import Client
-from paddle_serving_app import LACReader, SentaReader
+from paddle_serving_app.reader import LACReader, SentaReader
-import numpy as np
 import os
-import io
 import sys
-import subprocess
-from multiprocessing import Process, Queue
+#senta_web_service.py
+from paddle_serving_server.web_service import WebService
+from paddle_serving_client import Client
+from paddle_serving_app.reader import LACReader, SentaReader
-class SentaService(WebService):
-    def set_config(
-            self,
-            lac_model_path,
-            lac_dict_path,
-            senta_dict_path, ):
-        self.lac_model_path = lac_model_path
-        self.lac_client_config_path = lac_model_path + "/serving_server_conf.prototxt"
-        self.lac_dict_path = lac_dict_path
-        self.senta_dict_path = senta_dict_path
-        self.show = False
-    def show_detail(self, show=False):
-        self.show = show
-    def start_lac_service(self):
-        if not os.path.exists('./lac_serving'):
-            os.mkdir("./lac_serving")
-        os.chdir('./lac_serving')
-        self.lac_port = self.port + 100
-        r = os.popen(
-            "python -m paddle_serving_server.serve --model {} --port {} &".
-            format("../" + self.lac_model_path, self.lac_port))
-        os.chdir('..')
-    def init_lac_service(self):
-        ps = Process(target=self.start_lac_service())
-        ps.start()
-        #self.init_lac_client()
-    def lac_predict(self, feed_data):
+class SentaService(WebService):
-        self.init_lac_client()
+    #初始化lac模型预测服务
-        lac_result = self.lac_client.predict(
+    def init_lac_client(self, lac_port, lac_client_config):
-            feed={"words": feed_data}, fetch=["crf_decode"])
+        self.lac_reader = LACReader()
-        self.lac_client.release()
+        self.senta_reader = SentaReader()
-        return lac_result
-    def init_lac_client(self):
        self.lac_client = Client()
-        self.lac_client.load_client_config(self.lac_client_config_path)
+        self.lac_client.load_client_config(lac_client_config)
-        self.lac_client.connect(["127.0.0.1:{}".format(self.lac_port)])
+        self.lac_client.connect(["127.0.0.1:{}".format(lac_port)])
-    def init_lac_reader(self):
-        self.lac_reader = LACReader(self.lac_dict_path)
-    def init_senta_reader(self):
-        self.senta_reader = SentaReader(vocab_path=self.senta_dict_path)
+    #定义senta模型预测服务的预处理，调用顺序：lac reader->lac模型预测->预测结果后处理->senta reader
    def preprocess(self, feed=[], fetch=[]):
-        feed_data = self.lac_reader.process(feed[0]["words"])
+        feed_data = [{
-        if self.show:
+            "words": self.lac_reader.process(x["words"])
-            print("---- lac reader ----")
+        } for x in feed]
-            print(feed_data)
+        lac_result = self.lac_client.predict(
-        lac_result = self.lac_predict(feed_data)
+            feed=feed_data, fetch=["crf_decode"])
-        if self.show:
+        feed_batch = []
-            print("---- lac out ----")
+        result_lod = lac_result["crf_decode.lod"]
-            print(lac_result)
+        for i in range(len(feed)):
-        segs = self.lac_reader.parse_result(feed[0]["words"],
+            segs = self.lac_reader.parse_result(
-                                            lac_result["crf_decode"])
+                feed[i]["words"],
-        if self.show:
+                lac_result["crf_decode"][result_lod[i]:result_lod[i + 1]])
-            print("---- lac parse ----")
+            feed_data = self.senta_reader.process(segs)
-            print(segs)
+            feed_batch.append({"words": feed_data})
-        feed_data = self.senta_reader.process(segs)
+        return feed_batch, fetch
-        if self.show:
-            print("---- senta reader ----")
-            print("feed_data", feed_data)
-        return [{"words": feed_data}], fetch
 senta_service = SentaService(name="senta")
-#senta_service.show_detail(True)
+senta_service.load_model_config("senta_bilstm_model")
-senta_service.set_config(
+senta_service.prepare_server(workdir="workdir")
-    lac_model_path="./lac_model",
+senta_service.init_lac_client(
-    lac_dict_path="./lac_dict",
+    lac_port=9300, lac_client_config="lac_model/serving_server_conf.prototxt")
-    senta_dict_path="./vocab.txt")
+senta_service.run_rpc_service()
-senta_service.load_model_config(sys.argv[1])
+senta_service.run_web_service()
-senta_service.prepare_server(
-    workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu")
-senta_service.init_lac_reader()
-senta_service.init_senta_reader()
-senta_service.init_lac_service()
-senta_service.run_server()
-senta_service.run_flask()
--- a/python/examples/unet_for_image_seg/N0060.jpg
+++ b/python/examples/unet_for_image_seg/N0060.jpg
--- a/python/examples/unet_for_image_seg/README.md
+++ b/python/examples/unet_for_image_seg/README.md
+# Image Segmentation
+## Get Model
+```
+python -m paddle_serving_app.package --get_model unet
+tar -xzvf unet.tar.gz
+```
+## RPC Service
+### Start Service
+```
+python -m paddle_serving_server_gpu.serve --model unet_model --gpu_ids 0 --port 9494
+```
+### Client Prediction
+```
+python seg_client.py
+```
--- a/python/examples/unet_for_image_seg/README_CN.md
+++ b/python/examples/unet_for_image_seg/README_CN.md
+# 图像分割
+## 获取模型
+```
+python -m paddle_serving_app.package --get_model unet
+tar -xzvf unet.tar.gz
+```
+## RPC 服务
+### 启动服务端
+```
+python -m paddle_serving_server_gpu.serve --model unet_model --gpu_ids 0 --port 9494
+```
+### 客户端预测
+```
+python seg_client.py
+```
--- a/python/examples/imagenet/image_classification_service_gpu.py
+++ b/python/examples/imagenet/image_classification_service_gpu.py
@@ -12,31 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, File2Image, Resize, Transpose, BGR2RGB, SegPostprocess
 import sys
 import cv2
-import base64
-import numpy as np
-from paddle_serving_app import ImageReader
-from paddle_serving_server_gpu.web_service import WebService
+client = Client()
+client.load_client_config("unet_client/serving_client_conf.prototxt")
+client.connect(["127.0.0.1:9494"])
-class ImageService(WebService):
+preprocess = Sequential(
-    def preprocess(self, feed={}, fetch=[]):
+    [File2Image(), Resize(
-        reader = ImageReader()
+        (512, 512), interpolation=cv2.INTER_LINEAR)])
-        feed_batch = []
-        for ins in feed:
-            if "image" not in ins:
-                raise ("feed data error!")
-            sample = base64.b64decode(ins["image"])
-            img = reader.process_image(sample)
-            feed_batch.append({"image": img})
-        return feed_batch, fetch
+postprocess = SegPostprocess(2)
-image_service = ImageService(name="image")
+filename = "N0060.jpg"
-image_service.load_model_config(sys.argv[1])
+im = preprocess(filename)
-image_service.set_gpus("0,1")
+fetch_map = client.predict(feed={"image": im}, fetch=["output"])
-image_service.prepare_server(
+fetch_map["filename"] = filename
-    workdir=sys.argv[2], port=int(sys.argv[3]), device="gpu")
+postprocess(fetch_map)
-image_service.run_server()
-image_service.run_flask()
--- a/python/paddle_serving_app/README.md
+++ b/python/paddle_serving_app/README.md
+([简体中文](./README_CN.md)|English)
+paddle_serving_app is a tool component of the Paddle Serving framework, and includes functions such as pre-training model download and data pre-processing methods.
+It is convenient for users to quickly test and deploy model examples, analyze the performance of prediction services, and debug model prediction services.
+## Install
+```shell
+pip install paddle_serving_app
+```
+## Get model list
+```shell
+python -m paddle_serving_app.package --list_model
+```
+## Download pre-training model
+```shell
+python -m paddle_serving_app.package --get_model senta_bilstm
+```
+1 pre-trained models are built into paddle_serving_app, covering 6 kinds of prediction tasks.
+The model files can be directly used for deployment, and the `--tutorial` argument can be added to obtain the deployment method.
+| Prediction task | Model name                                         |
+| ------------ | ------------------------------------------------ |
+| SentimentAnalysis | 'senta_bilstm', 'senta_bow', 'senta_cnn'         |
+| SemanticRepresentation | 'ernie'                                     |
+| ChineseWordSegmentation     | 'lac'                                            |
+| ObjectDetection     | 'faster_rcnn'                         |
+| ImageSegmentation     | 'unet', 'deeplabv3','deeplabv3+cityscapes'      |
+| ImageClassification     | 'resnet_v2_50_imagenet', 'mobilenet_v2_imagenet' |
+## Data preprocess API
+paddle_serving_app provides a variety of data preprocessing methods for prediction tasks in the field of CV and NLP.
+- class ChineseBertReader 
+Preprocessing for Chinese semantic representation task.
+  - `__init__(vocab_file, max_seq_len=20)`
+    - vocab_file（st ）：Path of dictionary file.
+    - max_seq_len（in ，optional）：The length of sample after processing. The excess part will be truncated, and the insufficient part will be padding 0. Default 20.
+  - `process(line)`
+    - line（st ）：Text input.
+  [example](../examples/bert/bert_client.py)
+- class LACReader 
+Preprocessing for Chinese word segmentation task.
+  - `__init__(dict_floder)`
+    - dict_floder（st ）Path of dictionary file.
+  - `process(sent)`
+    - sent（st ）：Text input.
+  - `parse_result`
+    - words（st ）：Original text input.
+    - crf_decode（np.array）：CRF code predicted by model.
+  [example](../examples/lac/lac_web_service.py)
+- class SentaReader
+  - `__init__(vocab_path)`
+    - vocab_path（st ）：Path of dictionary file.
+  - `process(cols)`
+    - cols（st ）：Word segmentation result.
+  [example](../examples/senta/senta_web_service.py)
+- The image preprocessing method is more flexible than the above method, and can be combined by the following multiple classes，[example](../examples/imagenet/resnet50_rpc_client.py)
+- class Sequentia
+  - `__init__(transforms)`
+    - transforms（list）：List of image preprocessing classes
+  - `__call__(img)`
+    - img：The input of image preprocessing. The data type is is related to the first preprocessing method in transforms.
+- class File2Image
+  - `__call__(img_path)`
+    - img_path（str）：Path of image file.
+- class URL2Image
+  - `__call__(img_url)`
+    - img_url（str）：url of image file.
+- class Normalize
+  - `__init__(mean,std)`
+    - mean（float）：Mean
+    - std（float）：Variance
+  - `__call__(img)`
+    - img（np.array）：Image data in (C,H,W) channels.
+- class CenterCrop
+  - `__init__(size)`
+    - size（list/int）：
+  - `__call__(img)`
+    - img（np.array）：Image data.
+- class Resize
+  - `__init__(size, max_size=2147483647, interpolation=None)`
+    - size（list/int）：The expected image size, when the input is a list type, it needs to contain the expected length and width. When the input is int type, the short side will be set to the length of size, and the long side will be scaled proportionally.
+  - `__call__(img)`
+    - img（numpy array）：Image data.
+## Timeline tools
+The Timeline tool can be used to visualize the start and end time of various stages such as the preparation data of the prediction service, client wait and server op.
+This tool is convenient to analyze the proportion of time occupancy in the prediction service. On this basis, prediction services can be optimized in a targeted manner.
+### How to use
+1. Before making predictions on the client side, turn on the timeline function of each stage in the Paddle Serving framework by environment variables. It will print timeline information in log.
+   ```shell
+   export FLAGS_profile_client=1 # Turn on timeline function of client
+   export FLAGS_profile_server=1 # Turn on timeline function of server
+   ```
+2. Perform predictions and redirect client-side logs to files, for example, named as profile.
+3. Export the information in the log file into a trace file.
+   ```shell
+   python -m paddle_serving_app.trace --profile_file profile --trace_file trace
+   ```
+4. Open the `chrome: // tracing /` URL using Chrome browser. 
+Load the trace file generated in the previous step through the load button, you can
+Visualize the time information of each stage of the forecast service.
+As shown in next figure, the figure shows the timeline of GPU prediction service using [bert example](https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/bert).
+The server side starts service with 4 GPU cards, the client side starts 4 processes to request, and the batch size is 1.
+In the figure, bert_pre represents the data pre-processing stage of the client, and client_infer represents the stage where the client completes the sending of the prediction request to the receiving result.
+The process in the figure represents the process number of the client, and the second line of each process shows the timeline of each op of the server.
+![timeline](../../doc/timeline-example.png)
+## Debug tools
+The inference op of Paddle Serving is implemented based on Paddle inference lib.
+Before deploying the prediction service, you may need to check the input and output of the prediction service or check the resource consumption.
+Therefore, a local prediction tool is built into the paddle_serving_app, which is used in the same way as sending a request to the server through the client.
+Taking [fit_a_line prediction service](../examples/fit_a_line) as an example, the following code can be used to run local prediction.
+```python
+from paddle_serving_app.local_predict import Debugger
+import numpy as np
+debugger = Debugger()
+debugger.load_model_config("./uci_housing_model", gpu=False)
+data = [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727,
+        -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]
+fetch_map = debugger.predict(feed={"x":data}, fetch = ["price"])
+```
--- a/python/paddle_serving_app/README_CN.md
+++ b/python/paddle_serving_app/README_CN.md
+(简体中文|[English](./README.md))
+paddle_serving_app是Paddle Serving框架的工具组件，包含了预训练模型下载、数据预处理方法等功能。方便用户快速体验和部署模型示例、分析预测服务性能、调试模型预测服务等。
+## 安装
+```shell
+pip install paddle_serving_app
+```
+## 获取模型列表
+```shell
+python -m paddle_serving_app.package --list_model
+```
+## 下载预训练模型
+```shell
+python -m paddle_serving_app.package --get_model senta_bilstm
+```
+paddle_serving_app中内置了11种预训练模型，涵盖了6种预测任务。获取到的模型文件可以直接用于部署，添加`--tutorial`参数可以获取对应的部署方式。
+| 预测服务类型 | 模型名称                                         |
+| ------------ | ------------------------------------------------ |
+| 中文情感分析 | 'senta_bilstm', 'senta_bow', 'senta_cnn'         |
+| 语义理解     | 'ernie'                                          |
+| 中文分词     | 'lac'                                            |
+| 图像检测     | 'faster_rcnn'                                    |
+| 图像分割     | 'unet', 'deeplabv3', 'deeplabv3+cityscapes'                              |
+| 图像分类     | 'resnet_v2_50_imagenet', 'mobilenet_v2_imagenet' |
+## 数据预处理API
+paddle_serving_app针对CV和NLP领域的模型任务，提供了多种常见的数据预处理方法。
+- class ChineseBertReader 
+    中文语义理解模型预处理
+  - `__init__(vocab_file, max_seq_len=20)`
+    - vocab_file（str）：词典文件路径。
+    - max_seq_len（int，可选）：处理后的样本长度，超出的部分会截断，不足的部分会padding 0。默认值20。
+  - `process(line)`
+    - line（str）：输入文本
+  [参考示例](../examples/bert/bert_client.py)
+- class LACReader 中文分词预处理
+  - `__init__(dict_floder)`
+    - dict_floder（str）词典文件目录
+  - `process(sent)`
+    - sent（str）：输入文本
+  - `parse_result`
+    - words（str）：原始文本
+    - crf_decode（np.array）：模型预测结果中的CRF编码
+  [参考示例](../examples/lac/lac_web_service.py)
+- class SentaReader
+  - `__init__(vocab_path)`
+    - vocab_path（str）：词典文件目录
+  - `process(cols)`
+    - cols（str）：分词后的文本
+  [参考示例](../examples/senta/senta_web_service.py)
+- 图像的预处理方法相比于上述的方法更加灵活多变，可以通过以下的多个类进行组合，[参考示例](../examples/imagenet/resnet50_rpc_client.py)
+- class Sequentia
+  - `__init__(transforms)`
+    - transforms（list）：图像预处理方法类的列表
+  - `__call__(img)`
+    - img：图像处理的输入，具体类型与transforms中的第一个预处理方法有关
+- class File2Image
+  - `__call__(img_path)`
+    - img_path（str）：图像文件路径
+- class URL2Image
+  - `__call__(img_url)`
+    - img_url（str）：图像url
+- class Normalize
+  - `__init__(mean,std)`
+    - mean（float）：均值
+    - std（float）：方差
+  - `__call__(img)`
+    - img（np.array）：（C,H,W）排列的图像数据
+- class CenterCrop
+  - `__init__(size)`
+    - size（list/int）：预期的裁剪后的大小，list类型时需要包含预期的长和宽，int类型时会返回边长为size的正方形图片
+  - `__call__(img)`
+    - img（np.array）：输入图像
+- class Resize
+  - `__init__(size, max_size=2147483647, interpolation=None)`
+    - size（list/int）：预期的图像大小，list类型时需要包含预期的长和宽，int类型时，短边会设置为size的长度，长边按比例缩放
+  - `__call__(img)`
+    - img（numpy array）：输入图像
+## Timeline 工具
+通过Timeline工具可以将预测服务的准备数据、client等待、server端op等各阶段起止时间可视化，方便分析预测服务中的时间占用比重，在此基础上有针对性地优化预测服务。
+### 使用方式
+1. client端在进行预测之前，通过环境变量打开Paddle Serving框架中的各阶段日志打点功能
+   ```shell
+   export FLAGS_profile_client=1 #开启client端各阶段时间打点
+   export FLAGS_profile_server=1 #开启server端各阶段时间打点
+   ```
+2. 执行预测，并将client端的日志重定向到文件中，例如profile文件。
+3. 将日志文件中的信息导出成为trace文件
+   ```shell
+   python -m paddle_serving_app.trace --profile_file profile --trace_file trace
+   ```
+4. 使用chrome浏览器，打开`chrome://tracing/`网址，通过load按钮加载上一步产生的trace文件，即可将预测服务的各阶段时间信息可视化。
+   效果如下图，图中展示了使用[bert示例](https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/bert)的GPU预测服务，server端开启4卡预测，client端启动4进程，batch size为1时的各阶段timeline。
+其中bert_pre代表client端的数据预处理阶段，client_infer代表client完成预测请求的发送到接收结果的阶段，图中的process代表的是client的进程号，每个进程的第二行展示的是server各个op的timeline。
+   ![timeline](../../doc/timeline-example.png)
+## Debug工具
+Paddle Serving框架的server预测op使用了Paddle 的预测框架，在部署预测服务之前可能需要对预测服务的输入输出进行检验或者查看资源占用等。因此在paddle_serving_app中内置了本地预测工具，使用方式与通过client向服务端发送请求一致。
+以[fit_a_line预测服务](../examples/fit_a_line)为例，使用以下代码即可执行本地预测。
+```python
+from paddle_serving_app.local_predict import Debugger
+import numpy as np
+debugger = Debugger()
+debugger.load_model_config("./uci_housing_model", gpu=False)
+data = [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727,
+        -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]
+fetch_map = debugger.predict(feed={"x":data}, fetch = ["price"])
+```
--- a/python/paddle_serving_app/__init__.py
+++ b/python/paddle_serving_app/__init__.py
@@ -11,8 +11,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .reader.chinese_bert_reader import ChineseBertReader
-from .reader.image_reader import ImageReader
-from .reader.lac_reader import LACReader
-from .reader.senta_reader import SentaReader
 from .models import ServingModels
--- a/python/paddle_serving_app/local_predict.py
+++ b/python/paddle_serving_app/local_predict.py
+# -*- coding: utf-8 -*-
+"""
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+import os
+import google.protobuf.text_format
+import numpy as np
+import argparse
+import paddle.fluid as fluid
+from .proto import general_model_config_pb2 as m_config
+from paddle.fluid.core import PaddleTensor
+from paddle.fluid.core import AnalysisConfig
+from paddle.fluid.core import create_paddle_predictor
+import logging
+logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
+logger = logging.getLogger("fluid")
+logger.setLevel(logging.INFO)
+class Debugger(object):
+    def __init__(self):
+        self.feed_names_ = []
+        self.fetch_names_ = []
+        self.feed_types_ = {}
+        self.fetch_types_ = {}
+        self.feed_shapes_ = {}
+        self.feed_names_to_idx_ = {}
+        self.fetch_names_to_idx_ = {}
+        self.fetch_names_to_type_ = {}
+    def load_model_config(self, model_path, gpu=False, profile=True, cpu_num=1):
+        client_config = "{}/serving_server_conf.prototxt".format(model_path)
+        model_conf = m_config.GeneralModelConfig()
+        f = open(client_config, 'r')
+        model_conf = google.protobuf.text_format.Merge(
+            str(f.read()), model_conf)
+        config = AnalysisConfig(model_path)
+        self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
+        self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
+        self.feed_names_to_idx_ = {}
+        self.fetch_names_to_idx_ = {}
+        for i, var in enumerate(model_conf.feed_var):
+            self.feed_names_to_idx_[var.alias_name] = i
+            self.feed_types_[var.alias_name] = var.feed_type
+            self.feed_shapes_[var.alias_name] = var.shape
+        for i, var in enumerate(model_conf.fetch_var):
+            self.fetch_names_to_idx_[var.alias_name] = i
+            self.fetch_names_to_type_[var.alias_name] = var.fetch_type
+        if not gpu:
+            config.disable_gpu()
+        else:
+            config.enable_use_gpu(100, 0)
+        if profile:
+            config.enable_profile()
+        config.set_cpu_math_library_num_threads(cpu_num)
+        config.switch_ir_optim(False)
+        self.predictor = create_paddle_predictor(config)
+    def predict(self, feed=None, fetch=None):
+        if feed is None or fetch is None:
+            raise ValueError("You should specify feed and fetch for prediction")
+        fetch_list = []
+        if isinstance(fetch, str):
+            fetch_list = [fetch]
+        elif isinstance(fetch, list):
+            fetch_list = fetch
+        else:
+            raise ValueError("Fetch only accepts string and list of string")
+        feed_batch = []
+        if isinstance(feed, dict):
+            feed_batch.append(feed)
+        elif isinstance(feed, list):
+            feed_batch = feed
+        else:
+            raise ValueError("Feed only accepts dict and list of dict")
+        int_slot_batch = []
+        float_slot_batch = []
+        int_feed_names = []
+        float_feed_names = []
+        int_shape = []
+        float_shape = []
+        fetch_names = []
+        counter = 0
+        batch_size = len(feed_batch)
+        for key in fetch_list:
+            if key in self.fetch_names_:
+                fetch_names.append(key)
+        if len(fetch_names) == 0:
+            raise ValueError(
+                "Fetch names should not be empty or out of saved fetch list.")
+            return {}
+        inputs = []
+        for name in self.feed_names_:
+            if isinstance(feed[name], list):
+                feed[name] = np.array(feed[name]).reshape(self.feed_shapes_[
+                    name])
+                if self.feed_types_[name] == 0:
+                    feed[name] = feed[name].astype("int64")
+                else:
+                    feed[name] = feed[name].astype("float32")
+            inputs.append(PaddleTensor(feed[name][np.newaxis, :]))
+        outputs = self.predictor.run(inputs)
+        fetch_map = {}
+        for name in fetch:
+            fetch_map[name] = outputs[self.fetch_names_to_idx_[
+                name]].as_ndarray()
+        return fetch_map
--- a/python/paddle_serving_app/models/model_list.py
+++ b/python/paddle_serving_app/models/model_list.py
@@ -20,69 +20,41 @@ from collections import OrderedDict
 class ServingModels(object):
    def __init__(self):
        self.model_dict = OrderedDict()
-        #senta
+        self.model_dict[
-        for key in [
+            "SentimentAnalysis"] = ["senta_bilstm", "senta_bow", "senta_cnn"]
-                "senta_bilstm", "senta_bow", "senta_cnn", "senta_gru",
+        self.model_dict["SemanticRepresentation"] = ["ernie"]
-                "senta_lstm"
+        self.model_dict["ChineseWordSegmentation"] = ["lac"]
-        ]:
+        self.model_dict["ObjectDetection"] = ["faster_rcnn"]
-            self.model_dict[
+        self.model_dict["ImageSegmentation"] = [
-                key] = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SentimentAnalysis/" + key + ".tar.gz"
+            "unet", "deeplabv3", "deeplabv3+cityscapes"
-        #image classification
+        ]
-        for key in [
+        self.model_dict["ImageClassification"] = [
-                "alexnet_imagenet",
+            "resnet_v2_50_imagenet", "mobilenet_v2_imagenet"
-                "darknet53-imagenet",
+        ]
-                "densenet121_imagenet",
+        self.model_dict["OCR"] = ["ocr_rec"]
-                "densenet161_imagenet",
-                "densenet169_imagenet",
+        image_class_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ImageClassification/"
-                "densenet201_imagenet",
+        image_seg_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ImageSegmentation/"
-                "densenet264_imagenet"
+        object_detection_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ObjectDetection/"
-                "dpn107_imagenet",
+        ocr_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/OCR/"
-                "dpn131_imagenet",
+        senta_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SentimentAnalysis/"
-                "dpn68_imagenet",
+        semantic_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/"
-                "dpn92_imagenet",
+        wordseg_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/LexicalAnalysis/"
-                "dpn98_imagenet",
-                "efficientnetb0_imagenet",
+        self.url_dict = {}
-                "efficientnetb1_imagenet",
-                "efficientnetb2_imagenet",
+        def pack_url(model_dict, key, url):
-                "efficientnetb3_imagenet",
+            for i, value in enumerate(model_dict[key]):
-                "efficientnetb4_imagenet",
+                self.url_dict[model_dict[key][i]] = url + model_dict[key][
-                "efficientnetb5_imagenet",
+                    i] + ".tar.gz"
-                "efficientnetb6_imagenet",
-                "googlenet_imagenet",
+        pack_url(self.model_dict, "SentimentAnalysis", senta_url)
-                "inception_v4_imagenet",
+        pack_url(self.model_dict, "SemanticRepresentation", semantic_url)
-                "inception_v2_imagenet",
+        pack_url(self.model_dict, "ChineseWordSegmentation", wordseg_url)
-                "nasnet_imagenet",
+        pack_url(self.model_dict, "ObjectDetection", object_detection_url)
-                "pnasnet_imagenet",
+        pack_url(self.model_dict, "ImageSegmentation", image_seg_url)
-                "resnet_v2_101_imagenet",
+        pack_url(self.model_dict, "ImageClassification", image_class_url)
-                "resnet_v2_151_imagenet",
+        pack_url(self.model_dict, "OCR", ocr_url)
-                "resnet_v2_18_imagenet",
-                "resnet_v2_34_imagenet",
-                "resnet_v2_50_imagenet",
-                "resnext101_32x16d_wsl",
-                "resnext101_32x32d_wsl",
-                "resnext101_32x48d_wsl",
-                "resnext101_32x8d_wsl",
-                "resnext101_32x4d_imagenet",
-                "resnext101_64x4d_imagenet",
-                "resnext101_vd_32x4d_imagenet",
-                "resnext101_vd_64x4d_imagenet",
-                "resnext152_64x4d_imagenet",
-                "resnext152_vd_64x4d_imagenet",
-                "resnext50_64x4d_imagenet",
-                "resnext50_vd_32x4d_imagenet",
-                "resnext50_vd_64x4d_imagenet",
-                "se_resnext101_32x4d_imagenet",
-                "se_resnext50_32x4d_imagenet",
-                "shufflenet_v2_imagenet",
-                "vgg11_imagenet",
-                "vgg13_imagenet",
-                "vgg16_imagenet",
-                "vgg19_imagenet",
-                "xception65_imagenet",
-                "xception71_imagenet",
-        ]:
-            self.model_dict[
-                key] = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ImageClassification/" + key + ".tar.gz"
        #SemanticModel
        for key in [
@@ -101,13 +73,17 @@ class ServingModels(object):
                key] = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/" + key + ".tar.gz"
    def get_model_list(self):
-        return (self.model_dict.keys())
+        return self.model_dict
    def download(self, model_name):
-        if model_name in self.model_dict:
+        if model_name in self.url_dict:
-            url = self.model_dict[model_name]
+            url = self.url_dict[model_name]
            r = os.system('wget ' + url + ' --no-check-certificate')
+    def get_tutorial(self, model_name):
+        if model_name in self.tutorial_url:
+            return "Tutorial of {} to be added".format(model_name)
 if __name__ == "__main__":
    models = ServingModels()

--- a/python/paddle_serving_app/package.py
+++ b/python/paddle_serving_app/package.py
@@ -20,6 +20,7 @@ Usage:
 """
 import argparse
+import sys
 from .models import ServingModels
@@ -29,6 +30,8 @@ def parse_args():  # pylint: disable=doc-string-missing
        "--get_model", type=str, default="", help="Download a specific model")
    parser.add_argument(
        '--list_model', nargs='*', default=None, help="List Models")
+    parser.add_argument(
+        '--tutorial', type=str, default="", help="Get running command")
    return parser.parse_args()
@@ -36,25 +39,40 @@ if __name__ == "__main__":
    args = parse_args()
    if args.list_model != None:
        model_handle = ServingModels()
-        model_names = model_handle.get_model_list()
+        model_dict = model_handle.get_model_list()
-        for key in model_names:
+        # Task level model list
-            print(key)
+        # Text Classification, Semantic Representation
+        # Image Classification, Object Detection, Image Segmentation
+        for key in model_dict:
+            print("-----------------------------------------------")
+            print("{}: {}".format(key, " | ".join(model_dict[key])))
    elif args.get_model != "":
        model_handle = ServingModels()
-        model_names = model_handle.get_model_list()
+        model_dict = model_handle.url_dict
-        if args.get_model not in model_names:
+        if args.get_model not in model_dict:
            print(
                "Your model name does not exist in current model list, stay tuned"
            )
            sys.exit(0)
        model_handle.download(args.get_model)
+    elif args.tutorial != "":
+        model_handle = ServingModels()
+        model_dict = model_handle.url_dict
+        if args.get_model not in model_dict:
+            print(
+                "Your model name does not exist in current model list, stay tuned"
+            )
+            sys.exit(0)
+        tutorial_str = model_handle.get_tutorial()
+        print(tutorial_str)
    else:
        print("Wrong argument")
        print("""
              Usage:
              Download a package for serving directly
              Example:
-                   python -m paddle_serving_app.models --get senta_bilstm
+                   python -m paddle_serving_app.models --get_model senta_bilstm
                   python -m paddle_serving_app.models --list_model
              """)
        pass
--- a/python/paddle_serving_app/reader/__init__.py
+++ b/python/paddle_serving_app/reader/__init__.py
@@ -11,3 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from .chinese_bert_reader import ChineseBertReader
+from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize
+from .image_reader import CenterCrop, Resize, Transpose, Div, RGB2BGR, BGR2RGB
+from .image_reader import RCNNPostprocess, SegPostprocess, PadStride
+from .lac_reader import LACReader
+from .senta_reader import SentaReader
+from .imdb_reader import IMDBDataset
+from .ocr_reader import OCRReader
--- a/python/paddle_serving_app/reader/daisy.jpg
+++ b/python/paddle_serving_app/reader/daisy.jpg
--- a/python/paddle_serving_app/reader/functional.py
+++ b/python/paddle_serving_app/reader/functional.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import cv2
+import numpy as np
+def transpose(img, transpose_target):
+    img = img.transpose(transpose_target)
+    return img
+def normalize(img, mean, std, channel_first):
+    # need to optimize here
+    if channel_first:
+        img_mean = np.array(mean).reshape((3, 1, 1))
+        img_std = np.array(std).reshape((3, 1, 1))
+    else:
+        img_mean = np.array(mean).reshape((1, 1, 3))
+        img_std = np.array(std).reshape((1, 1, 3))
+    img -= img_mean
+    img /= img_std
+    return img
+def crop(img, target_size, center):
+    height, width = img.shape[:2]
+    size = target_size
+    if center == True:
+        w_start = (width - size) // 2
+        h_start = (height - size) // 2
+    else:
+        w_start = np.random.randint(0, width - size + 1)
+        h_start = np.random.randint(0, height - size + 1)
+    w_end = w_start + size
+    h_end = h_start + size
+    img = img[h_start:h_end, w_start:w_end, :]
+    return img
+def resize(img, target_size, max_size=2147483647, interpolation=None):
+    if isinstance(target_size, tuple):
+        resized_width = min(target_size[0], max_size)
+        resized_height = min(target_size[1], max_size)
+    else:
+        im_max_size = max(img.shape[0], img.shape[1])
+        percent = float(target_size) / min(img.shape[0], img.shape[1])
+        if np.round(percent * im_max_size) > max_size:
+            percent = float(max_size) / float(im_max_size)
+        resized_width = int(round(img.shape[1] * percent))
+        resized_height = int(round(img.shape[0] * percent))
+    if interpolation:
+        resized = cv2.resize(
+            img, (resized_width, resized_height), interpolation=interpolation)
+    else:
+        resized = cv2.resize(img, (resized_width, resized_height))
+    return resized
--- a/python/paddle_serving_app/reader/image_reader.py
+++ b/python/paddle_serving_app/reader/image_reader.py
@@ -11,9 +11,498 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import cv2
+import os
 import numpy as np
+import base64
+import sys
+from . import functional as F
+from PIL import Image, ImageDraw
+import json
+_cv2_interpolation_to_str = {cv2.INTER_LINEAR: "cv2.INTER_LINEAR", None: "None"}
+py_version = sys.version_info[0]
+if py_version == 2:
+    import urllib
+else:
+    import urllib.request as urllib
+def generate_colormap(num_classes):
+    color_map = num_classes * [0, 0, 0]
+    for i in range(0, num_classes):
+        j = 0
+        lab = i
+        while lab:
+            color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
+            color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
+            color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
+            j += 1
+            lab >>= 3
+    color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)]
+    return color_map
+class SegPostprocess(object):
+    def __init__(self, class_num):
+        self.class_num = class_num
+    def __call__(self, image_with_result):
+        if "filename" not in image_with_result:
+            raise ("filename should be specified in postprocess")
+        img_name = image_with_result["filename"]
+        ori_img = cv2.imread(img_name, -1)
+        ori_shape = ori_img.shape
+        mask = None
+        for key in image_with_result:
+            if ".lod" in key or "filename" in key:
+                continue
+            mask = image_with_result[key]
+        if mask is None:
+            raise ("segment mask should be specified in postprocess")
+        mask = mask.astype("uint8")
+        mask_png = mask.reshape((512, 512, 1))
+        #score_png = mask_png[:, :, np.newaxis]
+        score_png = mask_png
+        score_png = np.concatenate([score_png] * 3, axis=2)
+        color_map = generate_colormap(self.class_num)
+        for i in range(score_png.shape[0]):
+            for j in range(score_png.shape[1]):
+                score_png[i, j] = color_map[score_png[i, j, 0]]
+        ext_pos = img_name.rfind(".")
+        img_name_fix = img_name[:ext_pos] + "_" + img_name[ext_pos + 1:]
+        mask_save_name = img_name_fix + "_mask.png"
+        cv2.imwrite(mask_save_name, mask_png, [cv2.CV_8UC1])
+        vis_result_name = img_name_fix + "_result.png"
+        result_png = score_png
+        result_png = cv2.resize(
+            result_png,
+            ori_shape[:2],
+            fx=0,
+            fy=0,
+            interpolation=cv2.INTER_CUBIC)
+        cv2.imwrite(vis_result_name, result_png, [cv2.CV_8UC1])
+class RCNNPostprocess(object):
+    def __init__(self, label_file, output_dir):
+        self.output_dir = output_dir
+        self.label_file = label_file
+        self.label_list = []
+        with open(label_file) as fin:
+            for line in fin:
+                self.label_list.append(line.strip())
+        self.clsid2catid = {i: i for i in range(len(self.label_list))}
+        self.catid2name = {i: name for i, name in enumerate(self.label_list)}
+    def _offset_to_lengths(self, lod):
+        offset = lod[0]
+        lengths = [offset[i + 1] - offset[i] for i in range(len(offset) - 1)]
+        return [lengths]
+    def _bbox2out(self, results, clsid2catid, is_bbox_normalized=False):
+        xywh_res = []
+        for t in results:
+            bboxes = t['bbox'][0]
+            lengths = t['bbox'][1][0]
+            if bboxes.shape == (1, 1) or bboxes is None:
+                continue
+            k = 0
+            for i in range(len(lengths)):
+                num = lengths[i]
+                for j in range(num):
+                    dt = bboxes[k]
+                    clsid, score, xmin, ymin, xmax, ymax = dt.tolist()
+                    catid = (clsid2catid[int(clsid)])
+                    if is_bbox_normalized:
+                        xmin, ymin, xmax, ymax = \
+                            self.clip_bbox([xmin, ymin, xmax, ymax])
+                        w = xmax - xmin
+                        h = ymax - ymin
+                        im_shape = t['im_shape'][0][i].tolist()
+                        im_height, im_width = int(im_shape[0]), int(im_shape[1])
+                        xmin *= im_width
+                        ymin *= im_height
+                        w *= im_width
+                        h *= im_height
+                    else:
+                        w = xmax - xmin + 1
+                        h = ymax - ymin + 1
+                    bbox = [xmin, ymin, w, h]
+                    coco_res = {
+                        'category_id': catid,
+                        'bbox': bbox,
+                        'score': score
+                    }
+                    xywh_res.append(coco_res)
+                    k += 1
+        return xywh_res
+    def _get_bbox_result(self, fetch_map, fetch_name, clsid2catid):
+        result = {}
+        is_bbox_normalized = False
+        output = fetch_map[fetch_name]
+        lod = [fetch_map[fetch_name + '.lod']]
+        lengths = self._offset_to_lengths(lod)
+        np_data = np.array(output)
+        result['bbox'] = (np_data, lengths)
+        result['im_id'] = np.array([[0]])
+        bbox_results = self._bbox2out([result], clsid2catid, is_bbox_normalized)
+        return bbox_results
+    def color_map(self, num_classes):
+        color_map = num_classes * [0, 0, 0]
+        for i in range(0, num_classes):
+            j = 0
+            lab = i
+            while lab:
+                color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
+                color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
+                color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
+                j += 1
+                lab >>= 3
+        color_map = np.array(color_map).reshape(-1, 3)
+        return color_map
+    def draw_bbox(self, image, catid2name, bboxes, threshold, color_list):
+        """
+        draw bbox on image
+        """
+        draw = ImageDraw.Draw(image)
+        for dt in np.array(bboxes):
+            catid, bbox, score = dt['category_id'], dt['bbox'], dt['score']
+            if score < threshold:
+                continue
+            xmin, ymin, w, h = bbox
+            xmax = xmin + w
+            ymax = ymin + h
+            color = tuple(color_list[catid])
+            # draw bbox
+            draw.line(
+                [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
+                 (xmin, ymin)],
+                width=2,
+                fill=color)
+            # draw label
+            text = "{} {:.2f}".format(catid2name[catid], score)
+            tw, th = draw.textsize(text)
+            draw.rectangle(
+                [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color)
+            draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
+        return image
+    def visualize(self, infer_img, bbox_results, catid2name, num_classes):
+        image = Image.open(infer_img).convert('RGB')
+        color_list = self.color_map(num_classes)
+        image = self.draw_bbox(image, self.catid2name, bbox_results, 0.5,
+                               color_list)
+        image_path = os.path.split(infer_img)[-1]
+        if not os.path.exists(self.output_dir):
+            os.makedirs(self.output_dir)
+        out_path = os.path.join(self.output_dir, image_path)
+        image.save(out_path, quality=95)
+    def __call__(self, image_with_bbox):
+        fetch_name = ""
+        for key in image_with_bbox:
+            if key == "image":
+                continue
+            if ".lod" in key:
+                continue
+            fetch_name = key
+        bbox_result = self._get_bbox_result(image_with_bbox, fetch_name,
+                                            self.clsid2catid)
+        if os.path.isdir(self.output_dir) is False:
+            os.mkdir(self.output_dir)
+        self.visualize(image_with_bbox["image"], bbox_result, self.catid2name,
+                       len(self.label_list))
+        if os.path.isdir(self.output_dir) is False:
+            os.mkdir(self.output_dir)
+        bbox_file = os.path.join(self.output_dir, 'bbox.json')
+        with open(bbox_file, 'w') as f:
+            json.dump(bbox_result, f, indent=4)
+    def __repr__(self):
+        return self.__class__.__name__ + "label_file: {1}, output_dir: {2}".format(
+            self.label_file, self.output_dir)
+class Sequential(object):
+    """
+    Args:
+        sequence (sequence of ``Transform`` objects): list of transforms to chain.
+    This API references some of the design pattern of torchvision
+    Users can simply use this API in training as well
+    Example:
+        >>> image_reader.Sequnece([
+        >>>     transforms.CenterCrop(10),
+        >>> ])
+    """
+    def __init__(self, transforms):
+        self.transforms = transforms
+    def __call__(self, img):
+        for t in self.transforms:
+            img = t(img)
+        return img
+    def __repr__(self):
+        format_string_ = self.__class__.__name__ + '('
+        for t in self.transforms:
+            format_string_ += '\n'
+            format_string_ += '    {0}'.format(t)
+        format_string_ += '\n)'
+        return format_string_
+class RGB2BGR(object):
+    def __init__(self):
+        pass
+    def __call__(self, img):
+        return img[:, :, ::-1]
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+class BGR2RGB(object):
+    def __init__(self):
+        pass
+    def __call__(self, img):
+        return img[:, :, ::-1]
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+class File2Image(object):
+    def __init__(self):
+        pass
+    def __call__(self, img_path):
+        if py_version == 2:
+            fin = open(img_path)
+        else:
+            fin = open(img_path, "rb")
+        sample = fin.read()
+        data = np.fromstring(sample, np.uint8)
+        img = cv2.imdecode(data, cv2.IMREAD_COLOR)
+        '''
+        img = cv2.imread(img_path, -1)
+        channels = img.shape[2]
+        ori_h = img.shape[0]
+        ori_w = img.shape[1]
+        '''
+        return img
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+class URL2Image(object):
+    def __init__(self):
+        pass
+    def __call__(self, img_url):
+        resp = urllib.urlopen(img_url)
+        sample = resp.read()
+        data = np.fromstring(sample, np.uint8)
+        img = cv2.imdecode(data, cv2.IMREAD_COLOR)
+        return img
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+class Base64ToImage(object):
+    def __init__(self):
+        pass
+    def __call__(self, img_base64):
+        img = base64.b64decode(img_base64)
+        return img
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+class Div(object):
+    """ divide by some float number """
+    def __init__(self, value):
+        self.value = value
+    def __call__(self, img):
+        """
+        Args:
+            img (numpy array): (int8 numpy array)
+        Returns:
+            img (numpy array): (float32 numpy array)
+        """
+        img = img.astype('float32') / self.value
+        return img
+    def __repr__(self):
+        return self.__class__.__name__ + "({})".format(self.value)
+class Normalize(object):
+    """Normalize a tensor image with mean and standard deviation.
+    Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
+    will normalize each channel of the input ``torch.*Tensor`` i.e.
+    ``output[channel] = (input[channel] - mean[channel]) / std[channel]``
+    .. note::
+        This transform acts out of place, i.e., it does not mutate the input tensor.
+    Args:
+        mean (sequence): Sequence of means for each channel.
+        std (sequence): Sequence of standard deviations for each channel.
+    """
+    def __init__(self, mean, std, channel_first=False):
+        self.mean = mean
+        self.std = std
+        self.channel_first = channel_first
+    def __call__(self, img):
+        """
+        Args:
+            img (numpy array): (C, H, W) to be normalized.
+        Returns:
+            Tensor: Normalized Tensor image.
+        """
+        return F.normalize(img, self.mean, self.std, self.channel_first)
+    def __repr__(self):
+        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean,
+                                                                      self.std)
+class Lambda(object):
+    """Apply a user-defined lambda as a transform.
+       Very shame to just copy from
+       https://github.com/pytorch/vision/blob/master/torchvision/transforms/transforms.py#L301
+    Args:
+        lambd (function): Lambda/function to be used for transform.
+    """
+    def __init__(self, lambd):
+        assert callable(lambd), repr(type(lambd)
+                                     .__name__) + " object is not callable"
+        self.lambd = lambd
+    def __call__(self, img):
+        return self.lambd(img)
+    def __repr__(self):
+        return self.__class__.__name__ + '()'
+class CenterCrop(object):
+    """Crops the given Image at the center.
+    Args:
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made.
+    """
+    def __init__(self, size):
+        self.size = size
+    def __call__(self, img):
+        """
+        Args:
+            img (numpy array): Image to be cropped.
+        Returns:
+            numpy array Image: Cropped image.
+        """
+        return F.crop(img, self.size, True)
+    def __repr__(self):
+        return self.__class__.__name__ + '(size={0})'.format(self.size)
+class Resize(object):
+    """Resize the input numpy array Image to the given size.
+    Args:
+        size (sequence or int): Desired output size. If size is a sequence like
+            (h, w), output size will be matched to this. If size is an int,
+            smaller edge of the image will be matched to this number.
+            i.e, if height > width, then image will be rescaled to
+            (size * height / width, size)
+        interpolation (int, optional): Desired interpolation. Default is
+            ``None``
+    """
+    def __init__(self, size, max_size=2147483647, interpolation=None):
+        self.size = size
+        self.max_size = max_size
+        self.interpolation = interpolation
+    def __call__(self, img):
+        return F.resize(img, self.size, self.max_size, self.interpolation)
+    def __repr__(self):
+        return self.__class__.__name__ + '(size={0}, max_size={1}, interpolation={2})'.format(
+            self.size, self.max_size,
+            _cv2_interpolation_to_str[self.interpolation])
+class PadStride(object):
+    def __init__(self, stride):
+        self.coarsest_stride = stride
+    def __call__(self, img):
+        coarsest_stride = self.coarsest_stride
+        if coarsest_stride == 0:
+            return img
+        im_c, im_h, im_w = img.shape
+        pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
+        pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
+        padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
+        padding_im[:, :im_h, :im_w] = img
+        im_info = {}
+        im_info['resize_shape'] = padding_im.shape[1:]
+        return padding_im
+class Transpose(object):
+    def __init__(self, transpose_target):
+        self.transpose_target = transpose_target
+    def __call__(self, img):
+        return F.transpose(img, self.transpose_target)
+        return img
+    def __repr__(self):
+        format_string = self.__class__.__name__ + \
+                        "({})".format(self.transpose_target)
+        return format_string
 class ImageReader():

--- a/python/paddle_serving_app/reader/imdb_reader.py
+++ b/python/paddle_serving_app/reader/imdb_reader.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+import sys
+import os
+import paddle
+import re
+import paddle.fluid.incubate.data_generator as dg
+py_version = sys.version_info[0]
+class IMDBDataset(dg.MultiSlotDataGenerator):
+    def load_resource(self, dictfile):
+        self._vocab = {}
+        wid = 0
+        if py_version == 2:
+            with open(dictfile) as f:
+                for line in f:
+                    self._vocab[line.strip()] = wid
+                    wid += 1
+        else:
+            with open(dictfile, encoding="utf-8") as f:
+                for line in f:
+                    self._vocab[line.strip()] = wid
+                    wid += 1
+        self._unk_id = len(self._vocab)
+        self._pattern = re.compile(r'(;|,|\.|\?|!|\s|\(|\))')
+        self.return_value = ("words", [1, 2, 3, 4, 5, 6]), ("label", [0])
+    def get_words_only(self, line):
+        sent = line.lower().replace("<br />", " ").strip()
+        words = [x for x in self._pattern.split(sent) if x and x != " "]
+        feas = [
+            self._vocab[x] if x in self._vocab else self._unk_id for x in words
+        ]
+        return feas
+    def get_words_and_label(self, line):
+        send = '|'.join(line.split('|')[:-1]).lower().replace("<br />",
+                                                              " ").strip()
+        label = [int(line.split('|')[-1])]
+        words = [x for x in self._pattern.split(send) if x and x != " "]
+        feas = [
+            self._vocab[x] if x in self._vocab else self._unk_id for x in words
+        ]
+        return feas, label
+    def infer_reader(self, infer_filelist, batch, buf_size):
+        def local_iter():
+            for fname in infer_filelist:
+                with open(fname, "r") as fin:
+                    for line in fin:
+                        feas, label = self.get_words_and_label(line)
+                        yield feas, label
+        import paddle
+        batch_iter = paddle.batch(
+            paddle.reader.shuffle(
+                local_iter, buf_size=buf_size),
+            batch_size=batch)
+        return batch_iter
+    def generate_sample(self, line):
+        def memory_iter():
+            for i in range(1000):
+                yield self.return_value
+        def data_iter():
+            feas, label = self.get_words_and_label(line)
+            yield ("words", feas), ("label", label)
+        return data_iter
+if __name__ == "__main__":
+    imdb = IMDBDataset()
+    imdb.load_resource("imdb.vocab")
+    imdb.run_from_stdin()
--- a/python/paddle_serving_app/reader/lac_reader.py
+++ b/python/paddle_serving_app/reader/lac_reader.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from paddle_serving_client import Client
 import sys
-reload(sys)
+py_version = sys.version_info[0]
-sys.setdefaultencoding('utf-8')
+if py_version == 2:
+    reload(sys)
+    sys.setdefaultencoding('utf-8')
 import os
 import io
@@ -47,10 +48,16 @@ def load_kv_dict(dict_path,
 class LACReader(object):
    """data reader"""
-    def __init__(self, dict_folder):
+    def __init__(self, dict_folder=""):
        # read dict
        #basepath = os.path.abspath(__file__)
        #folder = os.path.dirname(basepath)
+        if dict_folder == "":
+            dict_folder = "lac_dict"
+            if not os.path.exists(dict_folder):
+                r = os.system(
+                    "wget https://paddle-serving.bj.bcebos.com/reader/lac/lac_dict.tar.gz  --no-check-certificate && tar -xzvf lac_dict.tar.gz"
+                )
        word_dict_path = os.path.join(dict_folder, "word.dic")
        label_dict_path = os.path.join(dict_folder, "tag.dic")
        replace_dict_path = os.path.join(dict_folder, "q2b.dic")
@@ -104,6 +111,10 @@ class LACReader(object):
        return word_ids
    def parse_result(self, words, crf_decode):
+        try:
+            words = unicode(words, "utf-8")
+        except:
+            pass
        tags = [self.id2label_dict[str(x[0])] for x in crf_decode]
        sent_out = []

--- a/python/paddle_serving_app/reader/ocr_reader.py
+++ b/python/paddle_serving_app/reader/ocr_reader.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import cv2
+import copy
+import numpy as np
+import math
+import re
+import sys
+import argparse
+from paddle_serving_app.reader import Sequential, Resize, Transpose, Div, Normalize
+class CharacterOps(object):
+    """ Convert between text-label and text-index """
+    def __init__(self, config):
+        self.character_type = config['character_type']
+        self.loss_type = config['loss_type']
+        if self.character_type == "en":
+            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
+            dict_character = list(self.character_str)
+        elif self.character_type == "ch":
+            character_dict_path = config['character_dict_path']
+            self.character_str = ""
+            with open(character_dict_path, "rb") as fin:
+                lines = fin.readlines()
+                for line in lines:
+                    line = line.decode('utf-8').strip("\n").strip("\r\n")
+                    self.character_str += line
+            dict_character = list(self.character_str)
+        elif self.character_type == "en_sensitive":
+            # same with ASTER setting (use 94 char).
+            self.character_str = string.printable[:-6]
+            dict_character = list(self.character_str)
+        else:
+            self.character_str = None
+        assert self.character_str is not None, \
+            "Nonsupport type of the character: {}".format(self.character_str)
+        self.beg_str = "sos"
+        self.end_str = "eos"
+        if self.loss_type == "attention":
+            dict_character = [self.beg_str, self.end_str] + dict_character
+        self.dict = {}
+        for i, char in enumerate(dict_character):
+            self.dict[char] = i
+        self.character = dict_character
+    def encode(self, text):
+        """convert text-label into text-index.
+        input:
+            text: text labels of each image. [batch_size]
+        output:
+            text: concatenated text index for CTCLoss.
+                    [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)]
+            length: length of each text. [batch_size]
+        """
+        if self.character_type == "en":
+            text = text.lower()
+        text_list = []
+        for char in text:
+            if char not in self.dict:
+                continue
+            text_list.append(self.dict[char])
+        text = np.array(text_list)
+        return text
+    def decode(self, text_index, is_remove_duplicate=False):
+        """ convert text-index into text-label. """
+        char_list = []
+        char_num = self.get_char_num()
+        if self.loss_type == "attention":
+            beg_idx = self.get_beg_end_flag_idx("beg")
+            end_idx = self.get_beg_end_flag_idx("end")
+            ignored_tokens = [beg_idx, end_idx]
+        else:
+            ignored_tokens = [char_num]
+        for idx in range(len(text_index)):
+            if text_index[idx] in ignored_tokens:
+                continue
+            if is_remove_duplicate:
+                if idx > 0 and text_index[idx - 1] == text_index[idx]:
+                    continue
+            char_list.append(self.character[text_index[idx]])
+        text = ''.join(char_list)
+        return text
+    def get_char_num(self):
+        return len(self.character)
+    def get_beg_end_flag_idx(self, beg_or_end):
+        if self.loss_type == "attention":
+            if beg_or_end == "beg":
+                idx = np.array(self.dict[self.beg_str])
+            elif beg_or_end == "end":
+                idx = np.array(self.dict[self.end_str])
+            else:
+                assert False, "Unsupport type %s in get_beg_end_flag_idx"\
+                    % beg_or_end
+            return idx
+        else:
+            err = "error in get_beg_end_flag_idx when using the loss %s"\
+                % (self.loss_type)
+            assert False, err
+class OCRReader(object):
+    def __init__(self):
+        args = self.parse_args()
+        image_shape = [int(v) for v in args.rec_image_shape.split(",")]
+        self.rec_image_shape = image_shape
+        self.character_type = args.rec_char_type
+        self.rec_batch_num = args.rec_batch_num
+        char_ops_params = {}
+        char_ops_params["character_type"] = args.rec_char_type
+        char_ops_params["character_dict_path"] = args.rec_char_dict_path
+        char_ops_params['loss_type'] = 'ctc'
+        self.char_ops = CharacterOps(char_ops_params)
+    def parse_args(self):
+        parser = argparse.ArgumentParser()
+        parser.add_argument("--rec_algorithm", type=str, default='CRNN')
+        parser.add_argument("--rec_model_dir", type=str)
+        parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
+        parser.add_argument("--rec_char_type", type=str, default='ch')
+        parser.add_argument("--rec_batch_num", type=int, default=1)
+        parser.add_argument(
+            "--rec_char_dict_path", type=str, default="./ppocr_keys_v1.txt")
+        return parser.parse_args()
+    def resize_norm_img(self, img, max_wh_ratio):
+        imgC, imgH, imgW = self.rec_image_shape
+        if self.character_type == "ch":
+            imgW = int(32 * max_wh_ratio)
+        h = img.shape[0]
+        w = img.shape[1]
+        ratio = w / float(h)
+        if math.ceil(imgH * ratio) > imgW:
+            resized_w = imgW
+        else:
+            resized_w = int(math.ceil(imgH * ratio))
+        seq = Sequential([
+            Resize(imgH, resized_w), Transpose((2, 0, 1)), Div(255),
+            Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5], True)
+        ])
+        resized_image = seq(img)
+        padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
+        padding_im[:, :, 0:resized_w] = resized_image
+        return padding_im
+    def preprocess(self, img_list):
+        img_num = len(img_list)
+        norm_img_batch = []
+        max_wh_ratio = 0
+        for ino in range(img_num):
+            h, w = img_list[ino].shape[0:2]
+            wh_ratio = w * 1.0 / h
+            max_wh_ratio = max(max_wh_ratio, wh_ratio)
+        for ino in range(img_num):
+            norm_img = self.resize_norm_img(img_list[ino], max_wh_ratio)
+            norm_img = norm_img[np.newaxis, :]
+            norm_img_batch.append(norm_img)
+        norm_img_batch = np.concatenate(norm_img_batch)
+        norm_img_batch = norm_img_batch.copy()
+        return norm_img_batch[0]
+    def postprocess(self, outputs):
+        rec_res = []
+        rec_idx_lod = outputs["ctc_greedy_decoder_0.tmp_0.lod"]
+        predict_lod = outputs["softmax_0.tmp_0.lod"]
+        rec_idx_batch = outputs["ctc_greedy_decoder_0.tmp_0"]
+        for rno in range(len(rec_idx_lod) - 1):
+            beg = rec_idx_lod[rno]
+            end = rec_idx_lod[rno + 1]
+            rec_idx_tmp = rec_idx_batch[beg:end, 0]
+            preds_text = self.char_ops.decode(rec_idx_tmp)
+            beg = predict_lod[rno]
+            end = predict_lod[rno + 1]
+            probs = outputs["softmax_0.tmp_0"][beg:end, :]
+            ind = np.argmax(probs, axis=1)
+            blank = probs.shape[1]
+            valid_ind = np.where(ind != (blank - 1))[0]
+            score = np.mean(probs[valid_ind, ind[valid_ind]])
+            rec_res.append([preds_text, score])
+        return rec_res
--- a/python/paddle_serving_app/reader/senta_reader.py
+++ b/python/paddle_serving_app/reader/senta_reader.py
@@ -14,10 +14,11 @@
 import sys
 import io
+import os
 class SentaReader():
-    def __init__(self, vocab_path, max_seq_len=20):
+    def __init__(self, vocab_path="", max_seq_len=20):
        self.max_seq_len = max_seq_len
        self.word_dict = self.load_vocab(vocab_path)
@@ -25,6 +26,13 @@ class SentaReader():
        """
        load the given vocabulary
        """
+        if vocab_path == "":
+            vocab_path = "senta_vocab.txt"
+            if not os.path.exists(vocab_path):
+                r = os.system(
+                    " wget https://paddle-serving.bj.bcebos.com/reader/senta/senta_vocab.txt --no-check-certificate"
+                )
        vocab = {}
        with io.open(vocab_path, 'r', encoding='utf8') as f:
            for line in f:

--- a/python/paddle_serving_app/reader/test_image_reader.py
+++ b/python/paddle_serving_app/reader/test_image_reader.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from image_reader import File2Image
+from image_reader import URL2Image
+from image_reader import Sequential
+from image_reader import Normalize
+from image_reader import CenterCrop
+from image_reader import Resize
+seq = Sequential([
+    File2Image(), CenterCrop(30),
+    Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Resize((5, 5))
+])
+url = "daisy.jpg"
+for x in range(100):
+    img = seq(url)
+    print(img.shape)
--- a/python/paddle_serving_app/trace.py
+++ b/python/paddle_serving_app/trace.py
+# -*- coding: utf-8 -*-
+"""
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+import json
+import sys
+import argparse
+def parse_args():
+    parser = argparse.ArgumentParser("Convert profile log to trace")
+    parser.add_argument(
+        "--profile_file",
+        type=str,
+        default="",
+        required=True,
+        help="Profile log")
+    parser.add_argument(
+        "--trace_file", type=str, default="trace", help="Trace file")
+    return parser.parse_args()
+def prase(pid_str, time_str, counter):
+    pid = pid_str.split(":")[1]
+    event_list = time_str.split(" ")
+    trace_list = []
+    for event in event_list:
+        name, ts = event.split(":")
+        name_list = name.split("_")
+        ph = "B" if (name_list[-1] == "0") else "E"
+        if len(name_list) == 2:
+            name = name_list[0]
+        else:
+            name = name_list[0] + "_" + name_list[1]
+        event_dict = {}
+        event_dict["name"] = name
+        event_dict["tid"] = 0
+        event_dict["pid"] = pid
+        event_dict["ts"] = ts
+        event_dict["ph"] = ph
+        trace_list.append(event_dict)
+    return trace_list
+if __name__ == "__main__":
+    args = parse_args()
+    profile_file = args.profile_file
+    trace_file = args.trace_file
+    all_list = []
+    counter = 0
+    with open(profile_file) as f:
+        for line in f.readlines():
+            line = line.strip().split("\t")
+            if line[0] == "PROFILE":
+                trace_list = prase(line[1], line[2], counter)
+                counter += 1
+                for trace in trace_list:
+                    all_list.append(trace)
+    trace = json.dumps(all_list, indent=2, separators=(',', ':'))
+    with open(trace_file, "w") as f:
+        f.write(trace)
--- a/python/paddle_serving_app/version.py
+++ b/python/paddle_serving_app/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving App version string """
-serving_app_version = "0.0.1"
+serving_app_version = "0.1.0"
--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -21,6 +21,7 @@ import google.protobuf.text_format
 import numpy as np
 import time
 import sys
+from .serving_client import PredictorRes
 int_type = 0
 float_type = 1
@@ -60,13 +61,18 @@ class SDKConfig(object):
        self.tag_list = []
        self.cluster_list = []
        self.variant_weight_list = []
+        self.rpc_timeout_ms = 20000
+        self.load_balance_strategy = "la"
    def add_server_variant(self, tag, cluster, variant_weight):
        self.tag_list.append(tag)
        self.cluster_list.append(cluster)
        self.variant_weight_list.append(variant_weight)
-    def gen_desc(self):
+    def set_load_banlance_strategy(self, strategy):
+        self.load_balance_strategy = strategy
+    def gen_desc(self, rpc_timeout_ms):
        predictor_desc = sdk.Predictor()
        predictor_desc.name = "general_model"
        predictor_desc.service_name = \
@@ -85,7 +91,7 @@ class SDKConfig(object):
        self.sdk_desc.predictors.extend([predictor_desc])
        self.sdk_desc.default_variant_conf.tag = "default"
        self.sdk_desc.default_variant_conf.connection_conf.connect_timeout_ms = 2000
-        self.sdk_desc.default_variant_conf.connection_conf.rpc_timeout_ms = 20000
+        self.sdk_desc.default_variant_conf.connection_conf.rpc_timeout_ms = rpc_timeout_ms
        self.sdk_desc.default_variant_conf.connection_conf.connect_retry_count = 2
        self.sdk_desc.default_variant_conf.connection_conf.max_connection_per_host = 100
        self.sdk_desc.default_variant_conf.connection_conf.hedge_request_timeout_ms = -1
@@ -108,11 +114,9 @@ class Client(object):
        self.feed_names_ = []
        self.fetch_names_ = []
        self.client_handle_ = None
-        self.result_handle_ = None
        self.feed_shapes_ = {}
        self.feed_types_ = {}
        self.feed_names_to_idx_ = {}
-        self.rpath()
        self.pid = os.getpid()
        self.predictor_sdk_ = None
        self.producers = []
@@ -120,16 +124,10 @@ class Client(object):
        self.profile_ = _Profiler()
        self.all_numpy_input = True
        self.has_numpy_input = False
+        self.rpc_timeout_ms = 20000
-    def rpath(self):
-        lib_path = os.path.dirname(paddle_serving_client.__file__)
-        client_path = os.path.join(lib_path, 'serving_client.so')
-        lib_path = os.path.join(lib_path, 'lib')
-        os.system('patchelf --set-rpath {} {}'.format(lib_path, client_path))
    def load_client_config(self, path):
        from .serving_client import PredictorClient
-        from .serving_client import PredictorRes
        model_conf = m_config.GeneralModelConfig()
        f = open(path, 'r')
        model_conf = google.protobuf.text_format.Merge(
@@ -139,7 +137,6 @@ class Client(object):
        # get feed vars, fetch vars
        # get feed shapes, feed types
        # map feed names to index
-        self.result_handle_ = PredictorRes()
        self.client_handle_ = PredictorClient()
        self.client_handle_.init(path)
        if "FLAGS_max_body_size" not in os.environ:
@@ -180,13 +177,19 @@ class Client(object):
        self.predictor_sdk_.add_server_variant(tag, cluster,
                                               str(variant_weight))
+    def set_rpc_timeout_ms(self, rpc_timeout):
+        if not isinstance(rpc_timeout, int):
+            raise ValueError("rpc_timeout must be int type.")
+        else:
+            self.rpc_timeout_ms = rpc_timeout
    def connect(self, endpoints=None):
        # check whether current endpoint is available
        # init from client config
        # create predictor here
        if endpoints is None:
            if self.predictor_sdk_ is None:
-                raise SystemExit(
+                raise ValueError(
                    "You must set the endpoints parameter or use add_variant function to create a variant."
                )
        else:
@@ -197,7 +200,7 @@ class Client(object):
                print(
                    "parameter endpoints({}) will not take effect, because you use the add_variant function.".
                    format(endpoints))
-        sdk_desc = self.predictor_sdk_.gen_desc()
+        sdk_desc = self.predictor_sdk_.gen_desc(self.rpc_timeout_ms)
        self.client_handle_.create_predictor_by_desc(sdk_desc.SerializeToString(
        ))
@@ -210,9 +213,15 @@ class Client(object):
    def shape_check(self, feed, key):
        if key in self.lod_tensor_set:
            return
-        if len(feed[key]) != self.feed_tensor_len[key]:
+        if isinstance(feed[key],
-            raise SystemExit("The shape of feed tensor {} not match.".format(
+                      list) and len(feed[key]) != self.feed_tensor_len[key]:
+            raise ValueError("The shape of feed tensor {} not match.".format(
                key))
+        if type(feed[key]).__module__ == np.__name__ and np.size(feed[
+                key]) != self.feed_tensor_len[key]:
+            #raise SystemExit("The shape of feed tensor {} not match.".format(
+            #    key))
+            pass
    def predict(self, feed=None, fetch=None, need_variant_tag=False):
        self.profile_.record('py_prepro_0')
@@ -261,8 +270,8 @@ class Client(object):
            for key in feed_i:
                if key not in self.feed_names_:
                    raise ValueError("Wrong feed name: {}.".format(key))
-                if not isinstance(feed_i[key], np.ndarray):
+                #if not isinstance(feed_i[key], np.ndarray):
-                    self.shape_check(feed_i, key)
+                self.shape_check(feed_i, key)
                if self.feed_types_[key] == int_type:
                    if i == 0:
                        int_feed_names.append(key)
@@ -271,7 +280,6 @@ class Client(object):
                        else:
                            int_shape.append(self.feed_shapes_[key])
                    if isinstance(feed_i[key], np.ndarray):
-                        #int_slot.append(np.reshape(feed_i[key], (-1)).tolist())
                        int_slot.append(feed_i[key])
                        self.has_numpy_input = True
                    else:
@@ -285,7 +293,6 @@ class Client(object):
                        else:
                            float_shape.append(self.feed_shapes_[key])
                    if isinstance(feed_i[key], np.ndarray):
-                        #float_slot.append(np.reshape(feed_i[key], (-1)).tolist())
                        float_slot.append(feed_i[key])
                        self.has_numpy_input = True
                    else:
@@ -297,17 +304,19 @@ class Client(object):
        self.profile_.record('py_prepro_1')
        self.profile_.record('py_client_infer_0')
-        result_batch = self.result_handle_
+        result_batch_handle = PredictorRes()
        if self.all_numpy_input:
            res = self.client_handle_.numpy_predict(
                float_slot_batch, float_feed_names, float_shape, int_slot_batch,
-                int_feed_names, int_shape, fetch_names, result_batch, self.pid)
+                int_feed_names, int_shape, fetch_names, result_batch_handle,
+                self.pid)
        elif self.has_numpy_input == False:
            res = self.client_handle_.batch_predict(
                float_slot_batch, float_feed_names, float_shape, int_slot_batch,
-                int_feed_names, int_shape, fetch_names, result_batch, self.pid)
+                int_feed_names, int_shape, fetch_names, result_batch_handle,
+                self.pid)
        else:
-            raise SystemExit(
+            raise ValueError(
                "Please make sure the inputs are all in list type or all in numpy.array type"
            )
@@ -318,28 +327,28 @@ class Client(object):
            return None
        multi_result_map = []
-        model_engine_names = result_batch.get_engine_names()
+        model_engine_names = result_batch_handle.get_engine_names()
        for mi, engine_name in enumerate(model_engine_names):
            result_map = {}
            # result map needs to be a numpy array
            for i, name in enumerate(fetch_names):
                if self.fetch_names_to_type_[name] == int_type:
-                    result_map[name] = result_batch.get_int64_by_name(mi, name)
+                    # result_map[name] will be py::array(numpy array)
-                    shape = result_batch.get_shape(mi, name)
+                    result_map[name] = result_batch_handle.get_int64_by_name(
-                    result_map[name] = np.array(result_map[name], dtype='int64')
+                        mi, name)
+                    shape = result_batch_handle.get_shape(mi, name)
                    result_map[name].shape = shape
                    if name in self.lod_tensor_set:
-                        result_map["{}.lod".format(name)] = np.array(
+                        result_map["{}.lod".format(
-                            result_batch.get_lod(mi, name))
+                            name)] = result_batch_handle.get_lod(mi, name)
                elif self.fetch_names_to_type_[name] == float_type:
-                    result_map[name] = result_batch.get_float_by_name(mi, name)
+                    result_map[name] = result_batch_handle.get_float_by_name(
-                    shape = result_batch.get_shape(mi, name)
+                        mi, name)
-                    result_map[name] = np.array(
+                    shape = result_batch_handle.get_shape(mi, name)
-                        result_map[name], dtype='float32')
                    result_map[name].shape = shape
                    if name in self.lod_tensor_set:
-                        result_map["{}.lod".format(name)] = np.array(
+                        result_map["{}.lod".format(
-                            result_batch.get_lod(mi, name))
+                            name)] = result_batch_handle.get_lod(mi, name)
            multi_result_map.append(result_map)
        ret = None
        if len(model_engine_names) == 1:
@@ -357,7 +366,7 @@ class Client(object):
        # When using the A/B test, the tag of variant needs to be returned
        return ret if not need_variant_tag else [
-            ret, self.result_handle_.variant_tag()
+            ret, result_batch_handle.variant_tag()
        ]
    def release(self):

--- a/python/paddle_serving_client/io/__init__.py
+++ b/python/paddle_serving_client/io/__init__.py
@@ -33,7 +33,11 @@ def save_model(server_model_folder,
    executor = Executor(place=CPUPlace())
    feed_var_names = [feed_var_dict[x].name for x in feed_var_dict]
-    target_vars = list(fetch_var_dict.values())
+    target_vars = []
+    target_var_names = []
+    for key in sorted(fetch_var_dict.keys()):
+        target_vars.append(fetch_var_dict[key])
+        target_var_names.append(key)
    save_inference_model(
        server_model_folder,
@@ -64,7 +68,7 @@ def save_model(server_model_folder,
            feed_var.shape.extend(tmp_shape)
        config.feed_var.extend([feed_var])
-    for key in fetch_var_dict:
+    for key in target_var_names:
        fetch_var = model_conf.FetchVar()
        fetch_var.alias_name = key
        fetch_var.name = fetch_var_dict[key].name
@@ -103,17 +107,21 @@ def save_model(server_model_folder,
        fout.write(config.SerializeToString())
-def inference_model_to_serving(infer_model, serving_client, serving_server):
+def inference_model_to_serving(dirname,
+                               serving_server="serving_server",
+                               serving_client="serving_client",
+                               model_filename=None,
+                               params_filename=None):
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    inference_program, feed_target_names, fetch_targets = \
-            fluid.io.load_inference_model(dirname=infer_model, executor=exe)
+            fluid.io.load_inference_model(dirname=dirname, executor=exe, model_filename=model_filename, params_filename=params_filename)
    feed_dict = {
        x: inference_program.global_block().var(x)
        for x in feed_target_names
    }
    fetch_dict = {x.name: x for x in fetch_targets}
-    save_model(serving_client, serving_server, feed_dict, fetch_dict,
+    save_model(serving_server, serving_client, feed_dict, fetch_dict,
               inference_program)
    feed_names = feed_dict.keys()
    fetch_names = fetch_dict.keys()

--- a/python/paddle_serving_client/utils/__init__.py
+++ b/python/paddle_serving_client/utils/__init__.py
@@ -38,7 +38,7 @@ def benchmark_args():
 def show_latency(latency_list):
    latency_array = np.array(latency_list)
-    info = ""
+    info = "latency:\n"
    info += "mean :{} ms\n".format(np.mean(latency_array))
    info += "median :{} ms\n".format(np.median(latency_array))
    info += "80 percent :{} ms\n".format(np.percentile(latency_array, 80))

--- a/python/paddle_serving_client/version.py
+++ b/python/paddle_serving_client/version.py
@@ -12,6 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving Client version string """
-serving_client_version = "0.2.0"
+serving_client_version = "0.3.0"
-serving_server_version = "0.2.0"
+serving_server_version = "0.3.0"
-module_proto_version = "0.2.0"
+module_proto_version = "0.3.0"
--- a/python/paddle_serving_server/__init__.py
+++ b/python/paddle_serving_server/__init__.py
@@ -23,6 +23,7 @@ import paddle_serving_server as paddle_serving_server
 from .version import serving_server_version
 from contextlib import closing
 import collections
+import fcntl
 class OpMaker(object):
@@ -127,6 +128,7 @@ class Server(object):
        self.model_toolkit_conf = None
        self.resource_conf = None
        self.memory_optimization = False
+        self.ir_optimization = False
        self.model_conf = None
        self.workflow_fn = "workflow.prototxt"
        self.resource_fn = "resource.prototxt"
@@ -175,6 +177,9 @@ class Server(object):
    def set_memory_optimize(self, flag=False):
        self.memory_optimization = flag
+    def set_ir_optimize(self, flag=False):
+        self.ir_optimization = flag
    def check_local_bin(self):
        if "SERVING_BIN" in os.environ:
            self.use_local_bin = True
@@ -195,6 +200,7 @@ class Server(object):
            engine.enable_batch_align = 0
            engine.model_data_path = model_config_path
            engine.enable_memory_optimization = self.memory_optimization
+            engine.enable_ir_optimization = self.ir_optimization
            engine.static_optimization = False
            engine.force_update_static_cache = False
@@ -244,7 +250,7 @@ class Server(object):
        workflow_oi_config_path = None
        if isinstance(model_config_paths, str):
            # If there is only one model path, use the default infer_op.
-            # Because there are several infer_op type, we need to find 
+            # Because there are several infer_op type, we need to find
            # it from workflow_conf.
            default_engine_names = [
                'general_infer_0', 'general_dist_kv_infer_0',
@@ -269,7 +275,8 @@ class Server(object):
                self.model_config_paths[node.name] = path
            print("You have specified multiple model paths, please ensure "
                  "that the input and output of multiple models are the same.")
-            workflow_oi_config_path = self.model_config_paths.items()[0][1]
+            workflow_oi_config_path = list(self.model_config_paths.items())[0][
+                1]
        else:
            raise Exception("The type of model_config_paths must be str or "
                            "dict({op: model_path}), not {}.".format(
@@ -284,8 +291,8 @@ class Server(object):
        # check config here
        # print config here
-    def use_mkl(self):
+    def use_mkl(self, flag):
-        self.mkl_flag = True
+        self.mkl_flag = flag
    def get_device_version(self):
        avx_flag = False
@@ -300,6 +307,10 @@ class Server(object):
            else:
                device_version = "serving-cpu-avx-openblas-"
        else:
+            if mkl_flag:
+                print(
+                    "Your CPU does not support AVX, server will running with noavx-openblas mode."
+                )
            device_version = "serving-cpu-noavx-openblas-"
        return device_version
@@ -312,6 +323,10 @@ class Server(object):
        bin_url = "https://paddle-serving.bj.bcebos.com/bin/" + tar_name
        self.server_path = os.path.join(self.module_path, floder_name)
+        #acquire lock
+        version_file = open("{}/version.py".format(self.module_path), "r")
+        fcntl.flock(version_file, fcntl.LOCK_EX)
        if not os.path.exists(self.server_path):
            print('Frist time run, downloading PaddleServing components ...')
            r = os.system('wget ' + bin_url + ' --no-check-certificate')
@@ -335,6 +350,8 @@ class Server(object):
                        foemat(self.module_path))
                finally:
                    os.remove(tar_name)
+        #release lock
+        version_file.close()
        os.chdir(self.cur_path)
        self.bin_path = self.server_path + "/serving"

--- a/python/paddle_serving_server/monitor.py
+++ b/python/paddle_serving_server/monitor.py
@@ -20,7 +20,7 @@ Usage:
 import os
 import time
 import argparse
-import commands
+import subprocess
 import datetime
 import shutil
 import tarfile
@@ -209,7 +209,7 @@ class HadoopMonitor(Monitor):
        remote_filepath = os.path.join(path, filename)
        cmd = '{} -ls {} 2>/dev/null'.format(self._cmd_prefix, remote_filepath)
        _LOGGER.debug('check cmd: {}'.format(cmd))
-        [status, output] = commands.getstatusoutput(cmd)
+        [status, output] = subprocess.getstatusoutput(cmd)
        _LOGGER.debug('resp: {}'.format(output))
        if status == 0:
            [_, _, _, _, _, mdate, mtime, _] = output.split('\n')[-1].split()

--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -41,6 +41,9 @@ def parse_args():  # pylint: disable=doc-string-missing
        "--device", type=str, default="cpu", help="Type of device")
    parser.add_argument(
        "--mem_optim", type=bool, default=False, help="Memory optimize")
+    parser.add_argument(
+        "--ir_optim", type=bool, default=False, help="Graph optimize")
+    parser.add_argument("--use_mkl", type=bool, default=False, help="Use MKL")
    parser.add_argument(
        "--max_body_size",
        type=int,
@@ -57,7 +60,9 @@ def start_standard_model():  # pylint: disable=doc-string-missing
    workdir = args.workdir
    device = args.device
    mem_optim = args.mem_optim
+    ir_optim = args.ir_optim
    max_body_size = args.max_body_size
+    use_mkl = args.use_mkl
    if model == "":
        print("You must specify your serving model")
@@ -78,6 +83,8 @@ def start_standard_model():  # pylint: disable=doc-string-missing
    server.set_op_sequence(op_seq_maker.get_op_sequence())
    server.set_num_threads(thread_num)
    server.set_memory_optimize(mem_optim)
+    server.set_ir_optimize(ir_optim)
+    server.use_mkl(use_mkl)
    server.set_max_body_size(max_body_size)
    server.set_port(port)
@@ -96,7 +103,7 @@ if __name__ == "__main__":
        service.load_model_config(args.model)
        service.prepare_server(
            workdir=args.workdir, port=args.port, device=args.device)
-        service.run_server()
+        service.run_rpc_service()
        app_instance = Flask(__name__)

--- a/python/paddle_serving_server/version.py
+++ b/python/paddle_serving_server/version.py
@@ -12,6 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving Client version string """
-serving_client_version = "0.2.0"
+serving_client_version = "0.3.0"
-serving_server_version = "0.2.0"
+serving_server_version = "0.3.0"
-module_proto_version = "0.2.0"
+module_proto_version = "0.3.0"
--- a/python/paddle_serving_server/web_service.py
+++ b/python/paddle_serving_server/web_service.py
@@ -86,13 +86,13 @@ class WebService(object):
            for key in fetch_map:
                fetch_map[key] = fetch_map[key].tolist()
            fetch_map = self.postprocess(
-                feed=feed, fetch=fetch, fetch_map=fetch_map)
+                feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map)
            result = {"result": fetch_map}
        except ValueError:
            result = {"result": "Request Value Error"}
        return result
-    def run_server(self):
+    def run_rpc_service(self):
        import socket
        localIP = socket.gethostbyname(socket.gethostname())
        print("web service address:")
@@ -101,7 +101,6 @@ class WebService(object):
        p_rpc = Process(target=self._launch_rpc_service)
        p_rpc.start()
-    def run_flask(self):
        app_instance = Flask(__name__)
        @app_instance.before_first_request
@@ -114,10 +113,16 @@ class WebService(object):
        def run():
            return self.get_prediction(request)
-        app_instance.run(host="0.0.0.0",
+        self.app_instance = app_instance
-                         port=self.port,
-                         threaded=False,
+    def run_web_service(self):
-                         processes=4)
+        self.app_instance.run(host="0.0.0.0",
+                              port=self.port,
+                              threaded=False,
+                              processes=1)
+    def get_app_instance(self):
+        return self.app_instance
    def preprocess(self, feed=[], fetch=[]):
        return feed, fetch

--- a/python/paddle_serving_server_gpu/__init__.py
+++ b/python/paddle_serving_server_gpu/__init__.py
@@ -25,6 +25,7 @@ from .version import serving_server_version
 from contextlib import closing
 import argparse
 import collections
+import fcntl
 def serve_args():
@@ -47,6 +48,8 @@ def serve_args():
        "--name", type=str, default="None", help="Default service name")
    parser.add_argument(
        "--mem_optim", type=bool, default=False, help="Memory optimize")
+    parser.add_argument(
+        "--ir_optim", type=bool, default=False, help="Graph optimize")
    parser.add_argument(
        "--max_body_size",
        type=int,
@@ -156,6 +159,7 @@ class Server(object):
        self.model_toolkit_conf = None
        self.resource_conf = None
        self.memory_optimization = False
+        self.ir_optimization = False
        self.model_conf = None
        self.workflow_fn = "workflow.prototxt"
        self.resource_fn = "resource.prototxt"
@@ -204,6 +208,9 @@ class Server(object):
    def set_memory_optimize(self, flag=False):
        self.memory_optimization = flag
+    def set_ir_optimize(self, flag=False):
+        self.ir_optimization = flag
    def check_local_bin(self):
        if "SERVING_BIN" in os.environ:
            self.use_local_bin = True
@@ -240,6 +247,7 @@ class Server(object):
            engine.enable_batch_align = 0
            engine.model_data_path = model_config_path
            engine.enable_memory_optimization = self.memory_optimization
+            engine.enable_ir_optimization = self.ir_optimization
            engine.static_optimization = False
            engine.force_update_static_cache = False
@@ -313,7 +321,8 @@ class Server(object):
                self.model_config_paths[node.name] = path
            print("You have specified multiple model paths, please ensure "
                  "that the input and output of multiple models are the same.")
-            workflow_oi_config_path = self.model_config_paths.items()[0][1]
+            workflow_oi_config_path = list(self.model_config_paths.items())[0][
+                1]
        else:
            raise Exception("The type of model_config_paths must be str or "
                            "dict({op: model_path}), not {}.".format(
@@ -339,6 +348,11 @@ class Server(object):
        download_flag = "{}/{}.is_download".format(self.module_path,
                                                   folder_name)
+        #acquire lock
+        version_file = open("{}/version.py".format(self.module_path), "r")
+        fcntl.flock(version_file, fcntl.LOCK_EX)
        if os.path.exists(download_flag):
            os.chdir(self.cur_path)
            self.bin_path = self.server_path + "/serving"
@@ -369,6 +383,8 @@ class Server(object):
                        format(self.module_path))
                finally:
                    os.remove(tar_name)
+        #release lock
+        version_file.close()
        os.chdir(self.cur_path)
        self.bin_path = self.server_path + "/serving"

--- a/python/paddle_serving_server_gpu/monitor.py
+++ b/python/paddle_serving_server_gpu/monitor.py
@@ -20,7 +20,7 @@ Usage:
 import os
 import time
 import argparse
-import commands
+import subprocess
 import datetime
 import shutil
 import tarfile
@@ -209,7 +209,7 @@ class HadoopMonitor(Monitor):
        remote_filepath = os.path.join(path, filename)
        cmd = '{} -ls {} 2>/dev/null'.format(self._cmd_prefix, remote_filepath)
        _LOGGER.debug('check cmd: {}'.format(cmd))
-        [status, output] = commands.getstatusoutput(cmd)
+        [status, output] = subprocess.getstatusoutput(cmd)
        _LOGGER.debug('resp: {}'.format(output))
        if status == 0:
            [_, _, _, _, _, mdate, mtime, _] = output.split('\n')[-1].split()

--- a/python/paddle_serving_server_gpu/serve.py
+++ b/python/paddle_serving_server_gpu/serve.py
@@ -35,6 +35,7 @@ def start_gpu_card_model(index, gpuid, args):  # pylint: disable=doc-string-miss
    thread_num = args.thread
    model = args.model
    mem_optim = args.mem_optim
+    ir_optim = args.ir_optim
    max_body_size = args.max_body_size
    workdir = "{}_{}".format(args.workdir, gpuid)
@@ -57,6 +58,7 @@ def start_gpu_card_model(index, gpuid, args):  # pylint: disable=doc-string-miss
    server.set_op_sequence(op_seq_maker.get_op_sequence())
    server.set_num_threads(thread_num)
    server.set_memory_optimize(mem_optim)
+    server.set_ir_optimize(ir_optim)
    server.set_max_body_size(max_body_size)
    server.load_model_config(model)
@@ -116,7 +118,7 @@ if __name__ == "__main__":
            web_service.set_gpus(gpu_ids)
        web_service.prepare_server(
            workdir=args.workdir, port=args.port, device=args.device)
-        web_service.run_server()
+        web_service.run_rpc_service()
        app_instance = Flask(__name__)

--- a/python/paddle_serving_server_gpu/version.py
+++ b/python/paddle_serving_server_gpu/version.py
@@ -12,6 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving Client version string """
-serving_client_version = "0.2.0"
+serving_client_version = "0.3.0"
-serving_server_version = "0.2.0"
+serving_server_version = "0.3.0"
-module_proto_version = "0.2.0"
+module_proto_version = "0.3.0"
--- a/python/paddle_serving_server_gpu/web_service.py
+++ b/python/paddle_serving_server_gpu/web_service.py
@@ -131,14 +131,13 @@ class WebService(object):
            for key in fetch_map:
                fetch_map[key] = fetch_map[key].tolist()
            result = self.postprocess(
-                feed=feed, fetch=fetch, fetch_map=fetch_map)
+                feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map)
            result = {"result": result}
-            result = {"result": fetch_map}
        except ValueError:
            result = {"result": "Request Value Error"}
        return result
-    def run_server(self):
+    def run_rpc_service(self):
        import socket
        localIP = socket.gethostbyname(socket.gethostname())
        print("web service address:")
@@ -151,7 +150,6 @@ class WebService(object):
        for p in server_pros:
            p.start()
-    def run_flask(self):
        app_instance = Flask(__name__)
        @app_instance.before_first_request
@@ -164,10 +162,16 @@ class WebService(object):
        def run():
            return self.get_prediction(request)
-        app_instance.run(host="0.0.0.0",
+        self.app_instance = app_instance
-                         port=self.port,
-                         threaded=False,
+    def run_web_service(self):
-                         processes=4)
+        self.app_instance.run(host="0.0.0.0",
+                              port=self.port,
+                              threaded=False,
+                              processes=1)
+    def get_app_instance(self):
+        return app_instance
    def preprocess(self, feed=[], fetch=[]):
        return feed, fetch

--- a/python/setup.py.app.in
+++ b/python/setup.py.app.in
@@ -42,10 +42,11 @@ if '${PACK}' == 'ON':
 REQUIRED_PACKAGES = [
-    'six >= 1.10.0', 'sentencepiece'
+    'six >= 1.10.0', 'sentencepiece', 'opencv-python', 'pillow'
 ]
 packages=['paddle_serving_app',
+	  'paddle_serving_app.proto',
          'paddle_serving_app.reader',
 	  'paddle_serving_app.utils',
 	  'paddle_serving_app.models',
@@ -54,6 +55,8 @@ packages=['paddle_serving_app',
 package_data={}
 package_dir={'paddle_serving_app':
             '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app',
+	     'paddle_serving_app.proto':
+	     '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto',
             'paddle_serving_app.reader':
             '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/reader',
 	     'paddle_serving_app.utils':

--- a/python/setup.py.client.in
+++ b/python/setup.py.client.in
@@ -26,7 +26,7 @@ from setuptools import setup
 from paddle_serving_client.version import serving_client_version
 from pkg_resources import DistributionNotFound, get_distribution
-py_version = sys.version_info[0]
+py_version = sys.version_info
 def python_version():
    return [int(v) for v in platform.python_version().split(".")]
@@ -39,7 +39,12 @@ def find_package(pkgname):
        return False
 def copy_lib():
-    lib_list = ['libpython2.7.so.1.0', 'libssl.so.10', 'libcrypto.so.10'] if py_version == 2 else ['libpython3.6m.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
+    if py_version[0] == 2:
+        lib_list = ['libpython2.7.so.1.0', 'libssl.so.10', 'libcrypto.so.10'] 
+    elif py_version[1] == 6:
+        lib_list = ['libpython3.6m.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
+    elif py_version[1] == 7:
+        lib_list = ['libpython3.7m.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
    os.popen('mkdir -p paddle_serving_client/lib')
    for lib in lib_list:
        r = os.popen('whereis {}'.format(lib))

--- a/python/setup.py.server.in
+++ b/python/setup.py.server.in
@@ -38,12 +38,9 @@ max_version, mid_version, min_version = python_version()
 REQUIRED_PACKAGES = [
    'six >= 1.10.0', 'protobuf >= 3.1.0',
-    'paddle_serving_client', 'flask >= 1.1.1'
+    'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app'
 ]
-if not find_package("paddlepaddle") and not find_package("paddlepaddle-gpu"):
-    REQUIRED_PACKAGES.append("paddlepaddle")
 packages=['paddle_serving_server',
          'paddle_serving_server.proto']

--- a/python/setup.py.server_gpu.in
+++ b/python/setup.py.server_gpu.in
@@ -38,11 +38,9 @@ max_version, mid_version, min_version = python_version()
 REQUIRED_PACKAGES = [
    'six >= 1.10.0', 'protobuf >= 3.1.0',
-    'paddle_serving_client', 'flask >= 1.1.1'
+    'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app'
 ]
-if not find_package("paddlepaddle") and not find_package("paddlepaddle-gpu"):
-    REQUIRED_PACKAGES.append("paddlepaddle") 
 packages=['paddle_serving_server_gpu',
          'paddle_serving_server_gpu.proto']

--- a/tools/Dockerfile
+++ b/tools/Dockerfile
@@ -9,4 +9,6 @@ RUN yum -y install wget && \
    yum -y install python3 python3-devel && \
    yum clean all && \
    curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
-    python get-pip.py && rm get-pip.py
+    python get-pip.py && rm get-pip.py && \
+    localedef -c -i en_US -f UTF-8 en_US.UTF-8 && \
+    echo "export LANG=en_US.utf8" >> /root/.bashrc
--- a/tools/Dockerfile.centos6.devel
+++ b/tools/Dockerfile.centos6.devel
@@ -21,7 +21,7 @@ RUN yum -y install wget && \
    wget https://www.python.org/ftp/python/2.7.5/Python-2.7.5.tgz && \
    tar -zxf Python-2.7.5.tgz && \
    cd Python-2.7.5 && \
-    ./configure --prefix=/usr/local/python2.7 --enable-shared && \
+    ./configure --prefix=/usr/local/python2.7 --enable-shared --enable-unicode=ucs4 && \
    make all && make install && \
    make clean && \
    echo 'export PATH=/usr/local/python2.7/bin:$PATH' >> /root/.bashrc && \
@@ -43,5 +43,7 @@ RUN yum -y install wget && \
    source /root/.bashrc && \
    cd .. && rm -rf Python-3.6.8* && \
    pip3 install google protobuf setuptools wheel flask numpy==1.16.4 && \
-    yum -y install epel-release && yum -y install patchelf && \
+    yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
-    yum clean all
+    yum clean all && \
+    localedef -c -i en_US -f UTF-8 en_US.UTF-8 && \
+    echo "export LANG=en_US.utf8" >> /root/.bashrc
--- a/tools/Dockerfile.centos6.gpu.devel
+++ b/tools/Dockerfile.centos6.gpu.devel
@@ -21,7 +21,7 @@ RUN yum -y install wget && \
    wget https://www.python.org/ftp/python/2.7.5/Python-2.7.5.tgz && \
    tar -zxf Python-2.7.5.tgz && \
    cd Python-2.7.5 && \
-    ./configure --prefix=/usr/local/python2.7 --enable-shared && \
+    ./configure --prefix=/usr/local/python2.7 --enable-shared --enable-unicode=ucs4 && \
    make all && make install && \
    make clean && \
    echo 'export PATH=/usr/local/python2.7/bin:$PATH' >> /root/.bashrc && \
@@ -43,5 +43,6 @@ RUN yum -y install wget && \
    source /root/.bashrc && \
    cd .. && rm -rf Python-3.6.8* && \
    pip3 install google protobuf setuptools wheel flask numpy==1.16.4 && \
-    yum -y install epel-release && yum -y install patchelf && \
+    yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
-    yum clean all
+    yum clean all && \
+    echo "export LANG=en_US.utf8" >> /root/.bashrc
--- a/tools/Dockerfile.devel
+++ b/tools/Dockerfile.devel
@@ -20,5 +20,7 @@ RUN yum -y install wget >/dev/null \
    && rm get-pip.py \
    && yum install -y python3 python3-devel \
    && pip3 install google protobuf setuptools wheel flask \
-    && yum -y install epel-release && yum -y install patchelf \
+    && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
-    && yum clean all
+    && yum clean all \
+    && localedef -c -i en_US -f UTF-8 en_US.UTF-8 \
+    && echo "export LANG=en_US.utf8" >> /root/.bashrc
--- a/tools/Dockerfile.gpu
+++ b/tools/Dockerfile.gpu
-FROM nvidia/cuda:9.0-cudnn7-runtime-centos7
+FROM nvidia/cuda:9.0-cudnn7-devel-centos7 as builder
+FROM nvidia/cuda:9.0-cudnn7-runtime-centos7
 RUN yum -y install wget && \
    yum -y install epel-release && yum -y install patchelf && \
    yum -y install gcc make python-devel && \
@@ -13,4 +14,8 @@ RUN yum -y install wget && \
    ln -s /usr/local/cuda-9.0/lib64/libcublas.so.9.0 /usr/local/cuda-9.0/lib64/libcublas.so && \
    echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> /root/.bashrc && \
    ln -s /usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudnn.so.7 /usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudnn.so && \
-    echo 'export LD_LIBRARY_PATH=/usr/local/cuda-9.0/targets/x86_64-linux/lib:$LD_LIBRARY_PATH' >> /root/.bashrc
+    echo 'export LD_LIBRARY_PATH=/usr/local/cuda-9.0/targets/x86_64-linux/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \
+    echo "export LANG=en_US.utf8" >> /root/.bashrc && \
+    mkdir -p /usr/local/cuda/extras
+COPY --from=builder /usr/local/cuda/extras/CUPTI /usr/local/cuda/extras/CUPTI
--- a/tools/Dockerfile.gpu.devel
+++ b/tools/Dockerfile.gpu.devel
@@ -21,5 +21,6 @@ RUN yum -y install wget >/dev/null \
    && rm get-pip.py \
    && yum install -y python3 python3-devel \
    && pip3 install google protobuf setuptools wheel flask \
-    && yum -y install epel-release && yum -y install patchelf \
+    && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
-    && yum clean all
+    && yum clean all \
+    && echo "export LANG=en_US.utf8" >> /root/.bashrc
--- a/tools/python_tag.py
+++ b/tools/python_tag.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
+import re
+with open("setup.cfg", "w") as f:
+    line = "[bdist_wheel]\npython-tag={0}{1}\nplat-name=manylinux1_x86_64".format(
+        get_abbr_impl(), get_impl_ver())
+    f.write(line)
--- a/tools/serving_build.sh
+++ b/tools/serving_build.sh
 #!/usr/bin/env bash
+set -x
 function unsetproxy() {
    HTTP_PROXY_TEMP=$http_proxy
    HTTPS_PROXY_TEMP=$https_proxy
@@ -331,6 +331,76 @@ function python_test_bert() {
    cd ..
 }
+function python_test_multi_fetch() {
+    # pwd: /Serving/python/examples
+    local TYPT=$1
+    export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving
+    cd bert # pwd: /Serving/python/examples/bert
+    case $TYPE in
+        CPU)
+            #download model (max_seq_len=32)
+            wget https://paddle-serving.bj.bcebos.com/bert_example/bert_multi_fetch.tar.gz
+            tar -xzvf bert_multi_fetch.tar.gz
+            check_cmd "python -m paddle_serving_server.serve --model bert_seq32_model --port 9292 &"
+            sleep 5
+            check_cmd "head -n 8 data-c.txt | python test_multi_fetch_client.py"
+            kill_server_process
+            echo "bert mutli fetch RPC inference pass"
+            ;;
+        GPU)
+            #download model (max_seq_len=32)
+            wget https://paddle-serving.bj.bcebos.com/bert_example/bert_multi_fetch.tar.gz
+            tar -xzvf bert_multi_fetch.tar.gz
+            check_cmd "python -m paddle_serving_server_gpu.serve --model bert_seq32_model --port 9292 --gpu_ids 0 &"
+            sleep 5
+            check_cmd "head -n 8 data-c.txt | python test_multi_fetch_client.py"
+            kill_server_process
+            echo "bert mutli fetch RPC inference pass"
+            ;;
+        *)
+            echo "error type"
+            exit 1
+            ;;
+    esac
+    echo "test multi fetch $TYPE finished as expected."
+    unset SERVING_BIN
+    cd ..
+}
+function python_test_multi_process(){
+    # pwd: /Serving/python/examples
+    local TYPT=$1
+    export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving
+    cd fit_a_line # pwd: /Serving/python/examples/fit_a_line
+    sh get_data.sh
+    case $TYPE in
+        CPU)
+            check_cmd "python -m paddle_serving_server.serve --model uci_housing_model --port 9292 --workdir test9292 &"
+            check_cmd "python -m paddle_serving_server.serve --model uci_housing_model --port 9293 --workdir test9293 &"
+            sleep 5
+            check_cmd "python test_multi_process_client.py"
+            kill_server_process
+            echo "bert mutli rpc RPC inference pass"
+            ;;
+        GPU)
+            rm -rf ./image #TODO: The following code tried to create this folder, but no corresponding code was found
+            check_cmd "python -m paddle_serving_server_gpu.serve --model uci_housing_model --port 9292 --workdir test9292 --gpu_ids 0 &"
+            check_cmd "python -m paddle_serving_server_gpu.serve --model uci_housing_model --port 9293 --workdir test9293 --gpu_ids 0 &"
+            sleep 5
+            check_cmd "python test_multi_process_client.py"
+            kill_server_process
+            echo "bert mutli process RPC inference pass"
+            ;;
+        *)
+            echo "error type"
+            exit 1
+            ;;
+    esac
+    echo "test multi process $TYPE finished as expected."
+    unset SERVING_BIN
+    cd ..
+}
 function python_test_imdb() {
    # pwd: /Serving/python/examples
    local TYPE=$1
@@ -343,7 +413,7 @@ function python_test_imdb() {
            sleep 5
            check_cmd "head test_data/part-0 | python test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab"
            # test batch predict
-            check_cmd "python benchmark_batch.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc --endpoint 127.0.0.1:9292"
+            check_cmd "python benchmark.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc --endpoint 127.0.0.1:9292"
            echo "imdb CPU RPC inference pass"
            kill_server_process
            rm -rf work_dir1
@@ -359,7 +429,7 @@ function python_test_imdb() {
                exit 1
            fi
            # test batch predict
-            check_cmd "python benchmark_batch.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request http --endpoint 127.0.0.1:9292"
+            check_cmd "python benchmark.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request http --endpoint 127.0.0.1:9292"
            setproxy # recover proxy state
            kill_server_process
            ps -ef | grep "text_classify_service.py" | grep -v grep | awk '{print $2}' | xargs kill
@@ -385,15 +455,16 @@ function python_test_lac() {
    cd lac # pwd: /Serving/python/examples/lac
    case $TYPE in
        CPU)
-            sh get_data.sh
+            python -m paddle_serving_app.package --get_model lac
-            check_cmd "python -m paddle_serving_server.serve --model jieba_server_model/ --port 9292 &"
+            tar -xzvf lac.tar.gz
+            check_cmd "python -m paddle_serving_server.serve --model lac_model/ --port 9292 &"
            sleep 5
-            check_cmd "echo \"我爱北京天安门\" | python lac_client.py jieba_client_conf/serving_client_conf.prototxt lac_dict/"
+            check_cmd "echo \"我爱北京天安门\" | python lac_client.py lac_client/serving_client_conf.prototxt "
            echo "lac CPU RPC inference pass"
            kill_server_process
            unsetproxy # maybe the proxy is used on iPipe, which makes web-test failed.
-            check_cmd "python lac_web_service.py jieba_server_model/ lac_workdir 9292 &"
+            check_cmd "python lac_web_service.py lac_model/ lac_workdir 9292 &"
            sleep 5
            check_cmd "curl -H \"Content-Type:application/json\" -X POST -d '{\"feed\":[{\"words\": \"我爱北京天安门\"}], \"fetch\":[\"word_seg\"]}' http://127.0.0.1:9292/lac/prediction"
            # check http code
@@ -436,7 +507,9 @@ function python_run_test() {
    python_run_criteo_ctr_with_cube $TYPE # pwd: /Serving/python/examples
    python_test_bert $TYPE # pwd: /Serving/python/examples
    python_test_imdb $TYPE # pwd: /Serving/python/examples
-    python_test_lac $TYPE
+    python_test_lac $TYPE # pwd: /Serving/python/examples
+    python_test_multi_process $TYPE # pwd: /Serving/python/examples
+    python_test_multi_fetch $TYPE # pwd: /Serving/python/examples
    echo "test python $TYPE part finished as expected."
    cd ../.. # pwd: /Serving
 }